10b57cec5SDimitry Andric /* 20b57cec5SDimitry Andric * kmp_runtime.cpp -- KPTS runtime support library 30b57cec5SDimitry Andric */ 40b57cec5SDimitry Andric 50b57cec5SDimitry Andric //===----------------------------------------------------------------------===// 60b57cec5SDimitry Andric // 70b57cec5SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 80b57cec5SDimitry Andric // See https://llvm.org/LICENSE.txt for license information. 90b57cec5SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 100b57cec5SDimitry Andric // 110b57cec5SDimitry Andric //===----------------------------------------------------------------------===// 120b57cec5SDimitry Andric 130b57cec5SDimitry Andric #include "kmp.h" 140b57cec5SDimitry Andric #include "kmp_affinity.h" 150b57cec5SDimitry Andric #include "kmp_atomic.h" 160b57cec5SDimitry Andric #include "kmp_environment.h" 170b57cec5SDimitry Andric #include "kmp_error.h" 180b57cec5SDimitry Andric #include "kmp_i18n.h" 190b57cec5SDimitry Andric #include "kmp_io.h" 200b57cec5SDimitry Andric #include "kmp_itt.h" 210b57cec5SDimitry Andric #include "kmp_settings.h" 220b57cec5SDimitry Andric #include "kmp_stats.h" 230b57cec5SDimitry Andric #include "kmp_str.h" 240b57cec5SDimitry Andric #include "kmp_wait_release.h" 250b57cec5SDimitry Andric #include "kmp_wrapper_getpid.h" 260b57cec5SDimitry Andric #include "kmp_dispatch.h" 270b57cec5SDimitry Andric #if KMP_USE_HIER_SCHED 280b57cec5SDimitry Andric #include "kmp_dispatch_hier.h" 290b57cec5SDimitry Andric #endif 300b57cec5SDimitry Andric 310b57cec5SDimitry Andric #if OMPT_SUPPORT 320b57cec5SDimitry Andric #include "ompt-specific.h" 330b57cec5SDimitry Andric #endif 34fe6060f1SDimitry Andric #if OMPD_SUPPORT 35fe6060f1SDimitry Andric #include "ompd-specific.h" 36fe6060f1SDimitry Andric #endif 370b57cec5SDimitry Andric 38d409305fSDimitry Andric #if OMP_PROFILING_SUPPORT 39e8d8bef9SDimitry Andric #include "llvm/Support/TimeProfiler.h" 40e8d8bef9SDimitry Andric static char *ProfileTraceFile = nullptr; 41e8d8bef9SDimitry Andric #endif 42e8d8bef9SDimitry Andric 430b57cec5SDimitry Andric /* these are temporary issues to be dealt with */ 440b57cec5SDimitry Andric #define KMP_USE_PRCTL 0 450b57cec5SDimitry Andric 460b57cec5SDimitry Andric #if KMP_OS_WINDOWS 470b57cec5SDimitry Andric #include <process.h> 480b57cec5SDimitry Andric #endif 490b57cec5SDimitry Andric 50e8d8bef9SDimitry Andric #if KMP_OS_WINDOWS 51e8d8bef9SDimitry Andric // windows does not need include files as it doesn't use shared memory 52e8d8bef9SDimitry Andric #else 53e8d8bef9SDimitry Andric #include <sys/mman.h> 54e8d8bef9SDimitry Andric #include <sys/stat.h> 55e8d8bef9SDimitry Andric #include <fcntl.h> 56e8d8bef9SDimitry Andric #define SHM_SIZE 1024 57e8d8bef9SDimitry Andric #endif 58e8d8bef9SDimitry Andric 590b57cec5SDimitry Andric #if defined(KMP_GOMP_COMPAT) 600b57cec5SDimitry Andric char const __kmp_version_alt_comp[] = 610b57cec5SDimitry Andric KMP_VERSION_PREFIX "alternative compiler support: yes"; 620b57cec5SDimitry Andric #endif /* defined(KMP_GOMP_COMPAT) */ 630b57cec5SDimitry Andric 640b57cec5SDimitry Andric char const __kmp_version_omp_api[] = 650b57cec5SDimitry Andric KMP_VERSION_PREFIX "API version: 5.0 (201611)"; 660b57cec5SDimitry Andric 670b57cec5SDimitry Andric #ifdef KMP_DEBUG 680b57cec5SDimitry Andric char const __kmp_version_lock[] = 690b57cec5SDimitry Andric KMP_VERSION_PREFIX "lock type: run time selectable"; 700b57cec5SDimitry Andric #endif /* KMP_DEBUG */ 710b57cec5SDimitry Andric 720b57cec5SDimitry Andric #define KMP_MIN(x, y) ((x) < (y) ? (x) : (y)) 730b57cec5SDimitry Andric 740b57cec5SDimitry Andric /* ------------------------------------------------------------------------ */ 750b57cec5SDimitry Andric 760b57cec5SDimitry Andric #if KMP_USE_MONITOR 770b57cec5SDimitry Andric kmp_info_t __kmp_monitor; 780b57cec5SDimitry Andric #endif 790b57cec5SDimitry Andric 800b57cec5SDimitry Andric /* Forward declarations */ 810b57cec5SDimitry Andric 820b57cec5SDimitry Andric void __kmp_cleanup(void); 830b57cec5SDimitry Andric 840b57cec5SDimitry Andric static void __kmp_initialize_info(kmp_info_t *, kmp_team_t *, int tid, 850b57cec5SDimitry Andric int gtid); 860b57cec5SDimitry Andric static void __kmp_initialize_team(kmp_team_t *team, int new_nproc, 870b57cec5SDimitry Andric kmp_internal_control_t *new_icvs, 880b57cec5SDimitry Andric ident_t *loc); 890b57cec5SDimitry Andric #if KMP_AFFINITY_SUPPORTED 900b57cec5SDimitry Andric static void __kmp_partition_places(kmp_team_t *team, 910b57cec5SDimitry Andric int update_master_only = 0); 920b57cec5SDimitry Andric #endif 930b57cec5SDimitry Andric static void __kmp_do_serial_initialize(void); 940b57cec5SDimitry Andric void __kmp_fork_barrier(int gtid, int tid); 950b57cec5SDimitry Andric void __kmp_join_barrier(int gtid); 960b57cec5SDimitry Andric void __kmp_setup_icv_copy(kmp_team_t *team, int new_nproc, 970b57cec5SDimitry Andric kmp_internal_control_t *new_icvs, ident_t *loc); 980b57cec5SDimitry Andric 990b57cec5SDimitry Andric #ifdef USE_LOAD_BALANCE 1000b57cec5SDimitry Andric static int __kmp_load_balance_nproc(kmp_root_t *root, int set_nproc); 1010b57cec5SDimitry Andric #endif 1020b57cec5SDimitry Andric 1030b57cec5SDimitry Andric static int __kmp_expand_threads(int nNeed); 1040b57cec5SDimitry Andric #if KMP_OS_WINDOWS 1050b57cec5SDimitry Andric static int __kmp_unregister_root_other_thread(int gtid); 1060b57cec5SDimitry Andric #endif 1070b57cec5SDimitry Andric static void __kmp_reap_thread(kmp_info_t *thread, int is_root); 1080b57cec5SDimitry Andric kmp_info_t *__kmp_thread_pool_insert_pt = NULL; 1090b57cec5SDimitry Andric 110349cc55cSDimitry Andric void __kmp_resize_dist_barrier(kmp_team_t *team, int old_nthreads, 111349cc55cSDimitry Andric int new_nthreads); 112349cc55cSDimitry Andric void __kmp_add_threads_to_team(kmp_team_t *team, int new_nthreads); 113349cc55cSDimitry Andric 1140b57cec5SDimitry Andric /* Calculate the identifier of the current thread */ 1150b57cec5SDimitry Andric /* fast (and somewhat portable) way to get unique identifier of executing 1160b57cec5SDimitry Andric thread. Returns KMP_GTID_DNE if we haven't been assigned a gtid. */ 1170b57cec5SDimitry Andric int __kmp_get_global_thread_id() { 1180b57cec5SDimitry Andric int i; 1190b57cec5SDimitry Andric kmp_info_t **other_threads; 1200b57cec5SDimitry Andric size_t stack_data; 1210b57cec5SDimitry Andric char *stack_addr; 1220b57cec5SDimitry Andric size_t stack_size; 1230b57cec5SDimitry Andric char *stack_base; 1240b57cec5SDimitry Andric 1250b57cec5SDimitry Andric KA_TRACE( 1260b57cec5SDimitry Andric 1000, 1270b57cec5SDimitry Andric ("*** __kmp_get_global_thread_id: entering, nproc=%d all_nproc=%d\n", 1280b57cec5SDimitry Andric __kmp_nth, __kmp_all_nth)); 1290b57cec5SDimitry Andric 1300b57cec5SDimitry Andric /* JPH - to handle the case where __kmpc_end(0) is called immediately prior to 1310b57cec5SDimitry Andric a parallel region, made it return KMP_GTID_DNE to force serial_initialize 1320b57cec5SDimitry Andric by caller. Had to handle KMP_GTID_DNE at all call-sites, or else guarantee 1330b57cec5SDimitry Andric __kmp_init_gtid for this to work. */ 1340b57cec5SDimitry Andric 1350b57cec5SDimitry Andric if (!TCR_4(__kmp_init_gtid)) 1360b57cec5SDimitry Andric return KMP_GTID_DNE; 1370b57cec5SDimitry Andric 1380b57cec5SDimitry Andric #ifdef KMP_TDATA_GTID 1390b57cec5SDimitry Andric if (TCR_4(__kmp_gtid_mode) >= 3) { 1400b57cec5SDimitry Andric KA_TRACE(1000, ("*** __kmp_get_global_thread_id: using TDATA\n")); 1410b57cec5SDimitry Andric return __kmp_gtid; 1420b57cec5SDimitry Andric } 1430b57cec5SDimitry Andric #endif 1440b57cec5SDimitry Andric if (TCR_4(__kmp_gtid_mode) >= 2) { 1450b57cec5SDimitry Andric KA_TRACE(1000, ("*** __kmp_get_global_thread_id: using keyed TLS\n")); 1460b57cec5SDimitry Andric return __kmp_gtid_get_specific(); 1470b57cec5SDimitry Andric } 1480b57cec5SDimitry Andric KA_TRACE(1000, ("*** __kmp_get_global_thread_id: using internal alg.\n")); 1490b57cec5SDimitry Andric 1500b57cec5SDimitry Andric stack_addr = (char *)&stack_data; 1510b57cec5SDimitry Andric other_threads = __kmp_threads; 1520b57cec5SDimitry Andric 1530b57cec5SDimitry Andric /* ATT: The code below is a source of potential bugs due to unsynchronized 1540b57cec5SDimitry Andric access to __kmp_threads array. For example: 1550b57cec5SDimitry Andric 1. Current thread loads other_threads[i] to thr and checks it, it is 1560b57cec5SDimitry Andric non-NULL. 1570b57cec5SDimitry Andric 2. Current thread is suspended by OS. 1580b57cec5SDimitry Andric 3. Another thread unregisters and finishes (debug versions of free() 1590b57cec5SDimitry Andric may fill memory with something like 0xEF). 1600b57cec5SDimitry Andric 4. Current thread is resumed. 1610b57cec5SDimitry Andric 5. Current thread reads junk from *thr. 1620b57cec5SDimitry Andric TODO: Fix it. --ln */ 1630b57cec5SDimitry Andric 1640b57cec5SDimitry Andric for (i = 0; i < __kmp_threads_capacity; i++) { 1650b57cec5SDimitry Andric 1660b57cec5SDimitry Andric kmp_info_t *thr = (kmp_info_t *)TCR_SYNC_PTR(other_threads[i]); 1670b57cec5SDimitry Andric if (!thr) 1680b57cec5SDimitry Andric continue; 1690b57cec5SDimitry Andric 1700b57cec5SDimitry Andric stack_size = (size_t)TCR_PTR(thr->th.th_info.ds.ds_stacksize); 1710b57cec5SDimitry Andric stack_base = (char *)TCR_PTR(thr->th.th_info.ds.ds_stackbase); 1720b57cec5SDimitry Andric 1730b57cec5SDimitry Andric /* stack grows down -- search through all of the active threads */ 1740b57cec5SDimitry Andric 1750b57cec5SDimitry Andric if (stack_addr <= stack_base) { 1760b57cec5SDimitry Andric size_t stack_diff = stack_base - stack_addr; 1770b57cec5SDimitry Andric 1780b57cec5SDimitry Andric if (stack_diff <= stack_size) { 1790b57cec5SDimitry Andric /* The only way we can be closer than the allocated */ 1800b57cec5SDimitry Andric /* stack size is if we are running on this thread. */ 1810b57cec5SDimitry Andric KMP_DEBUG_ASSERT(__kmp_gtid_get_specific() == i); 1820b57cec5SDimitry Andric return i; 1830b57cec5SDimitry Andric } 1840b57cec5SDimitry Andric } 1850b57cec5SDimitry Andric } 1860b57cec5SDimitry Andric 1870b57cec5SDimitry Andric /* get specific to try and determine our gtid */ 1880b57cec5SDimitry Andric KA_TRACE(1000, 1890b57cec5SDimitry Andric ("*** __kmp_get_global_thread_id: internal alg. failed to find " 1900b57cec5SDimitry Andric "thread, using TLS\n")); 1910b57cec5SDimitry Andric i = __kmp_gtid_get_specific(); 1920b57cec5SDimitry Andric 1930b57cec5SDimitry Andric /*fprintf( stderr, "=== %d\n", i ); */ /* GROO */ 1940b57cec5SDimitry Andric 1950b57cec5SDimitry Andric /* if we havn't been assigned a gtid, then return code */ 1960b57cec5SDimitry Andric if (i < 0) 1970b57cec5SDimitry Andric return i; 1980b57cec5SDimitry Andric 1990b57cec5SDimitry Andric /* dynamically updated stack window for uber threads to avoid get_specific 2000b57cec5SDimitry Andric call */ 2010b57cec5SDimitry Andric if (!TCR_4(other_threads[i]->th.th_info.ds.ds_stackgrow)) { 2020b57cec5SDimitry Andric KMP_FATAL(StackOverflow, i); 2030b57cec5SDimitry Andric } 2040b57cec5SDimitry Andric 2050b57cec5SDimitry Andric stack_base = (char *)other_threads[i]->th.th_info.ds.ds_stackbase; 2060b57cec5SDimitry Andric if (stack_addr > stack_base) { 2070b57cec5SDimitry Andric TCW_PTR(other_threads[i]->th.th_info.ds.ds_stackbase, stack_addr); 2080b57cec5SDimitry Andric TCW_PTR(other_threads[i]->th.th_info.ds.ds_stacksize, 2090b57cec5SDimitry Andric other_threads[i]->th.th_info.ds.ds_stacksize + stack_addr - 2100b57cec5SDimitry Andric stack_base); 2110b57cec5SDimitry Andric } else { 2120b57cec5SDimitry Andric TCW_PTR(other_threads[i]->th.th_info.ds.ds_stacksize, 2130b57cec5SDimitry Andric stack_base - stack_addr); 2140b57cec5SDimitry Andric } 2150b57cec5SDimitry Andric 2160b57cec5SDimitry Andric /* Reprint stack bounds for ubermaster since they have been refined */ 2170b57cec5SDimitry Andric if (__kmp_storage_map) { 2180b57cec5SDimitry Andric char *stack_end = (char *)other_threads[i]->th.th_info.ds.ds_stackbase; 2190b57cec5SDimitry Andric char *stack_beg = stack_end - other_threads[i]->th.th_info.ds.ds_stacksize; 2200b57cec5SDimitry Andric __kmp_print_storage_map_gtid(i, stack_beg, stack_end, 2210b57cec5SDimitry Andric other_threads[i]->th.th_info.ds.ds_stacksize, 2220b57cec5SDimitry Andric "th_%d stack (refinement)", i); 2230b57cec5SDimitry Andric } 2240b57cec5SDimitry Andric return i; 2250b57cec5SDimitry Andric } 2260b57cec5SDimitry Andric 2270b57cec5SDimitry Andric int __kmp_get_global_thread_id_reg() { 2280b57cec5SDimitry Andric int gtid; 2290b57cec5SDimitry Andric 2300b57cec5SDimitry Andric if (!__kmp_init_serial) { 2310b57cec5SDimitry Andric gtid = KMP_GTID_DNE; 2320b57cec5SDimitry Andric } else 2330b57cec5SDimitry Andric #ifdef KMP_TDATA_GTID 2340b57cec5SDimitry Andric if (TCR_4(__kmp_gtid_mode) >= 3) { 2350b57cec5SDimitry Andric KA_TRACE(1000, ("*** __kmp_get_global_thread_id_reg: using TDATA\n")); 2360b57cec5SDimitry Andric gtid = __kmp_gtid; 2370b57cec5SDimitry Andric } else 2380b57cec5SDimitry Andric #endif 2390b57cec5SDimitry Andric if (TCR_4(__kmp_gtid_mode) >= 2) { 2400b57cec5SDimitry Andric KA_TRACE(1000, ("*** __kmp_get_global_thread_id_reg: using keyed TLS\n")); 2410b57cec5SDimitry Andric gtid = __kmp_gtid_get_specific(); 2420b57cec5SDimitry Andric } else { 2430b57cec5SDimitry Andric KA_TRACE(1000, 2440b57cec5SDimitry Andric ("*** __kmp_get_global_thread_id_reg: using internal alg.\n")); 2450b57cec5SDimitry Andric gtid = __kmp_get_global_thread_id(); 2460b57cec5SDimitry Andric } 2470b57cec5SDimitry Andric 2480b57cec5SDimitry Andric /* we must be a new uber master sibling thread */ 2490b57cec5SDimitry Andric if (gtid == KMP_GTID_DNE) { 2500b57cec5SDimitry Andric KA_TRACE(10, 2510b57cec5SDimitry Andric ("__kmp_get_global_thread_id_reg: Encountered new root thread. " 2520b57cec5SDimitry Andric "Registering a new gtid.\n")); 2530b57cec5SDimitry Andric __kmp_acquire_bootstrap_lock(&__kmp_initz_lock); 2540b57cec5SDimitry Andric if (!__kmp_init_serial) { 2550b57cec5SDimitry Andric __kmp_do_serial_initialize(); 2560b57cec5SDimitry Andric gtid = __kmp_gtid_get_specific(); 2570b57cec5SDimitry Andric } else { 2580b57cec5SDimitry Andric gtid = __kmp_register_root(FALSE); 2590b57cec5SDimitry Andric } 2600b57cec5SDimitry Andric __kmp_release_bootstrap_lock(&__kmp_initz_lock); 2610b57cec5SDimitry Andric /*__kmp_printf( "+++ %d\n", gtid ); */ /* GROO */ 2620b57cec5SDimitry Andric } 2630b57cec5SDimitry Andric 2640b57cec5SDimitry Andric KMP_DEBUG_ASSERT(gtid >= 0); 2650b57cec5SDimitry Andric 2660b57cec5SDimitry Andric return gtid; 2670b57cec5SDimitry Andric } 2680b57cec5SDimitry Andric 2690b57cec5SDimitry Andric /* caller must hold forkjoin_lock */ 2700b57cec5SDimitry Andric void __kmp_check_stack_overlap(kmp_info_t *th) { 2710b57cec5SDimitry Andric int f; 2720b57cec5SDimitry Andric char *stack_beg = NULL; 2730b57cec5SDimitry Andric char *stack_end = NULL; 2740b57cec5SDimitry Andric int gtid; 2750b57cec5SDimitry Andric 2760b57cec5SDimitry Andric KA_TRACE(10, ("__kmp_check_stack_overlap: called\n")); 2770b57cec5SDimitry Andric if (__kmp_storage_map) { 2780b57cec5SDimitry Andric stack_end = (char *)th->th.th_info.ds.ds_stackbase; 2790b57cec5SDimitry Andric stack_beg = stack_end - th->th.th_info.ds.ds_stacksize; 2800b57cec5SDimitry Andric 2810b57cec5SDimitry Andric gtid = __kmp_gtid_from_thread(th); 2820b57cec5SDimitry Andric 2830b57cec5SDimitry Andric if (gtid == KMP_GTID_MONITOR) { 2840b57cec5SDimitry Andric __kmp_print_storage_map_gtid( 2850b57cec5SDimitry Andric gtid, stack_beg, stack_end, th->th.th_info.ds.ds_stacksize, 2860b57cec5SDimitry Andric "th_%s stack (%s)", "mon", 2870b57cec5SDimitry Andric (th->th.th_info.ds.ds_stackgrow) ? "initial" : "actual"); 2880b57cec5SDimitry Andric } else { 2890b57cec5SDimitry Andric __kmp_print_storage_map_gtid( 2900b57cec5SDimitry Andric gtid, stack_beg, stack_end, th->th.th_info.ds.ds_stacksize, 2910b57cec5SDimitry Andric "th_%d stack (%s)", gtid, 2920b57cec5SDimitry Andric (th->th.th_info.ds.ds_stackgrow) ? "initial" : "actual"); 2930b57cec5SDimitry Andric } 2940b57cec5SDimitry Andric } 2950b57cec5SDimitry Andric 2960b57cec5SDimitry Andric /* No point in checking ubermaster threads since they use refinement and 2970b57cec5SDimitry Andric * cannot overlap */ 2980b57cec5SDimitry Andric gtid = __kmp_gtid_from_thread(th); 2990b57cec5SDimitry Andric if (__kmp_env_checks == TRUE && !KMP_UBER_GTID(gtid)) { 3000b57cec5SDimitry Andric KA_TRACE(10, 3010b57cec5SDimitry Andric ("__kmp_check_stack_overlap: performing extensive checking\n")); 3020b57cec5SDimitry Andric if (stack_beg == NULL) { 3030b57cec5SDimitry Andric stack_end = (char *)th->th.th_info.ds.ds_stackbase; 3040b57cec5SDimitry Andric stack_beg = stack_end - th->th.th_info.ds.ds_stacksize; 3050b57cec5SDimitry Andric } 3060b57cec5SDimitry Andric 3070b57cec5SDimitry Andric for (f = 0; f < __kmp_threads_capacity; f++) { 3080b57cec5SDimitry Andric kmp_info_t *f_th = (kmp_info_t *)TCR_SYNC_PTR(__kmp_threads[f]); 3090b57cec5SDimitry Andric 3100b57cec5SDimitry Andric if (f_th && f_th != th) { 3110b57cec5SDimitry Andric char *other_stack_end = 3120b57cec5SDimitry Andric (char *)TCR_PTR(f_th->th.th_info.ds.ds_stackbase); 3130b57cec5SDimitry Andric char *other_stack_beg = 3140b57cec5SDimitry Andric other_stack_end - (size_t)TCR_PTR(f_th->th.th_info.ds.ds_stacksize); 3150b57cec5SDimitry Andric if ((stack_beg > other_stack_beg && stack_beg < other_stack_end) || 3160b57cec5SDimitry Andric (stack_end > other_stack_beg && stack_end < other_stack_end)) { 3170b57cec5SDimitry Andric 3180b57cec5SDimitry Andric /* Print the other stack values before the abort */ 3190b57cec5SDimitry Andric if (__kmp_storage_map) 3200b57cec5SDimitry Andric __kmp_print_storage_map_gtid( 3210b57cec5SDimitry Andric -1, other_stack_beg, other_stack_end, 3220b57cec5SDimitry Andric (size_t)TCR_PTR(f_th->th.th_info.ds.ds_stacksize), 3230b57cec5SDimitry Andric "th_%d stack (overlapped)", __kmp_gtid_from_thread(f_th)); 3240b57cec5SDimitry Andric 3250b57cec5SDimitry Andric __kmp_fatal(KMP_MSG(StackOverlap), KMP_HNT(ChangeStackLimit), 3260b57cec5SDimitry Andric __kmp_msg_null); 3270b57cec5SDimitry Andric } 3280b57cec5SDimitry Andric } 3290b57cec5SDimitry Andric } 3300b57cec5SDimitry Andric } 3310b57cec5SDimitry Andric KA_TRACE(10, ("__kmp_check_stack_overlap: returning\n")); 3320b57cec5SDimitry Andric } 3330b57cec5SDimitry Andric 3340b57cec5SDimitry Andric /* ------------------------------------------------------------------------ */ 3350b57cec5SDimitry Andric 3360b57cec5SDimitry Andric void __kmp_infinite_loop(void) { 3370b57cec5SDimitry Andric static int done = FALSE; 3380b57cec5SDimitry Andric 3390b57cec5SDimitry Andric while (!done) { 3400b57cec5SDimitry Andric KMP_YIELD(TRUE); 3410b57cec5SDimitry Andric } 3420b57cec5SDimitry Andric } 3430b57cec5SDimitry Andric 3440b57cec5SDimitry Andric #define MAX_MESSAGE 512 3450b57cec5SDimitry Andric 3460b57cec5SDimitry Andric void __kmp_print_storage_map_gtid(int gtid, void *p1, void *p2, size_t size, 3470b57cec5SDimitry Andric char const *format, ...) { 3480b57cec5SDimitry Andric char buffer[MAX_MESSAGE]; 3490b57cec5SDimitry Andric va_list ap; 3500b57cec5SDimitry Andric 3510b57cec5SDimitry Andric va_start(ap, format); 3520b57cec5SDimitry Andric KMP_SNPRINTF(buffer, sizeof(buffer), "OMP storage map: %p %p%8lu %s\n", p1, 3530b57cec5SDimitry Andric p2, (unsigned long)size, format); 3540b57cec5SDimitry Andric __kmp_acquire_bootstrap_lock(&__kmp_stdio_lock); 3550b57cec5SDimitry Andric __kmp_vprintf(kmp_err, buffer, ap); 3560b57cec5SDimitry Andric #if KMP_PRINT_DATA_PLACEMENT 3570b57cec5SDimitry Andric int node; 3580b57cec5SDimitry Andric if (gtid >= 0) { 3590b57cec5SDimitry Andric if (p1 <= p2 && (char *)p2 - (char *)p1 == size) { 3600b57cec5SDimitry Andric if (__kmp_storage_map_verbose) { 3610b57cec5SDimitry Andric node = __kmp_get_host_node(p1); 3620b57cec5SDimitry Andric if (node < 0) /* doesn't work, so don't try this next time */ 3630b57cec5SDimitry Andric __kmp_storage_map_verbose = FALSE; 3640b57cec5SDimitry Andric else { 3650b57cec5SDimitry Andric char *last; 3660b57cec5SDimitry Andric int lastNode; 3670b57cec5SDimitry Andric int localProc = __kmp_get_cpu_from_gtid(gtid); 3680b57cec5SDimitry Andric 3690b57cec5SDimitry Andric const int page_size = KMP_GET_PAGE_SIZE(); 3700b57cec5SDimitry Andric 3710b57cec5SDimitry Andric p1 = (void *)((size_t)p1 & ~((size_t)page_size - 1)); 3720b57cec5SDimitry Andric p2 = (void *)(((size_t)p2 - 1) & ~((size_t)page_size - 1)); 3730b57cec5SDimitry Andric if (localProc >= 0) 3740b57cec5SDimitry Andric __kmp_printf_no_lock(" GTID %d localNode %d\n", gtid, 3750b57cec5SDimitry Andric localProc >> 1); 3760b57cec5SDimitry Andric else 3770b57cec5SDimitry Andric __kmp_printf_no_lock(" GTID %d\n", gtid); 3780b57cec5SDimitry Andric #if KMP_USE_PRCTL 3790b57cec5SDimitry Andric /* The more elaborate format is disabled for now because of the prctl 3800b57cec5SDimitry Andric * hanging bug. */ 3810b57cec5SDimitry Andric do { 3820b57cec5SDimitry Andric last = p1; 3830b57cec5SDimitry Andric lastNode = node; 3840b57cec5SDimitry Andric /* This loop collates adjacent pages with the same host node. */ 3850b57cec5SDimitry Andric do { 3860b57cec5SDimitry Andric (char *)p1 += page_size; 3870b57cec5SDimitry Andric } while (p1 <= p2 && (node = __kmp_get_host_node(p1)) == lastNode); 3880b57cec5SDimitry Andric __kmp_printf_no_lock(" %p-%p memNode %d\n", last, (char *)p1 - 1, 3890b57cec5SDimitry Andric lastNode); 3900b57cec5SDimitry Andric } while (p1 <= p2); 3910b57cec5SDimitry Andric #else 3920b57cec5SDimitry Andric __kmp_printf_no_lock(" %p-%p memNode %d\n", p1, 3930b57cec5SDimitry Andric (char *)p1 + (page_size - 1), 3940b57cec5SDimitry Andric __kmp_get_host_node(p1)); 3950b57cec5SDimitry Andric if (p1 < p2) { 3960b57cec5SDimitry Andric __kmp_printf_no_lock(" %p-%p memNode %d\n", p2, 3970b57cec5SDimitry Andric (char *)p2 + (page_size - 1), 3980b57cec5SDimitry Andric __kmp_get_host_node(p2)); 3990b57cec5SDimitry Andric } 4000b57cec5SDimitry Andric #endif 4010b57cec5SDimitry Andric } 4020b57cec5SDimitry Andric } 4030b57cec5SDimitry Andric } else 4040b57cec5SDimitry Andric __kmp_printf_no_lock(" %s\n", KMP_I18N_STR(StorageMapWarning)); 4050b57cec5SDimitry Andric } 4060b57cec5SDimitry Andric #endif /* KMP_PRINT_DATA_PLACEMENT */ 4070b57cec5SDimitry Andric __kmp_release_bootstrap_lock(&__kmp_stdio_lock); 4080b57cec5SDimitry Andric } 4090b57cec5SDimitry Andric 4100b57cec5SDimitry Andric void __kmp_warn(char const *format, ...) { 4110b57cec5SDimitry Andric char buffer[MAX_MESSAGE]; 4120b57cec5SDimitry Andric va_list ap; 4130b57cec5SDimitry Andric 4140b57cec5SDimitry Andric if (__kmp_generate_warnings == kmp_warnings_off) { 4150b57cec5SDimitry Andric return; 4160b57cec5SDimitry Andric } 4170b57cec5SDimitry Andric 4180b57cec5SDimitry Andric va_start(ap, format); 4190b57cec5SDimitry Andric 4200b57cec5SDimitry Andric KMP_SNPRINTF(buffer, sizeof(buffer), "OMP warning: %s\n", format); 4210b57cec5SDimitry Andric __kmp_acquire_bootstrap_lock(&__kmp_stdio_lock); 4220b57cec5SDimitry Andric __kmp_vprintf(kmp_err, buffer, ap); 4230b57cec5SDimitry Andric __kmp_release_bootstrap_lock(&__kmp_stdio_lock); 4240b57cec5SDimitry Andric 4250b57cec5SDimitry Andric va_end(ap); 4260b57cec5SDimitry Andric } 4270b57cec5SDimitry Andric 4280b57cec5SDimitry Andric void __kmp_abort_process() { 4290b57cec5SDimitry Andric // Later threads may stall here, but that's ok because abort() will kill them. 4300b57cec5SDimitry Andric __kmp_acquire_bootstrap_lock(&__kmp_exit_lock); 4310b57cec5SDimitry Andric 4320b57cec5SDimitry Andric if (__kmp_debug_buf) { 4330b57cec5SDimitry Andric __kmp_dump_debug_buffer(); 4340b57cec5SDimitry Andric } 4350b57cec5SDimitry Andric 4360b57cec5SDimitry Andric if (KMP_OS_WINDOWS) { 4370b57cec5SDimitry Andric // Let other threads know of abnormal termination and prevent deadlock 4380b57cec5SDimitry Andric // if abort happened during library initialization or shutdown 4390b57cec5SDimitry Andric __kmp_global.g.g_abort = SIGABRT; 4400b57cec5SDimitry Andric 4410b57cec5SDimitry Andric /* On Windows* OS by default abort() causes pop-up error box, which stalls 4420b57cec5SDimitry Andric nightly testing. Unfortunately, we cannot reliably suppress pop-up error 4430b57cec5SDimitry Andric boxes. _set_abort_behavior() works well, but this function is not 4440b57cec5SDimitry Andric available in VS7 (this is not problem for DLL, but it is a problem for 4450b57cec5SDimitry Andric static OpenMP RTL). SetErrorMode (and so, timelimit utility) does not 4460b57cec5SDimitry Andric help, at least in some versions of MS C RTL. 4470b57cec5SDimitry Andric 4480b57cec5SDimitry Andric It seems following sequence is the only way to simulate abort() and 4490b57cec5SDimitry Andric avoid pop-up error box. */ 4500b57cec5SDimitry Andric raise(SIGABRT); 4510b57cec5SDimitry Andric _exit(3); // Just in case, if signal ignored, exit anyway. 4520b57cec5SDimitry Andric } else { 453e8d8bef9SDimitry Andric __kmp_unregister_library(); 4540b57cec5SDimitry Andric abort(); 4550b57cec5SDimitry Andric } 4560b57cec5SDimitry Andric 4570b57cec5SDimitry Andric __kmp_infinite_loop(); 4580b57cec5SDimitry Andric __kmp_release_bootstrap_lock(&__kmp_exit_lock); 4590b57cec5SDimitry Andric 4600b57cec5SDimitry Andric } // __kmp_abort_process 4610b57cec5SDimitry Andric 4620b57cec5SDimitry Andric void __kmp_abort_thread(void) { 4630b57cec5SDimitry Andric // TODO: Eliminate g_abort global variable and this function. 4640b57cec5SDimitry Andric // In case of abort just call abort(), it will kill all the threads. 4650b57cec5SDimitry Andric __kmp_infinite_loop(); 4660b57cec5SDimitry Andric } // __kmp_abort_thread 4670b57cec5SDimitry Andric 4680b57cec5SDimitry Andric /* Print out the storage map for the major kmp_info_t thread data structures 4690b57cec5SDimitry Andric that are allocated together. */ 4700b57cec5SDimitry Andric 4710b57cec5SDimitry Andric static void __kmp_print_thread_storage_map(kmp_info_t *thr, int gtid) { 4720b57cec5SDimitry Andric __kmp_print_storage_map_gtid(gtid, thr, thr + 1, sizeof(kmp_info_t), "th_%d", 4730b57cec5SDimitry Andric gtid); 4740b57cec5SDimitry Andric 4750b57cec5SDimitry Andric __kmp_print_storage_map_gtid(gtid, &thr->th.th_info, &thr->th.th_team, 4760b57cec5SDimitry Andric sizeof(kmp_desc_t), "th_%d.th_info", gtid); 4770b57cec5SDimitry Andric 4780b57cec5SDimitry Andric __kmp_print_storage_map_gtid(gtid, &thr->th.th_local, &thr->th.th_pri_head, 4790b57cec5SDimitry Andric sizeof(kmp_local_t), "th_%d.th_local", gtid); 4800b57cec5SDimitry Andric 4810b57cec5SDimitry Andric __kmp_print_storage_map_gtid( 4820b57cec5SDimitry Andric gtid, &thr->th.th_bar[0], &thr->th.th_bar[bs_last_barrier], 4830b57cec5SDimitry Andric sizeof(kmp_balign_t) * bs_last_barrier, "th_%d.th_bar", gtid); 4840b57cec5SDimitry Andric 4850b57cec5SDimitry Andric __kmp_print_storage_map_gtid(gtid, &thr->th.th_bar[bs_plain_barrier], 4860b57cec5SDimitry Andric &thr->th.th_bar[bs_plain_barrier + 1], 4870b57cec5SDimitry Andric sizeof(kmp_balign_t), "th_%d.th_bar[plain]", 4880b57cec5SDimitry Andric gtid); 4890b57cec5SDimitry Andric 4900b57cec5SDimitry Andric __kmp_print_storage_map_gtid(gtid, &thr->th.th_bar[bs_forkjoin_barrier], 4910b57cec5SDimitry Andric &thr->th.th_bar[bs_forkjoin_barrier + 1], 4920b57cec5SDimitry Andric sizeof(kmp_balign_t), "th_%d.th_bar[forkjoin]", 4930b57cec5SDimitry Andric gtid); 4940b57cec5SDimitry Andric 4950b57cec5SDimitry Andric #if KMP_FAST_REDUCTION_BARRIER 4960b57cec5SDimitry Andric __kmp_print_storage_map_gtid(gtid, &thr->th.th_bar[bs_reduction_barrier], 4970b57cec5SDimitry Andric &thr->th.th_bar[bs_reduction_barrier + 1], 4980b57cec5SDimitry Andric sizeof(kmp_balign_t), "th_%d.th_bar[reduction]", 4990b57cec5SDimitry Andric gtid); 5000b57cec5SDimitry Andric #endif // KMP_FAST_REDUCTION_BARRIER 5010b57cec5SDimitry Andric } 5020b57cec5SDimitry Andric 5030b57cec5SDimitry Andric /* Print out the storage map for the major kmp_team_t team data structures 5040b57cec5SDimitry Andric that are allocated together. */ 5050b57cec5SDimitry Andric 5060b57cec5SDimitry Andric static void __kmp_print_team_storage_map(const char *header, kmp_team_t *team, 5070b57cec5SDimitry Andric int team_id, int num_thr) { 5080b57cec5SDimitry Andric int num_disp_buff = team->t.t_max_nproc > 1 ? __kmp_dispatch_num_buffers : 2; 5090b57cec5SDimitry Andric __kmp_print_storage_map_gtid(-1, team, team + 1, sizeof(kmp_team_t), "%s_%d", 5100b57cec5SDimitry Andric header, team_id); 5110b57cec5SDimitry Andric 5120b57cec5SDimitry Andric __kmp_print_storage_map_gtid(-1, &team->t.t_bar[0], 5130b57cec5SDimitry Andric &team->t.t_bar[bs_last_barrier], 5140b57cec5SDimitry Andric sizeof(kmp_balign_team_t) * bs_last_barrier, 5150b57cec5SDimitry Andric "%s_%d.t_bar", header, team_id); 5160b57cec5SDimitry Andric 5170b57cec5SDimitry Andric __kmp_print_storage_map_gtid(-1, &team->t.t_bar[bs_plain_barrier], 5180b57cec5SDimitry Andric &team->t.t_bar[bs_plain_barrier + 1], 5190b57cec5SDimitry Andric sizeof(kmp_balign_team_t), "%s_%d.t_bar[plain]", 5200b57cec5SDimitry Andric header, team_id); 5210b57cec5SDimitry Andric 5220b57cec5SDimitry Andric __kmp_print_storage_map_gtid(-1, &team->t.t_bar[bs_forkjoin_barrier], 5230b57cec5SDimitry Andric &team->t.t_bar[bs_forkjoin_barrier + 1], 5240b57cec5SDimitry Andric sizeof(kmp_balign_team_t), 5250b57cec5SDimitry Andric "%s_%d.t_bar[forkjoin]", header, team_id); 5260b57cec5SDimitry Andric 5270b57cec5SDimitry Andric #if KMP_FAST_REDUCTION_BARRIER 5280b57cec5SDimitry Andric __kmp_print_storage_map_gtid(-1, &team->t.t_bar[bs_reduction_barrier], 5290b57cec5SDimitry Andric &team->t.t_bar[bs_reduction_barrier + 1], 5300b57cec5SDimitry Andric sizeof(kmp_balign_team_t), 5310b57cec5SDimitry Andric "%s_%d.t_bar[reduction]", header, team_id); 5320b57cec5SDimitry Andric #endif // KMP_FAST_REDUCTION_BARRIER 5330b57cec5SDimitry Andric 5340b57cec5SDimitry Andric __kmp_print_storage_map_gtid( 5350b57cec5SDimitry Andric -1, &team->t.t_dispatch[0], &team->t.t_dispatch[num_thr], 5360b57cec5SDimitry Andric sizeof(kmp_disp_t) * num_thr, "%s_%d.t_dispatch", header, team_id); 5370b57cec5SDimitry Andric 5380b57cec5SDimitry Andric __kmp_print_storage_map_gtid( 5390b57cec5SDimitry Andric -1, &team->t.t_threads[0], &team->t.t_threads[num_thr], 5400b57cec5SDimitry Andric sizeof(kmp_info_t *) * num_thr, "%s_%d.t_threads", header, team_id); 5410b57cec5SDimitry Andric 5420b57cec5SDimitry Andric __kmp_print_storage_map_gtid(-1, &team->t.t_disp_buffer[0], 5430b57cec5SDimitry Andric &team->t.t_disp_buffer[num_disp_buff], 5440b57cec5SDimitry Andric sizeof(dispatch_shared_info_t) * num_disp_buff, 5450b57cec5SDimitry Andric "%s_%d.t_disp_buffer", header, team_id); 5460b57cec5SDimitry Andric } 5470b57cec5SDimitry Andric 548fe6060f1SDimitry Andric static void __kmp_init_allocator() { 549fe6060f1SDimitry Andric __kmp_init_memkind(); 550fe6060f1SDimitry Andric __kmp_init_target_mem(); 551fe6060f1SDimitry Andric } 5520b57cec5SDimitry Andric static void __kmp_fini_allocator() { __kmp_fini_memkind(); } 5530b57cec5SDimitry Andric 5540b57cec5SDimitry Andric /* ------------------------------------------------------------------------ */ 5550b57cec5SDimitry Andric 5560b57cec5SDimitry Andric #if KMP_DYNAMIC_LIB 5570b57cec5SDimitry Andric #if KMP_OS_WINDOWS 5580b57cec5SDimitry Andric 5590b57cec5SDimitry Andric BOOL WINAPI DllMain(HINSTANCE hInstDLL, DWORD fdwReason, LPVOID lpReserved) { 5600b57cec5SDimitry Andric //__kmp_acquire_bootstrap_lock( &__kmp_initz_lock ); 5610b57cec5SDimitry Andric 5620b57cec5SDimitry Andric switch (fdwReason) { 5630b57cec5SDimitry Andric 5640b57cec5SDimitry Andric case DLL_PROCESS_ATTACH: 5650b57cec5SDimitry Andric KA_TRACE(10, ("DllMain: PROCESS_ATTACH\n")); 5660b57cec5SDimitry Andric 5670b57cec5SDimitry Andric return TRUE; 5680b57cec5SDimitry Andric 5690b57cec5SDimitry Andric case DLL_PROCESS_DETACH: 5700b57cec5SDimitry Andric KA_TRACE(10, ("DllMain: PROCESS_DETACH T#%d\n", __kmp_gtid_get_specific())); 5710b57cec5SDimitry Andric 572fe6060f1SDimitry Andric // According to Windows* documentation for DllMain entry point: 573fe6060f1SDimitry Andric // for DLL_PROCESS_DETACH, lpReserved is used for telling the difference: 574fe6060f1SDimitry Andric // lpReserved == NULL when FreeLibrary() is called, 575fe6060f1SDimitry Andric // lpReserved != NULL when the process is terminated. 576fe6060f1SDimitry Andric // When FreeLibrary() is called, worker threads remain alive. So the 577fe6060f1SDimitry Andric // runtime's state is consistent and executing proper shutdown is OK. 578fe6060f1SDimitry Andric // When the process is terminated, worker threads have exited or been 579fe6060f1SDimitry Andric // forcefully terminated by the OS and only the shutdown thread remains. 580fe6060f1SDimitry Andric // This can leave the runtime in an inconsistent state. 581fe6060f1SDimitry Andric // Hence, only attempt proper cleanup when FreeLibrary() is called. 582fe6060f1SDimitry Andric // Otherwise, rely on OS to reclaim resources. 583fe6060f1SDimitry Andric if (lpReserved == NULL) 5840b57cec5SDimitry Andric __kmp_internal_end_library(__kmp_gtid_get_specific()); 5850b57cec5SDimitry Andric 5860b57cec5SDimitry Andric return TRUE; 5870b57cec5SDimitry Andric 5880b57cec5SDimitry Andric case DLL_THREAD_ATTACH: 5890b57cec5SDimitry Andric KA_TRACE(10, ("DllMain: THREAD_ATTACH\n")); 5900b57cec5SDimitry Andric 5910b57cec5SDimitry Andric /* if we want to register new siblings all the time here call 5920b57cec5SDimitry Andric * __kmp_get_gtid(); */ 5930b57cec5SDimitry Andric return TRUE; 5940b57cec5SDimitry Andric 5950b57cec5SDimitry Andric case DLL_THREAD_DETACH: 5960b57cec5SDimitry Andric KA_TRACE(10, ("DllMain: THREAD_DETACH T#%d\n", __kmp_gtid_get_specific())); 5970b57cec5SDimitry Andric 5980b57cec5SDimitry Andric __kmp_internal_end_thread(__kmp_gtid_get_specific()); 5990b57cec5SDimitry Andric return TRUE; 6000b57cec5SDimitry Andric } 6010b57cec5SDimitry Andric 6020b57cec5SDimitry Andric return TRUE; 6030b57cec5SDimitry Andric } 6040b57cec5SDimitry Andric 6050b57cec5SDimitry Andric #endif /* KMP_OS_WINDOWS */ 6060b57cec5SDimitry Andric #endif /* KMP_DYNAMIC_LIB */ 6070b57cec5SDimitry Andric 6080b57cec5SDimitry Andric /* __kmp_parallel_deo -- Wait until it's our turn. */ 6090b57cec5SDimitry Andric void __kmp_parallel_deo(int *gtid_ref, int *cid_ref, ident_t *loc_ref) { 6100b57cec5SDimitry Andric int gtid = *gtid_ref; 6110b57cec5SDimitry Andric #ifdef BUILD_PARALLEL_ORDERED 6120b57cec5SDimitry Andric kmp_team_t *team = __kmp_team_from_gtid(gtid); 6130b57cec5SDimitry Andric #endif /* BUILD_PARALLEL_ORDERED */ 6140b57cec5SDimitry Andric 6150b57cec5SDimitry Andric if (__kmp_env_consistency_check) { 6160b57cec5SDimitry Andric if (__kmp_threads[gtid]->th.th_root->r.r_active) 6170b57cec5SDimitry Andric #if KMP_USE_DYNAMIC_LOCK 6180b57cec5SDimitry Andric __kmp_push_sync(gtid, ct_ordered_in_parallel, loc_ref, NULL, 0); 6190b57cec5SDimitry Andric #else 6200b57cec5SDimitry Andric __kmp_push_sync(gtid, ct_ordered_in_parallel, loc_ref, NULL); 6210b57cec5SDimitry Andric #endif 6220b57cec5SDimitry Andric } 6230b57cec5SDimitry Andric #ifdef BUILD_PARALLEL_ORDERED 6240b57cec5SDimitry Andric if (!team->t.t_serialized) { 6250b57cec5SDimitry Andric KMP_MB(); 6260b57cec5SDimitry Andric KMP_WAIT(&team->t.t_ordered.dt.t_value, __kmp_tid_from_gtid(gtid), KMP_EQ, 6270b57cec5SDimitry Andric NULL); 6280b57cec5SDimitry Andric KMP_MB(); 6290b57cec5SDimitry Andric } 6300b57cec5SDimitry Andric #endif /* BUILD_PARALLEL_ORDERED */ 6310b57cec5SDimitry Andric } 6320b57cec5SDimitry Andric 6330b57cec5SDimitry Andric /* __kmp_parallel_dxo -- Signal the next task. */ 6340b57cec5SDimitry Andric void __kmp_parallel_dxo(int *gtid_ref, int *cid_ref, ident_t *loc_ref) { 6350b57cec5SDimitry Andric int gtid = *gtid_ref; 6360b57cec5SDimitry Andric #ifdef BUILD_PARALLEL_ORDERED 6370b57cec5SDimitry Andric int tid = __kmp_tid_from_gtid(gtid); 6380b57cec5SDimitry Andric kmp_team_t *team = __kmp_team_from_gtid(gtid); 6390b57cec5SDimitry Andric #endif /* BUILD_PARALLEL_ORDERED */ 6400b57cec5SDimitry Andric 6410b57cec5SDimitry Andric if (__kmp_env_consistency_check) { 6420b57cec5SDimitry Andric if (__kmp_threads[gtid]->th.th_root->r.r_active) 6430b57cec5SDimitry Andric __kmp_pop_sync(gtid, ct_ordered_in_parallel, loc_ref); 6440b57cec5SDimitry Andric } 6450b57cec5SDimitry Andric #ifdef BUILD_PARALLEL_ORDERED 6460b57cec5SDimitry Andric if (!team->t.t_serialized) { 6470b57cec5SDimitry Andric KMP_MB(); /* Flush all pending memory write invalidates. */ 6480b57cec5SDimitry Andric 6490b57cec5SDimitry Andric /* use the tid of the next thread in this team */ 6500b57cec5SDimitry Andric /* TODO replace with general release procedure */ 6510b57cec5SDimitry Andric team->t.t_ordered.dt.t_value = ((tid + 1) % team->t.t_nproc); 6520b57cec5SDimitry Andric 6530b57cec5SDimitry Andric KMP_MB(); /* Flush all pending memory write invalidates. */ 6540b57cec5SDimitry Andric } 6550b57cec5SDimitry Andric #endif /* BUILD_PARALLEL_ORDERED */ 6560b57cec5SDimitry Andric } 6570b57cec5SDimitry Andric 6580b57cec5SDimitry Andric /* ------------------------------------------------------------------------ */ 6590b57cec5SDimitry Andric /* The BARRIER for a SINGLE process section is always explicit */ 6600b57cec5SDimitry Andric 6610b57cec5SDimitry Andric int __kmp_enter_single(int gtid, ident_t *id_ref, int push_ws) { 6620b57cec5SDimitry Andric int status; 6630b57cec5SDimitry Andric kmp_info_t *th; 6640b57cec5SDimitry Andric kmp_team_t *team; 6650b57cec5SDimitry Andric 6660b57cec5SDimitry Andric if (!TCR_4(__kmp_init_parallel)) 6670b57cec5SDimitry Andric __kmp_parallel_initialize(); 6680b57cec5SDimitry Andric __kmp_resume_if_soft_paused(); 6690b57cec5SDimitry Andric 6700b57cec5SDimitry Andric th = __kmp_threads[gtid]; 6710b57cec5SDimitry Andric team = th->th.th_team; 6720b57cec5SDimitry Andric status = 0; 6730b57cec5SDimitry Andric 6740b57cec5SDimitry Andric th->th.th_ident = id_ref; 6750b57cec5SDimitry Andric 6760b57cec5SDimitry Andric if (team->t.t_serialized) { 6770b57cec5SDimitry Andric status = 1; 6780b57cec5SDimitry Andric } else { 6790b57cec5SDimitry Andric kmp_int32 old_this = th->th.th_local.this_construct; 6800b57cec5SDimitry Andric 6810b57cec5SDimitry Andric ++th->th.th_local.this_construct; 6820b57cec5SDimitry Andric /* try to set team count to thread count--success means thread got the 6830b57cec5SDimitry Andric single block */ 6840b57cec5SDimitry Andric /* TODO: Should this be acquire or release? */ 6850b57cec5SDimitry Andric if (team->t.t_construct == old_this) { 6860b57cec5SDimitry Andric status = __kmp_atomic_compare_store_acq(&team->t.t_construct, old_this, 6870b57cec5SDimitry Andric th->th.th_local.this_construct); 6880b57cec5SDimitry Andric } 6890b57cec5SDimitry Andric #if USE_ITT_BUILD 6900b57cec5SDimitry Andric if (__itt_metadata_add_ptr && __kmp_forkjoin_frames_mode == 3 && 6910b57cec5SDimitry Andric KMP_MASTER_GTID(gtid) && th->th.th_teams_microtask == NULL && 692fe6060f1SDimitry Andric team->t.t_active_level == 1) { 693fe6060f1SDimitry Andric // Only report metadata by primary thread of active team at level 1 6940b57cec5SDimitry Andric __kmp_itt_metadata_single(id_ref); 6950b57cec5SDimitry Andric } 6960b57cec5SDimitry Andric #endif /* USE_ITT_BUILD */ 6970b57cec5SDimitry Andric } 6980b57cec5SDimitry Andric 6990b57cec5SDimitry Andric if (__kmp_env_consistency_check) { 7000b57cec5SDimitry Andric if (status && push_ws) { 7010b57cec5SDimitry Andric __kmp_push_workshare(gtid, ct_psingle, id_ref); 7020b57cec5SDimitry Andric } else { 7030b57cec5SDimitry Andric __kmp_check_workshare(gtid, ct_psingle, id_ref); 7040b57cec5SDimitry Andric } 7050b57cec5SDimitry Andric } 7060b57cec5SDimitry Andric #if USE_ITT_BUILD 7070b57cec5SDimitry Andric if (status) { 7080b57cec5SDimitry Andric __kmp_itt_single_start(gtid); 7090b57cec5SDimitry Andric } 7100b57cec5SDimitry Andric #endif /* USE_ITT_BUILD */ 7110b57cec5SDimitry Andric return status; 7120b57cec5SDimitry Andric } 7130b57cec5SDimitry Andric 7140b57cec5SDimitry Andric void __kmp_exit_single(int gtid) { 7150b57cec5SDimitry Andric #if USE_ITT_BUILD 7160b57cec5SDimitry Andric __kmp_itt_single_end(gtid); 7170b57cec5SDimitry Andric #endif /* USE_ITT_BUILD */ 7180b57cec5SDimitry Andric if (__kmp_env_consistency_check) 7190b57cec5SDimitry Andric __kmp_pop_workshare(gtid, ct_psingle, NULL); 7200b57cec5SDimitry Andric } 7210b57cec5SDimitry Andric 7220b57cec5SDimitry Andric /* determine if we can go parallel or must use a serialized parallel region and 7230b57cec5SDimitry Andric * how many threads we can use 7240b57cec5SDimitry Andric * set_nproc is the number of threads requested for the team 7250b57cec5SDimitry Andric * returns 0 if we should serialize or only use one thread, 7260b57cec5SDimitry Andric * otherwise the number of threads to use 7270b57cec5SDimitry Andric * The forkjoin lock is held by the caller. */ 7280b57cec5SDimitry Andric static int __kmp_reserve_threads(kmp_root_t *root, kmp_team_t *parent_team, 7290b57cec5SDimitry Andric int master_tid, int set_nthreads, 7300b57cec5SDimitry Andric int enter_teams) { 7310b57cec5SDimitry Andric int capacity; 7320b57cec5SDimitry Andric int new_nthreads; 7330b57cec5SDimitry Andric KMP_DEBUG_ASSERT(__kmp_init_serial); 7340b57cec5SDimitry Andric KMP_DEBUG_ASSERT(root && parent_team); 7350b57cec5SDimitry Andric kmp_info_t *this_thr = parent_team->t.t_threads[master_tid]; 7360b57cec5SDimitry Andric 7370b57cec5SDimitry Andric // If dyn-var is set, dynamically adjust the number of desired threads, 7380b57cec5SDimitry Andric // according to the method specified by dynamic_mode. 7390b57cec5SDimitry Andric new_nthreads = set_nthreads; 7400b57cec5SDimitry Andric if (!get__dynamic_2(parent_team, master_tid)) { 7410b57cec5SDimitry Andric ; 7420b57cec5SDimitry Andric } 7430b57cec5SDimitry Andric #ifdef USE_LOAD_BALANCE 7440b57cec5SDimitry Andric else if (__kmp_global.g.g_dynamic_mode == dynamic_load_balance) { 7450b57cec5SDimitry Andric new_nthreads = __kmp_load_balance_nproc(root, set_nthreads); 7460b57cec5SDimitry Andric if (new_nthreads == 1) { 7470b57cec5SDimitry Andric KC_TRACE(10, ("__kmp_reserve_threads: T#%d load balance reduced " 7480b57cec5SDimitry Andric "reservation to 1 thread\n", 7490b57cec5SDimitry Andric master_tid)); 7500b57cec5SDimitry Andric return 1; 7510b57cec5SDimitry Andric } 7520b57cec5SDimitry Andric if (new_nthreads < set_nthreads) { 7530b57cec5SDimitry Andric KC_TRACE(10, ("__kmp_reserve_threads: T#%d load balance reduced " 7540b57cec5SDimitry Andric "reservation to %d threads\n", 7550b57cec5SDimitry Andric master_tid, new_nthreads)); 7560b57cec5SDimitry Andric } 7570b57cec5SDimitry Andric } 7580b57cec5SDimitry Andric #endif /* USE_LOAD_BALANCE */ 7590b57cec5SDimitry Andric else if (__kmp_global.g.g_dynamic_mode == dynamic_thread_limit) { 7600b57cec5SDimitry Andric new_nthreads = __kmp_avail_proc - __kmp_nth + 7610b57cec5SDimitry Andric (root->r.r_active ? 1 : root->r.r_hot_team->t.t_nproc); 7620b57cec5SDimitry Andric if (new_nthreads <= 1) { 7630b57cec5SDimitry Andric KC_TRACE(10, ("__kmp_reserve_threads: T#%d thread limit reduced " 7640b57cec5SDimitry Andric "reservation to 1 thread\n", 7650b57cec5SDimitry Andric master_tid)); 7660b57cec5SDimitry Andric return 1; 7670b57cec5SDimitry Andric } 7680b57cec5SDimitry Andric if (new_nthreads < set_nthreads) { 7690b57cec5SDimitry Andric KC_TRACE(10, ("__kmp_reserve_threads: T#%d thread limit reduced " 7700b57cec5SDimitry Andric "reservation to %d threads\n", 7710b57cec5SDimitry Andric master_tid, new_nthreads)); 7720b57cec5SDimitry Andric } else { 7730b57cec5SDimitry Andric new_nthreads = set_nthreads; 7740b57cec5SDimitry Andric } 7750b57cec5SDimitry Andric } else if (__kmp_global.g.g_dynamic_mode == dynamic_random) { 7760b57cec5SDimitry Andric if (set_nthreads > 2) { 7770b57cec5SDimitry Andric new_nthreads = __kmp_get_random(parent_team->t.t_threads[master_tid]); 7780b57cec5SDimitry Andric new_nthreads = (new_nthreads % set_nthreads) + 1; 7790b57cec5SDimitry Andric if (new_nthreads == 1) { 7800b57cec5SDimitry Andric KC_TRACE(10, ("__kmp_reserve_threads: T#%d dynamic random reduced " 7810b57cec5SDimitry Andric "reservation to 1 thread\n", 7820b57cec5SDimitry Andric master_tid)); 7830b57cec5SDimitry Andric return 1; 7840b57cec5SDimitry Andric } 7850b57cec5SDimitry Andric if (new_nthreads < set_nthreads) { 7860b57cec5SDimitry Andric KC_TRACE(10, ("__kmp_reserve_threads: T#%d dynamic random reduced " 7870b57cec5SDimitry Andric "reservation to %d threads\n", 7880b57cec5SDimitry Andric master_tid, new_nthreads)); 7890b57cec5SDimitry Andric } 7900b57cec5SDimitry Andric } 7910b57cec5SDimitry Andric } else { 7920b57cec5SDimitry Andric KMP_ASSERT(0); 7930b57cec5SDimitry Andric } 7940b57cec5SDimitry Andric 7950b57cec5SDimitry Andric // Respect KMP_ALL_THREADS/KMP_DEVICE_THREAD_LIMIT. 7960b57cec5SDimitry Andric if (__kmp_nth + new_nthreads - 7970b57cec5SDimitry Andric (root->r.r_active ? 1 : root->r.r_hot_team->t.t_nproc) > 7980b57cec5SDimitry Andric __kmp_max_nth) { 7990b57cec5SDimitry Andric int tl_nthreads = __kmp_max_nth - __kmp_nth + 8000b57cec5SDimitry Andric (root->r.r_active ? 1 : root->r.r_hot_team->t.t_nproc); 8010b57cec5SDimitry Andric if (tl_nthreads <= 0) { 8020b57cec5SDimitry Andric tl_nthreads = 1; 8030b57cec5SDimitry Andric } 8040b57cec5SDimitry Andric 8050b57cec5SDimitry Andric // If dyn-var is false, emit a 1-time warning. 8060b57cec5SDimitry Andric if (!get__dynamic_2(parent_team, master_tid) && (!__kmp_reserve_warn)) { 8070b57cec5SDimitry Andric __kmp_reserve_warn = 1; 8080b57cec5SDimitry Andric __kmp_msg(kmp_ms_warning, 8090b57cec5SDimitry Andric KMP_MSG(CantFormThrTeam, set_nthreads, tl_nthreads), 8100b57cec5SDimitry Andric KMP_HNT(Unset_ALL_THREADS), __kmp_msg_null); 8110b57cec5SDimitry Andric } 8120b57cec5SDimitry Andric if (tl_nthreads == 1) { 8130b57cec5SDimitry Andric KC_TRACE(10, ("__kmp_reserve_threads: T#%d KMP_DEVICE_THREAD_LIMIT " 8140b57cec5SDimitry Andric "reduced reservation to 1 thread\n", 8150b57cec5SDimitry Andric master_tid)); 8160b57cec5SDimitry Andric return 1; 8170b57cec5SDimitry Andric } 8180b57cec5SDimitry Andric KC_TRACE(10, ("__kmp_reserve_threads: T#%d KMP_DEVICE_THREAD_LIMIT reduced " 8190b57cec5SDimitry Andric "reservation to %d threads\n", 8200b57cec5SDimitry Andric master_tid, tl_nthreads)); 8210b57cec5SDimitry Andric new_nthreads = tl_nthreads; 8220b57cec5SDimitry Andric } 8230b57cec5SDimitry Andric 8240b57cec5SDimitry Andric // Respect OMP_THREAD_LIMIT 8250b57cec5SDimitry Andric int cg_nthreads = this_thr->th.th_cg_roots->cg_nthreads; 8260b57cec5SDimitry Andric int max_cg_threads = this_thr->th.th_cg_roots->cg_thread_limit; 8270b57cec5SDimitry Andric if (cg_nthreads + new_nthreads - 8280b57cec5SDimitry Andric (root->r.r_active ? 1 : root->r.r_hot_team->t.t_nproc) > 8290b57cec5SDimitry Andric max_cg_threads) { 8300b57cec5SDimitry Andric int tl_nthreads = max_cg_threads - cg_nthreads + 8310b57cec5SDimitry Andric (root->r.r_active ? 1 : root->r.r_hot_team->t.t_nproc); 8320b57cec5SDimitry Andric if (tl_nthreads <= 0) { 8330b57cec5SDimitry Andric tl_nthreads = 1; 8340b57cec5SDimitry Andric } 8350b57cec5SDimitry Andric 8360b57cec5SDimitry Andric // If dyn-var is false, emit a 1-time warning. 8370b57cec5SDimitry Andric if (!get__dynamic_2(parent_team, master_tid) && (!__kmp_reserve_warn)) { 8380b57cec5SDimitry Andric __kmp_reserve_warn = 1; 8390b57cec5SDimitry Andric __kmp_msg(kmp_ms_warning, 8400b57cec5SDimitry Andric KMP_MSG(CantFormThrTeam, set_nthreads, tl_nthreads), 8410b57cec5SDimitry Andric KMP_HNT(Unset_ALL_THREADS), __kmp_msg_null); 8420b57cec5SDimitry Andric } 8430b57cec5SDimitry Andric if (tl_nthreads == 1) { 8440b57cec5SDimitry Andric KC_TRACE(10, ("__kmp_reserve_threads: T#%d OMP_THREAD_LIMIT " 8450b57cec5SDimitry Andric "reduced reservation to 1 thread\n", 8460b57cec5SDimitry Andric master_tid)); 8470b57cec5SDimitry Andric return 1; 8480b57cec5SDimitry Andric } 8490b57cec5SDimitry Andric KC_TRACE(10, ("__kmp_reserve_threads: T#%d OMP_THREAD_LIMIT reduced " 8500b57cec5SDimitry Andric "reservation to %d threads\n", 8510b57cec5SDimitry Andric master_tid, tl_nthreads)); 8520b57cec5SDimitry Andric new_nthreads = tl_nthreads; 8530b57cec5SDimitry Andric } 8540b57cec5SDimitry Andric 8550b57cec5SDimitry Andric // Check if the threads array is large enough, or needs expanding. 8560b57cec5SDimitry Andric // See comment in __kmp_register_root() about the adjustment if 8570b57cec5SDimitry Andric // __kmp_threads[0] == NULL. 8580b57cec5SDimitry Andric capacity = __kmp_threads_capacity; 8590b57cec5SDimitry Andric if (TCR_PTR(__kmp_threads[0]) == NULL) { 8600b57cec5SDimitry Andric --capacity; 8610b57cec5SDimitry Andric } 862d409305fSDimitry Andric // If it is not for initializing the hidden helper team, we need to take 863d409305fSDimitry Andric // __kmp_hidden_helper_threads_num out of the capacity because it is included 864d409305fSDimitry Andric // in __kmp_threads_capacity. 865d409305fSDimitry Andric if (__kmp_enable_hidden_helper && !TCR_4(__kmp_init_hidden_helper_threads)) { 866d409305fSDimitry Andric capacity -= __kmp_hidden_helper_threads_num; 867d409305fSDimitry Andric } 8680b57cec5SDimitry Andric if (__kmp_nth + new_nthreads - 8690b57cec5SDimitry Andric (root->r.r_active ? 1 : root->r.r_hot_team->t.t_nproc) > 8700b57cec5SDimitry Andric capacity) { 8710b57cec5SDimitry Andric // Expand the threads array. 8720b57cec5SDimitry Andric int slotsRequired = __kmp_nth + new_nthreads - 8730b57cec5SDimitry Andric (root->r.r_active ? 1 : root->r.r_hot_team->t.t_nproc) - 8740b57cec5SDimitry Andric capacity; 8750b57cec5SDimitry Andric int slotsAdded = __kmp_expand_threads(slotsRequired); 8760b57cec5SDimitry Andric if (slotsAdded < slotsRequired) { 8770b57cec5SDimitry Andric // The threads array was not expanded enough. 8780b57cec5SDimitry Andric new_nthreads -= (slotsRequired - slotsAdded); 8790b57cec5SDimitry Andric KMP_ASSERT(new_nthreads >= 1); 8800b57cec5SDimitry Andric 8810b57cec5SDimitry Andric // If dyn-var is false, emit a 1-time warning. 8820b57cec5SDimitry Andric if (!get__dynamic_2(parent_team, master_tid) && (!__kmp_reserve_warn)) { 8830b57cec5SDimitry Andric __kmp_reserve_warn = 1; 8840b57cec5SDimitry Andric if (__kmp_tp_cached) { 8850b57cec5SDimitry Andric __kmp_msg(kmp_ms_warning, 8860b57cec5SDimitry Andric KMP_MSG(CantFormThrTeam, set_nthreads, new_nthreads), 8870b57cec5SDimitry Andric KMP_HNT(Set_ALL_THREADPRIVATE, __kmp_tp_capacity), 8880b57cec5SDimitry Andric KMP_HNT(PossibleSystemLimitOnThreads), __kmp_msg_null); 8890b57cec5SDimitry Andric } else { 8900b57cec5SDimitry Andric __kmp_msg(kmp_ms_warning, 8910b57cec5SDimitry Andric KMP_MSG(CantFormThrTeam, set_nthreads, new_nthreads), 8920b57cec5SDimitry Andric KMP_HNT(SystemLimitOnThreads), __kmp_msg_null); 8930b57cec5SDimitry Andric } 8940b57cec5SDimitry Andric } 8950b57cec5SDimitry Andric } 8960b57cec5SDimitry Andric } 8970b57cec5SDimitry Andric 8980b57cec5SDimitry Andric #ifdef KMP_DEBUG 8990b57cec5SDimitry Andric if (new_nthreads == 1) { 9000b57cec5SDimitry Andric KC_TRACE(10, 9010b57cec5SDimitry Andric ("__kmp_reserve_threads: T#%d serializing team after reclaiming " 9020b57cec5SDimitry Andric "dead roots and rechecking; requested %d threads\n", 9030b57cec5SDimitry Andric __kmp_get_gtid(), set_nthreads)); 9040b57cec5SDimitry Andric } else { 9050b57cec5SDimitry Andric KC_TRACE(10, ("__kmp_reserve_threads: T#%d allocating %d threads; requested" 9060b57cec5SDimitry Andric " %d threads\n", 9070b57cec5SDimitry Andric __kmp_get_gtid(), new_nthreads, set_nthreads)); 9080b57cec5SDimitry Andric } 9090b57cec5SDimitry Andric #endif // KMP_DEBUG 9100b57cec5SDimitry Andric return new_nthreads; 9110b57cec5SDimitry Andric } 9120b57cec5SDimitry Andric 9130b57cec5SDimitry Andric /* Allocate threads from the thread pool and assign them to the new team. We are 9140b57cec5SDimitry Andric assured that there are enough threads available, because we checked on that 9150b57cec5SDimitry Andric earlier within critical section forkjoin */ 9160b57cec5SDimitry Andric static void __kmp_fork_team_threads(kmp_root_t *root, kmp_team_t *team, 917349cc55cSDimitry Andric kmp_info_t *master_th, int master_gtid, 918349cc55cSDimitry Andric int fork_teams_workers) { 9190b57cec5SDimitry Andric int i; 9200b57cec5SDimitry Andric int use_hot_team; 9210b57cec5SDimitry Andric 9220b57cec5SDimitry Andric KA_TRACE(10, ("__kmp_fork_team_threads: new_nprocs = %d\n", team->t.t_nproc)); 9230b57cec5SDimitry Andric KMP_DEBUG_ASSERT(master_gtid == __kmp_get_gtid()); 9240b57cec5SDimitry Andric KMP_MB(); 9250b57cec5SDimitry Andric 926fe6060f1SDimitry Andric /* first, let's setup the primary thread */ 9270b57cec5SDimitry Andric master_th->th.th_info.ds.ds_tid = 0; 9280b57cec5SDimitry Andric master_th->th.th_team = team; 9290b57cec5SDimitry Andric master_th->th.th_team_nproc = team->t.t_nproc; 9300b57cec5SDimitry Andric master_th->th.th_team_master = master_th; 9310b57cec5SDimitry Andric master_th->th.th_team_serialized = FALSE; 9320b57cec5SDimitry Andric master_th->th.th_dispatch = &team->t.t_dispatch[0]; 9330b57cec5SDimitry Andric 9340b57cec5SDimitry Andric /* make sure we are not the optimized hot team */ 9350b57cec5SDimitry Andric #if KMP_NESTED_HOT_TEAMS 9360b57cec5SDimitry Andric use_hot_team = 0; 9370b57cec5SDimitry Andric kmp_hot_team_ptr_t *hot_teams = master_th->th.th_hot_teams; 9380b57cec5SDimitry Andric if (hot_teams) { // hot teams array is not allocated if 9390b57cec5SDimitry Andric // KMP_HOT_TEAMS_MAX_LEVEL=0 9400b57cec5SDimitry Andric int level = team->t.t_active_level - 1; // index in array of hot teams 9410b57cec5SDimitry Andric if (master_th->th.th_teams_microtask) { // are we inside the teams? 9420b57cec5SDimitry Andric if (master_th->th.th_teams_size.nteams > 1) { 9430b57cec5SDimitry Andric ++level; // level was not increased in teams construct for 9440b57cec5SDimitry Andric // team_of_masters 9450b57cec5SDimitry Andric } 9460b57cec5SDimitry Andric if (team->t.t_pkfn != (microtask_t)__kmp_teams_master && 9470b57cec5SDimitry Andric master_th->th.th_teams_level == team->t.t_level) { 9480b57cec5SDimitry Andric ++level; // level was not increased in teams construct for 9490b57cec5SDimitry Andric // team_of_workers before the parallel 9500b57cec5SDimitry Andric } // team->t.t_level will be increased inside parallel 9510b57cec5SDimitry Andric } 9520b57cec5SDimitry Andric if (level < __kmp_hot_teams_max_level) { 9530b57cec5SDimitry Andric if (hot_teams[level].hot_team) { 9540b57cec5SDimitry Andric // hot team has already been allocated for given level 9550b57cec5SDimitry Andric KMP_DEBUG_ASSERT(hot_teams[level].hot_team == team); 9560b57cec5SDimitry Andric use_hot_team = 1; // the team is ready to use 9570b57cec5SDimitry Andric } else { 9580b57cec5SDimitry Andric use_hot_team = 0; // AC: threads are not allocated yet 9590b57cec5SDimitry Andric hot_teams[level].hot_team = team; // remember new hot team 9600b57cec5SDimitry Andric hot_teams[level].hot_team_nth = team->t.t_nproc; 9610b57cec5SDimitry Andric } 9620b57cec5SDimitry Andric } else { 9630b57cec5SDimitry Andric use_hot_team = 0; 9640b57cec5SDimitry Andric } 9650b57cec5SDimitry Andric } 9660b57cec5SDimitry Andric #else 9670b57cec5SDimitry Andric use_hot_team = team == root->r.r_hot_team; 9680b57cec5SDimitry Andric #endif 9690b57cec5SDimitry Andric if (!use_hot_team) { 9700b57cec5SDimitry Andric 971fe6060f1SDimitry Andric /* install the primary thread */ 9720b57cec5SDimitry Andric team->t.t_threads[0] = master_th; 9730b57cec5SDimitry Andric __kmp_initialize_info(master_th, team, 0, master_gtid); 9740b57cec5SDimitry Andric 9750b57cec5SDimitry Andric /* now, install the worker threads */ 9760b57cec5SDimitry Andric for (i = 1; i < team->t.t_nproc; i++) { 9770b57cec5SDimitry Andric 9780b57cec5SDimitry Andric /* fork or reallocate a new thread and install it in team */ 9790b57cec5SDimitry Andric kmp_info_t *thr = __kmp_allocate_thread(root, team, i); 9800b57cec5SDimitry Andric team->t.t_threads[i] = thr; 9810b57cec5SDimitry Andric KMP_DEBUG_ASSERT(thr); 9820b57cec5SDimitry Andric KMP_DEBUG_ASSERT(thr->th.th_team == team); 9830b57cec5SDimitry Andric /* align team and thread arrived states */ 9840b57cec5SDimitry Andric KA_TRACE(20, ("__kmp_fork_team_threads: T#%d(%d:%d) init arrived " 9850b57cec5SDimitry Andric "T#%d(%d:%d) join =%llu, plain=%llu\n", 9860b57cec5SDimitry Andric __kmp_gtid_from_tid(0, team), team->t.t_id, 0, 9870b57cec5SDimitry Andric __kmp_gtid_from_tid(i, team), team->t.t_id, i, 9880b57cec5SDimitry Andric team->t.t_bar[bs_forkjoin_barrier].b_arrived, 9890b57cec5SDimitry Andric team->t.t_bar[bs_plain_barrier].b_arrived)); 9900b57cec5SDimitry Andric thr->th.th_teams_microtask = master_th->th.th_teams_microtask; 9910b57cec5SDimitry Andric thr->th.th_teams_level = master_th->th.th_teams_level; 9920b57cec5SDimitry Andric thr->th.th_teams_size = master_th->th.th_teams_size; 9930b57cec5SDimitry Andric { // Initialize threads' barrier data. 9940b57cec5SDimitry Andric int b; 9950b57cec5SDimitry Andric kmp_balign_t *balign = team->t.t_threads[i]->th.th_bar; 9960b57cec5SDimitry Andric for (b = 0; b < bs_last_barrier; ++b) { 9970b57cec5SDimitry Andric balign[b].bb.b_arrived = team->t.t_bar[b].b_arrived; 9980b57cec5SDimitry Andric KMP_DEBUG_ASSERT(balign[b].bb.wait_flag != KMP_BARRIER_PARENT_FLAG); 9990b57cec5SDimitry Andric #if USE_DEBUGGER 10000b57cec5SDimitry Andric balign[b].bb.b_worker_arrived = team->t.t_bar[b].b_team_arrived; 10010b57cec5SDimitry Andric #endif 10020b57cec5SDimitry Andric } 10030b57cec5SDimitry Andric } 10040b57cec5SDimitry Andric } 10050b57cec5SDimitry Andric 10060b57cec5SDimitry Andric #if KMP_AFFINITY_SUPPORTED 1007349cc55cSDimitry Andric // Do not partition the places list for teams construct workers who 1008349cc55cSDimitry Andric // haven't actually been forked to do real work yet. This partitioning 1009349cc55cSDimitry Andric // will take place in the parallel region nested within the teams construct. 1010349cc55cSDimitry Andric if (!fork_teams_workers) { 10110b57cec5SDimitry Andric __kmp_partition_places(team); 1012349cc55cSDimitry Andric } 10130b57cec5SDimitry Andric #endif 10140b57cec5SDimitry Andric } 10150b57cec5SDimitry Andric 10160b57cec5SDimitry Andric if (__kmp_display_affinity && team->t.t_display_affinity != 1) { 10170b57cec5SDimitry Andric for (i = 0; i < team->t.t_nproc; i++) { 10180b57cec5SDimitry Andric kmp_info_t *thr = team->t.t_threads[i]; 10190b57cec5SDimitry Andric if (thr->th.th_prev_num_threads != team->t.t_nproc || 10200b57cec5SDimitry Andric thr->th.th_prev_level != team->t.t_level) { 10210b57cec5SDimitry Andric team->t.t_display_affinity = 1; 10220b57cec5SDimitry Andric break; 10230b57cec5SDimitry Andric } 10240b57cec5SDimitry Andric } 10250b57cec5SDimitry Andric } 10260b57cec5SDimitry Andric 10270b57cec5SDimitry Andric KMP_MB(); 10280b57cec5SDimitry Andric } 10290b57cec5SDimitry Andric 10300b57cec5SDimitry Andric #if KMP_ARCH_X86 || KMP_ARCH_X86_64 10310b57cec5SDimitry Andric // Propagate any changes to the floating point control registers out to the team 10320b57cec5SDimitry Andric // We try to avoid unnecessary writes to the relevant cache line in the team 10330b57cec5SDimitry Andric // structure, so we don't make changes unless they are needed. 10340b57cec5SDimitry Andric inline static void propagateFPControl(kmp_team_t *team) { 10350b57cec5SDimitry Andric if (__kmp_inherit_fp_control) { 10360b57cec5SDimitry Andric kmp_int16 x87_fpu_control_word; 10370b57cec5SDimitry Andric kmp_uint32 mxcsr; 10380b57cec5SDimitry Andric 1039fe6060f1SDimitry Andric // Get primary thread's values of FPU control flags (both X87 and vector) 10400b57cec5SDimitry Andric __kmp_store_x87_fpu_control_word(&x87_fpu_control_word); 10410b57cec5SDimitry Andric __kmp_store_mxcsr(&mxcsr); 10420b57cec5SDimitry Andric mxcsr &= KMP_X86_MXCSR_MASK; 10430b57cec5SDimitry Andric 10440b57cec5SDimitry Andric // There is no point looking at t_fp_control_saved here. 10450b57cec5SDimitry Andric // If it is TRUE, we still have to update the values if they are different 10460b57cec5SDimitry Andric // from those we now have. If it is FALSE we didn't save anything yet, but 10470b57cec5SDimitry Andric // our objective is the same. We have to ensure that the values in the team 10480b57cec5SDimitry Andric // are the same as those we have. 10490b57cec5SDimitry Andric // So, this code achieves what we need whether or not t_fp_control_saved is 10500b57cec5SDimitry Andric // true. By checking whether the value needs updating we avoid unnecessary 10510b57cec5SDimitry Andric // writes that would put the cache-line into a written state, causing all 10520b57cec5SDimitry Andric // threads in the team to have to read it again. 10530b57cec5SDimitry Andric KMP_CHECK_UPDATE(team->t.t_x87_fpu_control_word, x87_fpu_control_word); 10540b57cec5SDimitry Andric KMP_CHECK_UPDATE(team->t.t_mxcsr, mxcsr); 10550b57cec5SDimitry Andric // Although we don't use this value, other code in the runtime wants to know 10560b57cec5SDimitry Andric // whether it should restore them. So we must ensure it is correct. 10570b57cec5SDimitry Andric KMP_CHECK_UPDATE(team->t.t_fp_control_saved, TRUE); 10580b57cec5SDimitry Andric } else { 10590b57cec5SDimitry Andric // Similarly here. Don't write to this cache-line in the team structure 10600b57cec5SDimitry Andric // unless we have to. 10610b57cec5SDimitry Andric KMP_CHECK_UPDATE(team->t.t_fp_control_saved, FALSE); 10620b57cec5SDimitry Andric } 10630b57cec5SDimitry Andric } 10640b57cec5SDimitry Andric 10650b57cec5SDimitry Andric // Do the opposite, setting the hardware registers to the updated values from 10660b57cec5SDimitry Andric // the team. 10670b57cec5SDimitry Andric inline static void updateHWFPControl(kmp_team_t *team) { 10680b57cec5SDimitry Andric if (__kmp_inherit_fp_control && team->t.t_fp_control_saved) { 10690b57cec5SDimitry Andric // Only reset the fp control regs if they have been changed in the team. 10700b57cec5SDimitry Andric // the parallel region that we are exiting. 10710b57cec5SDimitry Andric kmp_int16 x87_fpu_control_word; 10720b57cec5SDimitry Andric kmp_uint32 mxcsr; 10730b57cec5SDimitry Andric __kmp_store_x87_fpu_control_word(&x87_fpu_control_word); 10740b57cec5SDimitry Andric __kmp_store_mxcsr(&mxcsr); 10750b57cec5SDimitry Andric mxcsr &= KMP_X86_MXCSR_MASK; 10760b57cec5SDimitry Andric 10770b57cec5SDimitry Andric if (team->t.t_x87_fpu_control_word != x87_fpu_control_word) { 10780b57cec5SDimitry Andric __kmp_clear_x87_fpu_status_word(); 10790b57cec5SDimitry Andric __kmp_load_x87_fpu_control_word(&team->t.t_x87_fpu_control_word); 10800b57cec5SDimitry Andric } 10810b57cec5SDimitry Andric 10820b57cec5SDimitry Andric if (team->t.t_mxcsr != mxcsr) { 10830b57cec5SDimitry Andric __kmp_load_mxcsr(&team->t.t_mxcsr); 10840b57cec5SDimitry Andric } 10850b57cec5SDimitry Andric } 10860b57cec5SDimitry Andric } 10870b57cec5SDimitry Andric #else 10880b57cec5SDimitry Andric #define propagateFPControl(x) ((void)0) 10890b57cec5SDimitry Andric #define updateHWFPControl(x) ((void)0) 10900b57cec5SDimitry Andric #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */ 10910b57cec5SDimitry Andric 10920b57cec5SDimitry Andric static void __kmp_alloc_argv_entries(int argc, kmp_team_t *team, 10930b57cec5SDimitry Andric int realloc); // forward declaration 10940b57cec5SDimitry Andric 10950b57cec5SDimitry Andric /* Run a parallel region that has been serialized, so runs only in a team of the 1096fe6060f1SDimitry Andric single primary thread. */ 10970b57cec5SDimitry Andric void __kmp_serialized_parallel(ident_t *loc, kmp_int32 global_tid) { 10980b57cec5SDimitry Andric kmp_info_t *this_thr; 10990b57cec5SDimitry Andric kmp_team_t *serial_team; 11000b57cec5SDimitry Andric 11010b57cec5SDimitry Andric KC_TRACE(10, ("__kmpc_serialized_parallel: called by T#%d\n", global_tid)); 11020b57cec5SDimitry Andric 11030b57cec5SDimitry Andric /* Skip all this code for autopar serialized loops since it results in 11040b57cec5SDimitry Andric unacceptable overhead */ 11050b57cec5SDimitry Andric if (loc != NULL && (loc->flags & KMP_IDENT_AUTOPAR)) 11060b57cec5SDimitry Andric return; 11070b57cec5SDimitry Andric 11080b57cec5SDimitry Andric if (!TCR_4(__kmp_init_parallel)) 11090b57cec5SDimitry Andric __kmp_parallel_initialize(); 11100b57cec5SDimitry Andric __kmp_resume_if_soft_paused(); 11110b57cec5SDimitry Andric 11120b57cec5SDimitry Andric this_thr = __kmp_threads[global_tid]; 11130b57cec5SDimitry Andric serial_team = this_thr->th.th_serial_team; 11140b57cec5SDimitry Andric 11150b57cec5SDimitry Andric /* utilize the serialized team held by this thread */ 11160b57cec5SDimitry Andric KMP_DEBUG_ASSERT(serial_team); 11170b57cec5SDimitry Andric KMP_MB(); 11180b57cec5SDimitry Andric 11190b57cec5SDimitry Andric if (__kmp_tasking_mode != tskm_immediate_exec) { 11200b57cec5SDimitry Andric KMP_DEBUG_ASSERT( 11210b57cec5SDimitry Andric this_thr->th.th_task_team == 11220b57cec5SDimitry Andric this_thr->th.th_team->t.t_task_team[this_thr->th.th_task_state]); 11230b57cec5SDimitry Andric KMP_DEBUG_ASSERT(serial_team->t.t_task_team[this_thr->th.th_task_state] == 11240b57cec5SDimitry Andric NULL); 11250b57cec5SDimitry Andric KA_TRACE(20, ("__kmpc_serialized_parallel: T#%d pushing task_team %p / " 11260b57cec5SDimitry Andric "team %p, new task_team = NULL\n", 11270b57cec5SDimitry Andric global_tid, this_thr->th.th_task_team, this_thr->th.th_team)); 11280b57cec5SDimitry Andric this_thr->th.th_task_team = NULL; 11290b57cec5SDimitry Andric } 11300b57cec5SDimitry Andric 11310b57cec5SDimitry Andric kmp_proc_bind_t proc_bind = this_thr->th.th_set_proc_bind; 11320b57cec5SDimitry Andric if (this_thr->th.th_current_task->td_icvs.proc_bind == proc_bind_false) { 11330b57cec5SDimitry Andric proc_bind = proc_bind_false; 11340b57cec5SDimitry Andric } else if (proc_bind == proc_bind_default) { 11350b57cec5SDimitry Andric // No proc_bind clause was specified, so use the current value 11360b57cec5SDimitry Andric // of proc-bind-var for this parallel region. 11370b57cec5SDimitry Andric proc_bind = this_thr->th.th_current_task->td_icvs.proc_bind; 11380b57cec5SDimitry Andric } 11390b57cec5SDimitry Andric // Reset for next parallel region 11400b57cec5SDimitry Andric this_thr->th.th_set_proc_bind = proc_bind_default; 11410b57cec5SDimitry Andric 11420b57cec5SDimitry Andric #if OMPT_SUPPORT 11430b57cec5SDimitry Andric ompt_data_t ompt_parallel_data = ompt_data_none; 11440b57cec5SDimitry Andric void *codeptr = OMPT_LOAD_RETURN_ADDRESS(global_tid); 11450b57cec5SDimitry Andric if (ompt_enabled.enabled && 11460b57cec5SDimitry Andric this_thr->th.ompt_thread_info.state != ompt_state_overhead) { 11470b57cec5SDimitry Andric 11480b57cec5SDimitry Andric ompt_task_info_t *parent_task_info; 11490b57cec5SDimitry Andric parent_task_info = OMPT_CUR_TASK_INFO(this_thr); 11500b57cec5SDimitry Andric 11510b57cec5SDimitry Andric parent_task_info->frame.enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); 11520b57cec5SDimitry Andric if (ompt_enabled.ompt_callback_parallel_begin) { 11530b57cec5SDimitry Andric int team_size = 1; 11540b57cec5SDimitry Andric 11550b57cec5SDimitry Andric ompt_callbacks.ompt_callback(ompt_callback_parallel_begin)( 11560b57cec5SDimitry Andric &(parent_task_info->task_data), &(parent_task_info->frame), 1157489b1cf2SDimitry Andric &ompt_parallel_data, team_size, 1158489b1cf2SDimitry Andric ompt_parallel_invoker_program | ompt_parallel_team, codeptr); 11590b57cec5SDimitry Andric } 11600b57cec5SDimitry Andric } 11610b57cec5SDimitry Andric #endif // OMPT_SUPPORT 11620b57cec5SDimitry Andric 11630b57cec5SDimitry Andric if (this_thr->th.th_team != serial_team) { 11640b57cec5SDimitry Andric // Nested level will be an index in the nested nthreads array 11650b57cec5SDimitry Andric int level = this_thr->th.th_team->t.t_level; 11660b57cec5SDimitry Andric 11670b57cec5SDimitry Andric if (serial_team->t.t_serialized) { 11680b57cec5SDimitry Andric /* this serial team was already used 11690b57cec5SDimitry Andric TODO increase performance by making this locks more specific */ 11700b57cec5SDimitry Andric kmp_team_t *new_team; 11710b57cec5SDimitry Andric 11720b57cec5SDimitry Andric __kmp_acquire_bootstrap_lock(&__kmp_forkjoin_lock); 11730b57cec5SDimitry Andric 11740b57cec5SDimitry Andric new_team = 11750b57cec5SDimitry Andric __kmp_allocate_team(this_thr->th.th_root, 1, 1, 11760b57cec5SDimitry Andric #if OMPT_SUPPORT 11770b57cec5SDimitry Andric ompt_parallel_data, 11780b57cec5SDimitry Andric #endif 11790b57cec5SDimitry Andric proc_bind, &this_thr->th.th_current_task->td_icvs, 11800b57cec5SDimitry Andric 0 USE_NESTED_HOT_ARG(NULL)); 11810b57cec5SDimitry Andric __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock); 11820b57cec5SDimitry Andric KMP_ASSERT(new_team); 11830b57cec5SDimitry Andric 11840b57cec5SDimitry Andric /* setup new serialized team and install it */ 11850b57cec5SDimitry Andric new_team->t.t_threads[0] = this_thr; 11860b57cec5SDimitry Andric new_team->t.t_parent = this_thr->th.th_team; 11870b57cec5SDimitry Andric serial_team = new_team; 11880b57cec5SDimitry Andric this_thr->th.th_serial_team = serial_team; 11890b57cec5SDimitry Andric 11900b57cec5SDimitry Andric KF_TRACE( 11910b57cec5SDimitry Andric 10, 11920b57cec5SDimitry Andric ("__kmpc_serialized_parallel: T#%d allocated new serial team %p\n", 11930b57cec5SDimitry Andric global_tid, serial_team)); 11940b57cec5SDimitry Andric 11950b57cec5SDimitry Andric /* TODO the above breaks the requirement that if we run out of resources, 11960b57cec5SDimitry Andric then we can still guarantee that serialized teams are ok, since we may 11970b57cec5SDimitry Andric need to allocate a new one */ 11980b57cec5SDimitry Andric } else { 11990b57cec5SDimitry Andric KF_TRACE( 12000b57cec5SDimitry Andric 10, 12010b57cec5SDimitry Andric ("__kmpc_serialized_parallel: T#%d reusing cached serial team %p\n", 12020b57cec5SDimitry Andric global_tid, serial_team)); 12030b57cec5SDimitry Andric } 12040b57cec5SDimitry Andric 12050b57cec5SDimitry Andric /* we have to initialize this serial team */ 12060b57cec5SDimitry Andric KMP_DEBUG_ASSERT(serial_team->t.t_threads); 12070b57cec5SDimitry Andric KMP_DEBUG_ASSERT(serial_team->t.t_threads[0] == this_thr); 12080b57cec5SDimitry Andric KMP_DEBUG_ASSERT(this_thr->th.th_team != serial_team); 12090b57cec5SDimitry Andric serial_team->t.t_ident = loc; 12100b57cec5SDimitry Andric serial_team->t.t_serialized = 1; 12110b57cec5SDimitry Andric serial_team->t.t_nproc = 1; 12120b57cec5SDimitry Andric serial_team->t.t_parent = this_thr->th.th_team; 12130b57cec5SDimitry Andric serial_team->t.t_sched.sched = this_thr->th.th_team->t.t_sched.sched; 12140b57cec5SDimitry Andric this_thr->th.th_team = serial_team; 12150b57cec5SDimitry Andric serial_team->t.t_master_tid = this_thr->th.th_info.ds.ds_tid; 12160b57cec5SDimitry Andric 1217349cc55cSDimitry Andric KF_TRACE(10, ("__kmpc_serialized_parallel: T#%d curtask=%p\n", global_tid, 12180b57cec5SDimitry Andric this_thr->th.th_current_task)); 12190b57cec5SDimitry Andric KMP_ASSERT(this_thr->th.th_current_task->td_flags.executing == 1); 12200b57cec5SDimitry Andric this_thr->th.th_current_task->td_flags.executing = 0; 12210b57cec5SDimitry Andric 12220b57cec5SDimitry Andric __kmp_push_current_task_to_thread(this_thr, serial_team, 0); 12230b57cec5SDimitry Andric 12240b57cec5SDimitry Andric /* TODO: GEH: do ICVs work for nested serialized teams? Don't we need an 12250b57cec5SDimitry Andric implicit task for each serialized task represented by 12260b57cec5SDimitry Andric team->t.t_serialized? */ 12270b57cec5SDimitry Andric copy_icvs(&this_thr->th.th_current_task->td_icvs, 12280b57cec5SDimitry Andric &this_thr->th.th_current_task->td_parent->td_icvs); 12290b57cec5SDimitry Andric 12300b57cec5SDimitry Andric // Thread value exists in the nested nthreads array for the next nested 12310b57cec5SDimitry Andric // level 12320b57cec5SDimitry Andric if (__kmp_nested_nth.used && (level + 1 < __kmp_nested_nth.used)) { 12330b57cec5SDimitry Andric this_thr->th.th_current_task->td_icvs.nproc = 12340b57cec5SDimitry Andric __kmp_nested_nth.nth[level + 1]; 12350b57cec5SDimitry Andric } 12360b57cec5SDimitry Andric 12370b57cec5SDimitry Andric if (__kmp_nested_proc_bind.used && 12380b57cec5SDimitry Andric (level + 1 < __kmp_nested_proc_bind.used)) { 12390b57cec5SDimitry Andric this_thr->th.th_current_task->td_icvs.proc_bind = 12400b57cec5SDimitry Andric __kmp_nested_proc_bind.bind_types[level + 1]; 12410b57cec5SDimitry Andric } 12420b57cec5SDimitry Andric 12430b57cec5SDimitry Andric #if USE_DEBUGGER 12440b57cec5SDimitry Andric serial_team->t.t_pkfn = (microtask_t)(~0); // For the debugger. 12450b57cec5SDimitry Andric #endif 12460b57cec5SDimitry Andric this_thr->th.th_info.ds.ds_tid = 0; 12470b57cec5SDimitry Andric 12480b57cec5SDimitry Andric /* set thread cache values */ 12490b57cec5SDimitry Andric this_thr->th.th_team_nproc = 1; 12500b57cec5SDimitry Andric this_thr->th.th_team_master = this_thr; 12510b57cec5SDimitry Andric this_thr->th.th_team_serialized = 1; 12520b57cec5SDimitry Andric 12530b57cec5SDimitry Andric serial_team->t.t_level = serial_team->t.t_parent->t.t_level + 1; 12540b57cec5SDimitry Andric serial_team->t.t_active_level = serial_team->t.t_parent->t.t_active_level; 12550b57cec5SDimitry Andric serial_team->t.t_def_allocator = this_thr->th.th_def_allocator; // save 12560b57cec5SDimitry Andric 12570b57cec5SDimitry Andric propagateFPControl(serial_team); 12580b57cec5SDimitry Andric 12590b57cec5SDimitry Andric /* check if we need to allocate dispatch buffers stack */ 12600b57cec5SDimitry Andric KMP_DEBUG_ASSERT(serial_team->t.t_dispatch); 12610b57cec5SDimitry Andric if (!serial_team->t.t_dispatch->th_disp_buffer) { 12620b57cec5SDimitry Andric serial_team->t.t_dispatch->th_disp_buffer = 12630b57cec5SDimitry Andric (dispatch_private_info_t *)__kmp_allocate( 12640b57cec5SDimitry Andric sizeof(dispatch_private_info_t)); 12650b57cec5SDimitry Andric } 12660b57cec5SDimitry Andric this_thr->th.th_dispatch = serial_team->t.t_dispatch; 12670b57cec5SDimitry Andric 12680b57cec5SDimitry Andric KMP_MB(); 12690b57cec5SDimitry Andric 12700b57cec5SDimitry Andric } else { 12710b57cec5SDimitry Andric /* this serialized team is already being used, 12720b57cec5SDimitry Andric * that's fine, just add another nested level */ 12730b57cec5SDimitry Andric KMP_DEBUG_ASSERT(this_thr->th.th_team == serial_team); 12740b57cec5SDimitry Andric KMP_DEBUG_ASSERT(serial_team->t.t_threads); 12750b57cec5SDimitry Andric KMP_DEBUG_ASSERT(serial_team->t.t_threads[0] == this_thr); 12760b57cec5SDimitry Andric ++serial_team->t.t_serialized; 12770b57cec5SDimitry Andric this_thr->th.th_team_serialized = serial_team->t.t_serialized; 12780b57cec5SDimitry Andric 12790b57cec5SDimitry Andric // Nested level will be an index in the nested nthreads array 12800b57cec5SDimitry Andric int level = this_thr->th.th_team->t.t_level; 12810b57cec5SDimitry Andric // Thread value exists in the nested nthreads array for the next nested 12820b57cec5SDimitry Andric // level 12830b57cec5SDimitry Andric if (__kmp_nested_nth.used && (level + 1 < __kmp_nested_nth.used)) { 12840b57cec5SDimitry Andric this_thr->th.th_current_task->td_icvs.nproc = 12850b57cec5SDimitry Andric __kmp_nested_nth.nth[level + 1]; 12860b57cec5SDimitry Andric } 12870b57cec5SDimitry Andric serial_team->t.t_level++; 12880b57cec5SDimitry Andric KF_TRACE(10, ("__kmpc_serialized_parallel: T#%d increasing nesting level " 12890b57cec5SDimitry Andric "of serial team %p to %d\n", 12900b57cec5SDimitry Andric global_tid, serial_team, serial_team->t.t_level)); 12910b57cec5SDimitry Andric 12920b57cec5SDimitry Andric /* allocate/push dispatch buffers stack */ 12930b57cec5SDimitry Andric KMP_DEBUG_ASSERT(serial_team->t.t_dispatch); 12940b57cec5SDimitry Andric { 12950b57cec5SDimitry Andric dispatch_private_info_t *disp_buffer = 12960b57cec5SDimitry Andric (dispatch_private_info_t *)__kmp_allocate( 12970b57cec5SDimitry Andric sizeof(dispatch_private_info_t)); 12980b57cec5SDimitry Andric disp_buffer->next = serial_team->t.t_dispatch->th_disp_buffer; 12990b57cec5SDimitry Andric serial_team->t.t_dispatch->th_disp_buffer = disp_buffer; 13000b57cec5SDimitry Andric } 13010b57cec5SDimitry Andric this_thr->th.th_dispatch = serial_team->t.t_dispatch; 13020b57cec5SDimitry Andric 13030b57cec5SDimitry Andric KMP_MB(); 13040b57cec5SDimitry Andric } 13050b57cec5SDimitry Andric KMP_CHECK_UPDATE(serial_team->t.t_cancel_request, cancel_noreq); 13060b57cec5SDimitry Andric 13070b57cec5SDimitry Andric // Perform the display affinity functionality for 13080b57cec5SDimitry Andric // serialized parallel regions 13090b57cec5SDimitry Andric if (__kmp_display_affinity) { 13100b57cec5SDimitry Andric if (this_thr->th.th_prev_level != serial_team->t.t_level || 13110b57cec5SDimitry Andric this_thr->th.th_prev_num_threads != 1) { 13120b57cec5SDimitry Andric // NULL means use the affinity-format-var ICV 13130b57cec5SDimitry Andric __kmp_aux_display_affinity(global_tid, NULL); 13140b57cec5SDimitry Andric this_thr->th.th_prev_level = serial_team->t.t_level; 13150b57cec5SDimitry Andric this_thr->th.th_prev_num_threads = 1; 13160b57cec5SDimitry Andric } 13170b57cec5SDimitry Andric } 13180b57cec5SDimitry Andric 13190b57cec5SDimitry Andric if (__kmp_env_consistency_check) 13200b57cec5SDimitry Andric __kmp_push_parallel(global_tid, NULL); 13210b57cec5SDimitry Andric #if OMPT_SUPPORT 13220b57cec5SDimitry Andric serial_team->t.ompt_team_info.master_return_address = codeptr; 13230b57cec5SDimitry Andric if (ompt_enabled.enabled && 13240b57cec5SDimitry Andric this_thr->th.ompt_thread_info.state != ompt_state_overhead) { 1325fe6060f1SDimitry Andric OMPT_CUR_TASK_INFO(this_thr)->frame.exit_frame.ptr = 1326fe6060f1SDimitry Andric OMPT_GET_FRAME_ADDRESS(0); 13270b57cec5SDimitry Andric 13280b57cec5SDimitry Andric ompt_lw_taskteam_t lw_taskteam; 13290b57cec5SDimitry Andric __ompt_lw_taskteam_init(&lw_taskteam, this_thr, global_tid, 13300b57cec5SDimitry Andric &ompt_parallel_data, codeptr); 13310b57cec5SDimitry Andric 13320b57cec5SDimitry Andric __ompt_lw_taskteam_link(&lw_taskteam, this_thr, 1); 13330b57cec5SDimitry Andric // don't use lw_taskteam after linking. content was swaped 13340b57cec5SDimitry Andric 13350b57cec5SDimitry Andric /* OMPT implicit task begin */ 13360b57cec5SDimitry Andric if (ompt_enabled.ompt_callback_implicit_task) { 13370b57cec5SDimitry Andric ompt_callbacks.ompt_callback(ompt_callback_implicit_task)( 13380b57cec5SDimitry Andric ompt_scope_begin, OMPT_CUR_TEAM_DATA(this_thr), 1339fe6060f1SDimitry Andric OMPT_CUR_TASK_DATA(this_thr), 1, __kmp_tid_from_gtid(global_tid), 1340fe6060f1SDimitry Andric ompt_task_implicit); // TODO: Can this be ompt_task_initial? 1341fe6060f1SDimitry Andric OMPT_CUR_TASK_INFO(this_thr)->thread_num = 1342fe6060f1SDimitry Andric __kmp_tid_from_gtid(global_tid); 13430b57cec5SDimitry Andric } 13440b57cec5SDimitry Andric 13450b57cec5SDimitry Andric /* OMPT state */ 13460b57cec5SDimitry Andric this_thr->th.ompt_thread_info.state = ompt_state_work_parallel; 1347fe6060f1SDimitry Andric OMPT_CUR_TASK_INFO(this_thr)->frame.exit_frame.ptr = 1348fe6060f1SDimitry Andric OMPT_GET_FRAME_ADDRESS(0); 13490b57cec5SDimitry Andric } 13500b57cec5SDimitry Andric #endif 13510b57cec5SDimitry Andric } 13520b57cec5SDimitry Andric 13530b57cec5SDimitry Andric /* most of the work for a fork */ 13540b57cec5SDimitry Andric /* return true if we really went parallel, false if serialized */ 13550b57cec5SDimitry Andric int __kmp_fork_call(ident_t *loc, int gtid, 13560b57cec5SDimitry Andric enum fork_context_e call_context, // Intel, GNU, ... 13570b57cec5SDimitry Andric kmp_int32 argc, microtask_t microtask, launch_t invoker, 135816794618SDimitry Andric kmp_va_list ap) { 13590b57cec5SDimitry Andric void **argv; 13600b57cec5SDimitry Andric int i; 13610b57cec5SDimitry Andric int master_tid; 13620b57cec5SDimitry Andric int master_this_cons; 13630b57cec5SDimitry Andric kmp_team_t *team; 13640b57cec5SDimitry Andric kmp_team_t *parent_team; 13650b57cec5SDimitry Andric kmp_info_t *master_th; 13660b57cec5SDimitry Andric kmp_root_t *root; 13670b57cec5SDimitry Andric int nthreads; 13680b57cec5SDimitry Andric int master_active; 13690b57cec5SDimitry Andric int master_set_numthreads; 13700b57cec5SDimitry Andric int level; 13710b57cec5SDimitry Andric int active_level; 13720b57cec5SDimitry Andric int teams_level; 13730b57cec5SDimitry Andric #if KMP_NESTED_HOT_TEAMS 13740b57cec5SDimitry Andric kmp_hot_team_ptr_t **p_hot_teams; 13750b57cec5SDimitry Andric #endif 13760b57cec5SDimitry Andric { // KMP_TIME_BLOCK 13770b57cec5SDimitry Andric KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_fork_call); 13780b57cec5SDimitry Andric KMP_COUNT_VALUE(OMP_PARALLEL_args, argc); 13790b57cec5SDimitry Andric 13800b57cec5SDimitry Andric KA_TRACE(20, ("__kmp_fork_call: enter T#%d\n", gtid)); 13810b57cec5SDimitry Andric if (__kmp_stkpadding > 0 && __kmp_root[gtid] != NULL) { 13820b57cec5SDimitry Andric /* Some systems prefer the stack for the root thread(s) to start with */ 13830b57cec5SDimitry Andric /* some gap from the parent stack to prevent false sharing. */ 13840b57cec5SDimitry Andric void *dummy = KMP_ALLOCA(__kmp_stkpadding); 13850b57cec5SDimitry Andric /* These 2 lines below are so this does not get optimized out */ 13860b57cec5SDimitry Andric if (__kmp_stkpadding > KMP_MAX_STKPADDING) 13870b57cec5SDimitry Andric __kmp_stkpadding += (short)((kmp_int64)dummy); 13880b57cec5SDimitry Andric } 13890b57cec5SDimitry Andric 13900b57cec5SDimitry Andric /* initialize if needed */ 13910b57cec5SDimitry Andric KMP_DEBUG_ASSERT( 13920b57cec5SDimitry Andric __kmp_init_serial); // AC: potentially unsafe, not in sync with shutdown 13930b57cec5SDimitry Andric if (!TCR_4(__kmp_init_parallel)) 13940b57cec5SDimitry Andric __kmp_parallel_initialize(); 13950b57cec5SDimitry Andric __kmp_resume_if_soft_paused(); 13960b57cec5SDimitry Andric 13970b57cec5SDimitry Andric /* setup current data */ 13980b57cec5SDimitry Andric master_th = __kmp_threads[gtid]; // AC: potentially unsafe, not in sync with 13990b57cec5SDimitry Andric // shutdown 14000b57cec5SDimitry Andric parent_team = master_th->th.th_team; 14010b57cec5SDimitry Andric master_tid = master_th->th.th_info.ds.ds_tid; 14020b57cec5SDimitry Andric master_this_cons = master_th->th.th_local.this_construct; 14030b57cec5SDimitry Andric root = master_th->th.th_root; 14040b57cec5SDimitry Andric master_active = root->r.r_active; 14050b57cec5SDimitry Andric master_set_numthreads = master_th->th.th_set_nproc; 14060b57cec5SDimitry Andric 14070b57cec5SDimitry Andric #if OMPT_SUPPORT 14080b57cec5SDimitry Andric ompt_data_t ompt_parallel_data = ompt_data_none; 14090b57cec5SDimitry Andric ompt_data_t *parent_task_data; 14100b57cec5SDimitry Andric ompt_frame_t *ompt_frame; 14110b57cec5SDimitry Andric ompt_data_t *implicit_task_data; 14120b57cec5SDimitry Andric void *return_address = NULL; 14130b57cec5SDimitry Andric 14140b57cec5SDimitry Andric if (ompt_enabled.enabled) { 14150b57cec5SDimitry Andric __ompt_get_task_info_internal(0, NULL, &parent_task_data, &ompt_frame, 14160b57cec5SDimitry Andric NULL, NULL); 14170b57cec5SDimitry Andric return_address = OMPT_LOAD_RETURN_ADDRESS(gtid); 14180b57cec5SDimitry Andric } 14190b57cec5SDimitry Andric #endif 14200b57cec5SDimitry Andric 1421fe6060f1SDimitry Andric // Assign affinity to root thread if it hasn't happened yet 1422fe6060f1SDimitry Andric __kmp_assign_root_init_mask(); 1423fe6060f1SDimitry Andric 14240b57cec5SDimitry Andric // Nested level will be an index in the nested nthreads array 14250b57cec5SDimitry Andric level = parent_team->t.t_level; 14260b57cec5SDimitry Andric // used to launch non-serial teams even if nested is not allowed 14270b57cec5SDimitry Andric active_level = parent_team->t.t_active_level; 14280b57cec5SDimitry Andric // needed to check nesting inside the teams 14290b57cec5SDimitry Andric teams_level = master_th->th.th_teams_level; 14300b57cec5SDimitry Andric #if KMP_NESTED_HOT_TEAMS 14310b57cec5SDimitry Andric p_hot_teams = &master_th->th.th_hot_teams; 14320b57cec5SDimitry Andric if (*p_hot_teams == NULL && __kmp_hot_teams_max_level > 0) { 14330b57cec5SDimitry Andric *p_hot_teams = (kmp_hot_team_ptr_t *)__kmp_allocate( 14340b57cec5SDimitry Andric sizeof(kmp_hot_team_ptr_t) * __kmp_hot_teams_max_level); 14350b57cec5SDimitry Andric (*p_hot_teams)[0].hot_team = root->r.r_hot_team; 14360b57cec5SDimitry Andric // it is either actual or not needed (when active_level > 0) 14370b57cec5SDimitry Andric (*p_hot_teams)[0].hot_team_nth = 1; 14380b57cec5SDimitry Andric } 14390b57cec5SDimitry Andric #endif 14400b57cec5SDimitry Andric 14410b57cec5SDimitry Andric #if OMPT_SUPPORT 14420b57cec5SDimitry Andric if (ompt_enabled.enabled) { 14430b57cec5SDimitry Andric if (ompt_enabled.ompt_callback_parallel_begin) { 14440b57cec5SDimitry Andric int team_size = master_set_numthreads 14450b57cec5SDimitry Andric ? master_set_numthreads 14460b57cec5SDimitry Andric : get__nproc_2(parent_team, master_tid); 1447489b1cf2SDimitry Andric int flags = OMPT_INVOKER(call_context) | 1448489b1cf2SDimitry Andric ((microtask == (microtask_t)__kmp_teams_master) 1449489b1cf2SDimitry Andric ? ompt_parallel_league 1450489b1cf2SDimitry Andric : ompt_parallel_team); 14510b57cec5SDimitry Andric ompt_callbacks.ompt_callback(ompt_callback_parallel_begin)( 1452489b1cf2SDimitry Andric parent_task_data, ompt_frame, &ompt_parallel_data, team_size, flags, 1453489b1cf2SDimitry Andric return_address); 14540b57cec5SDimitry Andric } 14550b57cec5SDimitry Andric master_th->th.ompt_thread_info.state = ompt_state_overhead; 14560b57cec5SDimitry Andric } 14570b57cec5SDimitry Andric #endif 14580b57cec5SDimitry Andric 14590b57cec5SDimitry Andric master_th->th.th_ident = loc; 14600b57cec5SDimitry Andric 14610b57cec5SDimitry Andric if (master_th->th.th_teams_microtask && ap && 14620b57cec5SDimitry Andric microtask != (microtask_t)__kmp_teams_master && level == teams_level) { 14630b57cec5SDimitry Andric // AC: This is start of parallel that is nested inside teams construct. 14640b57cec5SDimitry Andric // The team is actual (hot), all workers are ready at the fork barrier. 14650b57cec5SDimitry Andric // No lock needed to initialize the team a bit, then free workers. 14660b57cec5SDimitry Andric parent_team->t.t_ident = loc; 14670b57cec5SDimitry Andric __kmp_alloc_argv_entries(argc, parent_team, TRUE); 14680b57cec5SDimitry Andric parent_team->t.t_argc = argc; 14690b57cec5SDimitry Andric argv = (void **)parent_team->t.t_argv; 14700b57cec5SDimitry Andric for (i = argc - 1; i >= 0; --i) 147116794618SDimitry Andric *argv++ = va_arg(kmp_va_deref(ap), void *); 14720b57cec5SDimitry Andric // Increment our nested depth levels, but not increase the serialization 14730b57cec5SDimitry Andric if (parent_team == master_th->th.th_serial_team) { 14740b57cec5SDimitry Andric // AC: we are in serialized parallel 14750b57cec5SDimitry Andric __kmpc_serialized_parallel(loc, gtid); 14760b57cec5SDimitry Andric KMP_DEBUG_ASSERT(parent_team->t.t_serialized > 1); 1477489b1cf2SDimitry Andric 1478e8d8bef9SDimitry Andric if (call_context == fork_context_gnu) { 1479e8d8bef9SDimitry Andric // AC: need to decrement t_serialized for enquiry functions to work 1480e8d8bef9SDimitry Andric // correctly, will restore at join time 1481e8d8bef9SDimitry Andric parent_team->t.t_serialized--; 1482e8d8bef9SDimitry Andric return TRUE; 1483e8d8bef9SDimitry Andric } 1484e8d8bef9SDimitry Andric 1485fe6060f1SDimitry Andric #if OMPD_SUPPORT 1486fe6060f1SDimitry Andric parent_team->t.t_pkfn = microtask; 1487fe6060f1SDimitry Andric #endif 1488fe6060f1SDimitry Andric 14890b57cec5SDimitry Andric #if OMPT_SUPPORT 14900b57cec5SDimitry Andric void *dummy; 1491489b1cf2SDimitry Andric void **exit_frame_p; 14920b57cec5SDimitry Andric 14930b57cec5SDimitry Andric ompt_lw_taskteam_t lw_taskteam; 14940b57cec5SDimitry Andric 14950b57cec5SDimitry Andric if (ompt_enabled.enabled) { 14960b57cec5SDimitry Andric __ompt_lw_taskteam_init(&lw_taskteam, master_th, gtid, 14970b57cec5SDimitry Andric &ompt_parallel_data, return_address); 1498489b1cf2SDimitry Andric exit_frame_p = &(lw_taskteam.ompt_task_info.frame.exit_frame.ptr); 14990b57cec5SDimitry Andric 15000b57cec5SDimitry Andric __ompt_lw_taskteam_link(&lw_taskteam, master_th, 0); 15010b57cec5SDimitry Andric // don't use lw_taskteam after linking. content was swaped 15020b57cec5SDimitry Andric 15030b57cec5SDimitry Andric /* OMPT implicit task begin */ 15040b57cec5SDimitry Andric implicit_task_data = OMPT_CUR_TASK_DATA(master_th); 15050b57cec5SDimitry Andric if (ompt_enabled.ompt_callback_implicit_task) { 1506fe6060f1SDimitry Andric OMPT_CUR_TASK_INFO(master_th)->thread_num = 1507fe6060f1SDimitry Andric __kmp_tid_from_gtid(gtid); 1508489b1cf2SDimitry Andric ompt_callbacks.ompt_callback(ompt_callback_implicit_task)( 1509489b1cf2SDimitry Andric ompt_scope_begin, OMPT_CUR_TEAM_DATA(master_th), 1510489b1cf2SDimitry Andric implicit_task_data, 1, 1511489b1cf2SDimitry Andric OMPT_CUR_TASK_INFO(master_th)->thread_num, ompt_task_implicit); 15120b57cec5SDimitry Andric } 15130b57cec5SDimitry Andric 15140b57cec5SDimitry Andric /* OMPT state */ 15150b57cec5SDimitry Andric master_th->th.ompt_thread_info.state = ompt_state_work_parallel; 15160b57cec5SDimitry Andric } else { 1517489b1cf2SDimitry Andric exit_frame_p = &dummy; 15180b57cec5SDimitry Andric } 15190b57cec5SDimitry Andric #endif 1520489b1cf2SDimitry Andric // AC: need to decrement t_serialized for enquiry functions to work 1521489b1cf2SDimitry Andric // correctly, will restore at join time 1522489b1cf2SDimitry Andric parent_team->t.t_serialized--; 15230b57cec5SDimitry Andric 15240b57cec5SDimitry Andric { 15250b57cec5SDimitry Andric KMP_TIME_PARTITIONED_BLOCK(OMP_parallel); 15260b57cec5SDimitry Andric KMP_SET_THREAD_STATE_BLOCK(IMPLICIT_TASK); 15270b57cec5SDimitry Andric __kmp_invoke_microtask(microtask, gtid, 0, argc, parent_team->t.t_argv 15280b57cec5SDimitry Andric #if OMPT_SUPPORT 15290b57cec5SDimitry Andric , 1530489b1cf2SDimitry Andric exit_frame_p 15310b57cec5SDimitry Andric #endif 15320b57cec5SDimitry Andric ); 15330b57cec5SDimitry Andric } 15340b57cec5SDimitry Andric 15350b57cec5SDimitry Andric #if OMPT_SUPPORT 15360b57cec5SDimitry Andric if (ompt_enabled.enabled) { 1537489b1cf2SDimitry Andric *exit_frame_p = NULL; 15380b57cec5SDimitry Andric OMPT_CUR_TASK_INFO(master_th)->frame.exit_frame = ompt_data_none; 15390b57cec5SDimitry Andric if (ompt_enabled.ompt_callback_implicit_task) { 15400b57cec5SDimitry Andric ompt_callbacks.ompt_callback(ompt_callback_implicit_task)( 15410b57cec5SDimitry Andric ompt_scope_end, NULL, implicit_task_data, 1, 1542489b1cf2SDimitry Andric OMPT_CUR_TASK_INFO(master_th)->thread_num, ompt_task_implicit); 15430b57cec5SDimitry Andric } 1544489b1cf2SDimitry Andric ompt_parallel_data = *OMPT_CUR_TEAM_DATA(master_th); 15450b57cec5SDimitry Andric __ompt_lw_taskteam_unlink(master_th); 15460b57cec5SDimitry Andric if (ompt_enabled.ompt_callback_parallel_end) { 15470b57cec5SDimitry Andric ompt_callbacks.ompt_callback(ompt_callback_parallel_end)( 1548489b1cf2SDimitry Andric &ompt_parallel_data, OMPT_CUR_TASK_DATA(master_th), 1549489b1cf2SDimitry Andric OMPT_INVOKER(call_context) | ompt_parallel_team, 1550489b1cf2SDimitry Andric return_address); 15510b57cec5SDimitry Andric } 15520b57cec5SDimitry Andric master_th->th.ompt_thread_info.state = ompt_state_overhead; 15530b57cec5SDimitry Andric } 15540b57cec5SDimitry Andric #endif 15550b57cec5SDimitry Andric return TRUE; 15560b57cec5SDimitry Andric } 15570b57cec5SDimitry Andric 15580b57cec5SDimitry Andric parent_team->t.t_pkfn = microtask; 15590b57cec5SDimitry Andric parent_team->t.t_invoke = invoker; 15600b57cec5SDimitry Andric KMP_ATOMIC_INC(&root->r.r_in_parallel); 15610b57cec5SDimitry Andric parent_team->t.t_active_level++; 15620b57cec5SDimitry Andric parent_team->t.t_level++; 15630b57cec5SDimitry Andric parent_team->t.t_def_allocator = master_th->th.th_def_allocator; // save 15640b57cec5SDimitry Andric 1565489b1cf2SDimitry Andric #if OMPT_SUPPORT 1566489b1cf2SDimitry Andric if (ompt_enabled.enabled) { 1567489b1cf2SDimitry Andric ompt_lw_taskteam_t lw_taskteam; 1568489b1cf2SDimitry Andric __ompt_lw_taskteam_init(&lw_taskteam, master_th, gtid, 1569489b1cf2SDimitry Andric &ompt_parallel_data, return_address); 1570489b1cf2SDimitry Andric __ompt_lw_taskteam_link(&lw_taskteam, master_th, 1, true); 1571489b1cf2SDimitry Andric } 1572489b1cf2SDimitry Andric #endif 1573489b1cf2SDimitry Andric 15740b57cec5SDimitry Andric /* Change number of threads in the team if requested */ 15750b57cec5SDimitry Andric if (master_set_numthreads) { // The parallel has num_threads clause 1576349cc55cSDimitry Andric if (master_set_numthreads <= master_th->th.th_teams_size.nth) { 15770b57cec5SDimitry Andric // AC: only can reduce number of threads dynamically, can't increase 15780b57cec5SDimitry Andric kmp_info_t **other_threads = parent_team->t.t_threads; 1579349cc55cSDimitry Andric // NOTE: if using distributed barrier, we need to run this code block 1580349cc55cSDimitry Andric // even when the team size appears not to have changed from the max. 1581349cc55cSDimitry Andric int old_proc = master_th->th.th_teams_size.nth; 1582349cc55cSDimitry Andric if (__kmp_barrier_release_pattern[bs_forkjoin_barrier] == 1583349cc55cSDimitry Andric bp_dist_bar) { 1584349cc55cSDimitry Andric __kmp_resize_dist_barrier(parent_team, old_proc, 1585349cc55cSDimitry Andric master_set_numthreads); 1586349cc55cSDimitry Andric __kmp_add_threads_to_team(parent_team, master_set_numthreads); 1587349cc55cSDimitry Andric } 15880b57cec5SDimitry Andric parent_team->t.t_nproc = master_set_numthreads; 15890b57cec5SDimitry Andric for (i = 0; i < master_set_numthreads; ++i) { 15900b57cec5SDimitry Andric other_threads[i]->th.th_team_nproc = master_set_numthreads; 15910b57cec5SDimitry Andric } 15920b57cec5SDimitry Andric } 1593349cc55cSDimitry Andric // Keep extra threads hot in the team for possible next parallels 15940b57cec5SDimitry Andric master_th->th.th_set_nproc = 0; 15950b57cec5SDimitry Andric } 15960b57cec5SDimitry Andric 15970b57cec5SDimitry Andric #if USE_DEBUGGER 15980b57cec5SDimitry Andric if (__kmp_debugging) { // Let debugger override number of threads. 15990b57cec5SDimitry Andric int nth = __kmp_omp_num_threads(loc); 16000b57cec5SDimitry Andric if (nth > 0) { // 0 means debugger doesn't want to change num threads 16010b57cec5SDimitry Andric master_set_numthreads = nth; 16020b57cec5SDimitry Andric } 16030b57cec5SDimitry Andric } 16040b57cec5SDimitry Andric #endif 16050b57cec5SDimitry Andric 1606349cc55cSDimitry Andric // Figure out the proc_bind policy for the nested parallel within teams 1607349cc55cSDimitry Andric kmp_proc_bind_t proc_bind = master_th->th.th_set_proc_bind; 1608349cc55cSDimitry Andric // proc_bind_default means don't update 1609349cc55cSDimitry Andric kmp_proc_bind_t proc_bind_icv = proc_bind_default; 1610349cc55cSDimitry Andric if (master_th->th.th_current_task->td_icvs.proc_bind == proc_bind_false) { 1611349cc55cSDimitry Andric proc_bind = proc_bind_false; 1612349cc55cSDimitry Andric } else { 1613349cc55cSDimitry Andric // No proc_bind clause specified; use current proc-bind-var 1614349cc55cSDimitry Andric if (proc_bind == proc_bind_default) { 1615349cc55cSDimitry Andric proc_bind = master_th->th.th_current_task->td_icvs.proc_bind; 1616349cc55cSDimitry Andric } 1617349cc55cSDimitry Andric /* else: The proc_bind policy was specified explicitly on parallel 1618349cc55cSDimitry Andric clause. 1619349cc55cSDimitry Andric This overrides proc-bind-var for this parallel region, but does not 1620349cc55cSDimitry Andric change proc-bind-var. */ 1621349cc55cSDimitry Andric // Figure the value of proc-bind-var for the child threads. 1622349cc55cSDimitry Andric if ((level + 1 < __kmp_nested_proc_bind.used) && 1623349cc55cSDimitry Andric (__kmp_nested_proc_bind.bind_types[level + 1] != 1624349cc55cSDimitry Andric master_th->th.th_current_task->td_icvs.proc_bind)) { 1625349cc55cSDimitry Andric proc_bind_icv = __kmp_nested_proc_bind.bind_types[level + 1]; 1626349cc55cSDimitry Andric } 1627349cc55cSDimitry Andric } 1628349cc55cSDimitry Andric KMP_CHECK_UPDATE(parent_team->t.t_proc_bind, proc_bind); 1629349cc55cSDimitry Andric // Need to change the bind-var ICV to correct value for each implicit task 1630349cc55cSDimitry Andric if (proc_bind_icv != proc_bind_default && 1631349cc55cSDimitry Andric master_th->th.th_current_task->td_icvs.proc_bind != proc_bind_icv) { 1632349cc55cSDimitry Andric kmp_info_t **other_threads = parent_team->t.t_threads; 1633349cc55cSDimitry Andric for (i = 0; i < master_th->th.th_team_nproc; ++i) { 1634349cc55cSDimitry Andric other_threads[i]->th.th_current_task->td_icvs.proc_bind = 1635349cc55cSDimitry Andric proc_bind_icv; 1636349cc55cSDimitry Andric } 1637349cc55cSDimitry Andric } 1638349cc55cSDimitry Andric // Reset for next parallel region 1639349cc55cSDimitry Andric master_th->th.th_set_proc_bind = proc_bind_default; 1640349cc55cSDimitry Andric 1641e8d8bef9SDimitry Andric #if USE_ITT_BUILD && USE_ITT_NOTIFY 1642e8d8bef9SDimitry Andric if (((__itt_frame_submit_v3_ptr && __itt_get_timestamp_ptr) || 1643e8d8bef9SDimitry Andric KMP_ITT_DEBUG) && 1644e8d8bef9SDimitry Andric __kmp_forkjoin_frames_mode == 3 && 1645e8d8bef9SDimitry Andric parent_team->t.t_active_level == 1 // only report frames at level 1 1646e8d8bef9SDimitry Andric && master_th->th.th_teams_size.nteams == 1) { 1647e8d8bef9SDimitry Andric kmp_uint64 tmp_time = __itt_get_timestamp(); 1648e8d8bef9SDimitry Andric master_th->th.th_frame_time = tmp_time; 1649e8d8bef9SDimitry Andric parent_team->t.t_region_time = tmp_time; 1650e8d8bef9SDimitry Andric } 1651e8d8bef9SDimitry Andric if (__itt_stack_caller_create_ptr) { 1652fe6060f1SDimitry Andric KMP_DEBUG_ASSERT(parent_team->t.t_stack_id == NULL); 1653e8d8bef9SDimitry Andric // create new stack stitching id before entering fork barrier 1654e8d8bef9SDimitry Andric parent_team->t.t_stack_id = __kmp_itt_stack_caller_create(); 1655e8d8bef9SDimitry Andric } 1656e8d8bef9SDimitry Andric #endif /* USE_ITT_BUILD && USE_ITT_NOTIFY */ 1657349cc55cSDimitry Andric #if KMP_AFFINITY_SUPPORTED 1658349cc55cSDimitry Andric __kmp_partition_places(parent_team); 1659349cc55cSDimitry Andric #endif 1660e8d8bef9SDimitry Andric 16610b57cec5SDimitry Andric KF_TRACE(10, ("__kmp_fork_call: before internal fork: root=%p, team=%p, " 16620b57cec5SDimitry Andric "master_th=%p, gtid=%d\n", 16630b57cec5SDimitry Andric root, parent_team, master_th, gtid)); 16640b57cec5SDimitry Andric __kmp_internal_fork(loc, gtid, parent_team); 16650b57cec5SDimitry Andric KF_TRACE(10, ("__kmp_fork_call: after internal fork: root=%p, team=%p, " 16660b57cec5SDimitry Andric "master_th=%p, gtid=%d\n", 16670b57cec5SDimitry Andric root, parent_team, master_th, gtid)); 16680b57cec5SDimitry Andric 1669e8d8bef9SDimitry Andric if (call_context == fork_context_gnu) 1670e8d8bef9SDimitry Andric return TRUE; 1671e8d8bef9SDimitry Andric 1672fe6060f1SDimitry Andric /* Invoke microtask for PRIMARY thread */ 16730b57cec5SDimitry Andric KA_TRACE(20, ("__kmp_fork_call: T#%d(%d:0) invoke microtask = %p\n", gtid, 16740b57cec5SDimitry Andric parent_team->t.t_id, parent_team->t.t_pkfn)); 16750b57cec5SDimitry Andric 16760b57cec5SDimitry Andric if (!parent_team->t.t_invoke(gtid)) { 1677fe6060f1SDimitry Andric KMP_ASSERT2(0, "cannot invoke microtask for PRIMARY thread"); 16780b57cec5SDimitry Andric } 16790b57cec5SDimitry Andric KA_TRACE(20, ("__kmp_fork_call: T#%d(%d:0) done microtask = %p\n", gtid, 16800b57cec5SDimitry Andric parent_team->t.t_id, parent_team->t.t_pkfn)); 16810b57cec5SDimitry Andric KMP_MB(); /* Flush all pending memory write invalidates. */ 16820b57cec5SDimitry Andric 16830b57cec5SDimitry Andric KA_TRACE(20, ("__kmp_fork_call: parallel exit T#%d\n", gtid)); 16840b57cec5SDimitry Andric 16850b57cec5SDimitry Andric return TRUE; 16860b57cec5SDimitry Andric } // Parallel closely nested in teams construct 16870b57cec5SDimitry Andric 16880b57cec5SDimitry Andric #if KMP_DEBUG 16890b57cec5SDimitry Andric if (__kmp_tasking_mode != tskm_immediate_exec) { 16900b57cec5SDimitry Andric KMP_DEBUG_ASSERT(master_th->th.th_task_team == 16910b57cec5SDimitry Andric parent_team->t.t_task_team[master_th->th.th_task_state]); 16920b57cec5SDimitry Andric } 16930b57cec5SDimitry Andric #endif 16940b57cec5SDimitry Andric 1695349cc55cSDimitry Andric // Need this to happen before we determine the number of threads, not while 1696349cc55cSDimitry Andric // we are allocating the team 1697349cc55cSDimitry Andric //__kmp_push_current_task_to_thread(master_th, parent_team, 0); 1698fe6060f1SDimitry Andric int enter_teams = 0; 16990b57cec5SDimitry Andric if (parent_team->t.t_active_level >= 17000b57cec5SDimitry Andric master_th->th.th_current_task->td_icvs.max_active_levels) { 17010b57cec5SDimitry Andric nthreads = 1; 17020b57cec5SDimitry Andric } else { 1703fe6060f1SDimitry Andric enter_teams = ((ap == NULL && active_level == 0) || 17040b57cec5SDimitry Andric (ap && teams_level > 0 && teams_level == level)); 1705349cc55cSDimitry Andric nthreads = master_set_numthreads 17060b57cec5SDimitry Andric ? master_set_numthreads 1707349cc55cSDimitry Andric // TODO: get nproc directly from current task 1708349cc55cSDimitry Andric : get__nproc_2(parent_team, master_tid); 17090b57cec5SDimitry Andric // Check if we need to take forkjoin lock? (no need for serialized 17100b57cec5SDimitry Andric // parallel out of teams construct). This code moved here from 17110b57cec5SDimitry Andric // __kmp_reserve_threads() to speedup nested serialized parallels. 17120b57cec5SDimitry Andric if (nthreads > 1) { 17130b57cec5SDimitry Andric if ((get__max_active_levels(master_th) == 1 && 17140b57cec5SDimitry Andric (root->r.r_in_parallel && !enter_teams)) || 17150b57cec5SDimitry Andric (__kmp_library == library_serial)) { 17160b57cec5SDimitry Andric KC_TRACE(10, ("__kmp_fork_call: T#%d serializing team; requested %d" 17170b57cec5SDimitry Andric " threads\n", 17180b57cec5SDimitry Andric gtid, nthreads)); 17190b57cec5SDimitry Andric nthreads = 1; 17200b57cec5SDimitry Andric } 17210b57cec5SDimitry Andric } 17220b57cec5SDimitry Andric if (nthreads > 1) { 17230b57cec5SDimitry Andric /* determine how many new threads we can use */ 17240b57cec5SDimitry Andric __kmp_acquire_bootstrap_lock(&__kmp_forkjoin_lock); 17250b57cec5SDimitry Andric /* AC: If we execute teams from parallel region (on host), then teams 17260b57cec5SDimitry Andric should be created but each can only have 1 thread if nesting is 17270b57cec5SDimitry Andric disabled. If teams called from serial region, then teams and their 17280b57cec5SDimitry Andric threads should be created regardless of the nesting setting. */ 17290b57cec5SDimitry Andric nthreads = __kmp_reserve_threads(root, parent_team, master_tid, 17300b57cec5SDimitry Andric nthreads, enter_teams); 17310b57cec5SDimitry Andric if (nthreads == 1) { 17320b57cec5SDimitry Andric // Free lock for single thread execution here; for multi-thread 17330b57cec5SDimitry Andric // execution it will be freed later after team of threads created 17340b57cec5SDimitry Andric // and initialized 17350b57cec5SDimitry Andric __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock); 17360b57cec5SDimitry Andric } 17370b57cec5SDimitry Andric } 17380b57cec5SDimitry Andric } 17390b57cec5SDimitry Andric KMP_DEBUG_ASSERT(nthreads > 0); 17400b57cec5SDimitry Andric 17410b57cec5SDimitry Andric // If we temporarily changed the set number of threads then restore it now 17420b57cec5SDimitry Andric master_th->th.th_set_nproc = 0; 17430b57cec5SDimitry Andric 17440b57cec5SDimitry Andric /* create a serialized parallel region? */ 17450b57cec5SDimitry Andric if (nthreads == 1) { 17460b57cec5SDimitry Andric /* josh todo: hypothetical question: what do we do for OS X*? */ 17470b57cec5SDimitry Andric #if KMP_OS_LINUX && \ 17480b57cec5SDimitry Andric (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) 17490b57cec5SDimitry Andric void *args[argc]; 17500b57cec5SDimitry Andric #else 17510b57cec5SDimitry Andric void **args = (void **)KMP_ALLOCA(argc * sizeof(void *)); 17520b57cec5SDimitry Andric #endif /* KMP_OS_LINUX && ( KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || \ 17530b57cec5SDimitry Andric KMP_ARCH_AARCH64) */ 17540b57cec5SDimitry Andric 17550b57cec5SDimitry Andric KA_TRACE(20, 17560b57cec5SDimitry Andric ("__kmp_fork_call: T#%d serializing parallel region\n", gtid)); 17570b57cec5SDimitry Andric 17580b57cec5SDimitry Andric __kmpc_serialized_parallel(loc, gtid); 17590b57cec5SDimitry Andric 1760fe6060f1SDimitry Andric #if OMPD_SUPPORT 1761fe6060f1SDimitry Andric master_th->th.th_serial_team->t.t_pkfn = microtask; 1762fe6060f1SDimitry Andric #endif 1763fe6060f1SDimitry Andric 17640b57cec5SDimitry Andric if (call_context == fork_context_intel) { 17650b57cec5SDimitry Andric /* TODO this sucks, use the compiler itself to pass args! :) */ 17660b57cec5SDimitry Andric master_th->th.th_serial_team->t.t_ident = loc; 17670b57cec5SDimitry Andric if (!ap) { 17680b57cec5SDimitry Andric // revert change made in __kmpc_serialized_parallel() 17690b57cec5SDimitry Andric master_th->th.th_serial_team->t.t_level--; 17700b57cec5SDimitry Andric // Get args from parent team for teams construct 17710b57cec5SDimitry Andric 17720b57cec5SDimitry Andric #if OMPT_SUPPORT 17730b57cec5SDimitry Andric void *dummy; 1774489b1cf2SDimitry Andric void **exit_frame_p; 17750b57cec5SDimitry Andric ompt_task_info_t *task_info; 17760b57cec5SDimitry Andric 17770b57cec5SDimitry Andric ompt_lw_taskteam_t lw_taskteam; 17780b57cec5SDimitry Andric 17790b57cec5SDimitry Andric if (ompt_enabled.enabled) { 17800b57cec5SDimitry Andric __ompt_lw_taskteam_init(&lw_taskteam, master_th, gtid, 17810b57cec5SDimitry Andric &ompt_parallel_data, return_address); 17820b57cec5SDimitry Andric 17830b57cec5SDimitry Andric __ompt_lw_taskteam_link(&lw_taskteam, master_th, 0); 17840b57cec5SDimitry Andric // don't use lw_taskteam after linking. content was swaped 17850b57cec5SDimitry Andric 17860b57cec5SDimitry Andric task_info = OMPT_CUR_TASK_INFO(master_th); 1787489b1cf2SDimitry Andric exit_frame_p = &(task_info->frame.exit_frame.ptr); 17880b57cec5SDimitry Andric if (ompt_enabled.ompt_callback_implicit_task) { 1789fe6060f1SDimitry Andric OMPT_CUR_TASK_INFO(master_th)->thread_num = 1790fe6060f1SDimitry Andric __kmp_tid_from_gtid(gtid); 1791489b1cf2SDimitry Andric ompt_callbacks.ompt_callback(ompt_callback_implicit_task)( 1792489b1cf2SDimitry Andric ompt_scope_begin, OMPT_CUR_TEAM_DATA(master_th), 1793489b1cf2SDimitry Andric &(task_info->task_data), 1, 1794489b1cf2SDimitry Andric OMPT_CUR_TASK_INFO(master_th)->thread_num, 1795489b1cf2SDimitry Andric ompt_task_implicit); 17960b57cec5SDimitry Andric } 17970b57cec5SDimitry Andric 17980b57cec5SDimitry Andric /* OMPT state */ 17990b57cec5SDimitry Andric master_th->th.ompt_thread_info.state = ompt_state_work_parallel; 18000b57cec5SDimitry Andric } else { 1801489b1cf2SDimitry Andric exit_frame_p = &dummy; 18020b57cec5SDimitry Andric } 18030b57cec5SDimitry Andric #endif 18040b57cec5SDimitry Andric 18050b57cec5SDimitry Andric { 18060b57cec5SDimitry Andric KMP_TIME_PARTITIONED_BLOCK(OMP_parallel); 18070b57cec5SDimitry Andric KMP_SET_THREAD_STATE_BLOCK(IMPLICIT_TASK); 18080b57cec5SDimitry Andric __kmp_invoke_microtask(microtask, gtid, 0, argc, 18090b57cec5SDimitry Andric parent_team->t.t_argv 18100b57cec5SDimitry Andric #if OMPT_SUPPORT 18110b57cec5SDimitry Andric , 1812489b1cf2SDimitry Andric exit_frame_p 18130b57cec5SDimitry Andric #endif 18140b57cec5SDimitry Andric ); 18150b57cec5SDimitry Andric } 18160b57cec5SDimitry Andric 18170b57cec5SDimitry Andric #if OMPT_SUPPORT 18180b57cec5SDimitry Andric if (ompt_enabled.enabled) { 1819489b1cf2SDimitry Andric *exit_frame_p = NULL; 18200b57cec5SDimitry Andric if (ompt_enabled.ompt_callback_implicit_task) { 18210b57cec5SDimitry Andric ompt_callbacks.ompt_callback(ompt_callback_implicit_task)( 18220b57cec5SDimitry Andric ompt_scope_end, NULL, &(task_info->task_data), 1, 1823489b1cf2SDimitry Andric OMPT_CUR_TASK_INFO(master_th)->thread_num, 1824489b1cf2SDimitry Andric ompt_task_implicit); 18250b57cec5SDimitry Andric } 1826489b1cf2SDimitry Andric ompt_parallel_data = *OMPT_CUR_TEAM_DATA(master_th); 18270b57cec5SDimitry Andric __ompt_lw_taskteam_unlink(master_th); 18280b57cec5SDimitry Andric if (ompt_enabled.ompt_callback_parallel_end) { 18290b57cec5SDimitry Andric ompt_callbacks.ompt_callback(ompt_callback_parallel_end)( 1830489b1cf2SDimitry Andric &ompt_parallel_data, parent_task_data, 1831489b1cf2SDimitry Andric OMPT_INVOKER(call_context) | ompt_parallel_team, 1832489b1cf2SDimitry Andric return_address); 18330b57cec5SDimitry Andric } 18340b57cec5SDimitry Andric master_th->th.ompt_thread_info.state = ompt_state_overhead; 18350b57cec5SDimitry Andric } 18360b57cec5SDimitry Andric #endif 18370b57cec5SDimitry Andric } else if (microtask == (microtask_t)__kmp_teams_master) { 18380b57cec5SDimitry Andric KMP_DEBUG_ASSERT(master_th->th.th_team == 18390b57cec5SDimitry Andric master_th->th.th_serial_team); 18400b57cec5SDimitry Andric team = master_th->th.th_team; 18410b57cec5SDimitry Andric // team->t.t_pkfn = microtask; 18420b57cec5SDimitry Andric team->t.t_invoke = invoker; 18430b57cec5SDimitry Andric __kmp_alloc_argv_entries(argc, team, TRUE); 18440b57cec5SDimitry Andric team->t.t_argc = argc; 18450b57cec5SDimitry Andric argv = (void **)team->t.t_argv; 18460b57cec5SDimitry Andric if (ap) { 18470b57cec5SDimitry Andric for (i = argc - 1; i >= 0; --i) 184816794618SDimitry Andric *argv++ = va_arg(kmp_va_deref(ap), void *); 18490b57cec5SDimitry Andric } else { 18500b57cec5SDimitry Andric for (i = 0; i < argc; ++i) 18510b57cec5SDimitry Andric // Get args from parent team for teams construct 18520b57cec5SDimitry Andric argv[i] = parent_team->t.t_argv[i]; 18530b57cec5SDimitry Andric } 18540b57cec5SDimitry Andric // AC: revert change made in __kmpc_serialized_parallel() 18550b57cec5SDimitry Andric // because initial code in teams should have level=0 18560b57cec5SDimitry Andric team->t.t_level--; 18570b57cec5SDimitry Andric // AC: call special invoker for outer "parallel" of teams construct 18580b57cec5SDimitry Andric invoker(gtid); 1859489b1cf2SDimitry Andric #if OMPT_SUPPORT 1860489b1cf2SDimitry Andric if (ompt_enabled.enabled) { 1861489b1cf2SDimitry Andric ompt_task_info_t *task_info = OMPT_CUR_TASK_INFO(master_th); 1862489b1cf2SDimitry Andric if (ompt_enabled.ompt_callback_implicit_task) { 1863489b1cf2SDimitry Andric ompt_callbacks.ompt_callback(ompt_callback_implicit_task)( 1864489b1cf2SDimitry Andric ompt_scope_end, NULL, &(task_info->task_data), 0, 1865489b1cf2SDimitry Andric OMPT_CUR_TASK_INFO(master_th)->thread_num, ompt_task_initial); 1866489b1cf2SDimitry Andric } 1867489b1cf2SDimitry Andric if (ompt_enabled.ompt_callback_parallel_end) { 1868489b1cf2SDimitry Andric ompt_callbacks.ompt_callback(ompt_callback_parallel_end)( 1869489b1cf2SDimitry Andric &ompt_parallel_data, parent_task_data, 1870489b1cf2SDimitry Andric OMPT_INVOKER(call_context) | ompt_parallel_league, 1871489b1cf2SDimitry Andric return_address); 1872489b1cf2SDimitry Andric } 1873489b1cf2SDimitry Andric master_th->th.ompt_thread_info.state = ompt_state_overhead; 1874489b1cf2SDimitry Andric } 1875489b1cf2SDimitry Andric #endif 18760b57cec5SDimitry Andric } else { 18770b57cec5SDimitry Andric argv = args; 18780b57cec5SDimitry Andric for (i = argc - 1; i >= 0; --i) 187916794618SDimitry Andric *argv++ = va_arg(kmp_va_deref(ap), void *); 18800b57cec5SDimitry Andric KMP_MB(); 18810b57cec5SDimitry Andric 18820b57cec5SDimitry Andric #if OMPT_SUPPORT 18830b57cec5SDimitry Andric void *dummy; 1884489b1cf2SDimitry Andric void **exit_frame_p; 18850b57cec5SDimitry Andric ompt_task_info_t *task_info; 18860b57cec5SDimitry Andric 18870b57cec5SDimitry Andric ompt_lw_taskteam_t lw_taskteam; 18880b57cec5SDimitry Andric 18890b57cec5SDimitry Andric if (ompt_enabled.enabled) { 18900b57cec5SDimitry Andric __ompt_lw_taskteam_init(&lw_taskteam, master_th, gtid, 18910b57cec5SDimitry Andric &ompt_parallel_data, return_address); 18920b57cec5SDimitry Andric __ompt_lw_taskteam_link(&lw_taskteam, master_th, 0); 18930b57cec5SDimitry Andric // don't use lw_taskteam after linking. content was swaped 18940b57cec5SDimitry Andric task_info = OMPT_CUR_TASK_INFO(master_th); 1895489b1cf2SDimitry Andric exit_frame_p = &(task_info->frame.exit_frame.ptr); 18960b57cec5SDimitry Andric 18970b57cec5SDimitry Andric /* OMPT implicit task begin */ 18980b57cec5SDimitry Andric implicit_task_data = OMPT_CUR_TASK_DATA(master_th); 18990b57cec5SDimitry Andric if (ompt_enabled.ompt_callback_implicit_task) { 19000b57cec5SDimitry Andric ompt_callbacks.ompt_callback(ompt_callback_implicit_task)( 19010b57cec5SDimitry Andric ompt_scope_begin, OMPT_CUR_TEAM_DATA(master_th), 1902489b1cf2SDimitry Andric implicit_task_data, 1, __kmp_tid_from_gtid(gtid), 1903489b1cf2SDimitry Andric ompt_task_implicit); 1904fe6060f1SDimitry Andric OMPT_CUR_TASK_INFO(master_th)->thread_num = 1905fe6060f1SDimitry Andric __kmp_tid_from_gtid(gtid); 19060b57cec5SDimitry Andric } 19070b57cec5SDimitry Andric 19080b57cec5SDimitry Andric /* OMPT state */ 19090b57cec5SDimitry Andric master_th->th.ompt_thread_info.state = ompt_state_work_parallel; 19100b57cec5SDimitry Andric } else { 1911489b1cf2SDimitry Andric exit_frame_p = &dummy; 19120b57cec5SDimitry Andric } 19130b57cec5SDimitry Andric #endif 19140b57cec5SDimitry Andric 19150b57cec5SDimitry Andric { 19160b57cec5SDimitry Andric KMP_TIME_PARTITIONED_BLOCK(OMP_parallel); 19170b57cec5SDimitry Andric KMP_SET_THREAD_STATE_BLOCK(IMPLICIT_TASK); 19180b57cec5SDimitry Andric __kmp_invoke_microtask(microtask, gtid, 0, argc, args 19190b57cec5SDimitry Andric #if OMPT_SUPPORT 19200b57cec5SDimitry Andric , 1921489b1cf2SDimitry Andric exit_frame_p 19220b57cec5SDimitry Andric #endif 19230b57cec5SDimitry Andric ); 19240b57cec5SDimitry Andric } 19250b57cec5SDimitry Andric 19260b57cec5SDimitry Andric #if OMPT_SUPPORT 19270b57cec5SDimitry Andric if (ompt_enabled.enabled) { 1928489b1cf2SDimitry Andric *exit_frame_p = NULL; 19290b57cec5SDimitry Andric if (ompt_enabled.ompt_callback_implicit_task) { 19300b57cec5SDimitry Andric ompt_callbacks.ompt_callback(ompt_callback_implicit_task)( 19310b57cec5SDimitry Andric ompt_scope_end, NULL, &(task_info->task_data), 1, 1932489b1cf2SDimitry Andric OMPT_CUR_TASK_INFO(master_th)->thread_num, 1933489b1cf2SDimitry Andric ompt_task_implicit); 19340b57cec5SDimitry Andric } 19350b57cec5SDimitry Andric 19360b57cec5SDimitry Andric ompt_parallel_data = *OMPT_CUR_TEAM_DATA(master_th); 19370b57cec5SDimitry Andric __ompt_lw_taskteam_unlink(master_th); 19380b57cec5SDimitry Andric if (ompt_enabled.ompt_callback_parallel_end) { 19390b57cec5SDimitry Andric ompt_callbacks.ompt_callback(ompt_callback_parallel_end)( 19400b57cec5SDimitry Andric &ompt_parallel_data, parent_task_data, 1941489b1cf2SDimitry Andric OMPT_INVOKER(call_context) | ompt_parallel_team, 1942489b1cf2SDimitry Andric return_address); 19430b57cec5SDimitry Andric } 19440b57cec5SDimitry Andric master_th->th.ompt_thread_info.state = ompt_state_overhead; 19450b57cec5SDimitry Andric } 19460b57cec5SDimitry Andric #endif 19470b57cec5SDimitry Andric } 19480b57cec5SDimitry Andric } else if (call_context == fork_context_gnu) { 19490b57cec5SDimitry Andric #if OMPT_SUPPORT 19500b57cec5SDimitry Andric ompt_lw_taskteam_t lwt; 19510b57cec5SDimitry Andric __ompt_lw_taskteam_init(&lwt, master_th, gtid, &ompt_parallel_data, 19520b57cec5SDimitry Andric return_address); 19530b57cec5SDimitry Andric 19540b57cec5SDimitry Andric lwt.ompt_task_info.frame.exit_frame = ompt_data_none; 19550b57cec5SDimitry Andric __ompt_lw_taskteam_link(&lwt, master_th, 1); 19560b57cec5SDimitry Andric // don't use lw_taskteam after linking. content was swaped 19570b57cec5SDimitry Andric #endif 19580b57cec5SDimitry Andric 19590b57cec5SDimitry Andric // we were called from GNU native code 19600b57cec5SDimitry Andric KA_TRACE(20, ("__kmp_fork_call: T#%d serial exit\n", gtid)); 19610b57cec5SDimitry Andric return FALSE; 19620b57cec5SDimitry Andric } else { 19630b57cec5SDimitry Andric KMP_ASSERT2(call_context < fork_context_last, 19640b57cec5SDimitry Andric "__kmp_fork_call: unknown fork_context parameter"); 19650b57cec5SDimitry Andric } 19660b57cec5SDimitry Andric 19670b57cec5SDimitry Andric KA_TRACE(20, ("__kmp_fork_call: T#%d serial exit\n", gtid)); 19680b57cec5SDimitry Andric KMP_MB(); 19690b57cec5SDimitry Andric return FALSE; 19700b57cec5SDimitry Andric } // if (nthreads == 1) 19710b57cec5SDimitry Andric 19720b57cec5SDimitry Andric // GEH: only modify the executing flag in the case when not serialized 19730b57cec5SDimitry Andric // serialized case is handled in kmpc_serialized_parallel 19740b57cec5SDimitry Andric KF_TRACE(10, ("__kmp_fork_call: parent_team_aclevel=%d, master_th=%p, " 19750b57cec5SDimitry Andric "curtask=%p, curtask_max_aclevel=%d\n", 19760b57cec5SDimitry Andric parent_team->t.t_active_level, master_th, 19770b57cec5SDimitry Andric master_th->th.th_current_task, 19780b57cec5SDimitry Andric master_th->th.th_current_task->td_icvs.max_active_levels)); 19790b57cec5SDimitry Andric // TODO: GEH - cannot do this assertion because root thread not set up as 19800b57cec5SDimitry Andric // executing 19810b57cec5SDimitry Andric // KMP_ASSERT( master_th->th.th_current_task->td_flags.executing == 1 ); 19820b57cec5SDimitry Andric master_th->th.th_current_task->td_flags.executing = 0; 19830b57cec5SDimitry Andric 19840b57cec5SDimitry Andric if (!master_th->th.th_teams_microtask || level > teams_level) { 19850b57cec5SDimitry Andric /* Increment our nested depth level */ 19860b57cec5SDimitry Andric KMP_ATOMIC_INC(&root->r.r_in_parallel); 19870b57cec5SDimitry Andric } 19880b57cec5SDimitry Andric 19890b57cec5SDimitry Andric // See if we need to make a copy of the ICVs. 19900b57cec5SDimitry Andric int nthreads_icv = master_th->th.th_current_task->td_icvs.nproc; 19910b57cec5SDimitry Andric if ((level + 1 < __kmp_nested_nth.used) && 19920b57cec5SDimitry Andric (__kmp_nested_nth.nth[level + 1] != nthreads_icv)) { 19930b57cec5SDimitry Andric nthreads_icv = __kmp_nested_nth.nth[level + 1]; 19940b57cec5SDimitry Andric } else { 19950b57cec5SDimitry Andric nthreads_icv = 0; // don't update 19960b57cec5SDimitry Andric } 19970b57cec5SDimitry Andric 19980b57cec5SDimitry Andric // Figure out the proc_bind_policy for the new team. 19990b57cec5SDimitry Andric kmp_proc_bind_t proc_bind = master_th->th.th_set_proc_bind; 2000349cc55cSDimitry Andric // proc_bind_default means don't update 2001349cc55cSDimitry Andric kmp_proc_bind_t proc_bind_icv = proc_bind_default; 20020b57cec5SDimitry Andric if (master_th->th.th_current_task->td_icvs.proc_bind == proc_bind_false) { 20030b57cec5SDimitry Andric proc_bind = proc_bind_false; 20040b57cec5SDimitry Andric } else { 20050b57cec5SDimitry Andric // No proc_bind clause specified; use current proc-bind-var for this 20060b57cec5SDimitry Andric // parallel region 2007349cc55cSDimitry Andric if (proc_bind == proc_bind_default) { 20080b57cec5SDimitry Andric proc_bind = master_th->th.th_current_task->td_icvs.proc_bind; 20090b57cec5SDimitry Andric } 2010349cc55cSDimitry Andric // Have teams construct take proc_bind value from KMP_TEAMS_PROC_BIND 2011349cc55cSDimitry Andric if (master_th->th.th_teams_microtask && 2012349cc55cSDimitry Andric microtask == (microtask_t)__kmp_teams_master) { 2013349cc55cSDimitry Andric proc_bind = __kmp_teams_proc_bind; 2014349cc55cSDimitry Andric } 20150b57cec5SDimitry Andric /* else: The proc_bind policy was specified explicitly on parallel clause. 20160b57cec5SDimitry Andric This overrides proc-bind-var for this parallel region, but does not 20170b57cec5SDimitry Andric change proc-bind-var. */ 20180b57cec5SDimitry Andric // Figure the value of proc-bind-var for the child threads. 20190b57cec5SDimitry Andric if ((level + 1 < __kmp_nested_proc_bind.used) && 20200b57cec5SDimitry Andric (__kmp_nested_proc_bind.bind_types[level + 1] != 20210b57cec5SDimitry Andric master_th->th.th_current_task->td_icvs.proc_bind)) { 2022349cc55cSDimitry Andric // Do not modify the proc bind icv for the two teams construct forks 2023349cc55cSDimitry Andric // They just let the proc bind icv pass through 2024349cc55cSDimitry Andric if (!master_th->th.th_teams_microtask || 2025349cc55cSDimitry Andric !(microtask == (microtask_t)__kmp_teams_master || ap == NULL)) 20260b57cec5SDimitry Andric proc_bind_icv = __kmp_nested_proc_bind.bind_types[level + 1]; 20270b57cec5SDimitry Andric } 20280b57cec5SDimitry Andric } 20290b57cec5SDimitry Andric 20300b57cec5SDimitry Andric // Reset for next parallel region 20310b57cec5SDimitry Andric master_th->th.th_set_proc_bind = proc_bind_default; 20320b57cec5SDimitry Andric 20330b57cec5SDimitry Andric if ((nthreads_icv > 0) || (proc_bind_icv != proc_bind_default)) { 20340b57cec5SDimitry Andric kmp_internal_control_t new_icvs; 20350b57cec5SDimitry Andric copy_icvs(&new_icvs, &master_th->th.th_current_task->td_icvs); 20360b57cec5SDimitry Andric new_icvs.next = NULL; 20370b57cec5SDimitry Andric if (nthreads_icv > 0) { 20380b57cec5SDimitry Andric new_icvs.nproc = nthreads_icv; 20390b57cec5SDimitry Andric } 20400b57cec5SDimitry Andric if (proc_bind_icv != proc_bind_default) { 20410b57cec5SDimitry Andric new_icvs.proc_bind = proc_bind_icv; 20420b57cec5SDimitry Andric } 20430b57cec5SDimitry Andric 20440b57cec5SDimitry Andric /* allocate a new parallel team */ 20450b57cec5SDimitry Andric KF_TRACE(10, ("__kmp_fork_call: before __kmp_allocate_team\n")); 20460b57cec5SDimitry Andric team = __kmp_allocate_team(root, nthreads, nthreads, 20470b57cec5SDimitry Andric #if OMPT_SUPPORT 20480b57cec5SDimitry Andric ompt_parallel_data, 20490b57cec5SDimitry Andric #endif 20500b57cec5SDimitry Andric proc_bind, &new_icvs, 20510b57cec5SDimitry Andric argc USE_NESTED_HOT_ARG(master_th)); 2052349cc55cSDimitry Andric if (__kmp_barrier_release_pattern[bs_forkjoin_barrier] == bp_dist_bar) 2053349cc55cSDimitry Andric copy_icvs((kmp_internal_control_t *)team->t.b->team_icvs, &new_icvs); 20540b57cec5SDimitry Andric } else { 20550b57cec5SDimitry Andric /* allocate a new parallel team */ 20560b57cec5SDimitry Andric KF_TRACE(10, ("__kmp_fork_call: before __kmp_allocate_team\n")); 20570b57cec5SDimitry Andric team = __kmp_allocate_team(root, nthreads, nthreads, 20580b57cec5SDimitry Andric #if OMPT_SUPPORT 20590b57cec5SDimitry Andric ompt_parallel_data, 20600b57cec5SDimitry Andric #endif 20610b57cec5SDimitry Andric proc_bind, 20620b57cec5SDimitry Andric &master_th->th.th_current_task->td_icvs, 20630b57cec5SDimitry Andric argc USE_NESTED_HOT_ARG(master_th)); 2064349cc55cSDimitry Andric if (__kmp_barrier_release_pattern[bs_forkjoin_barrier] == bp_dist_bar) 2065349cc55cSDimitry Andric copy_icvs((kmp_internal_control_t *)team->t.b->team_icvs, 2066349cc55cSDimitry Andric &master_th->th.th_current_task->td_icvs); 20670b57cec5SDimitry Andric } 20680b57cec5SDimitry Andric KF_TRACE( 20690b57cec5SDimitry Andric 10, ("__kmp_fork_call: after __kmp_allocate_team - team = %p\n", team)); 20700b57cec5SDimitry Andric 20710b57cec5SDimitry Andric /* setup the new team */ 20720b57cec5SDimitry Andric KMP_CHECK_UPDATE(team->t.t_master_tid, master_tid); 20730b57cec5SDimitry Andric KMP_CHECK_UPDATE(team->t.t_master_this_cons, master_this_cons); 20740b57cec5SDimitry Andric KMP_CHECK_UPDATE(team->t.t_ident, loc); 20750b57cec5SDimitry Andric KMP_CHECK_UPDATE(team->t.t_parent, parent_team); 20760b57cec5SDimitry Andric KMP_CHECK_UPDATE_SYNC(team->t.t_pkfn, microtask); 20770b57cec5SDimitry Andric #if OMPT_SUPPORT 20780b57cec5SDimitry Andric KMP_CHECK_UPDATE_SYNC(team->t.ompt_team_info.master_return_address, 20790b57cec5SDimitry Andric return_address); 20800b57cec5SDimitry Andric #endif 20810b57cec5SDimitry Andric KMP_CHECK_UPDATE(team->t.t_invoke, invoker); // TODO move to root, maybe 20820b57cec5SDimitry Andric // TODO: parent_team->t.t_level == INT_MAX ??? 20830b57cec5SDimitry Andric if (!master_th->th.th_teams_microtask || level > teams_level) { 20840b57cec5SDimitry Andric int new_level = parent_team->t.t_level + 1; 20850b57cec5SDimitry Andric KMP_CHECK_UPDATE(team->t.t_level, new_level); 20860b57cec5SDimitry Andric new_level = parent_team->t.t_active_level + 1; 20870b57cec5SDimitry Andric KMP_CHECK_UPDATE(team->t.t_active_level, new_level); 20880b57cec5SDimitry Andric } else { 20890b57cec5SDimitry Andric // AC: Do not increase parallel level at start of the teams construct 20900b57cec5SDimitry Andric int new_level = parent_team->t.t_level; 20910b57cec5SDimitry Andric KMP_CHECK_UPDATE(team->t.t_level, new_level); 20920b57cec5SDimitry Andric new_level = parent_team->t.t_active_level; 20930b57cec5SDimitry Andric KMP_CHECK_UPDATE(team->t.t_active_level, new_level); 20940b57cec5SDimitry Andric } 20950b57cec5SDimitry Andric kmp_r_sched_t new_sched = get__sched_2(parent_team, master_tid); 2096fe6060f1SDimitry Andric // set primary thread's schedule as new run-time schedule 20970b57cec5SDimitry Andric KMP_CHECK_UPDATE(team->t.t_sched.sched, new_sched.sched); 20980b57cec5SDimitry Andric 20990b57cec5SDimitry Andric KMP_CHECK_UPDATE(team->t.t_cancel_request, cancel_noreq); 21000b57cec5SDimitry Andric KMP_CHECK_UPDATE(team->t.t_def_allocator, master_th->th.th_def_allocator); 21010b57cec5SDimitry Andric 21020b57cec5SDimitry Andric // Update the floating point rounding in the team if required. 21030b57cec5SDimitry Andric propagateFPControl(team); 2104fe6060f1SDimitry Andric #if OMPD_SUPPORT 2105fe6060f1SDimitry Andric if (ompd_state & OMPD_ENABLE_BP) 2106fe6060f1SDimitry Andric ompd_bp_parallel_begin(); 2107fe6060f1SDimitry Andric #endif 21080b57cec5SDimitry Andric 21090b57cec5SDimitry Andric if (__kmp_tasking_mode != tskm_immediate_exec) { 2110fe6060f1SDimitry Andric // Set primary thread's task team to team's task team. Unless this is hot 2111fe6060f1SDimitry Andric // team, it should be NULL. 21120b57cec5SDimitry Andric KMP_DEBUG_ASSERT(master_th->th.th_task_team == 21130b57cec5SDimitry Andric parent_team->t.t_task_team[master_th->th.th_task_state]); 2114fe6060f1SDimitry Andric KA_TRACE(20, ("__kmp_fork_call: Primary T#%d pushing task_team %p / team " 21150b57cec5SDimitry Andric "%p, new task_team %p / team %p\n", 21160b57cec5SDimitry Andric __kmp_gtid_from_thread(master_th), 21170b57cec5SDimitry Andric master_th->th.th_task_team, parent_team, 21180b57cec5SDimitry Andric team->t.t_task_team[master_th->th.th_task_state], team)); 21190b57cec5SDimitry Andric 21200b57cec5SDimitry Andric if (active_level || master_th->th.th_task_team) { 2121fe6060f1SDimitry Andric // Take a memo of primary thread's task_state 21220b57cec5SDimitry Andric KMP_DEBUG_ASSERT(master_th->th.th_task_state_memo_stack); 21230b57cec5SDimitry Andric if (master_th->th.th_task_state_top >= 21240b57cec5SDimitry Andric master_th->th.th_task_state_stack_sz) { // increase size 21250b57cec5SDimitry Andric kmp_uint32 new_size = 2 * master_th->th.th_task_state_stack_sz; 21260b57cec5SDimitry Andric kmp_uint8 *old_stack, *new_stack; 21270b57cec5SDimitry Andric kmp_uint32 i; 21280b57cec5SDimitry Andric new_stack = (kmp_uint8 *)__kmp_allocate(new_size); 21290b57cec5SDimitry Andric for (i = 0; i < master_th->th.th_task_state_stack_sz; ++i) { 21300b57cec5SDimitry Andric new_stack[i] = master_th->th.th_task_state_memo_stack[i]; 21310b57cec5SDimitry Andric } 21320b57cec5SDimitry Andric for (i = master_th->th.th_task_state_stack_sz; i < new_size; 21330b57cec5SDimitry Andric ++i) { // zero-init rest of stack 21340b57cec5SDimitry Andric new_stack[i] = 0; 21350b57cec5SDimitry Andric } 21360b57cec5SDimitry Andric old_stack = master_th->th.th_task_state_memo_stack; 21370b57cec5SDimitry Andric master_th->th.th_task_state_memo_stack = new_stack; 21380b57cec5SDimitry Andric master_th->th.th_task_state_stack_sz = new_size; 21390b57cec5SDimitry Andric __kmp_free(old_stack); 21400b57cec5SDimitry Andric } 2141fe6060f1SDimitry Andric // Store primary thread's task_state on stack 21420b57cec5SDimitry Andric master_th->th 21430b57cec5SDimitry Andric .th_task_state_memo_stack[master_th->th.th_task_state_top] = 21440b57cec5SDimitry Andric master_th->th.th_task_state; 21450b57cec5SDimitry Andric master_th->th.th_task_state_top++; 21460b57cec5SDimitry Andric #if KMP_NESTED_HOT_TEAMS 21470b57cec5SDimitry Andric if (master_th->th.th_hot_teams && 21480b57cec5SDimitry Andric active_level < __kmp_hot_teams_max_level && 21490b57cec5SDimitry Andric team == master_th->th.th_hot_teams[active_level].hot_team) { 2150fe6060f1SDimitry Andric // Restore primary thread's nested state if nested hot team 21510b57cec5SDimitry Andric master_th->th.th_task_state = 21520b57cec5SDimitry Andric master_th->th 21530b57cec5SDimitry Andric .th_task_state_memo_stack[master_th->th.th_task_state_top]; 21540b57cec5SDimitry Andric } else { 21550b57cec5SDimitry Andric #endif 21560b57cec5SDimitry Andric master_th->th.th_task_state = 0; 21570b57cec5SDimitry Andric #if KMP_NESTED_HOT_TEAMS 21580b57cec5SDimitry Andric } 21590b57cec5SDimitry Andric #endif 21600b57cec5SDimitry Andric } 21610b57cec5SDimitry Andric #if !KMP_NESTED_HOT_TEAMS 21620b57cec5SDimitry Andric KMP_DEBUG_ASSERT((master_th->th.th_task_team == NULL) || 21630b57cec5SDimitry Andric (team == root->r.r_hot_team)); 21640b57cec5SDimitry Andric #endif 21650b57cec5SDimitry Andric } 21660b57cec5SDimitry Andric 21670b57cec5SDimitry Andric KA_TRACE( 21680b57cec5SDimitry Andric 20, 21690b57cec5SDimitry Andric ("__kmp_fork_call: T#%d(%d:%d)->(%d:0) created a team of %d threads\n", 21700b57cec5SDimitry Andric gtid, parent_team->t.t_id, team->t.t_master_tid, team->t.t_id, 21710b57cec5SDimitry Andric team->t.t_nproc)); 21720b57cec5SDimitry Andric KMP_DEBUG_ASSERT(team != root->r.r_hot_team || 21730b57cec5SDimitry Andric (team->t.t_master_tid == 0 && 21740b57cec5SDimitry Andric (team->t.t_parent == root->r.r_root_team || 21750b57cec5SDimitry Andric team->t.t_parent->t.t_serialized))); 21760b57cec5SDimitry Andric KMP_MB(); 21770b57cec5SDimitry Andric 21780b57cec5SDimitry Andric /* now, setup the arguments */ 21790b57cec5SDimitry Andric argv = (void **)team->t.t_argv; 21800b57cec5SDimitry Andric if (ap) { 21810b57cec5SDimitry Andric for (i = argc - 1; i >= 0; --i) { 218216794618SDimitry Andric void *new_argv = va_arg(kmp_va_deref(ap), void *); 21830b57cec5SDimitry Andric KMP_CHECK_UPDATE(*argv, new_argv); 21840b57cec5SDimitry Andric argv++; 21850b57cec5SDimitry Andric } 21860b57cec5SDimitry Andric } else { 21870b57cec5SDimitry Andric for (i = 0; i < argc; ++i) { 21880b57cec5SDimitry Andric // Get args from parent team for teams construct 21890b57cec5SDimitry Andric KMP_CHECK_UPDATE(argv[i], team->t.t_parent->t.t_argv[i]); 21900b57cec5SDimitry Andric } 21910b57cec5SDimitry Andric } 21920b57cec5SDimitry Andric 21930b57cec5SDimitry Andric /* now actually fork the threads */ 21940b57cec5SDimitry Andric KMP_CHECK_UPDATE(team->t.t_master_active, master_active); 21950b57cec5SDimitry Andric if (!root->r.r_active) // Only do assignment if it prevents cache ping-pong 21960b57cec5SDimitry Andric root->r.r_active = TRUE; 21970b57cec5SDimitry Andric 2198349cc55cSDimitry Andric __kmp_fork_team_threads(root, team, master_th, gtid, !ap); 21990b57cec5SDimitry Andric __kmp_setup_icv_copy(team, nthreads, 22000b57cec5SDimitry Andric &master_th->th.th_current_task->td_icvs, loc); 22010b57cec5SDimitry Andric 22020b57cec5SDimitry Andric #if OMPT_SUPPORT 22030b57cec5SDimitry Andric master_th->th.ompt_thread_info.state = ompt_state_work_parallel; 22040b57cec5SDimitry Andric #endif 22050b57cec5SDimitry Andric 22060b57cec5SDimitry Andric __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock); 22070b57cec5SDimitry Andric 22080b57cec5SDimitry Andric #if USE_ITT_BUILD 22090b57cec5SDimitry Andric if (team->t.t_active_level == 1 // only report frames at level 1 22100b57cec5SDimitry Andric && !master_th->th.th_teams_microtask) { // not in teams construct 22110b57cec5SDimitry Andric #if USE_ITT_NOTIFY 22120b57cec5SDimitry Andric if ((__itt_frame_submit_v3_ptr || KMP_ITT_DEBUG) && 22130b57cec5SDimitry Andric (__kmp_forkjoin_frames_mode == 3 || 22140b57cec5SDimitry Andric __kmp_forkjoin_frames_mode == 1)) { 22150b57cec5SDimitry Andric kmp_uint64 tmp_time = 0; 22160b57cec5SDimitry Andric if (__itt_get_timestamp_ptr) 22170b57cec5SDimitry Andric tmp_time = __itt_get_timestamp(); 22180b57cec5SDimitry Andric // Internal fork - report frame begin 22190b57cec5SDimitry Andric master_th->th.th_frame_time = tmp_time; 22200b57cec5SDimitry Andric if (__kmp_forkjoin_frames_mode == 3) 22210b57cec5SDimitry Andric team->t.t_region_time = tmp_time; 22220b57cec5SDimitry Andric } else 22230b57cec5SDimitry Andric // only one notification scheme (either "submit" or "forking/joined", not both) 22240b57cec5SDimitry Andric #endif /* USE_ITT_NOTIFY */ 22250b57cec5SDimitry Andric if ((__itt_frame_begin_v3_ptr || KMP_ITT_DEBUG) && 22260b57cec5SDimitry Andric __kmp_forkjoin_frames && !__kmp_forkjoin_frames_mode) { 22270b57cec5SDimitry Andric // Mark start of "parallel" region for Intel(R) VTune(TM) analyzer. 22280b57cec5SDimitry Andric __kmp_itt_region_forking(gtid, team->t.t_nproc, 0); 22290b57cec5SDimitry Andric } 22300b57cec5SDimitry Andric } 22310b57cec5SDimitry Andric #endif /* USE_ITT_BUILD */ 22320b57cec5SDimitry Andric 22330b57cec5SDimitry Andric /* now go on and do the work */ 22340b57cec5SDimitry Andric KMP_DEBUG_ASSERT(team == __kmp_threads[gtid]->th.th_team); 22350b57cec5SDimitry Andric KMP_MB(); 22360b57cec5SDimitry Andric KF_TRACE(10, 22370b57cec5SDimitry Andric ("__kmp_internal_fork : root=%p, team=%p, master_th=%p, gtid=%d\n", 22380b57cec5SDimitry Andric root, team, master_th, gtid)); 22390b57cec5SDimitry Andric 22400b57cec5SDimitry Andric #if USE_ITT_BUILD 22410b57cec5SDimitry Andric if (__itt_stack_caller_create_ptr) { 2242fe6060f1SDimitry Andric // create new stack stitching id before entering fork barrier 2243fe6060f1SDimitry Andric if (!enter_teams) { 2244fe6060f1SDimitry Andric KMP_DEBUG_ASSERT(team->t.t_stack_id == NULL); 2245fe6060f1SDimitry Andric team->t.t_stack_id = __kmp_itt_stack_caller_create(); 2246fe6060f1SDimitry Andric } else if (parent_team->t.t_serialized) { 2247fe6060f1SDimitry Andric // keep stack stitching id in the serialized parent_team; 2248fe6060f1SDimitry Andric // current team will be used for parallel inside the teams; 2249fe6060f1SDimitry Andric // if parent_team is active, then it already keeps stack stitching id 2250fe6060f1SDimitry Andric // for the league of teams 2251fe6060f1SDimitry Andric KMP_DEBUG_ASSERT(parent_team->t.t_stack_id == NULL); 2252fe6060f1SDimitry Andric parent_team->t.t_stack_id = __kmp_itt_stack_caller_create(); 2253fe6060f1SDimitry Andric } 22540b57cec5SDimitry Andric } 22550b57cec5SDimitry Andric #endif /* USE_ITT_BUILD */ 22560b57cec5SDimitry Andric 2257fe6060f1SDimitry Andric // AC: skip __kmp_internal_fork at teams construct, let only primary 22580b57cec5SDimitry Andric // threads execute 22590b57cec5SDimitry Andric if (ap) { 22600b57cec5SDimitry Andric __kmp_internal_fork(loc, gtid, team); 22610b57cec5SDimitry Andric KF_TRACE(10, ("__kmp_internal_fork : after : root=%p, team=%p, " 22620b57cec5SDimitry Andric "master_th=%p, gtid=%d\n", 22630b57cec5SDimitry Andric root, team, master_th, gtid)); 22640b57cec5SDimitry Andric } 22650b57cec5SDimitry Andric 22660b57cec5SDimitry Andric if (call_context == fork_context_gnu) { 22670b57cec5SDimitry Andric KA_TRACE(20, ("__kmp_fork_call: parallel exit T#%d\n", gtid)); 22680b57cec5SDimitry Andric return TRUE; 22690b57cec5SDimitry Andric } 22700b57cec5SDimitry Andric 2271fe6060f1SDimitry Andric /* Invoke microtask for PRIMARY thread */ 22720b57cec5SDimitry Andric KA_TRACE(20, ("__kmp_fork_call: T#%d(%d:0) invoke microtask = %p\n", gtid, 22730b57cec5SDimitry Andric team->t.t_id, team->t.t_pkfn)); 22740b57cec5SDimitry Andric } // END of timer KMP_fork_call block 22750b57cec5SDimitry Andric 22760b57cec5SDimitry Andric #if KMP_STATS_ENABLED 22770b57cec5SDimitry Andric // If beginning a teams construct, then change thread state 22780b57cec5SDimitry Andric stats_state_e previous_state = KMP_GET_THREAD_STATE(); 22790b57cec5SDimitry Andric if (!ap) { 22800b57cec5SDimitry Andric KMP_SET_THREAD_STATE(stats_state_e::TEAMS_REGION); 22810b57cec5SDimitry Andric } 22820b57cec5SDimitry Andric #endif 22830b57cec5SDimitry Andric 22840b57cec5SDimitry Andric if (!team->t.t_invoke(gtid)) { 2285fe6060f1SDimitry Andric KMP_ASSERT2(0, "cannot invoke microtask for PRIMARY thread"); 22860b57cec5SDimitry Andric } 22870b57cec5SDimitry Andric 22880b57cec5SDimitry Andric #if KMP_STATS_ENABLED 22890b57cec5SDimitry Andric // If was beginning of a teams construct, then reset thread state 22900b57cec5SDimitry Andric if (!ap) { 22910b57cec5SDimitry Andric KMP_SET_THREAD_STATE(previous_state); 22920b57cec5SDimitry Andric } 22930b57cec5SDimitry Andric #endif 22940b57cec5SDimitry Andric 22950b57cec5SDimitry Andric KA_TRACE(20, ("__kmp_fork_call: T#%d(%d:0) done microtask = %p\n", gtid, 22960b57cec5SDimitry Andric team->t.t_id, team->t.t_pkfn)); 22970b57cec5SDimitry Andric KMP_MB(); /* Flush all pending memory write invalidates. */ 22980b57cec5SDimitry Andric 22990b57cec5SDimitry Andric KA_TRACE(20, ("__kmp_fork_call: parallel exit T#%d\n", gtid)); 23000b57cec5SDimitry Andric #if OMPT_SUPPORT 23010b57cec5SDimitry Andric if (ompt_enabled.enabled) { 23020b57cec5SDimitry Andric master_th->th.ompt_thread_info.state = ompt_state_overhead; 23030b57cec5SDimitry Andric } 23040b57cec5SDimitry Andric #endif 23050b57cec5SDimitry Andric 23060b57cec5SDimitry Andric return TRUE; 23070b57cec5SDimitry Andric } 23080b57cec5SDimitry Andric 23090b57cec5SDimitry Andric #if OMPT_SUPPORT 23100b57cec5SDimitry Andric static inline void __kmp_join_restore_state(kmp_info_t *thread, 23110b57cec5SDimitry Andric kmp_team_t *team) { 23120b57cec5SDimitry Andric // restore state outside the region 23130b57cec5SDimitry Andric thread->th.ompt_thread_info.state = 23140b57cec5SDimitry Andric ((team->t.t_serialized) ? ompt_state_work_serial 23150b57cec5SDimitry Andric : ompt_state_work_parallel); 23160b57cec5SDimitry Andric } 23170b57cec5SDimitry Andric 23180b57cec5SDimitry Andric static inline void __kmp_join_ompt(int gtid, kmp_info_t *thread, 23190b57cec5SDimitry Andric kmp_team_t *team, ompt_data_t *parallel_data, 2320489b1cf2SDimitry Andric int flags, void *codeptr) { 23210b57cec5SDimitry Andric ompt_task_info_t *task_info = __ompt_get_task_info_object(0); 23220b57cec5SDimitry Andric if (ompt_enabled.ompt_callback_parallel_end) { 23230b57cec5SDimitry Andric ompt_callbacks.ompt_callback(ompt_callback_parallel_end)( 2324489b1cf2SDimitry Andric parallel_data, &(task_info->task_data), flags, codeptr); 23250b57cec5SDimitry Andric } 23260b57cec5SDimitry Andric 23270b57cec5SDimitry Andric task_info->frame.enter_frame = ompt_data_none; 23280b57cec5SDimitry Andric __kmp_join_restore_state(thread, team); 23290b57cec5SDimitry Andric } 23300b57cec5SDimitry Andric #endif 23310b57cec5SDimitry Andric 23320b57cec5SDimitry Andric void __kmp_join_call(ident_t *loc, int gtid 23330b57cec5SDimitry Andric #if OMPT_SUPPORT 23340b57cec5SDimitry Andric , 23350b57cec5SDimitry Andric enum fork_context_e fork_context 23360b57cec5SDimitry Andric #endif 23370b57cec5SDimitry Andric , 23380b57cec5SDimitry Andric int exit_teams) { 23390b57cec5SDimitry Andric KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_join_call); 23400b57cec5SDimitry Andric kmp_team_t *team; 23410b57cec5SDimitry Andric kmp_team_t *parent_team; 23420b57cec5SDimitry Andric kmp_info_t *master_th; 23430b57cec5SDimitry Andric kmp_root_t *root; 23440b57cec5SDimitry Andric int master_active; 23450b57cec5SDimitry Andric 23460b57cec5SDimitry Andric KA_TRACE(20, ("__kmp_join_call: enter T#%d\n", gtid)); 23470b57cec5SDimitry Andric 23480b57cec5SDimitry Andric /* setup current data */ 23490b57cec5SDimitry Andric master_th = __kmp_threads[gtid]; 23500b57cec5SDimitry Andric root = master_th->th.th_root; 23510b57cec5SDimitry Andric team = master_th->th.th_team; 23520b57cec5SDimitry Andric parent_team = team->t.t_parent; 23530b57cec5SDimitry Andric 23540b57cec5SDimitry Andric master_th->th.th_ident = loc; 23550b57cec5SDimitry Andric 23560b57cec5SDimitry Andric #if OMPT_SUPPORT 2357489b1cf2SDimitry Andric void *team_microtask = (void *)team->t.t_pkfn; 2358e8d8bef9SDimitry Andric // For GOMP interface with serialized parallel, need the 2359e8d8bef9SDimitry Andric // __kmpc_end_serialized_parallel to call hooks for OMPT end-implicit-task 2360e8d8bef9SDimitry Andric // and end-parallel events. 2361e8d8bef9SDimitry Andric if (ompt_enabled.enabled && 2362e8d8bef9SDimitry Andric !(team->t.t_serialized && fork_context == fork_context_gnu)) { 23630b57cec5SDimitry Andric master_th->th.ompt_thread_info.state = ompt_state_overhead; 23640b57cec5SDimitry Andric } 23650b57cec5SDimitry Andric #endif 23660b57cec5SDimitry Andric 23670b57cec5SDimitry Andric #if KMP_DEBUG 23680b57cec5SDimitry Andric if (__kmp_tasking_mode != tskm_immediate_exec && !exit_teams) { 23690b57cec5SDimitry Andric KA_TRACE(20, ("__kmp_join_call: T#%d, old team = %p old task_team = %p, " 23700b57cec5SDimitry Andric "th_task_team = %p\n", 23710b57cec5SDimitry Andric __kmp_gtid_from_thread(master_th), team, 23720b57cec5SDimitry Andric team->t.t_task_team[master_th->th.th_task_state], 23730b57cec5SDimitry Andric master_th->th.th_task_team)); 23740b57cec5SDimitry Andric KMP_DEBUG_ASSERT(master_th->th.th_task_team == 23750b57cec5SDimitry Andric team->t.t_task_team[master_th->th.th_task_state]); 23760b57cec5SDimitry Andric } 23770b57cec5SDimitry Andric #endif 23780b57cec5SDimitry Andric 23790b57cec5SDimitry Andric if (team->t.t_serialized) { 23800b57cec5SDimitry Andric if (master_th->th.th_teams_microtask) { 23810b57cec5SDimitry Andric // We are in teams construct 23820b57cec5SDimitry Andric int level = team->t.t_level; 23830b57cec5SDimitry Andric int tlevel = master_th->th.th_teams_level; 23840b57cec5SDimitry Andric if (level == tlevel) { 23850b57cec5SDimitry Andric // AC: we haven't incremented it earlier at start of teams construct, 23860b57cec5SDimitry Andric // so do it here - at the end of teams construct 23870b57cec5SDimitry Andric team->t.t_level++; 23880b57cec5SDimitry Andric } else if (level == tlevel + 1) { 23890b57cec5SDimitry Andric // AC: we are exiting parallel inside teams, need to increment 23900b57cec5SDimitry Andric // serialization in order to restore it in the next call to 23910b57cec5SDimitry Andric // __kmpc_end_serialized_parallel 23920b57cec5SDimitry Andric team->t.t_serialized++; 23930b57cec5SDimitry Andric } 23940b57cec5SDimitry Andric } 23950b57cec5SDimitry Andric __kmpc_end_serialized_parallel(loc, gtid); 23960b57cec5SDimitry Andric 23970b57cec5SDimitry Andric #if OMPT_SUPPORT 23980b57cec5SDimitry Andric if (ompt_enabled.enabled) { 23990b57cec5SDimitry Andric __kmp_join_restore_state(master_th, parent_team); 24000b57cec5SDimitry Andric } 24010b57cec5SDimitry Andric #endif 24020b57cec5SDimitry Andric 24030b57cec5SDimitry Andric return; 24040b57cec5SDimitry Andric } 24050b57cec5SDimitry Andric 24060b57cec5SDimitry Andric master_active = team->t.t_master_active; 24070b57cec5SDimitry Andric 24080b57cec5SDimitry Andric if (!exit_teams) { 24090b57cec5SDimitry Andric // AC: No barrier for internal teams at exit from teams construct. 24100b57cec5SDimitry Andric // But there is barrier for external team (league). 24110b57cec5SDimitry Andric __kmp_internal_join(loc, gtid, team); 2412fe6060f1SDimitry Andric #if USE_ITT_BUILD 2413fe6060f1SDimitry Andric if (__itt_stack_caller_create_ptr) { 2414fe6060f1SDimitry Andric KMP_DEBUG_ASSERT(team->t.t_stack_id != NULL); 2415fe6060f1SDimitry Andric // destroy the stack stitching id after join barrier 2416fe6060f1SDimitry Andric __kmp_itt_stack_caller_destroy((__itt_caller)team->t.t_stack_id); 2417fe6060f1SDimitry Andric team->t.t_stack_id = NULL; 2418fe6060f1SDimitry Andric } 2419fe6060f1SDimitry Andric #endif 24200b57cec5SDimitry Andric } else { 24210b57cec5SDimitry Andric master_th->th.th_task_state = 24220b57cec5SDimitry Andric 0; // AC: no tasking in teams (out of any parallel) 2423fe6060f1SDimitry Andric #if USE_ITT_BUILD 2424fe6060f1SDimitry Andric if (__itt_stack_caller_create_ptr && parent_team->t.t_serialized) { 2425fe6060f1SDimitry Andric KMP_DEBUG_ASSERT(parent_team->t.t_stack_id != NULL); 2426fe6060f1SDimitry Andric // destroy the stack stitching id on exit from the teams construct 2427fe6060f1SDimitry Andric // if parent_team is active, then the id will be destroyed later on 2428fe6060f1SDimitry Andric // by master of the league of teams 2429fe6060f1SDimitry Andric __kmp_itt_stack_caller_destroy((__itt_caller)parent_team->t.t_stack_id); 2430fe6060f1SDimitry Andric parent_team->t.t_stack_id = NULL; 2431fe6060f1SDimitry Andric } 2432fe6060f1SDimitry Andric #endif 2433349cc55cSDimitry Andric 2434349cc55cSDimitry Andric if (team->t.t_nproc > 1 && 2435349cc55cSDimitry Andric __kmp_barrier_gather_pattern[bs_forkjoin_barrier] == bp_dist_bar) { 2436349cc55cSDimitry Andric team->t.b->update_num_threads(team->t.t_nproc); 2437349cc55cSDimitry Andric __kmp_add_threads_to_team(team, team->t.t_nproc); 2438349cc55cSDimitry Andric } 24390b57cec5SDimitry Andric } 24400b57cec5SDimitry Andric 24410b57cec5SDimitry Andric KMP_MB(); 24420b57cec5SDimitry Andric 24430b57cec5SDimitry Andric #if OMPT_SUPPORT 24440b57cec5SDimitry Andric ompt_data_t *parallel_data = &(team->t.ompt_team_info.parallel_data); 24450b57cec5SDimitry Andric void *codeptr = team->t.ompt_team_info.master_return_address; 24460b57cec5SDimitry Andric #endif 24470b57cec5SDimitry Andric 24480b57cec5SDimitry Andric #if USE_ITT_BUILD 24490b57cec5SDimitry Andric // Mark end of "parallel" region for Intel(R) VTune(TM) analyzer. 24500b57cec5SDimitry Andric if (team->t.t_active_level == 1 && 2451e8d8bef9SDimitry Andric (!master_th->th.th_teams_microtask || /* not in teams construct */ 2452e8d8bef9SDimitry Andric master_th->th.th_teams_size.nteams == 1)) { 24530b57cec5SDimitry Andric master_th->th.th_ident = loc; 24540b57cec5SDimitry Andric // only one notification scheme (either "submit" or "forking/joined", not 24550b57cec5SDimitry Andric // both) 24560b57cec5SDimitry Andric if ((__itt_frame_submit_v3_ptr || KMP_ITT_DEBUG) && 24570b57cec5SDimitry Andric __kmp_forkjoin_frames_mode == 3) 24580b57cec5SDimitry Andric __kmp_itt_frame_submit(gtid, team->t.t_region_time, 24590b57cec5SDimitry Andric master_th->th.th_frame_time, 0, loc, 24600b57cec5SDimitry Andric master_th->th.th_team_nproc, 1); 24610b57cec5SDimitry Andric else if ((__itt_frame_end_v3_ptr || KMP_ITT_DEBUG) && 24620b57cec5SDimitry Andric !__kmp_forkjoin_frames_mode && __kmp_forkjoin_frames) 24630b57cec5SDimitry Andric __kmp_itt_region_joined(gtid); 24640b57cec5SDimitry Andric } // active_level == 1 24650b57cec5SDimitry Andric #endif /* USE_ITT_BUILD */ 24660b57cec5SDimitry Andric 2467349cc55cSDimitry Andric #if KMP_AFFINITY_SUPPORTED 2468349cc55cSDimitry Andric if (!exit_teams) { 2469349cc55cSDimitry Andric // Restore master thread's partition. 2470349cc55cSDimitry Andric master_th->th.th_first_place = team->t.t_first_place; 2471349cc55cSDimitry Andric master_th->th.th_last_place = team->t.t_last_place; 2472349cc55cSDimitry Andric } 2473349cc55cSDimitry Andric #endif // KMP_AFFINITY_SUPPORTED 2474349cc55cSDimitry Andric 24750b57cec5SDimitry Andric if (master_th->th.th_teams_microtask && !exit_teams && 24760b57cec5SDimitry Andric team->t.t_pkfn != (microtask_t)__kmp_teams_master && 24770b57cec5SDimitry Andric team->t.t_level == master_th->th.th_teams_level + 1) { 24780b57cec5SDimitry Andric // AC: We need to leave the team structure intact at the end of parallel 24790b57cec5SDimitry Andric // inside the teams construct, so that at the next parallel same (hot) team 24800b57cec5SDimitry Andric // works, only adjust nesting levels 2481489b1cf2SDimitry Andric #if OMPT_SUPPORT 2482489b1cf2SDimitry Andric ompt_data_t ompt_parallel_data = ompt_data_none; 2483489b1cf2SDimitry Andric if (ompt_enabled.enabled) { 2484489b1cf2SDimitry Andric ompt_task_info_t *task_info = __ompt_get_task_info_object(0); 2485489b1cf2SDimitry Andric if (ompt_enabled.ompt_callback_implicit_task) { 2486489b1cf2SDimitry Andric int ompt_team_size = team->t.t_nproc; 2487489b1cf2SDimitry Andric ompt_callbacks.ompt_callback(ompt_callback_implicit_task)( 2488489b1cf2SDimitry Andric ompt_scope_end, NULL, &(task_info->task_data), ompt_team_size, 2489489b1cf2SDimitry Andric OMPT_CUR_TASK_INFO(master_th)->thread_num, ompt_task_implicit); 2490489b1cf2SDimitry Andric } 2491489b1cf2SDimitry Andric task_info->frame.exit_frame = ompt_data_none; 2492489b1cf2SDimitry Andric task_info->task_data = ompt_data_none; 2493489b1cf2SDimitry Andric ompt_parallel_data = *OMPT_CUR_TEAM_DATA(master_th); 2494489b1cf2SDimitry Andric __ompt_lw_taskteam_unlink(master_th); 2495489b1cf2SDimitry Andric } 2496489b1cf2SDimitry Andric #endif 24970b57cec5SDimitry Andric /* Decrement our nested depth level */ 24980b57cec5SDimitry Andric team->t.t_level--; 24990b57cec5SDimitry Andric team->t.t_active_level--; 25000b57cec5SDimitry Andric KMP_ATOMIC_DEC(&root->r.r_in_parallel); 25010b57cec5SDimitry Andric 25020b57cec5SDimitry Andric // Restore number of threads in the team if needed. This code relies on 25030b57cec5SDimitry Andric // the proper adjustment of th_teams_size.nth after the fork in 2504fe6060f1SDimitry Andric // __kmp_teams_master on each teams primary thread in the case that 25050b57cec5SDimitry Andric // __kmp_reserve_threads reduced it. 25060b57cec5SDimitry Andric if (master_th->th.th_team_nproc < master_th->th.th_teams_size.nth) { 25070b57cec5SDimitry Andric int old_num = master_th->th.th_team_nproc; 25080b57cec5SDimitry Andric int new_num = master_th->th.th_teams_size.nth; 25090b57cec5SDimitry Andric kmp_info_t **other_threads = team->t.t_threads; 25100b57cec5SDimitry Andric team->t.t_nproc = new_num; 25110b57cec5SDimitry Andric for (int i = 0; i < old_num; ++i) { 25120b57cec5SDimitry Andric other_threads[i]->th.th_team_nproc = new_num; 25130b57cec5SDimitry Andric } 25140b57cec5SDimitry Andric // Adjust states of non-used threads of the team 25150b57cec5SDimitry Andric for (int i = old_num; i < new_num; ++i) { 25160b57cec5SDimitry Andric // Re-initialize thread's barrier data. 25170b57cec5SDimitry Andric KMP_DEBUG_ASSERT(other_threads[i]); 25180b57cec5SDimitry Andric kmp_balign_t *balign = other_threads[i]->th.th_bar; 25190b57cec5SDimitry Andric for (int b = 0; b < bs_last_barrier; ++b) { 25200b57cec5SDimitry Andric balign[b].bb.b_arrived = team->t.t_bar[b].b_arrived; 25210b57cec5SDimitry Andric KMP_DEBUG_ASSERT(balign[b].bb.wait_flag != KMP_BARRIER_PARENT_FLAG); 25220b57cec5SDimitry Andric #if USE_DEBUGGER 25230b57cec5SDimitry Andric balign[b].bb.b_worker_arrived = team->t.t_bar[b].b_team_arrived; 25240b57cec5SDimitry Andric #endif 25250b57cec5SDimitry Andric } 25260b57cec5SDimitry Andric if (__kmp_tasking_mode != tskm_immediate_exec) { 25270b57cec5SDimitry Andric // Synchronize thread's task state 25280b57cec5SDimitry Andric other_threads[i]->th.th_task_state = master_th->th.th_task_state; 25290b57cec5SDimitry Andric } 25300b57cec5SDimitry Andric } 25310b57cec5SDimitry Andric } 25320b57cec5SDimitry Andric 25330b57cec5SDimitry Andric #if OMPT_SUPPORT 25340b57cec5SDimitry Andric if (ompt_enabled.enabled) { 2535489b1cf2SDimitry Andric __kmp_join_ompt(gtid, master_th, parent_team, &ompt_parallel_data, 2536489b1cf2SDimitry Andric OMPT_INVOKER(fork_context) | ompt_parallel_team, codeptr); 25370b57cec5SDimitry Andric } 25380b57cec5SDimitry Andric #endif 25390b57cec5SDimitry Andric 25400b57cec5SDimitry Andric return; 25410b57cec5SDimitry Andric } 25420b57cec5SDimitry Andric 25430b57cec5SDimitry Andric /* do cleanup and restore the parent team */ 25440b57cec5SDimitry Andric master_th->th.th_info.ds.ds_tid = team->t.t_master_tid; 25450b57cec5SDimitry Andric master_th->th.th_local.this_construct = team->t.t_master_this_cons; 25460b57cec5SDimitry Andric 25470b57cec5SDimitry Andric master_th->th.th_dispatch = &parent_team->t.t_dispatch[team->t.t_master_tid]; 25480b57cec5SDimitry Andric 25490b57cec5SDimitry Andric /* jc: The following lock has instructions with REL and ACQ semantics, 25500b57cec5SDimitry Andric separating the parallel user code called in this parallel region 25510b57cec5SDimitry Andric from the serial user code called after this function returns. */ 25520b57cec5SDimitry Andric __kmp_acquire_bootstrap_lock(&__kmp_forkjoin_lock); 25530b57cec5SDimitry Andric 25540b57cec5SDimitry Andric if (!master_th->th.th_teams_microtask || 25550b57cec5SDimitry Andric team->t.t_level > master_th->th.th_teams_level) { 25560b57cec5SDimitry Andric /* Decrement our nested depth level */ 25570b57cec5SDimitry Andric KMP_ATOMIC_DEC(&root->r.r_in_parallel); 25580b57cec5SDimitry Andric } 25590b57cec5SDimitry Andric KMP_DEBUG_ASSERT(root->r.r_in_parallel >= 0); 25600b57cec5SDimitry Andric 25610b57cec5SDimitry Andric #if OMPT_SUPPORT 25620b57cec5SDimitry Andric if (ompt_enabled.enabled) { 25630b57cec5SDimitry Andric ompt_task_info_t *task_info = __ompt_get_task_info_object(0); 25640b57cec5SDimitry Andric if (ompt_enabled.ompt_callback_implicit_task) { 2565489b1cf2SDimitry Andric int flags = (team_microtask == (void *)__kmp_teams_master) 2566489b1cf2SDimitry Andric ? ompt_task_initial 2567489b1cf2SDimitry Andric : ompt_task_implicit; 2568489b1cf2SDimitry Andric int ompt_team_size = (flags == ompt_task_initial) ? 0 : team->t.t_nproc; 25690b57cec5SDimitry Andric ompt_callbacks.ompt_callback(ompt_callback_implicit_task)( 25700b57cec5SDimitry Andric ompt_scope_end, NULL, &(task_info->task_data), ompt_team_size, 2571489b1cf2SDimitry Andric OMPT_CUR_TASK_INFO(master_th)->thread_num, flags); 25720b57cec5SDimitry Andric } 25730b57cec5SDimitry Andric task_info->frame.exit_frame = ompt_data_none; 25740b57cec5SDimitry Andric task_info->task_data = ompt_data_none; 25750b57cec5SDimitry Andric } 25760b57cec5SDimitry Andric #endif 25770b57cec5SDimitry Andric 25780b57cec5SDimitry Andric KF_TRACE(10, ("__kmp_join_call1: T#%d, this_thread=%p team=%p\n", 0, 25790b57cec5SDimitry Andric master_th, team)); 25800b57cec5SDimitry Andric __kmp_pop_current_task_from_thread(master_th); 25810b57cec5SDimitry Andric 25820b57cec5SDimitry Andric master_th->th.th_def_allocator = team->t.t_def_allocator; 25830b57cec5SDimitry Andric 2584fe6060f1SDimitry Andric #if OMPD_SUPPORT 2585fe6060f1SDimitry Andric if (ompd_state & OMPD_ENABLE_BP) 2586fe6060f1SDimitry Andric ompd_bp_parallel_end(); 2587fe6060f1SDimitry Andric #endif 25880b57cec5SDimitry Andric updateHWFPControl(team); 25890b57cec5SDimitry Andric 25900b57cec5SDimitry Andric if (root->r.r_active != master_active) 25910b57cec5SDimitry Andric root->r.r_active = master_active; 25920b57cec5SDimitry Andric 25930b57cec5SDimitry Andric __kmp_free_team(root, team USE_NESTED_HOT_ARG( 25940b57cec5SDimitry Andric master_th)); // this will free worker threads 25950b57cec5SDimitry Andric 25960b57cec5SDimitry Andric /* this race was fun to find. make sure the following is in the critical 25970b57cec5SDimitry Andric region otherwise assertions may fail occasionally since the old team may be 25980b57cec5SDimitry Andric reallocated and the hierarchy appears inconsistent. it is actually safe to 25990b57cec5SDimitry Andric run and won't cause any bugs, but will cause those assertion failures. it's 26000b57cec5SDimitry Andric only one deref&assign so might as well put this in the critical region */ 26010b57cec5SDimitry Andric master_th->th.th_team = parent_team; 26020b57cec5SDimitry Andric master_th->th.th_team_nproc = parent_team->t.t_nproc; 26030b57cec5SDimitry Andric master_th->th.th_team_master = parent_team->t.t_threads[0]; 26040b57cec5SDimitry Andric master_th->th.th_team_serialized = parent_team->t.t_serialized; 26050b57cec5SDimitry Andric 26060b57cec5SDimitry Andric /* restore serialized team, if need be */ 26070b57cec5SDimitry Andric if (parent_team->t.t_serialized && 26080b57cec5SDimitry Andric parent_team != master_th->th.th_serial_team && 26090b57cec5SDimitry Andric parent_team != root->r.r_root_team) { 26100b57cec5SDimitry Andric __kmp_free_team(root, 26110b57cec5SDimitry Andric master_th->th.th_serial_team USE_NESTED_HOT_ARG(NULL)); 26120b57cec5SDimitry Andric master_th->th.th_serial_team = parent_team; 26130b57cec5SDimitry Andric } 26140b57cec5SDimitry Andric 26150b57cec5SDimitry Andric if (__kmp_tasking_mode != tskm_immediate_exec) { 26160b57cec5SDimitry Andric if (master_th->th.th_task_state_top > 26170b57cec5SDimitry Andric 0) { // Restore task state from memo stack 26180b57cec5SDimitry Andric KMP_DEBUG_ASSERT(master_th->th.th_task_state_memo_stack); 2619fe6060f1SDimitry Andric // Remember primary thread's state if we re-use this nested hot team 26200b57cec5SDimitry Andric master_th->th.th_task_state_memo_stack[master_th->th.th_task_state_top] = 26210b57cec5SDimitry Andric master_th->th.th_task_state; 26220b57cec5SDimitry Andric --master_th->th.th_task_state_top; // pop 26230b57cec5SDimitry Andric // Now restore state at this level 26240b57cec5SDimitry Andric master_th->th.th_task_state = 26250b57cec5SDimitry Andric master_th->th 26260b57cec5SDimitry Andric .th_task_state_memo_stack[master_th->th.th_task_state_top]; 26270b57cec5SDimitry Andric } 2628fe6060f1SDimitry Andric // Copy the task team from the parent team to the primary thread 26290b57cec5SDimitry Andric master_th->th.th_task_team = 26300b57cec5SDimitry Andric parent_team->t.t_task_team[master_th->th.th_task_state]; 26310b57cec5SDimitry Andric KA_TRACE(20, 2632fe6060f1SDimitry Andric ("__kmp_join_call: Primary T#%d restoring task_team %p, team %p\n", 26330b57cec5SDimitry Andric __kmp_gtid_from_thread(master_th), master_th->th.th_task_team, 26340b57cec5SDimitry Andric parent_team)); 26350b57cec5SDimitry Andric } 26360b57cec5SDimitry Andric 26370b57cec5SDimitry Andric // TODO: GEH - cannot do this assertion because root thread not set up as 26380b57cec5SDimitry Andric // executing 26390b57cec5SDimitry Andric // KMP_ASSERT( master_th->th.th_current_task->td_flags.executing == 0 ); 26400b57cec5SDimitry Andric master_th->th.th_current_task->td_flags.executing = 1; 26410b57cec5SDimitry Andric 26420b57cec5SDimitry Andric __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock); 26430b57cec5SDimitry Andric 2644fcaf7f86SDimitry Andric #if KMP_AFFINITY_SUPPORTED 2645fcaf7f86SDimitry Andric if (master_th->th.th_team->t.t_level == 0 && __kmp_affin_reset) { 2646fcaf7f86SDimitry Andric __kmp_reset_root_init_mask(gtid); 2647fcaf7f86SDimitry Andric } 2648fcaf7f86SDimitry Andric #endif 26490b57cec5SDimitry Andric #if OMPT_SUPPORT 2650489b1cf2SDimitry Andric int flags = 2651489b1cf2SDimitry Andric OMPT_INVOKER(fork_context) | 2652489b1cf2SDimitry Andric ((team_microtask == (void *)__kmp_teams_master) ? ompt_parallel_league 2653489b1cf2SDimitry Andric : ompt_parallel_team); 26540b57cec5SDimitry Andric if (ompt_enabled.enabled) { 2655489b1cf2SDimitry Andric __kmp_join_ompt(gtid, master_th, parent_team, parallel_data, flags, 26560b57cec5SDimitry Andric codeptr); 26570b57cec5SDimitry Andric } 26580b57cec5SDimitry Andric #endif 26590b57cec5SDimitry Andric 26600b57cec5SDimitry Andric KMP_MB(); 26610b57cec5SDimitry Andric KA_TRACE(20, ("__kmp_join_call: exit T#%d\n", gtid)); 26620b57cec5SDimitry Andric } 26630b57cec5SDimitry Andric 26640b57cec5SDimitry Andric /* Check whether we should push an internal control record onto the 26650b57cec5SDimitry Andric serial team stack. If so, do it. */ 26660b57cec5SDimitry Andric void __kmp_save_internal_controls(kmp_info_t *thread) { 26670b57cec5SDimitry Andric 26680b57cec5SDimitry Andric if (thread->th.th_team != thread->th.th_serial_team) { 26690b57cec5SDimitry Andric return; 26700b57cec5SDimitry Andric } 26710b57cec5SDimitry Andric if (thread->th.th_team->t.t_serialized > 1) { 26720b57cec5SDimitry Andric int push = 0; 26730b57cec5SDimitry Andric 26740b57cec5SDimitry Andric if (thread->th.th_team->t.t_control_stack_top == NULL) { 26750b57cec5SDimitry Andric push = 1; 26760b57cec5SDimitry Andric } else { 26770b57cec5SDimitry Andric if (thread->th.th_team->t.t_control_stack_top->serial_nesting_level != 26780b57cec5SDimitry Andric thread->th.th_team->t.t_serialized) { 26790b57cec5SDimitry Andric push = 1; 26800b57cec5SDimitry Andric } 26810b57cec5SDimitry Andric } 26820b57cec5SDimitry Andric if (push) { /* push a record on the serial team's stack */ 26830b57cec5SDimitry Andric kmp_internal_control_t *control = 26840b57cec5SDimitry Andric (kmp_internal_control_t *)__kmp_allocate( 26850b57cec5SDimitry Andric sizeof(kmp_internal_control_t)); 26860b57cec5SDimitry Andric 26870b57cec5SDimitry Andric copy_icvs(control, &thread->th.th_current_task->td_icvs); 26880b57cec5SDimitry Andric 26890b57cec5SDimitry Andric control->serial_nesting_level = thread->th.th_team->t.t_serialized; 26900b57cec5SDimitry Andric 26910b57cec5SDimitry Andric control->next = thread->th.th_team->t.t_control_stack_top; 26920b57cec5SDimitry Andric thread->th.th_team->t.t_control_stack_top = control; 26930b57cec5SDimitry Andric } 26940b57cec5SDimitry Andric } 26950b57cec5SDimitry Andric } 26960b57cec5SDimitry Andric 26970b57cec5SDimitry Andric /* Changes set_nproc */ 26980b57cec5SDimitry Andric void __kmp_set_num_threads(int new_nth, int gtid) { 26990b57cec5SDimitry Andric kmp_info_t *thread; 27000b57cec5SDimitry Andric kmp_root_t *root; 27010b57cec5SDimitry Andric 27020b57cec5SDimitry Andric KF_TRACE(10, ("__kmp_set_num_threads: new __kmp_nth = %d\n", new_nth)); 27030b57cec5SDimitry Andric KMP_DEBUG_ASSERT(__kmp_init_serial); 27040b57cec5SDimitry Andric 27050b57cec5SDimitry Andric if (new_nth < 1) 27060b57cec5SDimitry Andric new_nth = 1; 27070b57cec5SDimitry Andric else if (new_nth > __kmp_max_nth) 27080b57cec5SDimitry Andric new_nth = __kmp_max_nth; 27090b57cec5SDimitry Andric 27100b57cec5SDimitry Andric KMP_COUNT_VALUE(OMP_set_numthreads, new_nth); 27110b57cec5SDimitry Andric thread = __kmp_threads[gtid]; 27120b57cec5SDimitry Andric if (thread->th.th_current_task->td_icvs.nproc == new_nth) 27130b57cec5SDimitry Andric return; // nothing to do 27140b57cec5SDimitry Andric 27150b57cec5SDimitry Andric __kmp_save_internal_controls(thread); 27160b57cec5SDimitry Andric 27170b57cec5SDimitry Andric set__nproc(thread, new_nth); 27180b57cec5SDimitry Andric 27190b57cec5SDimitry Andric // If this omp_set_num_threads() call will cause the hot team size to be 27200b57cec5SDimitry Andric // reduced (in the absence of a num_threads clause), then reduce it now, 27210b57cec5SDimitry Andric // rather than waiting for the next parallel region. 27220b57cec5SDimitry Andric root = thread->th.th_root; 27230b57cec5SDimitry Andric if (__kmp_init_parallel && (!root->r.r_active) && 27240b57cec5SDimitry Andric (root->r.r_hot_team->t.t_nproc > new_nth) 27250b57cec5SDimitry Andric #if KMP_NESTED_HOT_TEAMS 27260b57cec5SDimitry Andric && __kmp_hot_teams_max_level && !__kmp_hot_teams_mode 27270b57cec5SDimitry Andric #endif 27280b57cec5SDimitry Andric ) { 27290b57cec5SDimitry Andric kmp_team_t *hot_team = root->r.r_hot_team; 27300b57cec5SDimitry Andric int f; 27310b57cec5SDimitry Andric 27320b57cec5SDimitry Andric __kmp_acquire_bootstrap_lock(&__kmp_forkjoin_lock); 27330b57cec5SDimitry Andric 2734349cc55cSDimitry Andric if (__kmp_barrier_release_pattern[bs_forkjoin_barrier] == bp_dist_bar) { 2735349cc55cSDimitry Andric __kmp_resize_dist_barrier(hot_team, hot_team->t.t_nproc, new_nth); 2736349cc55cSDimitry Andric } 27370b57cec5SDimitry Andric // Release the extra threads we don't need any more. 27380b57cec5SDimitry Andric for (f = new_nth; f < hot_team->t.t_nproc; f++) { 27390b57cec5SDimitry Andric KMP_DEBUG_ASSERT(hot_team->t.t_threads[f] != NULL); 27400b57cec5SDimitry Andric if (__kmp_tasking_mode != tskm_immediate_exec) { 27410b57cec5SDimitry Andric // When decreasing team size, threads no longer in the team should unref 27420b57cec5SDimitry Andric // task team. 27430b57cec5SDimitry Andric hot_team->t.t_threads[f]->th.th_task_team = NULL; 27440b57cec5SDimitry Andric } 27450b57cec5SDimitry Andric __kmp_free_thread(hot_team->t.t_threads[f]); 27460b57cec5SDimitry Andric hot_team->t.t_threads[f] = NULL; 27470b57cec5SDimitry Andric } 27480b57cec5SDimitry Andric hot_team->t.t_nproc = new_nth; 27490b57cec5SDimitry Andric #if KMP_NESTED_HOT_TEAMS 27500b57cec5SDimitry Andric if (thread->th.th_hot_teams) { 27510b57cec5SDimitry Andric KMP_DEBUG_ASSERT(hot_team == thread->th.th_hot_teams[0].hot_team); 27520b57cec5SDimitry Andric thread->th.th_hot_teams[0].hot_team_nth = new_nth; 27530b57cec5SDimitry Andric } 27540b57cec5SDimitry Andric #endif 27550b57cec5SDimitry Andric 2756349cc55cSDimitry Andric if (__kmp_barrier_release_pattern[bs_forkjoin_barrier] == bp_dist_bar) { 2757349cc55cSDimitry Andric hot_team->t.b->update_num_threads(new_nth); 2758349cc55cSDimitry Andric __kmp_add_threads_to_team(hot_team, new_nth); 2759349cc55cSDimitry Andric } 2760349cc55cSDimitry Andric 27610b57cec5SDimitry Andric __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock); 27620b57cec5SDimitry Andric 27630b57cec5SDimitry Andric // Update the t_nproc field in the threads that are still active. 27640b57cec5SDimitry Andric for (f = 0; f < new_nth; f++) { 27650b57cec5SDimitry Andric KMP_DEBUG_ASSERT(hot_team->t.t_threads[f] != NULL); 27660b57cec5SDimitry Andric hot_team->t.t_threads[f]->th.th_team_nproc = new_nth; 27670b57cec5SDimitry Andric } 27680b57cec5SDimitry Andric // Special flag in case omp_set_num_threads() call 27690b57cec5SDimitry Andric hot_team->t.t_size_changed = -1; 27700b57cec5SDimitry Andric } 27710b57cec5SDimitry Andric } 27720b57cec5SDimitry Andric 27730b57cec5SDimitry Andric /* Changes max_active_levels */ 27740b57cec5SDimitry Andric void __kmp_set_max_active_levels(int gtid, int max_active_levels) { 27750b57cec5SDimitry Andric kmp_info_t *thread; 27760b57cec5SDimitry Andric 27770b57cec5SDimitry Andric KF_TRACE(10, ("__kmp_set_max_active_levels: new max_active_levels for thread " 27780b57cec5SDimitry Andric "%d = (%d)\n", 27790b57cec5SDimitry Andric gtid, max_active_levels)); 27800b57cec5SDimitry Andric KMP_DEBUG_ASSERT(__kmp_init_serial); 27810b57cec5SDimitry Andric 27820b57cec5SDimitry Andric // validate max_active_levels 27830b57cec5SDimitry Andric if (max_active_levels < 0) { 27840b57cec5SDimitry Andric KMP_WARNING(ActiveLevelsNegative, max_active_levels); 27850b57cec5SDimitry Andric // We ignore this call if the user has specified a negative value. 27860b57cec5SDimitry Andric // The current setting won't be changed. The last valid setting will be 27870b57cec5SDimitry Andric // used. A warning will be issued (if warnings are allowed as controlled by 27880b57cec5SDimitry Andric // the KMP_WARNINGS env var). 27890b57cec5SDimitry Andric KF_TRACE(10, ("__kmp_set_max_active_levels: the call is ignored: new " 27900b57cec5SDimitry Andric "max_active_levels for thread %d = (%d)\n", 27910b57cec5SDimitry Andric gtid, max_active_levels)); 27920b57cec5SDimitry Andric return; 27930b57cec5SDimitry Andric } 27940b57cec5SDimitry Andric if (max_active_levels <= KMP_MAX_ACTIVE_LEVELS_LIMIT) { 27950b57cec5SDimitry Andric // it's OK, the max_active_levels is within the valid range: [ 0; 27960b57cec5SDimitry Andric // KMP_MAX_ACTIVE_LEVELS_LIMIT ] 27970b57cec5SDimitry Andric // We allow a zero value. (implementation defined behavior) 27980b57cec5SDimitry Andric } else { 27990b57cec5SDimitry Andric KMP_WARNING(ActiveLevelsExceedLimit, max_active_levels, 28000b57cec5SDimitry Andric KMP_MAX_ACTIVE_LEVELS_LIMIT); 28010b57cec5SDimitry Andric max_active_levels = KMP_MAX_ACTIVE_LEVELS_LIMIT; 28020b57cec5SDimitry Andric // Current upper limit is MAX_INT. (implementation defined behavior) 28030b57cec5SDimitry Andric // If the input exceeds the upper limit, we correct the input to be the 28040b57cec5SDimitry Andric // upper limit. (implementation defined behavior) 28050b57cec5SDimitry Andric // Actually, the flow should never get here until we use MAX_INT limit. 28060b57cec5SDimitry Andric } 28070b57cec5SDimitry Andric KF_TRACE(10, ("__kmp_set_max_active_levels: after validation: new " 28080b57cec5SDimitry Andric "max_active_levels for thread %d = (%d)\n", 28090b57cec5SDimitry Andric gtid, max_active_levels)); 28100b57cec5SDimitry Andric 28110b57cec5SDimitry Andric thread = __kmp_threads[gtid]; 28120b57cec5SDimitry Andric 28130b57cec5SDimitry Andric __kmp_save_internal_controls(thread); 28140b57cec5SDimitry Andric 28150b57cec5SDimitry Andric set__max_active_levels(thread, max_active_levels); 28160b57cec5SDimitry Andric } 28170b57cec5SDimitry Andric 28180b57cec5SDimitry Andric /* Gets max_active_levels */ 28190b57cec5SDimitry Andric int __kmp_get_max_active_levels(int gtid) { 28200b57cec5SDimitry Andric kmp_info_t *thread; 28210b57cec5SDimitry Andric 28220b57cec5SDimitry Andric KF_TRACE(10, ("__kmp_get_max_active_levels: thread %d\n", gtid)); 28230b57cec5SDimitry Andric KMP_DEBUG_ASSERT(__kmp_init_serial); 28240b57cec5SDimitry Andric 28250b57cec5SDimitry Andric thread = __kmp_threads[gtid]; 28260b57cec5SDimitry Andric KMP_DEBUG_ASSERT(thread->th.th_current_task); 28270b57cec5SDimitry Andric KF_TRACE(10, ("__kmp_get_max_active_levels: thread %d, curtask=%p, " 28280b57cec5SDimitry Andric "curtask_maxaclevel=%d\n", 28290b57cec5SDimitry Andric gtid, thread->th.th_current_task, 28300b57cec5SDimitry Andric thread->th.th_current_task->td_icvs.max_active_levels)); 28310b57cec5SDimitry Andric return thread->th.th_current_task->td_icvs.max_active_levels; 28320b57cec5SDimitry Andric } 28330b57cec5SDimitry Andric 2834fe6060f1SDimitry Andric // nteams-var per-device ICV 2835fe6060f1SDimitry Andric void __kmp_set_num_teams(int num_teams) { 2836fe6060f1SDimitry Andric if (num_teams > 0) 2837fe6060f1SDimitry Andric __kmp_nteams = num_teams; 2838fe6060f1SDimitry Andric } 2839fe6060f1SDimitry Andric int __kmp_get_max_teams(void) { return __kmp_nteams; } 2840fe6060f1SDimitry Andric // teams-thread-limit-var per-device ICV 2841fe6060f1SDimitry Andric void __kmp_set_teams_thread_limit(int limit) { 2842fe6060f1SDimitry Andric if (limit > 0) 2843fe6060f1SDimitry Andric __kmp_teams_thread_limit = limit; 2844fe6060f1SDimitry Andric } 2845fe6060f1SDimitry Andric int __kmp_get_teams_thread_limit(void) { return __kmp_teams_thread_limit; } 2846fe6060f1SDimitry Andric 28470b57cec5SDimitry Andric KMP_BUILD_ASSERT(sizeof(kmp_sched_t) == sizeof(int)); 28480b57cec5SDimitry Andric KMP_BUILD_ASSERT(sizeof(enum sched_type) == sizeof(int)); 28490b57cec5SDimitry Andric 28500b57cec5SDimitry Andric /* Changes def_sched_var ICV values (run-time schedule kind and chunk) */ 28510b57cec5SDimitry Andric void __kmp_set_schedule(int gtid, kmp_sched_t kind, int chunk) { 28520b57cec5SDimitry Andric kmp_info_t *thread; 28530b57cec5SDimitry Andric kmp_sched_t orig_kind; 28540b57cec5SDimitry Andric // kmp_team_t *team; 28550b57cec5SDimitry Andric 28560b57cec5SDimitry Andric KF_TRACE(10, ("__kmp_set_schedule: new schedule for thread %d = (%d, %d)\n", 28570b57cec5SDimitry Andric gtid, (int)kind, chunk)); 28580b57cec5SDimitry Andric KMP_DEBUG_ASSERT(__kmp_init_serial); 28590b57cec5SDimitry Andric 28600b57cec5SDimitry Andric // Check if the kind parameter is valid, correct if needed. 28610b57cec5SDimitry Andric // Valid parameters should fit in one of two intervals - standard or extended: 28620b57cec5SDimitry Andric // <lower>, <valid>, <upper_std>, <lower_ext>, <valid>, <upper> 28630b57cec5SDimitry Andric // 2008-01-25: 0, 1 - 4, 5, 100, 101 - 102, 103 28640b57cec5SDimitry Andric orig_kind = kind; 28650b57cec5SDimitry Andric kind = __kmp_sched_without_mods(kind); 28660b57cec5SDimitry Andric 28670b57cec5SDimitry Andric if (kind <= kmp_sched_lower || kind >= kmp_sched_upper || 28680b57cec5SDimitry Andric (kind <= kmp_sched_lower_ext && kind >= kmp_sched_upper_std)) { 28690b57cec5SDimitry Andric // TODO: Hint needs attention in case we change the default schedule. 28700b57cec5SDimitry Andric __kmp_msg(kmp_ms_warning, KMP_MSG(ScheduleKindOutOfRange, kind), 28710b57cec5SDimitry Andric KMP_HNT(DefaultScheduleKindUsed, "static, no chunk"), 28720b57cec5SDimitry Andric __kmp_msg_null); 28730b57cec5SDimitry Andric kind = kmp_sched_default; 28740b57cec5SDimitry Andric chunk = 0; // ignore chunk value in case of bad kind 28750b57cec5SDimitry Andric } 28760b57cec5SDimitry Andric 28770b57cec5SDimitry Andric thread = __kmp_threads[gtid]; 28780b57cec5SDimitry Andric 28790b57cec5SDimitry Andric __kmp_save_internal_controls(thread); 28800b57cec5SDimitry Andric 28810b57cec5SDimitry Andric if (kind < kmp_sched_upper_std) { 28820b57cec5SDimitry Andric if (kind == kmp_sched_static && chunk < KMP_DEFAULT_CHUNK) { 28830b57cec5SDimitry Andric // differ static chunked vs. unchunked: chunk should be invalid to 28840b57cec5SDimitry Andric // indicate unchunked schedule (which is the default) 28850b57cec5SDimitry Andric thread->th.th_current_task->td_icvs.sched.r_sched_type = kmp_sch_static; 28860b57cec5SDimitry Andric } else { 28870b57cec5SDimitry Andric thread->th.th_current_task->td_icvs.sched.r_sched_type = 28880b57cec5SDimitry Andric __kmp_sch_map[kind - kmp_sched_lower - 1]; 28890b57cec5SDimitry Andric } 28900b57cec5SDimitry Andric } else { 28910b57cec5SDimitry Andric // __kmp_sch_map[ kind - kmp_sched_lower_ext + kmp_sched_upper_std - 28920b57cec5SDimitry Andric // kmp_sched_lower - 2 ]; 28930b57cec5SDimitry Andric thread->th.th_current_task->td_icvs.sched.r_sched_type = 28940b57cec5SDimitry Andric __kmp_sch_map[kind - kmp_sched_lower_ext + kmp_sched_upper_std - 28950b57cec5SDimitry Andric kmp_sched_lower - 2]; 28960b57cec5SDimitry Andric } 28970b57cec5SDimitry Andric __kmp_sched_apply_mods_intkind( 28980b57cec5SDimitry Andric orig_kind, &(thread->th.th_current_task->td_icvs.sched.r_sched_type)); 28990b57cec5SDimitry Andric if (kind == kmp_sched_auto || chunk < 1) { 29000b57cec5SDimitry Andric // ignore parameter chunk for schedule auto 29010b57cec5SDimitry Andric thread->th.th_current_task->td_icvs.sched.chunk = KMP_DEFAULT_CHUNK; 29020b57cec5SDimitry Andric } else { 29030b57cec5SDimitry Andric thread->th.th_current_task->td_icvs.sched.chunk = chunk; 29040b57cec5SDimitry Andric } 29050b57cec5SDimitry Andric } 29060b57cec5SDimitry Andric 29070b57cec5SDimitry Andric /* Gets def_sched_var ICV values */ 29080b57cec5SDimitry Andric void __kmp_get_schedule(int gtid, kmp_sched_t *kind, int *chunk) { 29090b57cec5SDimitry Andric kmp_info_t *thread; 29100b57cec5SDimitry Andric enum sched_type th_type; 29110b57cec5SDimitry Andric 29120b57cec5SDimitry Andric KF_TRACE(10, ("__kmp_get_schedule: thread %d\n", gtid)); 29130b57cec5SDimitry Andric KMP_DEBUG_ASSERT(__kmp_init_serial); 29140b57cec5SDimitry Andric 29150b57cec5SDimitry Andric thread = __kmp_threads[gtid]; 29160b57cec5SDimitry Andric 29170b57cec5SDimitry Andric th_type = thread->th.th_current_task->td_icvs.sched.r_sched_type; 29180b57cec5SDimitry Andric switch (SCHEDULE_WITHOUT_MODIFIERS(th_type)) { 29190b57cec5SDimitry Andric case kmp_sch_static: 29200b57cec5SDimitry Andric case kmp_sch_static_greedy: 29210b57cec5SDimitry Andric case kmp_sch_static_balanced: 29220b57cec5SDimitry Andric *kind = kmp_sched_static; 29230b57cec5SDimitry Andric __kmp_sched_apply_mods_stdkind(kind, th_type); 29240b57cec5SDimitry Andric *chunk = 0; // chunk was not set, try to show this fact via zero value 29250b57cec5SDimitry Andric return; 29260b57cec5SDimitry Andric case kmp_sch_static_chunked: 29270b57cec5SDimitry Andric *kind = kmp_sched_static; 29280b57cec5SDimitry Andric break; 29290b57cec5SDimitry Andric case kmp_sch_dynamic_chunked: 29300b57cec5SDimitry Andric *kind = kmp_sched_dynamic; 29310b57cec5SDimitry Andric break; 29320b57cec5SDimitry Andric case kmp_sch_guided_chunked: 29330b57cec5SDimitry Andric case kmp_sch_guided_iterative_chunked: 29340b57cec5SDimitry Andric case kmp_sch_guided_analytical_chunked: 29350b57cec5SDimitry Andric *kind = kmp_sched_guided; 29360b57cec5SDimitry Andric break; 29370b57cec5SDimitry Andric case kmp_sch_auto: 29380b57cec5SDimitry Andric *kind = kmp_sched_auto; 29390b57cec5SDimitry Andric break; 29400b57cec5SDimitry Andric case kmp_sch_trapezoidal: 29410b57cec5SDimitry Andric *kind = kmp_sched_trapezoidal; 29420b57cec5SDimitry Andric break; 29430b57cec5SDimitry Andric #if KMP_STATIC_STEAL_ENABLED 29440b57cec5SDimitry Andric case kmp_sch_static_steal: 29450b57cec5SDimitry Andric *kind = kmp_sched_static_steal; 29460b57cec5SDimitry Andric break; 29470b57cec5SDimitry Andric #endif 29480b57cec5SDimitry Andric default: 29490b57cec5SDimitry Andric KMP_FATAL(UnknownSchedulingType, th_type); 29500b57cec5SDimitry Andric } 29510b57cec5SDimitry Andric 29520b57cec5SDimitry Andric __kmp_sched_apply_mods_stdkind(kind, th_type); 29530b57cec5SDimitry Andric *chunk = thread->th.th_current_task->td_icvs.sched.chunk; 29540b57cec5SDimitry Andric } 29550b57cec5SDimitry Andric 29560b57cec5SDimitry Andric int __kmp_get_ancestor_thread_num(int gtid, int level) { 29570b57cec5SDimitry Andric 29580b57cec5SDimitry Andric int ii, dd; 29590b57cec5SDimitry Andric kmp_team_t *team; 29600b57cec5SDimitry Andric kmp_info_t *thr; 29610b57cec5SDimitry Andric 29620b57cec5SDimitry Andric KF_TRACE(10, ("__kmp_get_ancestor_thread_num: thread %d %d\n", gtid, level)); 29630b57cec5SDimitry Andric KMP_DEBUG_ASSERT(__kmp_init_serial); 29640b57cec5SDimitry Andric 29650b57cec5SDimitry Andric // validate level 29660b57cec5SDimitry Andric if (level == 0) 29670b57cec5SDimitry Andric return 0; 29680b57cec5SDimitry Andric if (level < 0) 29690b57cec5SDimitry Andric return -1; 29700b57cec5SDimitry Andric thr = __kmp_threads[gtid]; 29710b57cec5SDimitry Andric team = thr->th.th_team; 29720b57cec5SDimitry Andric ii = team->t.t_level; 29730b57cec5SDimitry Andric if (level > ii) 29740b57cec5SDimitry Andric return -1; 29750b57cec5SDimitry Andric 29760b57cec5SDimitry Andric if (thr->th.th_teams_microtask) { 29770b57cec5SDimitry Andric // AC: we are in teams region where multiple nested teams have same level 29780b57cec5SDimitry Andric int tlevel = thr->th.th_teams_level; // the level of the teams construct 29790b57cec5SDimitry Andric if (level <= 29800b57cec5SDimitry Andric tlevel) { // otherwise usual algorithm works (will not touch the teams) 29810b57cec5SDimitry Andric KMP_DEBUG_ASSERT(ii >= tlevel); 29820b57cec5SDimitry Andric // AC: As we need to pass by the teams league, we need to artificially 29830b57cec5SDimitry Andric // increase ii 29840b57cec5SDimitry Andric if (ii == tlevel) { 29850b57cec5SDimitry Andric ii += 2; // three teams have same level 29860b57cec5SDimitry Andric } else { 29870b57cec5SDimitry Andric ii++; // two teams have same level 29880b57cec5SDimitry Andric } 29890b57cec5SDimitry Andric } 29900b57cec5SDimitry Andric } 29910b57cec5SDimitry Andric 29920b57cec5SDimitry Andric if (ii == level) 29930b57cec5SDimitry Andric return __kmp_tid_from_gtid(gtid); 29940b57cec5SDimitry Andric 29950b57cec5SDimitry Andric dd = team->t.t_serialized; 29960b57cec5SDimitry Andric level++; 29970b57cec5SDimitry Andric while (ii > level) { 29980b57cec5SDimitry Andric for (dd = team->t.t_serialized; (dd > 0) && (ii > level); dd--, ii--) { 29990b57cec5SDimitry Andric } 30000b57cec5SDimitry Andric if ((team->t.t_serialized) && (!dd)) { 30010b57cec5SDimitry Andric team = team->t.t_parent; 30020b57cec5SDimitry Andric continue; 30030b57cec5SDimitry Andric } 30040b57cec5SDimitry Andric if (ii > level) { 30050b57cec5SDimitry Andric team = team->t.t_parent; 30060b57cec5SDimitry Andric dd = team->t.t_serialized; 30070b57cec5SDimitry Andric ii--; 30080b57cec5SDimitry Andric } 30090b57cec5SDimitry Andric } 30100b57cec5SDimitry Andric 30110b57cec5SDimitry Andric return (dd > 1) ? (0) : (team->t.t_master_tid); 30120b57cec5SDimitry Andric } 30130b57cec5SDimitry Andric 30140b57cec5SDimitry Andric int __kmp_get_team_size(int gtid, int level) { 30150b57cec5SDimitry Andric 30160b57cec5SDimitry Andric int ii, dd; 30170b57cec5SDimitry Andric kmp_team_t *team; 30180b57cec5SDimitry Andric kmp_info_t *thr; 30190b57cec5SDimitry Andric 30200b57cec5SDimitry Andric KF_TRACE(10, ("__kmp_get_team_size: thread %d %d\n", gtid, level)); 30210b57cec5SDimitry Andric KMP_DEBUG_ASSERT(__kmp_init_serial); 30220b57cec5SDimitry Andric 30230b57cec5SDimitry Andric // validate level 30240b57cec5SDimitry Andric if (level == 0) 30250b57cec5SDimitry Andric return 1; 30260b57cec5SDimitry Andric if (level < 0) 30270b57cec5SDimitry Andric return -1; 30280b57cec5SDimitry Andric thr = __kmp_threads[gtid]; 30290b57cec5SDimitry Andric team = thr->th.th_team; 30300b57cec5SDimitry Andric ii = team->t.t_level; 30310b57cec5SDimitry Andric if (level > ii) 30320b57cec5SDimitry Andric return -1; 30330b57cec5SDimitry Andric 30340b57cec5SDimitry Andric if (thr->th.th_teams_microtask) { 30350b57cec5SDimitry Andric // AC: we are in teams region where multiple nested teams have same level 30360b57cec5SDimitry Andric int tlevel = thr->th.th_teams_level; // the level of the teams construct 30370b57cec5SDimitry Andric if (level <= 30380b57cec5SDimitry Andric tlevel) { // otherwise usual algorithm works (will not touch the teams) 30390b57cec5SDimitry Andric KMP_DEBUG_ASSERT(ii >= tlevel); 30400b57cec5SDimitry Andric // AC: As we need to pass by the teams league, we need to artificially 30410b57cec5SDimitry Andric // increase ii 30420b57cec5SDimitry Andric if (ii == tlevel) { 30430b57cec5SDimitry Andric ii += 2; // three teams have same level 30440b57cec5SDimitry Andric } else { 30450b57cec5SDimitry Andric ii++; // two teams have same level 30460b57cec5SDimitry Andric } 30470b57cec5SDimitry Andric } 30480b57cec5SDimitry Andric } 30490b57cec5SDimitry Andric 30500b57cec5SDimitry Andric while (ii > level) { 30510b57cec5SDimitry Andric for (dd = team->t.t_serialized; (dd > 0) && (ii > level); dd--, ii--) { 30520b57cec5SDimitry Andric } 30530b57cec5SDimitry Andric if (team->t.t_serialized && (!dd)) { 30540b57cec5SDimitry Andric team = team->t.t_parent; 30550b57cec5SDimitry Andric continue; 30560b57cec5SDimitry Andric } 30570b57cec5SDimitry Andric if (ii > level) { 30580b57cec5SDimitry Andric team = team->t.t_parent; 30590b57cec5SDimitry Andric ii--; 30600b57cec5SDimitry Andric } 30610b57cec5SDimitry Andric } 30620b57cec5SDimitry Andric 30630b57cec5SDimitry Andric return team->t.t_nproc; 30640b57cec5SDimitry Andric } 30650b57cec5SDimitry Andric 30660b57cec5SDimitry Andric kmp_r_sched_t __kmp_get_schedule_global() { 30670b57cec5SDimitry Andric // This routine created because pairs (__kmp_sched, __kmp_chunk) and 30680b57cec5SDimitry Andric // (__kmp_static, __kmp_guided) may be changed by kmp_set_defaults 30690b57cec5SDimitry Andric // independently. So one can get the updated schedule here. 30700b57cec5SDimitry Andric 30710b57cec5SDimitry Andric kmp_r_sched_t r_sched; 30720b57cec5SDimitry Andric 30730b57cec5SDimitry Andric // create schedule from 4 globals: __kmp_sched, __kmp_chunk, __kmp_static, 30740b57cec5SDimitry Andric // __kmp_guided. __kmp_sched should keep original value, so that user can set 30750b57cec5SDimitry Andric // KMP_SCHEDULE multiple times, and thus have different run-time schedules in 30760b57cec5SDimitry Andric // different roots (even in OMP 2.5) 30770b57cec5SDimitry Andric enum sched_type s = SCHEDULE_WITHOUT_MODIFIERS(__kmp_sched); 30780b57cec5SDimitry Andric enum sched_type sched_modifiers = SCHEDULE_GET_MODIFIERS(__kmp_sched); 30790b57cec5SDimitry Andric if (s == kmp_sch_static) { 30800b57cec5SDimitry Andric // replace STATIC with more detailed schedule (balanced or greedy) 30810b57cec5SDimitry Andric r_sched.r_sched_type = __kmp_static; 30820b57cec5SDimitry Andric } else if (s == kmp_sch_guided_chunked) { 30830b57cec5SDimitry Andric // replace GUIDED with more detailed schedule (iterative or analytical) 30840b57cec5SDimitry Andric r_sched.r_sched_type = __kmp_guided; 30850b57cec5SDimitry Andric } else { // (STATIC_CHUNKED), or (DYNAMIC_CHUNKED), or other 30860b57cec5SDimitry Andric r_sched.r_sched_type = __kmp_sched; 30870b57cec5SDimitry Andric } 30880b57cec5SDimitry Andric SCHEDULE_SET_MODIFIERS(r_sched.r_sched_type, sched_modifiers); 30890b57cec5SDimitry Andric 30900b57cec5SDimitry Andric if (__kmp_chunk < KMP_DEFAULT_CHUNK) { 30910b57cec5SDimitry Andric // __kmp_chunk may be wrong here (if it was not ever set) 30920b57cec5SDimitry Andric r_sched.chunk = KMP_DEFAULT_CHUNK; 30930b57cec5SDimitry Andric } else { 30940b57cec5SDimitry Andric r_sched.chunk = __kmp_chunk; 30950b57cec5SDimitry Andric } 30960b57cec5SDimitry Andric 30970b57cec5SDimitry Andric return r_sched; 30980b57cec5SDimitry Andric } 30990b57cec5SDimitry Andric 31000b57cec5SDimitry Andric /* Allocate (realloc == FALSE) * or reallocate (realloc == TRUE) 31010b57cec5SDimitry Andric at least argc number of *t_argv entries for the requested team. */ 31020b57cec5SDimitry Andric static void __kmp_alloc_argv_entries(int argc, kmp_team_t *team, int realloc) { 31030b57cec5SDimitry Andric 31040b57cec5SDimitry Andric KMP_DEBUG_ASSERT(team); 31050b57cec5SDimitry Andric if (!realloc || argc > team->t.t_max_argc) { 31060b57cec5SDimitry Andric 31070b57cec5SDimitry Andric KA_TRACE(100, ("__kmp_alloc_argv_entries: team %d: needed entries=%d, " 31080b57cec5SDimitry Andric "current entries=%d\n", 31090b57cec5SDimitry Andric team->t.t_id, argc, (realloc) ? team->t.t_max_argc : 0)); 31100b57cec5SDimitry Andric /* if previously allocated heap space for args, free them */ 31110b57cec5SDimitry Andric if (realloc && team->t.t_argv != &team->t.t_inline_argv[0]) 31120b57cec5SDimitry Andric __kmp_free((void *)team->t.t_argv); 31130b57cec5SDimitry Andric 31140b57cec5SDimitry Andric if (argc <= KMP_INLINE_ARGV_ENTRIES) { 31150b57cec5SDimitry Andric /* use unused space in the cache line for arguments */ 31160b57cec5SDimitry Andric team->t.t_max_argc = KMP_INLINE_ARGV_ENTRIES; 31170b57cec5SDimitry Andric KA_TRACE(100, ("__kmp_alloc_argv_entries: team %d: inline allocate %d " 31180b57cec5SDimitry Andric "argv entries\n", 31190b57cec5SDimitry Andric team->t.t_id, team->t.t_max_argc)); 31200b57cec5SDimitry Andric team->t.t_argv = &team->t.t_inline_argv[0]; 31210b57cec5SDimitry Andric if (__kmp_storage_map) { 31220b57cec5SDimitry Andric __kmp_print_storage_map_gtid( 31230b57cec5SDimitry Andric -1, &team->t.t_inline_argv[0], 31240b57cec5SDimitry Andric &team->t.t_inline_argv[KMP_INLINE_ARGV_ENTRIES], 31250b57cec5SDimitry Andric (sizeof(void *) * KMP_INLINE_ARGV_ENTRIES), "team_%d.t_inline_argv", 31260b57cec5SDimitry Andric team->t.t_id); 31270b57cec5SDimitry Andric } 31280b57cec5SDimitry Andric } else { 31290b57cec5SDimitry Andric /* allocate space for arguments in the heap */ 31300b57cec5SDimitry Andric team->t.t_max_argc = (argc <= (KMP_MIN_MALLOC_ARGV_ENTRIES >> 1)) 31310b57cec5SDimitry Andric ? KMP_MIN_MALLOC_ARGV_ENTRIES 31320b57cec5SDimitry Andric : 2 * argc; 31330b57cec5SDimitry Andric KA_TRACE(100, ("__kmp_alloc_argv_entries: team %d: dynamic allocate %d " 31340b57cec5SDimitry Andric "argv entries\n", 31350b57cec5SDimitry Andric team->t.t_id, team->t.t_max_argc)); 31360b57cec5SDimitry Andric team->t.t_argv = 31370b57cec5SDimitry Andric (void **)__kmp_page_allocate(sizeof(void *) * team->t.t_max_argc); 31380b57cec5SDimitry Andric if (__kmp_storage_map) { 31390b57cec5SDimitry Andric __kmp_print_storage_map_gtid(-1, &team->t.t_argv[0], 31400b57cec5SDimitry Andric &team->t.t_argv[team->t.t_max_argc], 31410b57cec5SDimitry Andric sizeof(void *) * team->t.t_max_argc, 31420b57cec5SDimitry Andric "team_%d.t_argv", team->t.t_id); 31430b57cec5SDimitry Andric } 31440b57cec5SDimitry Andric } 31450b57cec5SDimitry Andric } 31460b57cec5SDimitry Andric } 31470b57cec5SDimitry Andric 31480b57cec5SDimitry Andric static void __kmp_allocate_team_arrays(kmp_team_t *team, int max_nth) { 31490b57cec5SDimitry Andric int i; 31500b57cec5SDimitry Andric int num_disp_buff = max_nth > 1 ? __kmp_dispatch_num_buffers : 2; 31510b57cec5SDimitry Andric team->t.t_threads = 31520b57cec5SDimitry Andric (kmp_info_t **)__kmp_allocate(sizeof(kmp_info_t *) * max_nth); 31530b57cec5SDimitry Andric team->t.t_disp_buffer = (dispatch_shared_info_t *)__kmp_allocate( 31540b57cec5SDimitry Andric sizeof(dispatch_shared_info_t) * num_disp_buff); 31550b57cec5SDimitry Andric team->t.t_dispatch = 31560b57cec5SDimitry Andric (kmp_disp_t *)__kmp_allocate(sizeof(kmp_disp_t) * max_nth); 31570b57cec5SDimitry Andric team->t.t_implicit_task_taskdata = 31580b57cec5SDimitry Andric (kmp_taskdata_t *)__kmp_allocate(sizeof(kmp_taskdata_t) * max_nth); 31590b57cec5SDimitry Andric team->t.t_max_nproc = max_nth; 31600b57cec5SDimitry Andric 31610b57cec5SDimitry Andric /* setup dispatch buffers */ 31620b57cec5SDimitry Andric for (i = 0; i < num_disp_buff; ++i) { 31630b57cec5SDimitry Andric team->t.t_disp_buffer[i].buffer_index = i; 31640b57cec5SDimitry Andric team->t.t_disp_buffer[i].doacross_buf_idx = i; 31650b57cec5SDimitry Andric } 31660b57cec5SDimitry Andric } 31670b57cec5SDimitry Andric 31680b57cec5SDimitry Andric static void __kmp_free_team_arrays(kmp_team_t *team) { 31690b57cec5SDimitry Andric /* Note: this does not free the threads in t_threads (__kmp_free_threads) */ 31700b57cec5SDimitry Andric int i; 31710b57cec5SDimitry Andric for (i = 0; i < team->t.t_max_nproc; ++i) { 31720b57cec5SDimitry Andric if (team->t.t_dispatch[i].th_disp_buffer != NULL) { 31730b57cec5SDimitry Andric __kmp_free(team->t.t_dispatch[i].th_disp_buffer); 31740b57cec5SDimitry Andric team->t.t_dispatch[i].th_disp_buffer = NULL; 31750b57cec5SDimitry Andric } 31760b57cec5SDimitry Andric } 31770b57cec5SDimitry Andric #if KMP_USE_HIER_SCHED 31780b57cec5SDimitry Andric __kmp_dispatch_free_hierarchies(team); 31790b57cec5SDimitry Andric #endif 31800b57cec5SDimitry Andric __kmp_free(team->t.t_threads); 31810b57cec5SDimitry Andric __kmp_free(team->t.t_disp_buffer); 31820b57cec5SDimitry Andric __kmp_free(team->t.t_dispatch); 31830b57cec5SDimitry Andric __kmp_free(team->t.t_implicit_task_taskdata); 31840b57cec5SDimitry Andric team->t.t_threads = NULL; 31850b57cec5SDimitry Andric team->t.t_disp_buffer = NULL; 31860b57cec5SDimitry Andric team->t.t_dispatch = NULL; 31870b57cec5SDimitry Andric team->t.t_implicit_task_taskdata = 0; 31880b57cec5SDimitry Andric } 31890b57cec5SDimitry Andric 31900b57cec5SDimitry Andric static void __kmp_reallocate_team_arrays(kmp_team_t *team, int max_nth) { 31910b57cec5SDimitry Andric kmp_info_t **oldThreads = team->t.t_threads; 31920b57cec5SDimitry Andric 31930b57cec5SDimitry Andric __kmp_free(team->t.t_disp_buffer); 31940b57cec5SDimitry Andric __kmp_free(team->t.t_dispatch); 31950b57cec5SDimitry Andric __kmp_free(team->t.t_implicit_task_taskdata); 31960b57cec5SDimitry Andric __kmp_allocate_team_arrays(team, max_nth); 31970b57cec5SDimitry Andric 31980b57cec5SDimitry Andric KMP_MEMCPY(team->t.t_threads, oldThreads, 31990b57cec5SDimitry Andric team->t.t_nproc * sizeof(kmp_info_t *)); 32000b57cec5SDimitry Andric 32010b57cec5SDimitry Andric __kmp_free(oldThreads); 32020b57cec5SDimitry Andric } 32030b57cec5SDimitry Andric 32040b57cec5SDimitry Andric static kmp_internal_control_t __kmp_get_global_icvs(void) { 32050b57cec5SDimitry Andric 32060b57cec5SDimitry Andric kmp_r_sched_t r_sched = 32070b57cec5SDimitry Andric __kmp_get_schedule_global(); // get current state of scheduling globals 32080b57cec5SDimitry Andric 32090b57cec5SDimitry Andric KMP_DEBUG_ASSERT(__kmp_nested_proc_bind.used > 0); 32100b57cec5SDimitry Andric 32110b57cec5SDimitry Andric kmp_internal_control_t g_icvs = { 32120b57cec5SDimitry Andric 0, // int serial_nesting_level; //corresponds to value of th_team_serialized 32130b57cec5SDimitry Andric (kmp_int8)__kmp_global.g.g_dynamic, // internal control for dynamic 32140b57cec5SDimitry Andric // adjustment of threads (per thread) 32150b57cec5SDimitry Andric (kmp_int8)__kmp_env_blocktime, // int bt_set; //internal control for 32160b57cec5SDimitry Andric // whether blocktime is explicitly set 32170b57cec5SDimitry Andric __kmp_dflt_blocktime, // int blocktime; //internal control for blocktime 32180b57cec5SDimitry Andric #if KMP_USE_MONITOR 32190b57cec5SDimitry Andric __kmp_bt_intervals, // int bt_intervals; //internal control for blocktime 32200b57cec5SDimitry Andric // intervals 32210b57cec5SDimitry Andric #endif 32220b57cec5SDimitry Andric __kmp_dflt_team_nth, // int nproc; //internal control for # of threads for 32230b57cec5SDimitry Andric // next parallel region (per thread) 32240b57cec5SDimitry Andric // (use a max ub on value if __kmp_parallel_initialize not called yet) 32250b57cec5SDimitry Andric __kmp_cg_max_nth, // int thread_limit; 32260b57cec5SDimitry Andric __kmp_dflt_max_active_levels, // int max_active_levels; //internal control 32270b57cec5SDimitry Andric // for max_active_levels 32280b57cec5SDimitry Andric r_sched, // kmp_r_sched_t sched; //internal control for runtime schedule 32290b57cec5SDimitry Andric // {sched,chunk} pair 32300b57cec5SDimitry Andric __kmp_nested_proc_bind.bind_types[0], 32310b57cec5SDimitry Andric __kmp_default_device, 32320b57cec5SDimitry Andric NULL // struct kmp_internal_control *next; 32330b57cec5SDimitry Andric }; 32340b57cec5SDimitry Andric 32350b57cec5SDimitry Andric return g_icvs; 32360b57cec5SDimitry Andric } 32370b57cec5SDimitry Andric 32380b57cec5SDimitry Andric static kmp_internal_control_t __kmp_get_x_global_icvs(const kmp_team_t *team) { 32390b57cec5SDimitry Andric 32400b57cec5SDimitry Andric kmp_internal_control_t gx_icvs; 32410b57cec5SDimitry Andric gx_icvs.serial_nesting_level = 32420b57cec5SDimitry Andric 0; // probably =team->t.t_serial like in save_inter_controls 32430b57cec5SDimitry Andric copy_icvs(&gx_icvs, &team->t.t_threads[0]->th.th_current_task->td_icvs); 32440b57cec5SDimitry Andric gx_icvs.next = NULL; 32450b57cec5SDimitry Andric 32460b57cec5SDimitry Andric return gx_icvs; 32470b57cec5SDimitry Andric } 32480b57cec5SDimitry Andric 32490b57cec5SDimitry Andric static void __kmp_initialize_root(kmp_root_t *root) { 32500b57cec5SDimitry Andric int f; 32510b57cec5SDimitry Andric kmp_team_t *root_team; 32520b57cec5SDimitry Andric kmp_team_t *hot_team; 32530b57cec5SDimitry Andric int hot_team_max_nth; 32540b57cec5SDimitry Andric kmp_r_sched_t r_sched = 32550b57cec5SDimitry Andric __kmp_get_schedule_global(); // get current state of scheduling globals 32560b57cec5SDimitry Andric kmp_internal_control_t r_icvs = __kmp_get_global_icvs(); 32570b57cec5SDimitry Andric KMP_DEBUG_ASSERT(root); 32580b57cec5SDimitry Andric KMP_ASSERT(!root->r.r_begin); 32590b57cec5SDimitry Andric 32600b57cec5SDimitry Andric /* setup the root state structure */ 32610b57cec5SDimitry Andric __kmp_init_lock(&root->r.r_begin_lock); 32620b57cec5SDimitry Andric root->r.r_begin = FALSE; 32630b57cec5SDimitry Andric root->r.r_active = FALSE; 32640b57cec5SDimitry Andric root->r.r_in_parallel = 0; 32650b57cec5SDimitry Andric root->r.r_blocktime = __kmp_dflt_blocktime; 3266fe6060f1SDimitry Andric #if KMP_AFFINITY_SUPPORTED 3267fe6060f1SDimitry Andric root->r.r_affinity_assigned = FALSE; 3268fe6060f1SDimitry Andric #endif 32690b57cec5SDimitry Andric 32700b57cec5SDimitry Andric /* setup the root team for this task */ 32710b57cec5SDimitry Andric /* allocate the root team structure */ 32720b57cec5SDimitry Andric KF_TRACE(10, ("__kmp_initialize_root: before root_team\n")); 32730b57cec5SDimitry Andric 32740b57cec5SDimitry Andric root_team = 32750b57cec5SDimitry Andric __kmp_allocate_team(root, 32760b57cec5SDimitry Andric 1, // new_nproc 32770b57cec5SDimitry Andric 1, // max_nproc 32780b57cec5SDimitry Andric #if OMPT_SUPPORT 32790b57cec5SDimitry Andric ompt_data_none, // root parallel id 32800b57cec5SDimitry Andric #endif 32810b57cec5SDimitry Andric __kmp_nested_proc_bind.bind_types[0], &r_icvs, 32820b57cec5SDimitry Andric 0 // argc 3283fe6060f1SDimitry Andric USE_NESTED_HOT_ARG(NULL) // primary thread is unknown 32840b57cec5SDimitry Andric ); 32850b57cec5SDimitry Andric #if USE_DEBUGGER 32860b57cec5SDimitry Andric // Non-NULL value should be assigned to make the debugger display the root 32870b57cec5SDimitry Andric // team. 32880b57cec5SDimitry Andric TCW_SYNC_PTR(root_team->t.t_pkfn, (microtask_t)(~0)); 32890b57cec5SDimitry Andric #endif 32900b57cec5SDimitry Andric 32910b57cec5SDimitry Andric KF_TRACE(10, ("__kmp_initialize_root: after root_team = %p\n", root_team)); 32920b57cec5SDimitry Andric 32930b57cec5SDimitry Andric root->r.r_root_team = root_team; 32940b57cec5SDimitry Andric root_team->t.t_control_stack_top = NULL; 32950b57cec5SDimitry Andric 32960b57cec5SDimitry Andric /* initialize root team */ 32970b57cec5SDimitry Andric root_team->t.t_threads[0] = NULL; 32980b57cec5SDimitry Andric root_team->t.t_nproc = 1; 32990b57cec5SDimitry Andric root_team->t.t_serialized = 1; 33000b57cec5SDimitry Andric // TODO???: root_team->t.t_max_active_levels = __kmp_dflt_max_active_levels; 33010b57cec5SDimitry Andric root_team->t.t_sched.sched = r_sched.sched; 33020b57cec5SDimitry Andric KA_TRACE( 33030b57cec5SDimitry Andric 20, 33040b57cec5SDimitry Andric ("__kmp_initialize_root: init root team %d arrived: join=%u, plain=%u\n", 33050b57cec5SDimitry Andric root_team->t.t_id, KMP_INIT_BARRIER_STATE, KMP_INIT_BARRIER_STATE)); 33060b57cec5SDimitry Andric 33070b57cec5SDimitry Andric /* setup the hot team for this task */ 33080b57cec5SDimitry Andric /* allocate the hot team structure */ 33090b57cec5SDimitry Andric KF_TRACE(10, ("__kmp_initialize_root: before hot_team\n")); 33100b57cec5SDimitry Andric 33110b57cec5SDimitry Andric hot_team = 33120b57cec5SDimitry Andric __kmp_allocate_team(root, 33130b57cec5SDimitry Andric 1, // new_nproc 33140b57cec5SDimitry Andric __kmp_dflt_team_nth_ub * 2, // max_nproc 33150b57cec5SDimitry Andric #if OMPT_SUPPORT 33160b57cec5SDimitry Andric ompt_data_none, // root parallel id 33170b57cec5SDimitry Andric #endif 33180b57cec5SDimitry Andric __kmp_nested_proc_bind.bind_types[0], &r_icvs, 33190b57cec5SDimitry Andric 0 // argc 3320fe6060f1SDimitry Andric USE_NESTED_HOT_ARG(NULL) // primary thread is unknown 33210b57cec5SDimitry Andric ); 33220b57cec5SDimitry Andric KF_TRACE(10, ("__kmp_initialize_root: after hot_team = %p\n", hot_team)); 33230b57cec5SDimitry Andric 33240b57cec5SDimitry Andric root->r.r_hot_team = hot_team; 33250b57cec5SDimitry Andric root_team->t.t_control_stack_top = NULL; 33260b57cec5SDimitry Andric 33270b57cec5SDimitry Andric /* first-time initialization */ 33280b57cec5SDimitry Andric hot_team->t.t_parent = root_team; 33290b57cec5SDimitry Andric 33300b57cec5SDimitry Andric /* initialize hot team */ 33310b57cec5SDimitry Andric hot_team_max_nth = hot_team->t.t_max_nproc; 33320b57cec5SDimitry Andric for (f = 0; f < hot_team_max_nth; ++f) { 33330b57cec5SDimitry Andric hot_team->t.t_threads[f] = NULL; 33340b57cec5SDimitry Andric } 33350b57cec5SDimitry Andric hot_team->t.t_nproc = 1; 33360b57cec5SDimitry Andric // TODO???: hot_team->t.t_max_active_levels = __kmp_dflt_max_active_levels; 33370b57cec5SDimitry Andric hot_team->t.t_sched.sched = r_sched.sched; 33380b57cec5SDimitry Andric hot_team->t.t_size_changed = 0; 33390b57cec5SDimitry Andric } 33400b57cec5SDimitry Andric 33410b57cec5SDimitry Andric #ifdef KMP_DEBUG 33420b57cec5SDimitry Andric 33430b57cec5SDimitry Andric typedef struct kmp_team_list_item { 33440b57cec5SDimitry Andric kmp_team_p const *entry; 33450b57cec5SDimitry Andric struct kmp_team_list_item *next; 33460b57cec5SDimitry Andric } kmp_team_list_item_t; 33470b57cec5SDimitry Andric typedef kmp_team_list_item_t *kmp_team_list_t; 33480b57cec5SDimitry Andric 33490b57cec5SDimitry Andric static void __kmp_print_structure_team_accum( // Add team to list of teams. 33500b57cec5SDimitry Andric kmp_team_list_t list, // List of teams. 33510b57cec5SDimitry Andric kmp_team_p const *team // Team to add. 33520b57cec5SDimitry Andric ) { 33530b57cec5SDimitry Andric 33540b57cec5SDimitry Andric // List must terminate with item where both entry and next are NULL. 33550b57cec5SDimitry Andric // Team is added to the list only once. 33560b57cec5SDimitry Andric // List is sorted in ascending order by team id. 33570b57cec5SDimitry Andric // Team id is *not* a key. 33580b57cec5SDimitry Andric 33590b57cec5SDimitry Andric kmp_team_list_t l; 33600b57cec5SDimitry Andric 33610b57cec5SDimitry Andric KMP_DEBUG_ASSERT(list != NULL); 33620b57cec5SDimitry Andric if (team == NULL) { 33630b57cec5SDimitry Andric return; 33640b57cec5SDimitry Andric } 33650b57cec5SDimitry Andric 33660b57cec5SDimitry Andric __kmp_print_structure_team_accum(list, team->t.t_parent); 33670b57cec5SDimitry Andric __kmp_print_structure_team_accum(list, team->t.t_next_pool); 33680b57cec5SDimitry Andric 33690b57cec5SDimitry Andric // Search list for the team. 33700b57cec5SDimitry Andric l = list; 33710b57cec5SDimitry Andric while (l->next != NULL && l->entry != team) { 33720b57cec5SDimitry Andric l = l->next; 33730b57cec5SDimitry Andric } 33740b57cec5SDimitry Andric if (l->next != NULL) { 33750b57cec5SDimitry Andric return; // Team has been added before, exit. 33760b57cec5SDimitry Andric } 33770b57cec5SDimitry Andric 33780b57cec5SDimitry Andric // Team is not found. Search list again for insertion point. 33790b57cec5SDimitry Andric l = list; 33800b57cec5SDimitry Andric while (l->next != NULL && l->entry->t.t_id <= team->t.t_id) { 33810b57cec5SDimitry Andric l = l->next; 33820b57cec5SDimitry Andric } 33830b57cec5SDimitry Andric 33840b57cec5SDimitry Andric // Insert team. 33850b57cec5SDimitry Andric { 33860b57cec5SDimitry Andric kmp_team_list_item_t *item = (kmp_team_list_item_t *)KMP_INTERNAL_MALLOC( 33870b57cec5SDimitry Andric sizeof(kmp_team_list_item_t)); 33880b57cec5SDimitry Andric *item = *l; 33890b57cec5SDimitry Andric l->entry = team; 33900b57cec5SDimitry Andric l->next = item; 33910b57cec5SDimitry Andric } 33920b57cec5SDimitry Andric } 33930b57cec5SDimitry Andric 33940b57cec5SDimitry Andric static void __kmp_print_structure_team(char const *title, kmp_team_p const *team 33950b57cec5SDimitry Andric 33960b57cec5SDimitry Andric ) { 33970b57cec5SDimitry Andric __kmp_printf("%s", title); 33980b57cec5SDimitry Andric if (team != NULL) { 33990b57cec5SDimitry Andric __kmp_printf("%2x %p\n", team->t.t_id, team); 34000b57cec5SDimitry Andric } else { 34010b57cec5SDimitry Andric __kmp_printf(" - (nil)\n"); 34020b57cec5SDimitry Andric } 34030b57cec5SDimitry Andric } 34040b57cec5SDimitry Andric 34050b57cec5SDimitry Andric static void __kmp_print_structure_thread(char const *title, 34060b57cec5SDimitry Andric kmp_info_p const *thread) { 34070b57cec5SDimitry Andric __kmp_printf("%s", title); 34080b57cec5SDimitry Andric if (thread != NULL) { 34090b57cec5SDimitry Andric __kmp_printf("%2d %p\n", thread->th.th_info.ds.ds_gtid, thread); 34100b57cec5SDimitry Andric } else { 34110b57cec5SDimitry Andric __kmp_printf(" - (nil)\n"); 34120b57cec5SDimitry Andric } 34130b57cec5SDimitry Andric } 34140b57cec5SDimitry Andric 34150b57cec5SDimitry Andric void __kmp_print_structure(void) { 34160b57cec5SDimitry Andric 34170b57cec5SDimitry Andric kmp_team_list_t list; 34180b57cec5SDimitry Andric 34190b57cec5SDimitry Andric // Initialize list of teams. 34200b57cec5SDimitry Andric list = 34210b57cec5SDimitry Andric (kmp_team_list_item_t *)KMP_INTERNAL_MALLOC(sizeof(kmp_team_list_item_t)); 34220b57cec5SDimitry Andric list->entry = NULL; 34230b57cec5SDimitry Andric list->next = NULL; 34240b57cec5SDimitry Andric 34250b57cec5SDimitry Andric __kmp_printf("\n------------------------------\nGlobal Thread " 34260b57cec5SDimitry Andric "Table\n------------------------------\n"); 34270b57cec5SDimitry Andric { 34280b57cec5SDimitry Andric int gtid; 34290b57cec5SDimitry Andric for (gtid = 0; gtid < __kmp_threads_capacity; ++gtid) { 34300b57cec5SDimitry Andric __kmp_printf("%2d", gtid); 34310b57cec5SDimitry Andric if (__kmp_threads != NULL) { 34320b57cec5SDimitry Andric __kmp_printf(" %p", __kmp_threads[gtid]); 34330b57cec5SDimitry Andric } 34340b57cec5SDimitry Andric if (__kmp_root != NULL) { 34350b57cec5SDimitry Andric __kmp_printf(" %p", __kmp_root[gtid]); 34360b57cec5SDimitry Andric } 34370b57cec5SDimitry Andric __kmp_printf("\n"); 34380b57cec5SDimitry Andric } 34390b57cec5SDimitry Andric } 34400b57cec5SDimitry Andric 34410b57cec5SDimitry Andric // Print out __kmp_threads array. 34420b57cec5SDimitry Andric __kmp_printf("\n------------------------------\nThreads\n--------------------" 34430b57cec5SDimitry Andric "----------\n"); 34440b57cec5SDimitry Andric if (__kmp_threads != NULL) { 34450b57cec5SDimitry Andric int gtid; 34460b57cec5SDimitry Andric for (gtid = 0; gtid < __kmp_threads_capacity; ++gtid) { 34470b57cec5SDimitry Andric kmp_info_t const *thread = __kmp_threads[gtid]; 34480b57cec5SDimitry Andric if (thread != NULL) { 34490b57cec5SDimitry Andric __kmp_printf("GTID %2d %p:\n", gtid, thread); 34500b57cec5SDimitry Andric __kmp_printf(" Our Root: %p\n", thread->th.th_root); 34510b57cec5SDimitry Andric __kmp_print_structure_team(" Our Team: ", thread->th.th_team); 34520b57cec5SDimitry Andric __kmp_print_structure_team(" Serial Team: ", 34530b57cec5SDimitry Andric thread->th.th_serial_team); 34540b57cec5SDimitry Andric __kmp_printf(" Threads: %2d\n", thread->th.th_team_nproc); 3455fe6060f1SDimitry Andric __kmp_print_structure_thread(" Primary: ", 34560b57cec5SDimitry Andric thread->th.th_team_master); 34570b57cec5SDimitry Andric __kmp_printf(" Serialized?: %2d\n", thread->th.th_team_serialized); 34580b57cec5SDimitry Andric __kmp_printf(" Set NProc: %2d\n", thread->th.th_set_nproc); 34590b57cec5SDimitry Andric __kmp_printf(" Set Proc Bind: %2d\n", thread->th.th_set_proc_bind); 34600b57cec5SDimitry Andric __kmp_print_structure_thread(" Next in pool: ", 34610b57cec5SDimitry Andric thread->th.th_next_pool); 34620b57cec5SDimitry Andric __kmp_printf("\n"); 34630b57cec5SDimitry Andric __kmp_print_structure_team_accum(list, thread->th.th_team); 34640b57cec5SDimitry Andric __kmp_print_structure_team_accum(list, thread->th.th_serial_team); 34650b57cec5SDimitry Andric } 34660b57cec5SDimitry Andric } 34670b57cec5SDimitry Andric } else { 34680b57cec5SDimitry Andric __kmp_printf("Threads array is not allocated.\n"); 34690b57cec5SDimitry Andric } 34700b57cec5SDimitry Andric 34710b57cec5SDimitry Andric // Print out __kmp_root array. 34720b57cec5SDimitry Andric __kmp_printf("\n------------------------------\nUbers\n----------------------" 34730b57cec5SDimitry Andric "--------\n"); 34740b57cec5SDimitry Andric if (__kmp_root != NULL) { 34750b57cec5SDimitry Andric int gtid; 34760b57cec5SDimitry Andric for (gtid = 0; gtid < __kmp_threads_capacity; ++gtid) { 34770b57cec5SDimitry Andric kmp_root_t const *root = __kmp_root[gtid]; 34780b57cec5SDimitry Andric if (root != NULL) { 34790b57cec5SDimitry Andric __kmp_printf("GTID %2d %p:\n", gtid, root); 34800b57cec5SDimitry Andric __kmp_print_structure_team(" Root Team: ", root->r.r_root_team); 34810b57cec5SDimitry Andric __kmp_print_structure_team(" Hot Team: ", root->r.r_hot_team); 34820b57cec5SDimitry Andric __kmp_print_structure_thread(" Uber Thread: ", 34830b57cec5SDimitry Andric root->r.r_uber_thread); 34840b57cec5SDimitry Andric __kmp_printf(" Active?: %2d\n", root->r.r_active); 34850b57cec5SDimitry Andric __kmp_printf(" In Parallel: %2d\n", 34860b57cec5SDimitry Andric KMP_ATOMIC_LD_RLX(&root->r.r_in_parallel)); 34870b57cec5SDimitry Andric __kmp_printf("\n"); 34880b57cec5SDimitry Andric __kmp_print_structure_team_accum(list, root->r.r_root_team); 34890b57cec5SDimitry Andric __kmp_print_structure_team_accum(list, root->r.r_hot_team); 34900b57cec5SDimitry Andric } 34910b57cec5SDimitry Andric } 34920b57cec5SDimitry Andric } else { 34930b57cec5SDimitry Andric __kmp_printf("Ubers array is not allocated.\n"); 34940b57cec5SDimitry Andric } 34950b57cec5SDimitry Andric 34960b57cec5SDimitry Andric __kmp_printf("\n------------------------------\nTeams\n----------------------" 34970b57cec5SDimitry Andric "--------\n"); 34980b57cec5SDimitry Andric while (list->next != NULL) { 34990b57cec5SDimitry Andric kmp_team_p const *team = list->entry; 35000b57cec5SDimitry Andric int i; 35010b57cec5SDimitry Andric __kmp_printf("Team %2x %p:\n", team->t.t_id, team); 35020b57cec5SDimitry Andric __kmp_print_structure_team(" Parent Team: ", team->t.t_parent); 3503fe6060f1SDimitry Andric __kmp_printf(" Primary TID: %2d\n", team->t.t_master_tid); 35040b57cec5SDimitry Andric __kmp_printf(" Max threads: %2d\n", team->t.t_max_nproc); 35050b57cec5SDimitry Andric __kmp_printf(" Levels of serial: %2d\n", team->t.t_serialized); 35060b57cec5SDimitry Andric __kmp_printf(" Number threads: %2d\n", team->t.t_nproc); 35070b57cec5SDimitry Andric for (i = 0; i < team->t.t_nproc; ++i) { 35080b57cec5SDimitry Andric __kmp_printf(" Thread %2d: ", i); 35090b57cec5SDimitry Andric __kmp_print_structure_thread("", team->t.t_threads[i]); 35100b57cec5SDimitry Andric } 35110b57cec5SDimitry Andric __kmp_print_structure_team(" Next in pool: ", team->t.t_next_pool); 35120b57cec5SDimitry Andric __kmp_printf("\n"); 35130b57cec5SDimitry Andric list = list->next; 35140b57cec5SDimitry Andric } 35150b57cec5SDimitry Andric 35160b57cec5SDimitry Andric // Print out __kmp_thread_pool and __kmp_team_pool. 35170b57cec5SDimitry Andric __kmp_printf("\n------------------------------\nPools\n----------------------" 35180b57cec5SDimitry Andric "--------\n"); 35190b57cec5SDimitry Andric __kmp_print_structure_thread("Thread pool: ", 35200b57cec5SDimitry Andric CCAST(kmp_info_t *, __kmp_thread_pool)); 35210b57cec5SDimitry Andric __kmp_print_structure_team("Team pool: ", 35220b57cec5SDimitry Andric CCAST(kmp_team_t *, __kmp_team_pool)); 35230b57cec5SDimitry Andric __kmp_printf("\n"); 35240b57cec5SDimitry Andric 35250b57cec5SDimitry Andric // Free team list. 35260b57cec5SDimitry Andric while (list != NULL) { 35270b57cec5SDimitry Andric kmp_team_list_item_t *item = list; 35280b57cec5SDimitry Andric list = list->next; 35290b57cec5SDimitry Andric KMP_INTERNAL_FREE(item); 35300b57cec5SDimitry Andric } 35310b57cec5SDimitry Andric } 35320b57cec5SDimitry Andric 35330b57cec5SDimitry Andric #endif 35340b57cec5SDimitry Andric 35350b57cec5SDimitry Andric //--------------------------------------------------------------------------- 35360b57cec5SDimitry Andric // Stuff for per-thread fast random number generator 35370b57cec5SDimitry Andric // Table of primes 35380b57cec5SDimitry Andric static const unsigned __kmp_primes[] = { 35390b57cec5SDimitry Andric 0x9e3779b1, 0xffe6cc59, 0x2109f6dd, 0x43977ab5, 0xba5703f5, 0xb495a877, 35400b57cec5SDimitry Andric 0xe1626741, 0x79695e6b, 0xbc98c09f, 0xd5bee2b3, 0x287488f9, 0x3af18231, 35410b57cec5SDimitry Andric 0x9677cd4d, 0xbe3a6929, 0xadc6a877, 0xdcf0674b, 0xbe4d6fe9, 0x5f15e201, 35420b57cec5SDimitry Andric 0x99afc3fd, 0xf3f16801, 0xe222cfff, 0x24ba5fdb, 0x0620452d, 0x79f149e3, 35430b57cec5SDimitry Andric 0xc8b93f49, 0x972702cd, 0xb07dd827, 0x6c97d5ed, 0x085a3d61, 0x46eb5ea7, 35440b57cec5SDimitry Andric 0x3d9910ed, 0x2e687b5b, 0x29609227, 0x6eb081f1, 0x0954c4e1, 0x9d114db9, 35450b57cec5SDimitry Andric 0x542acfa9, 0xb3e6bd7b, 0x0742d917, 0xe9f3ffa7, 0x54581edb, 0xf2480f45, 35460b57cec5SDimitry Andric 0x0bb9288f, 0xef1affc7, 0x85fa0ca7, 0x3ccc14db, 0xe6baf34b, 0x343377f7, 35470b57cec5SDimitry Andric 0x5ca19031, 0xe6d9293b, 0xf0a9f391, 0x5d2e980b, 0xfc411073, 0xc3749363, 35480b57cec5SDimitry Andric 0xb892d829, 0x3549366b, 0x629750ad, 0xb98294e5, 0x892d9483, 0xc235baf3, 35490b57cec5SDimitry Andric 0x3d2402a3, 0x6bdef3c9, 0xbec333cd, 0x40c9520f}; 35500b57cec5SDimitry Andric 35510b57cec5SDimitry Andric //--------------------------------------------------------------------------- 35520b57cec5SDimitry Andric // __kmp_get_random: Get a random number using a linear congruential method. 35530b57cec5SDimitry Andric unsigned short __kmp_get_random(kmp_info_t *thread) { 35540b57cec5SDimitry Andric unsigned x = thread->th.th_x; 3555e8d8bef9SDimitry Andric unsigned short r = (unsigned short)(x >> 16); 35560b57cec5SDimitry Andric 35570b57cec5SDimitry Andric thread->th.th_x = x * thread->th.th_a + 1; 35580b57cec5SDimitry Andric 35590b57cec5SDimitry Andric KA_TRACE(30, ("__kmp_get_random: THREAD: %d, RETURN: %u\n", 35600b57cec5SDimitry Andric thread->th.th_info.ds.ds_tid, r)); 35610b57cec5SDimitry Andric 35620b57cec5SDimitry Andric return r; 35630b57cec5SDimitry Andric } 35640b57cec5SDimitry Andric //-------------------------------------------------------- 35650b57cec5SDimitry Andric // __kmp_init_random: Initialize a random number generator 35660b57cec5SDimitry Andric void __kmp_init_random(kmp_info_t *thread) { 35670b57cec5SDimitry Andric unsigned seed = thread->th.th_info.ds.ds_tid; 35680b57cec5SDimitry Andric 35690b57cec5SDimitry Andric thread->th.th_a = 35700b57cec5SDimitry Andric __kmp_primes[seed % (sizeof(__kmp_primes) / sizeof(__kmp_primes[0]))]; 35710b57cec5SDimitry Andric thread->th.th_x = (seed + 1) * thread->th.th_a + 1; 35720b57cec5SDimitry Andric KA_TRACE(30, 35730b57cec5SDimitry Andric ("__kmp_init_random: THREAD: %u; A: %u\n", seed, thread->th.th_a)); 35740b57cec5SDimitry Andric } 35750b57cec5SDimitry Andric 35760b57cec5SDimitry Andric #if KMP_OS_WINDOWS 35770b57cec5SDimitry Andric /* reclaim array entries for root threads that are already dead, returns number 35780b57cec5SDimitry Andric * reclaimed */ 35790b57cec5SDimitry Andric static int __kmp_reclaim_dead_roots(void) { 35800b57cec5SDimitry Andric int i, r = 0; 35810b57cec5SDimitry Andric 35820b57cec5SDimitry Andric for (i = 0; i < __kmp_threads_capacity; ++i) { 35830b57cec5SDimitry Andric if (KMP_UBER_GTID(i) && 35840b57cec5SDimitry Andric !__kmp_still_running((kmp_info_t *)TCR_SYNC_PTR(__kmp_threads[i])) && 35850b57cec5SDimitry Andric !__kmp_root[i] 35860b57cec5SDimitry Andric ->r.r_active) { // AC: reclaim only roots died in non-active state 35870b57cec5SDimitry Andric r += __kmp_unregister_root_other_thread(i); 35880b57cec5SDimitry Andric } 35890b57cec5SDimitry Andric } 35900b57cec5SDimitry Andric return r; 35910b57cec5SDimitry Andric } 35920b57cec5SDimitry Andric #endif 35930b57cec5SDimitry Andric 35940b57cec5SDimitry Andric /* This function attempts to create free entries in __kmp_threads and 35950b57cec5SDimitry Andric __kmp_root, and returns the number of free entries generated. 35960b57cec5SDimitry Andric 35970b57cec5SDimitry Andric For Windows* OS static library, the first mechanism used is to reclaim array 35980b57cec5SDimitry Andric entries for root threads that are already dead. 35990b57cec5SDimitry Andric 36000b57cec5SDimitry Andric On all platforms, expansion is attempted on the arrays __kmp_threads_ and 36010b57cec5SDimitry Andric __kmp_root, with appropriate update to __kmp_threads_capacity. Array 36020b57cec5SDimitry Andric capacity is increased by doubling with clipping to __kmp_tp_capacity, if 36030b57cec5SDimitry Andric threadprivate cache array has been created. Synchronization with 36040b57cec5SDimitry Andric __kmpc_threadprivate_cached is done using __kmp_tp_cached_lock. 36050b57cec5SDimitry Andric 36060b57cec5SDimitry Andric After any dead root reclamation, if the clipping value allows array expansion 36070b57cec5SDimitry Andric to result in the generation of a total of nNeed free slots, the function does 36080b57cec5SDimitry Andric that expansion. If not, nothing is done beyond the possible initial root 36090b57cec5SDimitry Andric thread reclamation. 36100b57cec5SDimitry Andric 36110b57cec5SDimitry Andric If any argument is negative, the behavior is undefined. */ 36120b57cec5SDimitry Andric static int __kmp_expand_threads(int nNeed) { 36130b57cec5SDimitry Andric int added = 0; 36140b57cec5SDimitry Andric int minimumRequiredCapacity; 36150b57cec5SDimitry Andric int newCapacity; 36160b57cec5SDimitry Andric kmp_info_t **newThreads; 36170b57cec5SDimitry Andric kmp_root_t **newRoot; 36180b57cec5SDimitry Andric 36190b57cec5SDimitry Andric // All calls to __kmp_expand_threads should be under __kmp_forkjoin_lock, so 36200b57cec5SDimitry Andric // resizing __kmp_threads does not need additional protection if foreign 36210b57cec5SDimitry Andric // threads are present 36220b57cec5SDimitry Andric 36230b57cec5SDimitry Andric #if KMP_OS_WINDOWS && !KMP_DYNAMIC_LIB 36240b57cec5SDimitry Andric /* only for Windows static library */ 36250b57cec5SDimitry Andric /* reclaim array entries for root threads that are already dead */ 36260b57cec5SDimitry Andric added = __kmp_reclaim_dead_roots(); 36270b57cec5SDimitry Andric 36280b57cec5SDimitry Andric if (nNeed) { 36290b57cec5SDimitry Andric nNeed -= added; 36300b57cec5SDimitry Andric if (nNeed < 0) 36310b57cec5SDimitry Andric nNeed = 0; 36320b57cec5SDimitry Andric } 36330b57cec5SDimitry Andric #endif 36340b57cec5SDimitry Andric if (nNeed <= 0) 36350b57cec5SDimitry Andric return added; 36360b57cec5SDimitry Andric 36370b57cec5SDimitry Andric // Note that __kmp_threads_capacity is not bounded by __kmp_max_nth. If 36380b57cec5SDimitry Andric // __kmp_max_nth is set to some value less than __kmp_sys_max_nth by the 36390b57cec5SDimitry Andric // user via KMP_DEVICE_THREAD_LIMIT, then __kmp_threads_capacity may become 36400b57cec5SDimitry Andric // > __kmp_max_nth in one of two ways: 36410b57cec5SDimitry Andric // 36420b57cec5SDimitry Andric // 1) The initialization thread (gtid = 0) exits. __kmp_threads[0] 36435ffd83dbSDimitry Andric // may not be reused by another thread, so we may need to increase 36440b57cec5SDimitry Andric // __kmp_threads_capacity to __kmp_max_nth + 1. 36450b57cec5SDimitry Andric // 36460b57cec5SDimitry Andric // 2) New foreign root(s) are encountered. We always register new foreign 36470b57cec5SDimitry Andric // roots. This may cause a smaller # of threads to be allocated at 36480b57cec5SDimitry Andric // subsequent parallel regions, but the worker threads hang around (and 36490b57cec5SDimitry Andric // eventually go to sleep) and need slots in the __kmp_threads[] array. 36500b57cec5SDimitry Andric // 36510b57cec5SDimitry Andric // Anyway, that is the reason for moving the check to see if 36520b57cec5SDimitry Andric // __kmp_max_nth was exceeded into __kmp_reserve_threads() 36530b57cec5SDimitry Andric // instead of having it performed here. -BB 36540b57cec5SDimitry Andric 36550b57cec5SDimitry Andric KMP_DEBUG_ASSERT(__kmp_sys_max_nth >= __kmp_threads_capacity); 36560b57cec5SDimitry Andric 36570b57cec5SDimitry Andric /* compute expansion headroom to check if we can expand */ 36580b57cec5SDimitry Andric if (__kmp_sys_max_nth - __kmp_threads_capacity < nNeed) { 36590b57cec5SDimitry Andric /* possible expansion too small -- give up */ 36600b57cec5SDimitry Andric return added; 36610b57cec5SDimitry Andric } 36620b57cec5SDimitry Andric minimumRequiredCapacity = __kmp_threads_capacity + nNeed; 36630b57cec5SDimitry Andric 36640b57cec5SDimitry Andric newCapacity = __kmp_threads_capacity; 36650b57cec5SDimitry Andric do { 36660b57cec5SDimitry Andric newCapacity = newCapacity <= (__kmp_sys_max_nth >> 1) ? (newCapacity << 1) 36670b57cec5SDimitry Andric : __kmp_sys_max_nth; 36680b57cec5SDimitry Andric } while (newCapacity < minimumRequiredCapacity); 36690b57cec5SDimitry Andric newThreads = (kmp_info_t **)__kmp_allocate( 36700b57cec5SDimitry Andric (sizeof(kmp_info_t *) + sizeof(kmp_root_t *)) * newCapacity + CACHE_LINE); 36710b57cec5SDimitry Andric newRoot = 36720b57cec5SDimitry Andric (kmp_root_t **)((char *)newThreads + sizeof(kmp_info_t *) * newCapacity); 36730b57cec5SDimitry Andric KMP_MEMCPY(newThreads, __kmp_threads, 36740b57cec5SDimitry Andric __kmp_threads_capacity * sizeof(kmp_info_t *)); 36750b57cec5SDimitry Andric KMP_MEMCPY(newRoot, __kmp_root, 36760b57cec5SDimitry Andric __kmp_threads_capacity * sizeof(kmp_root_t *)); 367781ad6265SDimitry Andric // Put old __kmp_threads array on a list. Any ongoing references to the old 367881ad6265SDimitry Andric // list will be valid. This list is cleaned up at library shutdown. 367981ad6265SDimitry Andric kmp_old_threads_list_t *node = 368081ad6265SDimitry Andric (kmp_old_threads_list_t *)__kmp_allocate(sizeof(kmp_old_threads_list_t)); 368181ad6265SDimitry Andric node->threads = __kmp_threads; 368281ad6265SDimitry Andric node->next = __kmp_old_threads_list; 368381ad6265SDimitry Andric __kmp_old_threads_list = node; 36840b57cec5SDimitry Andric 36850b57cec5SDimitry Andric *(kmp_info_t * *volatile *)&__kmp_threads = newThreads; 36860b57cec5SDimitry Andric *(kmp_root_t * *volatile *)&__kmp_root = newRoot; 36870b57cec5SDimitry Andric added += newCapacity - __kmp_threads_capacity; 36880b57cec5SDimitry Andric *(volatile int *)&__kmp_threads_capacity = newCapacity; 36890b57cec5SDimitry Andric 36900b57cec5SDimitry Andric if (newCapacity > __kmp_tp_capacity) { 36910b57cec5SDimitry Andric __kmp_acquire_bootstrap_lock(&__kmp_tp_cached_lock); 36920b57cec5SDimitry Andric if (__kmp_tp_cached && newCapacity > __kmp_tp_capacity) { 36930b57cec5SDimitry Andric __kmp_threadprivate_resize_cache(newCapacity); 36940b57cec5SDimitry Andric } else { // increase __kmp_tp_capacity to correspond with kmp_threads size 36950b57cec5SDimitry Andric *(volatile int *)&__kmp_tp_capacity = newCapacity; 36960b57cec5SDimitry Andric } 36970b57cec5SDimitry Andric __kmp_release_bootstrap_lock(&__kmp_tp_cached_lock); 36980b57cec5SDimitry Andric } 36990b57cec5SDimitry Andric 37000b57cec5SDimitry Andric return added; 37010b57cec5SDimitry Andric } 37020b57cec5SDimitry Andric 37030b57cec5SDimitry Andric /* Register the current thread as a root thread and obtain our gtid. We must 37040b57cec5SDimitry Andric have the __kmp_initz_lock held at this point. Argument TRUE only if are the 37050b57cec5SDimitry Andric thread that calls from __kmp_do_serial_initialize() */ 37060b57cec5SDimitry Andric int __kmp_register_root(int initial_thread) { 37070b57cec5SDimitry Andric kmp_info_t *root_thread; 37080b57cec5SDimitry Andric kmp_root_t *root; 37090b57cec5SDimitry Andric int gtid; 37100b57cec5SDimitry Andric int capacity; 37110b57cec5SDimitry Andric __kmp_acquire_bootstrap_lock(&__kmp_forkjoin_lock); 37120b57cec5SDimitry Andric KA_TRACE(20, ("__kmp_register_root: entered\n")); 37130b57cec5SDimitry Andric KMP_MB(); 37140b57cec5SDimitry Andric 37150b57cec5SDimitry Andric /* 2007-03-02: 37160b57cec5SDimitry Andric If initial thread did not invoke OpenMP RTL yet, and this thread is not an 37170b57cec5SDimitry Andric initial one, "__kmp_all_nth >= __kmp_threads_capacity" condition does not 37180b57cec5SDimitry Andric work as expected -- it may return false (that means there is at least one 37190b57cec5SDimitry Andric empty slot in __kmp_threads array), but it is possible the only free slot 37200b57cec5SDimitry Andric is #0, which is reserved for initial thread and so cannot be used for this 37210b57cec5SDimitry Andric one. Following code workarounds this bug. 37220b57cec5SDimitry Andric 37230b57cec5SDimitry Andric However, right solution seems to be not reserving slot #0 for initial 37240b57cec5SDimitry Andric thread because: 37250b57cec5SDimitry Andric (1) there is no magic in slot #0, 37260b57cec5SDimitry Andric (2) we cannot detect initial thread reliably (the first thread which does 37270b57cec5SDimitry Andric serial initialization may be not a real initial thread). 37280b57cec5SDimitry Andric */ 37290b57cec5SDimitry Andric capacity = __kmp_threads_capacity; 37300b57cec5SDimitry Andric if (!initial_thread && TCR_PTR(__kmp_threads[0]) == NULL) { 37310b57cec5SDimitry Andric --capacity; 37320b57cec5SDimitry Andric } 37330b57cec5SDimitry Andric 3734d409305fSDimitry Andric // If it is not for initializing the hidden helper team, we need to take 3735d409305fSDimitry Andric // __kmp_hidden_helper_threads_num out of the capacity because it is included 3736d409305fSDimitry Andric // in __kmp_threads_capacity. 3737d409305fSDimitry Andric if (__kmp_enable_hidden_helper && !TCR_4(__kmp_init_hidden_helper_threads)) { 3738d409305fSDimitry Andric capacity -= __kmp_hidden_helper_threads_num; 3739d409305fSDimitry Andric } 3740d409305fSDimitry Andric 37410b57cec5SDimitry Andric /* see if there are too many threads */ 37420b57cec5SDimitry Andric if (__kmp_all_nth >= capacity && !__kmp_expand_threads(1)) { 37430b57cec5SDimitry Andric if (__kmp_tp_cached) { 37440b57cec5SDimitry Andric __kmp_fatal(KMP_MSG(CantRegisterNewThread), 37450b57cec5SDimitry Andric KMP_HNT(Set_ALL_THREADPRIVATE, __kmp_tp_capacity), 37460b57cec5SDimitry Andric KMP_HNT(PossibleSystemLimitOnThreads), __kmp_msg_null); 37470b57cec5SDimitry Andric } else { 37480b57cec5SDimitry Andric __kmp_fatal(KMP_MSG(CantRegisterNewThread), KMP_HNT(SystemLimitOnThreads), 37490b57cec5SDimitry Andric __kmp_msg_null); 37500b57cec5SDimitry Andric } 37510b57cec5SDimitry Andric } 37520b57cec5SDimitry Andric 3753e8d8bef9SDimitry Andric // When hidden helper task is enabled, __kmp_threads is organized as follows: 3754e8d8bef9SDimitry Andric // 0: initial thread, also a regular OpenMP thread. 3755e8d8bef9SDimitry Andric // [1, __kmp_hidden_helper_threads_num]: slots for hidden helper threads. 3756e8d8bef9SDimitry Andric // [__kmp_hidden_helper_threads_num + 1, __kmp_threads_capacity): slots for 3757e8d8bef9SDimitry Andric // regular OpenMP threads. 3758e8d8bef9SDimitry Andric if (TCR_4(__kmp_init_hidden_helper_threads)) { 3759e8d8bef9SDimitry Andric // Find an available thread slot for hidden helper thread. Slots for hidden 3760e8d8bef9SDimitry Andric // helper threads start from 1 to __kmp_hidden_helper_threads_num. 3761e8d8bef9SDimitry Andric for (gtid = 1; TCR_PTR(__kmp_threads[gtid]) != NULL && 3762e8d8bef9SDimitry Andric gtid <= __kmp_hidden_helper_threads_num; 37630b57cec5SDimitry Andric gtid++) 37640b57cec5SDimitry Andric ; 3765e8d8bef9SDimitry Andric KMP_ASSERT(gtid <= __kmp_hidden_helper_threads_num); 3766e8d8bef9SDimitry Andric KA_TRACE(1, ("__kmp_register_root: found slot in threads array for " 3767e8d8bef9SDimitry Andric "hidden helper thread: T#%d\n", 3768e8d8bef9SDimitry Andric gtid)); 3769e8d8bef9SDimitry Andric } else { 3770e8d8bef9SDimitry Andric /* find an available thread slot */ 3771e8d8bef9SDimitry Andric // Don't reassign the zero slot since we need that to only be used by 3772e8d8bef9SDimitry Andric // initial thread. Slots for hidden helper threads should also be skipped. 3773d409305fSDimitry Andric if (initial_thread && TCR_PTR(__kmp_threads[0]) == NULL) { 3774e8d8bef9SDimitry Andric gtid = 0; 3775e8d8bef9SDimitry Andric } else { 3776e8d8bef9SDimitry Andric for (gtid = __kmp_hidden_helper_threads_num + 1; 3777e8d8bef9SDimitry Andric TCR_PTR(__kmp_threads[gtid]) != NULL; gtid++) 3778e8d8bef9SDimitry Andric ; 3779e8d8bef9SDimitry Andric } 3780e8d8bef9SDimitry Andric KA_TRACE( 3781e8d8bef9SDimitry Andric 1, ("__kmp_register_root: found slot in threads array: T#%d\n", gtid)); 37820b57cec5SDimitry Andric KMP_ASSERT(gtid < __kmp_threads_capacity); 3783e8d8bef9SDimitry Andric } 37840b57cec5SDimitry Andric 37850b57cec5SDimitry Andric /* update global accounting */ 37860b57cec5SDimitry Andric __kmp_all_nth++; 37870b57cec5SDimitry Andric TCW_4(__kmp_nth, __kmp_nth + 1); 37880b57cec5SDimitry Andric 37890b57cec5SDimitry Andric // if __kmp_adjust_gtid_mode is set, then we use method #1 (sp search) for low 37900b57cec5SDimitry Andric // numbers of procs, and method #2 (keyed API call) for higher numbers. 37910b57cec5SDimitry Andric if (__kmp_adjust_gtid_mode) { 37920b57cec5SDimitry Andric if (__kmp_all_nth >= __kmp_tls_gtid_min) { 37930b57cec5SDimitry Andric if (TCR_4(__kmp_gtid_mode) != 2) { 37940b57cec5SDimitry Andric TCW_4(__kmp_gtid_mode, 2); 37950b57cec5SDimitry Andric } 37960b57cec5SDimitry Andric } else { 37970b57cec5SDimitry Andric if (TCR_4(__kmp_gtid_mode) != 1) { 37980b57cec5SDimitry Andric TCW_4(__kmp_gtid_mode, 1); 37990b57cec5SDimitry Andric } 38000b57cec5SDimitry Andric } 38010b57cec5SDimitry Andric } 38020b57cec5SDimitry Andric 38030b57cec5SDimitry Andric #ifdef KMP_ADJUST_BLOCKTIME 38040b57cec5SDimitry Andric /* Adjust blocktime to zero if necessary */ 38050b57cec5SDimitry Andric /* Middle initialization might not have occurred yet */ 38060b57cec5SDimitry Andric if (!__kmp_env_blocktime && (__kmp_avail_proc > 0)) { 38070b57cec5SDimitry Andric if (__kmp_nth > __kmp_avail_proc) { 38080b57cec5SDimitry Andric __kmp_zero_bt = TRUE; 38090b57cec5SDimitry Andric } 38100b57cec5SDimitry Andric } 38110b57cec5SDimitry Andric #endif /* KMP_ADJUST_BLOCKTIME */ 38120b57cec5SDimitry Andric 38130b57cec5SDimitry Andric /* setup this new hierarchy */ 38140b57cec5SDimitry Andric if (!(root = __kmp_root[gtid])) { 38150b57cec5SDimitry Andric root = __kmp_root[gtid] = (kmp_root_t *)__kmp_allocate(sizeof(kmp_root_t)); 38160b57cec5SDimitry Andric KMP_DEBUG_ASSERT(!root->r.r_root_team); 38170b57cec5SDimitry Andric } 38180b57cec5SDimitry Andric 38190b57cec5SDimitry Andric #if KMP_STATS_ENABLED 38200b57cec5SDimitry Andric // Initialize stats as soon as possible (right after gtid assignment). 38210b57cec5SDimitry Andric __kmp_stats_thread_ptr = __kmp_stats_list->push_back(gtid); 38220b57cec5SDimitry Andric __kmp_stats_thread_ptr->startLife(); 38230b57cec5SDimitry Andric KMP_SET_THREAD_STATE(SERIAL_REGION); 38240b57cec5SDimitry Andric KMP_INIT_PARTITIONED_TIMERS(OMP_serial); 38250b57cec5SDimitry Andric #endif 38260b57cec5SDimitry Andric __kmp_initialize_root(root); 38270b57cec5SDimitry Andric 38280b57cec5SDimitry Andric /* setup new root thread structure */ 38290b57cec5SDimitry Andric if (root->r.r_uber_thread) { 38300b57cec5SDimitry Andric root_thread = root->r.r_uber_thread; 38310b57cec5SDimitry Andric } else { 38320b57cec5SDimitry Andric root_thread = (kmp_info_t *)__kmp_allocate(sizeof(kmp_info_t)); 38330b57cec5SDimitry Andric if (__kmp_storage_map) { 38340b57cec5SDimitry Andric __kmp_print_thread_storage_map(root_thread, gtid); 38350b57cec5SDimitry Andric } 38360b57cec5SDimitry Andric root_thread->th.th_info.ds.ds_gtid = gtid; 38370b57cec5SDimitry Andric #if OMPT_SUPPORT 38380b57cec5SDimitry Andric root_thread->th.ompt_thread_info.thread_data = ompt_data_none; 38390b57cec5SDimitry Andric #endif 38400b57cec5SDimitry Andric root_thread->th.th_root = root; 38410b57cec5SDimitry Andric if (__kmp_env_consistency_check) { 38420b57cec5SDimitry Andric root_thread->th.th_cons = __kmp_allocate_cons_stack(gtid); 38430b57cec5SDimitry Andric } 38440b57cec5SDimitry Andric #if USE_FAST_MEMORY 38450b57cec5SDimitry Andric __kmp_initialize_fast_memory(root_thread); 38460b57cec5SDimitry Andric #endif /* USE_FAST_MEMORY */ 38470b57cec5SDimitry Andric 38480b57cec5SDimitry Andric #if KMP_USE_BGET 38490b57cec5SDimitry Andric KMP_DEBUG_ASSERT(root_thread->th.th_local.bget_data == NULL); 38500b57cec5SDimitry Andric __kmp_initialize_bget(root_thread); 38510b57cec5SDimitry Andric #endif 38520b57cec5SDimitry Andric __kmp_init_random(root_thread); // Initialize random number generator 38530b57cec5SDimitry Andric } 38540b57cec5SDimitry Andric 38550b57cec5SDimitry Andric /* setup the serial team held in reserve by the root thread */ 38560b57cec5SDimitry Andric if (!root_thread->th.th_serial_team) { 38570b57cec5SDimitry Andric kmp_internal_control_t r_icvs = __kmp_get_global_icvs(); 38580b57cec5SDimitry Andric KF_TRACE(10, ("__kmp_register_root: before serial_team\n")); 38590b57cec5SDimitry Andric root_thread->th.th_serial_team = __kmp_allocate_team( 38600b57cec5SDimitry Andric root, 1, 1, 38610b57cec5SDimitry Andric #if OMPT_SUPPORT 38620b57cec5SDimitry Andric ompt_data_none, // root parallel id 38630b57cec5SDimitry Andric #endif 38640b57cec5SDimitry Andric proc_bind_default, &r_icvs, 0 USE_NESTED_HOT_ARG(NULL)); 38650b57cec5SDimitry Andric } 38660b57cec5SDimitry Andric KMP_ASSERT(root_thread->th.th_serial_team); 38670b57cec5SDimitry Andric KF_TRACE(10, ("__kmp_register_root: after serial_team = %p\n", 38680b57cec5SDimitry Andric root_thread->th.th_serial_team)); 38690b57cec5SDimitry Andric 38700b57cec5SDimitry Andric /* drop root_thread into place */ 38710b57cec5SDimitry Andric TCW_SYNC_PTR(__kmp_threads[gtid], root_thread); 38720b57cec5SDimitry Andric 38730b57cec5SDimitry Andric root->r.r_root_team->t.t_threads[0] = root_thread; 38740b57cec5SDimitry Andric root->r.r_hot_team->t.t_threads[0] = root_thread; 38750b57cec5SDimitry Andric root_thread->th.th_serial_team->t.t_threads[0] = root_thread; 38760b57cec5SDimitry Andric // AC: the team created in reserve, not for execution (it is unused for now). 38770b57cec5SDimitry Andric root_thread->th.th_serial_team->t.t_serialized = 0; 38780b57cec5SDimitry Andric root->r.r_uber_thread = root_thread; 38790b57cec5SDimitry Andric 38800b57cec5SDimitry Andric /* initialize the thread, get it ready to go */ 38810b57cec5SDimitry Andric __kmp_initialize_info(root_thread, root->r.r_root_team, 0, gtid); 38820b57cec5SDimitry Andric TCW_4(__kmp_init_gtid, TRUE); 38830b57cec5SDimitry Andric 3884fe6060f1SDimitry Andric /* prepare the primary thread for get_gtid() */ 38850b57cec5SDimitry Andric __kmp_gtid_set_specific(gtid); 38860b57cec5SDimitry Andric 38870b57cec5SDimitry Andric #if USE_ITT_BUILD 38880b57cec5SDimitry Andric __kmp_itt_thread_name(gtid); 38890b57cec5SDimitry Andric #endif /* USE_ITT_BUILD */ 38900b57cec5SDimitry Andric 38910b57cec5SDimitry Andric #ifdef KMP_TDATA_GTID 38920b57cec5SDimitry Andric __kmp_gtid = gtid; 38930b57cec5SDimitry Andric #endif 38940b57cec5SDimitry Andric __kmp_create_worker(gtid, root_thread, __kmp_stksize); 38950b57cec5SDimitry Andric KMP_DEBUG_ASSERT(__kmp_gtid_get_specific() == gtid); 38960b57cec5SDimitry Andric 38970b57cec5SDimitry Andric KA_TRACE(20, ("__kmp_register_root: T#%d init T#%d(%d:%d) arrived: join=%u, " 38980b57cec5SDimitry Andric "plain=%u\n", 38990b57cec5SDimitry Andric gtid, __kmp_gtid_from_tid(0, root->r.r_hot_team), 39000b57cec5SDimitry Andric root->r.r_hot_team->t.t_id, 0, KMP_INIT_BARRIER_STATE, 39010b57cec5SDimitry Andric KMP_INIT_BARRIER_STATE)); 39020b57cec5SDimitry Andric { // Initialize barrier data. 39030b57cec5SDimitry Andric int b; 39040b57cec5SDimitry Andric for (b = 0; b < bs_last_barrier; ++b) { 39050b57cec5SDimitry Andric root_thread->th.th_bar[b].bb.b_arrived = KMP_INIT_BARRIER_STATE; 39060b57cec5SDimitry Andric #if USE_DEBUGGER 39070b57cec5SDimitry Andric root_thread->th.th_bar[b].bb.b_worker_arrived = 0; 39080b57cec5SDimitry Andric #endif 39090b57cec5SDimitry Andric } 39100b57cec5SDimitry Andric } 39110b57cec5SDimitry Andric KMP_DEBUG_ASSERT(root->r.r_hot_team->t.t_bar[bs_forkjoin_barrier].b_arrived == 39120b57cec5SDimitry Andric KMP_INIT_BARRIER_STATE); 39130b57cec5SDimitry Andric 39140b57cec5SDimitry Andric #if KMP_AFFINITY_SUPPORTED 39150b57cec5SDimitry Andric root_thread->th.th_current_place = KMP_PLACE_UNDEFINED; 39160b57cec5SDimitry Andric root_thread->th.th_new_place = KMP_PLACE_UNDEFINED; 39170b57cec5SDimitry Andric root_thread->th.th_first_place = KMP_PLACE_UNDEFINED; 39180b57cec5SDimitry Andric root_thread->th.th_last_place = KMP_PLACE_UNDEFINED; 39190b57cec5SDimitry Andric #endif /* KMP_AFFINITY_SUPPORTED */ 39200b57cec5SDimitry Andric root_thread->th.th_def_allocator = __kmp_def_allocator; 39210b57cec5SDimitry Andric root_thread->th.th_prev_level = 0; 39220b57cec5SDimitry Andric root_thread->th.th_prev_num_threads = 1; 39230b57cec5SDimitry Andric 39240b57cec5SDimitry Andric kmp_cg_root_t *tmp = (kmp_cg_root_t *)__kmp_allocate(sizeof(kmp_cg_root_t)); 39250b57cec5SDimitry Andric tmp->cg_root = root_thread; 39260b57cec5SDimitry Andric tmp->cg_thread_limit = __kmp_cg_max_nth; 39270b57cec5SDimitry Andric tmp->cg_nthreads = 1; 39280b57cec5SDimitry Andric KA_TRACE(100, ("__kmp_register_root: Thread %p created node %p with" 39290b57cec5SDimitry Andric " cg_nthreads init to 1\n", 39300b57cec5SDimitry Andric root_thread, tmp)); 39310b57cec5SDimitry Andric tmp->up = NULL; 39320b57cec5SDimitry Andric root_thread->th.th_cg_roots = tmp; 39330b57cec5SDimitry Andric 39340b57cec5SDimitry Andric __kmp_root_counter++; 39350b57cec5SDimitry Andric 39360b57cec5SDimitry Andric #if OMPT_SUPPORT 39370b57cec5SDimitry Andric if (!initial_thread && ompt_enabled.enabled) { 39380b57cec5SDimitry Andric 39390b57cec5SDimitry Andric kmp_info_t *root_thread = ompt_get_thread(); 39400b57cec5SDimitry Andric 39410b57cec5SDimitry Andric ompt_set_thread_state(root_thread, ompt_state_overhead); 39420b57cec5SDimitry Andric 39430b57cec5SDimitry Andric if (ompt_enabled.ompt_callback_thread_begin) { 39440b57cec5SDimitry Andric ompt_callbacks.ompt_callback(ompt_callback_thread_begin)( 39450b57cec5SDimitry Andric ompt_thread_initial, __ompt_get_thread_data_internal()); 39460b57cec5SDimitry Andric } 39470b57cec5SDimitry Andric ompt_data_t *task_data; 39480b57cec5SDimitry Andric ompt_data_t *parallel_data; 3949fe6060f1SDimitry Andric __ompt_get_task_info_internal(0, NULL, &task_data, NULL, ¶llel_data, 3950fe6060f1SDimitry Andric NULL); 39510b57cec5SDimitry Andric if (ompt_enabled.ompt_callback_implicit_task) { 39520b57cec5SDimitry Andric ompt_callbacks.ompt_callback(ompt_callback_implicit_task)( 39530b57cec5SDimitry Andric ompt_scope_begin, parallel_data, task_data, 1, 1, ompt_task_initial); 39540b57cec5SDimitry Andric } 39550b57cec5SDimitry Andric 39560b57cec5SDimitry Andric ompt_set_thread_state(root_thread, ompt_state_work_serial); 39570b57cec5SDimitry Andric } 39580b57cec5SDimitry Andric #endif 3959fe6060f1SDimitry Andric #if OMPD_SUPPORT 3960fe6060f1SDimitry Andric if (ompd_state & OMPD_ENABLE_BP) 3961fe6060f1SDimitry Andric ompd_bp_thread_begin(); 3962fe6060f1SDimitry Andric #endif 39630b57cec5SDimitry Andric 39640b57cec5SDimitry Andric KMP_MB(); 39650b57cec5SDimitry Andric __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock); 39660b57cec5SDimitry Andric 39670b57cec5SDimitry Andric return gtid; 39680b57cec5SDimitry Andric } 39690b57cec5SDimitry Andric 39700b57cec5SDimitry Andric #if KMP_NESTED_HOT_TEAMS 39710b57cec5SDimitry Andric static int __kmp_free_hot_teams(kmp_root_t *root, kmp_info_t *thr, int level, 39720b57cec5SDimitry Andric const int max_level) { 39730b57cec5SDimitry Andric int i, n, nth; 39740b57cec5SDimitry Andric kmp_hot_team_ptr_t *hot_teams = thr->th.th_hot_teams; 39750b57cec5SDimitry Andric if (!hot_teams || !hot_teams[level].hot_team) { 39760b57cec5SDimitry Andric return 0; 39770b57cec5SDimitry Andric } 39780b57cec5SDimitry Andric KMP_DEBUG_ASSERT(level < max_level); 39790b57cec5SDimitry Andric kmp_team_t *team = hot_teams[level].hot_team; 39800b57cec5SDimitry Andric nth = hot_teams[level].hot_team_nth; 3981fe6060f1SDimitry Andric n = nth - 1; // primary thread is not freed 39820b57cec5SDimitry Andric if (level < max_level - 1) { 39830b57cec5SDimitry Andric for (i = 0; i < nth; ++i) { 39840b57cec5SDimitry Andric kmp_info_t *th = team->t.t_threads[i]; 39850b57cec5SDimitry Andric n += __kmp_free_hot_teams(root, th, level + 1, max_level); 39860b57cec5SDimitry Andric if (i > 0 && th->th.th_hot_teams) { 39870b57cec5SDimitry Andric __kmp_free(th->th.th_hot_teams); 39880b57cec5SDimitry Andric th->th.th_hot_teams = NULL; 39890b57cec5SDimitry Andric } 39900b57cec5SDimitry Andric } 39910b57cec5SDimitry Andric } 39920b57cec5SDimitry Andric __kmp_free_team(root, team, NULL); 39930b57cec5SDimitry Andric return n; 39940b57cec5SDimitry Andric } 39950b57cec5SDimitry Andric #endif 39960b57cec5SDimitry Andric 39970b57cec5SDimitry Andric // Resets a root thread and clear its root and hot teams. 39980b57cec5SDimitry Andric // Returns the number of __kmp_threads entries directly and indirectly freed. 39990b57cec5SDimitry Andric static int __kmp_reset_root(int gtid, kmp_root_t *root) { 40000b57cec5SDimitry Andric kmp_team_t *root_team = root->r.r_root_team; 40010b57cec5SDimitry Andric kmp_team_t *hot_team = root->r.r_hot_team; 40020b57cec5SDimitry Andric int n = hot_team->t.t_nproc; 40030b57cec5SDimitry Andric int i; 40040b57cec5SDimitry Andric 40050b57cec5SDimitry Andric KMP_DEBUG_ASSERT(!root->r.r_active); 40060b57cec5SDimitry Andric 40070b57cec5SDimitry Andric root->r.r_root_team = NULL; 40080b57cec5SDimitry Andric root->r.r_hot_team = NULL; 40090b57cec5SDimitry Andric // __kmp_free_team() does not free hot teams, so we have to clear r_hot_team 40100b57cec5SDimitry Andric // before call to __kmp_free_team(). 40110b57cec5SDimitry Andric __kmp_free_team(root, root_team USE_NESTED_HOT_ARG(NULL)); 40120b57cec5SDimitry Andric #if KMP_NESTED_HOT_TEAMS 40130b57cec5SDimitry Andric if (__kmp_hot_teams_max_level > 40140b57cec5SDimitry Andric 0) { // need to free nested hot teams and their threads if any 40150b57cec5SDimitry Andric for (i = 0; i < hot_team->t.t_nproc; ++i) { 40160b57cec5SDimitry Andric kmp_info_t *th = hot_team->t.t_threads[i]; 40170b57cec5SDimitry Andric if (__kmp_hot_teams_max_level > 1) { 40180b57cec5SDimitry Andric n += __kmp_free_hot_teams(root, th, 1, __kmp_hot_teams_max_level); 40190b57cec5SDimitry Andric } 40200b57cec5SDimitry Andric if (th->th.th_hot_teams) { 40210b57cec5SDimitry Andric __kmp_free(th->th.th_hot_teams); 40220b57cec5SDimitry Andric th->th.th_hot_teams = NULL; 40230b57cec5SDimitry Andric } 40240b57cec5SDimitry Andric } 40250b57cec5SDimitry Andric } 40260b57cec5SDimitry Andric #endif 40270b57cec5SDimitry Andric __kmp_free_team(root, hot_team USE_NESTED_HOT_ARG(NULL)); 40280b57cec5SDimitry Andric 40290b57cec5SDimitry Andric // Before we can reap the thread, we need to make certain that all other 40300b57cec5SDimitry Andric // threads in the teams that had this root as ancestor have stopped trying to 40310b57cec5SDimitry Andric // steal tasks. 40320b57cec5SDimitry Andric if (__kmp_tasking_mode != tskm_immediate_exec) { 40330b57cec5SDimitry Andric __kmp_wait_to_unref_task_teams(); 40340b57cec5SDimitry Andric } 40350b57cec5SDimitry Andric 40360b57cec5SDimitry Andric #if KMP_OS_WINDOWS 40370b57cec5SDimitry Andric /* Close Handle of root duplicated in __kmp_create_worker (tr #62919) */ 40380b57cec5SDimitry Andric KA_TRACE( 40390b57cec5SDimitry Andric 10, ("__kmp_reset_root: free handle, th = %p, handle = %" KMP_UINTPTR_SPEC 40400b57cec5SDimitry Andric "\n", 40410b57cec5SDimitry Andric (LPVOID) & (root->r.r_uber_thread->th), 40420b57cec5SDimitry Andric root->r.r_uber_thread->th.th_info.ds.ds_thread)); 40430b57cec5SDimitry Andric __kmp_free_handle(root->r.r_uber_thread->th.th_info.ds.ds_thread); 40440b57cec5SDimitry Andric #endif /* KMP_OS_WINDOWS */ 40450b57cec5SDimitry Andric 4046fe6060f1SDimitry Andric #if OMPD_SUPPORT 4047fe6060f1SDimitry Andric if (ompd_state & OMPD_ENABLE_BP) 4048fe6060f1SDimitry Andric ompd_bp_thread_end(); 4049fe6060f1SDimitry Andric #endif 4050fe6060f1SDimitry Andric 40510b57cec5SDimitry Andric #if OMPT_SUPPORT 40520b57cec5SDimitry Andric ompt_data_t *task_data; 40530b57cec5SDimitry Andric ompt_data_t *parallel_data; 4054fe6060f1SDimitry Andric __ompt_get_task_info_internal(0, NULL, &task_data, NULL, ¶llel_data, 4055fe6060f1SDimitry Andric NULL); 40560b57cec5SDimitry Andric if (ompt_enabled.ompt_callback_implicit_task) { 40570b57cec5SDimitry Andric ompt_callbacks.ompt_callback(ompt_callback_implicit_task)( 40580b57cec5SDimitry Andric ompt_scope_end, parallel_data, task_data, 0, 1, ompt_task_initial); 40590b57cec5SDimitry Andric } 40600b57cec5SDimitry Andric if (ompt_enabled.ompt_callback_thread_end) { 40610b57cec5SDimitry Andric ompt_callbacks.ompt_callback(ompt_callback_thread_end)( 40620b57cec5SDimitry Andric &(root->r.r_uber_thread->th.ompt_thread_info.thread_data)); 40630b57cec5SDimitry Andric } 40640b57cec5SDimitry Andric #endif 40650b57cec5SDimitry Andric 40660b57cec5SDimitry Andric TCW_4(__kmp_nth, 40670b57cec5SDimitry Andric __kmp_nth - 1); // __kmp_reap_thread will decrement __kmp_all_nth. 40680b57cec5SDimitry Andric i = root->r.r_uber_thread->th.th_cg_roots->cg_nthreads--; 40690b57cec5SDimitry Andric KA_TRACE(100, ("__kmp_reset_root: Thread %p decrement cg_nthreads on node %p" 40700b57cec5SDimitry Andric " to %d\n", 40710b57cec5SDimitry Andric root->r.r_uber_thread, root->r.r_uber_thread->th.th_cg_roots, 40720b57cec5SDimitry Andric root->r.r_uber_thread->th.th_cg_roots->cg_nthreads)); 40730b57cec5SDimitry Andric if (i == 1) { 40740b57cec5SDimitry Andric // need to free contention group structure 40750b57cec5SDimitry Andric KMP_DEBUG_ASSERT(root->r.r_uber_thread == 40760b57cec5SDimitry Andric root->r.r_uber_thread->th.th_cg_roots->cg_root); 40770b57cec5SDimitry Andric KMP_DEBUG_ASSERT(root->r.r_uber_thread->th.th_cg_roots->up == NULL); 40780b57cec5SDimitry Andric __kmp_free(root->r.r_uber_thread->th.th_cg_roots); 40790b57cec5SDimitry Andric root->r.r_uber_thread->th.th_cg_roots = NULL; 40800b57cec5SDimitry Andric } 40810b57cec5SDimitry Andric __kmp_reap_thread(root->r.r_uber_thread, 1); 40820b57cec5SDimitry Andric 4083480093f4SDimitry Andric // We canot put root thread to __kmp_thread_pool, so we have to reap it 4084480093f4SDimitry Andric // instead of freeing. 40850b57cec5SDimitry Andric root->r.r_uber_thread = NULL; 40860b57cec5SDimitry Andric /* mark root as no longer in use */ 40870b57cec5SDimitry Andric root->r.r_begin = FALSE; 40880b57cec5SDimitry Andric 40890b57cec5SDimitry Andric return n; 40900b57cec5SDimitry Andric } 40910b57cec5SDimitry Andric 40920b57cec5SDimitry Andric void __kmp_unregister_root_current_thread(int gtid) { 40930b57cec5SDimitry Andric KA_TRACE(1, ("__kmp_unregister_root_current_thread: enter T#%d\n", gtid)); 40940b57cec5SDimitry Andric /* this lock should be ok, since unregister_root_current_thread is never 40950b57cec5SDimitry Andric called during an abort, only during a normal close. furthermore, if you 40960b57cec5SDimitry Andric have the forkjoin lock, you should never try to get the initz lock */ 40970b57cec5SDimitry Andric __kmp_acquire_bootstrap_lock(&__kmp_forkjoin_lock); 40980b57cec5SDimitry Andric if (TCR_4(__kmp_global.g.g_done) || !__kmp_init_serial) { 40990b57cec5SDimitry Andric KC_TRACE(10, ("__kmp_unregister_root_current_thread: already finished, " 41000b57cec5SDimitry Andric "exiting T#%d\n", 41010b57cec5SDimitry Andric gtid)); 41020b57cec5SDimitry Andric __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock); 41030b57cec5SDimitry Andric return; 41040b57cec5SDimitry Andric } 41050b57cec5SDimitry Andric kmp_root_t *root = __kmp_root[gtid]; 41060b57cec5SDimitry Andric 41070b57cec5SDimitry Andric KMP_DEBUG_ASSERT(__kmp_threads && __kmp_threads[gtid]); 41080b57cec5SDimitry Andric KMP_ASSERT(KMP_UBER_GTID(gtid)); 41090b57cec5SDimitry Andric KMP_ASSERT(root == __kmp_threads[gtid]->th.th_root); 41100b57cec5SDimitry Andric KMP_ASSERT(root->r.r_active == FALSE); 41110b57cec5SDimitry Andric 41120b57cec5SDimitry Andric KMP_MB(); 41130b57cec5SDimitry Andric 41140b57cec5SDimitry Andric kmp_info_t *thread = __kmp_threads[gtid]; 41150b57cec5SDimitry Andric kmp_team_t *team = thread->th.th_team; 41160b57cec5SDimitry Andric kmp_task_team_t *task_team = thread->th.th_task_team; 41170b57cec5SDimitry Andric 41180b57cec5SDimitry Andric // we need to wait for the proxy tasks before finishing the thread 411904eeddc0SDimitry Andric if (task_team != NULL && (task_team->tt.tt_found_proxy_tasks || 412004eeddc0SDimitry Andric task_team->tt.tt_hidden_helper_task_encountered)) { 41210b57cec5SDimitry Andric #if OMPT_SUPPORT 41220b57cec5SDimitry Andric // the runtime is shutting down so we won't report any events 41230b57cec5SDimitry Andric thread->th.ompt_thread_info.state = ompt_state_undefined; 41240b57cec5SDimitry Andric #endif 41250b57cec5SDimitry Andric __kmp_task_team_wait(thread, team USE_ITT_BUILD_ARG(NULL)); 41260b57cec5SDimitry Andric } 41270b57cec5SDimitry Andric 41280b57cec5SDimitry Andric __kmp_reset_root(gtid, root); 41290b57cec5SDimitry Andric 41300b57cec5SDimitry Andric KMP_MB(); 41310b57cec5SDimitry Andric KC_TRACE(10, 41320b57cec5SDimitry Andric ("__kmp_unregister_root_current_thread: T#%d unregistered\n", gtid)); 41330b57cec5SDimitry Andric 41340b57cec5SDimitry Andric __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock); 41350b57cec5SDimitry Andric } 41360b57cec5SDimitry Andric 41370b57cec5SDimitry Andric #if KMP_OS_WINDOWS 41380b57cec5SDimitry Andric /* __kmp_forkjoin_lock must be already held 41390b57cec5SDimitry Andric Unregisters a root thread that is not the current thread. Returns the number 41400b57cec5SDimitry Andric of __kmp_threads entries freed as a result. */ 41410b57cec5SDimitry Andric static int __kmp_unregister_root_other_thread(int gtid) { 41420b57cec5SDimitry Andric kmp_root_t *root = __kmp_root[gtid]; 41430b57cec5SDimitry Andric int r; 41440b57cec5SDimitry Andric 41450b57cec5SDimitry Andric KA_TRACE(1, ("__kmp_unregister_root_other_thread: enter T#%d\n", gtid)); 41460b57cec5SDimitry Andric KMP_DEBUG_ASSERT(__kmp_threads && __kmp_threads[gtid]); 41470b57cec5SDimitry Andric KMP_ASSERT(KMP_UBER_GTID(gtid)); 41480b57cec5SDimitry Andric KMP_ASSERT(root == __kmp_threads[gtid]->th.th_root); 41490b57cec5SDimitry Andric KMP_ASSERT(root->r.r_active == FALSE); 41500b57cec5SDimitry Andric 41510b57cec5SDimitry Andric r = __kmp_reset_root(gtid, root); 41520b57cec5SDimitry Andric KC_TRACE(10, 41530b57cec5SDimitry Andric ("__kmp_unregister_root_other_thread: T#%d unregistered\n", gtid)); 41540b57cec5SDimitry Andric return r; 41550b57cec5SDimitry Andric } 41560b57cec5SDimitry Andric #endif 41570b57cec5SDimitry Andric 41580b57cec5SDimitry Andric #if KMP_DEBUG 41590b57cec5SDimitry Andric void __kmp_task_info() { 41600b57cec5SDimitry Andric 41610b57cec5SDimitry Andric kmp_int32 gtid = __kmp_entry_gtid(); 41620b57cec5SDimitry Andric kmp_int32 tid = __kmp_tid_from_gtid(gtid); 41630b57cec5SDimitry Andric kmp_info_t *this_thr = __kmp_threads[gtid]; 41640b57cec5SDimitry Andric kmp_team_t *steam = this_thr->th.th_serial_team; 41650b57cec5SDimitry Andric kmp_team_t *team = this_thr->th.th_team; 41660b57cec5SDimitry Andric 41670b57cec5SDimitry Andric __kmp_printf( 41680b57cec5SDimitry Andric "__kmp_task_info: gtid=%d tid=%d t_thread=%p team=%p steam=%p curtask=%p " 41690b57cec5SDimitry Andric "ptask=%p\n", 41700b57cec5SDimitry Andric gtid, tid, this_thr, team, steam, this_thr->th.th_current_task, 41710b57cec5SDimitry Andric team->t.t_implicit_task_taskdata[tid].td_parent); 41720b57cec5SDimitry Andric } 41730b57cec5SDimitry Andric #endif // KMP_DEBUG 41740b57cec5SDimitry Andric 41750b57cec5SDimitry Andric /* TODO optimize with one big memclr, take out what isn't needed, split 41760b57cec5SDimitry Andric responsibility to workers as much as possible, and delay initialization of 41770b57cec5SDimitry Andric features as much as possible */ 41780b57cec5SDimitry Andric static void __kmp_initialize_info(kmp_info_t *this_thr, kmp_team_t *team, 41790b57cec5SDimitry Andric int tid, int gtid) { 41800b57cec5SDimitry Andric /* this_thr->th.th_info.ds.ds_gtid is setup in 41810b57cec5SDimitry Andric kmp_allocate_thread/create_worker. 41820b57cec5SDimitry Andric this_thr->th.th_serial_team is setup in __kmp_allocate_thread */ 41830b57cec5SDimitry Andric KMP_DEBUG_ASSERT(this_thr != NULL); 41840b57cec5SDimitry Andric KMP_DEBUG_ASSERT(this_thr->th.th_serial_team); 41850b57cec5SDimitry Andric KMP_DEBUG_ASSERT(team); 41860b57cec5SDimitry Andric KMP_DEBUG_ASSERT(team->t.t_threads); 41870b57cec5SDimitry Andric KMP_DEBUG_ASSERT(team->t.t_dispatch); 4188fe6060f1SDimitry Andric kmp_info_t *master = team->t.t_threads[0]; 41890b57cec5SDimitry Andric KMP_DEBUG_ASSERT(master); 41900b57cec5SDimitry Andric KMP_DEBUG_ASSERT(master->th.th_root); 41910b57cec5SDimitry Andric 41920b57cec5SDimitry Andric KMP_MB(); 41930b57cec5SDimitry Andric 41940b57cec5SDimitry Andric TCW_SYNC_PTR(this_thr->th.th_team, team); 41950b57cec5SDimitry Andric 41960b57cec5SDimitry Andric this_thr->th.th_info.ds.ds_tid = tid; 41970b57cec5SDimitry Andric this_thr->th.th_set_nproc = 0; 41980b57cec5SDimitry Andric if (__kmp_tasking_mode != tskm_immediate_exec) 41990b57cec5SDimitry Andric // When tasking is possible, threads are not safe to reap until they are 42000b57cec5SDimitry Andric // done tasking; this will be set when tasking code is exited in wait 42010b57cec5SDimitry Andric this_thr->th.th_reap_state = KMP_NOT_SAFE_TO_REAP; 42020b57cec5SDimitry Andric else // no tasking --> always safe to reap 42030b57cec5SDimitry Andric this_thr->th.th_reap_state = KMP_SAFE_TO_REAP; 42040b57cec5SDimitry Andric this_thr->th.th_set_proc_bind = proc_bind_default; 42050b57cec5SDimitry Andric #if KMP_AFFINITY_SUPPORTED 42060b57cec5SDimitry Andric this_thr->th.th_new_place = this_thr->th.th_current_place; 42070b57cec5SDimitry Andric #endif 42080b57cec5SDimitry Andric this_thr->th.th_root = master->th.th_root; 42090b57cec5SDimitry Andric 42100b57cec5SDimitry Andric /* setup the thread's cache of the team structure */ 42110b57cec5SDimitry Andric this_thr->th.th_team_nproc = team->t.t_nproc; 42120b57cec5SDimitry Andric this_thr->th.th_team_master = master; 42130b57cec5SDimitry Andric this_thr->th.th_team_serialized = team->t.t_serialized; 42140b57cec5SDimitry Andric 42150b57cec5SDimitry Andric KMP_DEBUG_ASSERT(team->t.t_implicit_task_taskdata); 42160b57cec5SDimitry Andric 42170b57cec5SDimitry Andric KF_TRACE(10, ("__kmp_initialize_info1: T#%d:%d this_thread=%p curtask=%p\n", 42180b57cec5SDimitry Andric tid, gtid, this_thr, this_thr->th.th_current_task)); 42190b57cec5SDimitry Andric 42200b57cec5SDimitry Andric __kmp_init_implicit_task(this_thr->th.th_team_master->th.th_ident, this_thr, 42210b57cec5SDimitry Andric team, tid, TRUE); 42220b57cec5SDimitry Andric 42230b57cec5SDimitry Andric KF_TRACE(10, ("__kmp_initialize_info2: T#%d:%d this_thread=%p curtask=%p\n", 42240b57cec5SDimitry Andric tid, gtid, this_thr, this_thr->th.th_current_task)); 42250b57cec5SDimitry Andric // TODO: Initialize ICVs from parent; GEH - isn't that already done in 42260b57cec5SDimitry Andric // __kmp_initialize_team()? 42270b57cec5SDimitry Andric 42280b57cec5SDimitry Andric /* TODO no worksharing in speculative threads */ 42290b57cec5SDimitry Andric this_thr->th.th_dispatch = &team->t.t_dispatch[tid]; 42300b57cec5SDimitry Andric 42310b57cec5SDimitry Andric this_thr->th.th_local.this_construct = 0; 42320b57cec5SDimitry Andric 42330b57cec5SDimitry Andric if (!this_thr->th.th_pri_common) { 42340b57cec5SDimitry Andric this_thr->th.th_pri_common = 42350b57cec5SDimitry Andric (struct common_table *)__kmp_allocate(sizeof(struct common_table)); 42360b57cec5SDimitry Andric if (__kmp_storage_map) { 42370b57cec5SDimitry Andric __kmp_print_storage_map_gtid( 42380b57cec5SDimitry Andric gtid, this_thr->th.th_pri_common, this_thr->th.th_pri_common + 1, 42390b57cec5SDimitry Andric sizeof(struct common_table), "th_%d.th_pri_common\n", gtid); 42400b57cec5SDimitry Andric } 42410b57cec5SDimitry Andric this_thr->th.th_pri_head = NULL; 42420b57cec5SDimitry Andric } 42430b57cec5SDimitry Andric 4244fe6060f1SDimitry Andric if (this_thr != master && // Primary thread's CG root is initialized elsewhere 42450b57cec5SDimitry Andric this_thr->th.th_cg_roots != master->th.th_cg_roots) { // CG root not set 4246fe6060f1SDimitry Andric // Make new thread's CG root same as primary thread's 42470b57cec5SDimitry Andric KMP_DEBUG_ASSERT(master->th.th_cg_roots); 42480b57cec5SDimitry Andric kmp_cg_root_t *tmp = this_thr->th.th_cg_roots; 42490b57cec5SDimitry Andric if (tmp) { 42500b57cec5SDimitry Andric // worker changes CG, need to check if old CG should be freed 42510b57cec5SDimitry Andric int i = tmp->cg_nthreads--; 42520b57cec5SDimitry Andric KA_TRACE(100, ("__kmp_initialize_info: Thread %p decrement cg_nthreads" 42530b57cec5SDimitry Andric " on node %p of thread %p to %d\n", 42540b57cec5SDimitry Andric this_thr, tmp, tmp->cg_root, tmp->cg_nthreads)); 42550b57cec5SDimitry Andric if (i == 1) { 42560b57cec5SDimitry Andric __kmp_free(tmp); // last thread left CG --> free it 42570b57cec5SDimitry Andric } 42580b57cec5SDimitry Andric } 42590b57cec5SDimitry Andric this_thr->th.th_cg_roots = master->th.th_cg_roots; 42600b57cec5SDimitry Andric // Increment new thread's CG root's counter to add the new thread 42610b57cec5SDimitry Andric this_thr->th.th_cg_roots->cg_nthreads++; 42620b57cec5SDimitry Andric KA_TRACE(100, ("__kmp_initialize_info: Thread %p increment cg_nthreads on" 42630b57cec5SDimitry Andric " node %p of thread %p to %d\n", 42640b57cec5SDimitry Andric this_thr, this_thr->th.th_cg_roots, 42650b57cec5SDimitry Andric this_thr->th.th_cg_roots->cg_root, 42660b57cec5SDimitry Andric this_thr->th.th_cg_roots->cg_nthreads)); 42670b57cec5SDimitry Andric this_thr->th.th_current_task->td_icvs.thread_limit = 42680b57cec5SDimitry Andric this_thr->th.th_cg_roots->cg_thread_limit; 42690b57cec5SDimitry Andric } 42700b57cec5SDimitry Andric 42710b57cec5SDimitry Andric /* Initialize dynamic dispatch */ 42720b57cec5SDimitry Andric { 42730b57cec5SDimitry Andric volatile kmp_disp_t *dispatch = this_thr->th.th_dispatch; 42740b57cec5SDimitry Andric // Use team max_nproc since this will never change for the team. 42750b57cec5SDimitry Andric size_t disp_size = 42760b57cec5SDimitry Andric sizeof(dispatch_private_info_t) * 42770b57cec5SDimitry Andric (team->t.t_max_nproc == 1 ? 1 : __kmp_dispatch_num_buffers); 42780b57cec5SDimitry Andric KD_TRACE(10, ("__kmp_initialize_info: T#%d max_nproc: %d\n", gtid, 42790b57cec5SDimitry Andric team->t.t_max_nproc)); 42800b57cec5SDimitry Andric KMP_ASSERT(dispatch); 42810b57cec5SDimitry Andric KMP_DEBUG_ASSERT(team->t.t_dispatch); 42820b57cec5SDimitry Andric KMP_DEBUG_ASSERT(dispatch == &team->t.t_dispatch[tid]); 42830b57cec5SDimitry Andric 42840b57cec5SDimitry Andric dispatch->th_disp_index = 0; 42850b57cec5SDimitry Andric dispatch->th_doacross_buf_idx = 0; 42860b57cec5SDimitry Andric if (!dispatch->th_disp_buffer) { 42870b57cec5SDimitry Andric dispatch->th_disp_buffer = 42880b57cec5SDimitry Andric (dispatch_private_info_t *)__kmp_allocate(disp_size); 42890b57cec5SDimitry Andric 42900b57cec5SDimitry Andric if (__kmp_storage_map) { 42910b57cec5SDimitry Andric __kmp_print_storage_map_gtid( 42920b57cec5SDimitry Andric gtid, &dispatch->th_disp_buffer[0], 42930b57cec5SDimitry Andric &dispatch->th_disp_buffer[team->t.t_max_nproc == 1 42940b57cec5SDimitry Andric ? 1 42950b57cec5SDimitry Andric : __kmp_dispatch_num_buffers], 4296fe6060f1SDimitry Andric disp_size, 4297fe6060f1SDimitry Andric "th_%d.th_dispatch.th_disp_buffer " 42980b57cec5SDimitry Andric "(team_%d.t_dispatch[%d].th_disp_buffer)", 42990b57cec5SDimitry Andric gtid, team->t.t_id, gtid); 43000b57cec5SDimitry Andric } 43010b57cec5SDimitry Andric } else { 43020b57cec5SDimitry Andric memset(&dispatch->th_disp_buffer[0], '\0', disp_size); 43030b57cec5SDimitry Andric } 43040b57cec5SDimitry Andric 43050b57cec5SDimitry Andric dispatch->th_dispatch_pr_current = 0; 43060b57cec5SDimitry Andric dispatch->th_dispatch_sh_current = 0; 43070b57cec5SDimitry Andric 43080b57cec5SDimitry Andric dispatch->th_deo_fcn = 0; /* ORDERED */ 43090b57cec5SDimitry Andric dispatch->th_dxo_fcn = 0; /* END ORDERED */ 43100b57cec5SDimitry Andric } 43110b57cec5SDimitry Andric 43120b57cec5SDimitry Andric this_thr->th.th_next_pool = NULL; 43130b57cec5SDimitry Andric 43140b57cec5SDimitry Andric if (!this_thr->th.th_task_state_memo_stack) { 43150b57cec5SDimitry Andric size_t i; 43160b57cec5SDimitry Andric this_thr->th.th_task_state_memo_stack = 43170b57cec5SDimitry Andric (kmp_uint8 *)__kmp_allocate(4 * sizeof(kmp_uint8)); 43180b57cec5SDimitry Andric this_thr->th.th_task_state_top = 0; 43190b57cec5SDimitry Andric this_thr->th.th_task_state_stack_sz = 4; 43200b57cec5SDimitry Andric for (i = 0; i < this_thr->th.th_task_state_stack_sz; 43210b57cec5SDimitry Andric ++i) // zero init the stack 43220b57cec5SDimitry Andric this_thr->th.th_task_state_memo_stack[i] = 0; 43230b57cec5SDimitry Andric } 43240b57cec5SDimitry Andric 43250b57cec5SDimitry Andric KMP_DEBUG_ASSERT(!this_thr->th.th_spin_here); 43260b57cec5SDimitry Andric KMP_DEBUG_ASSERT(this_thr->th.th_next_waiting == 0); 43270b57cec5SDimitry Andric 43280b57cec5SDimitry Andric KMP_MB(); 43290b57cec5SDimitry Andric } 43300b57cec5SDimitry Andric 43310b57cec5SDimitry Andric /* allocate a new thread for the requesting team. this is only called from 43320b57cec5SDimitry Andric within a forkjoin critical section. we will first try to get an available 43330b57cec5SDimitry Andric thread from the thread pool. if none is available, we will fork a new one 43340b57cec5SDimitry Andric assuming we are able to create a new one. this should be assured, as the 43350b57cec5SDimitry Andric caller should check on this first. */ 43360b57cec5SDimitry Andric kmp_info_t *__kmp_allocate_thread(kmp_root_t *root, kmp_team_t *team, 43370b57cec5SDimitry Andric int new_tid) { 43380b57cec5SDimitry Andric kmp_team_t *serial_team; 43390b57cec5SDimitry Andric kmp_info_t *new_thr; 43400b57cec5SDimitry Andric int new_gtid; 43410b57cec5SDimitry Andric 43420b57cec5SDimitry Andric KA_TRACE(20, ("__kmp_allocate_thread: T#%d\n", __kmp_get_gtid())); 43430b57cec5SDimitry Andric KMP_DEBUG_ASSERT(root && team); 43440b57cec5SDimitry Andric #if !KMP_NESTED_HOT_TEAMS 43450b57cec5SDimitry Andric KMP_DEBUG_ASSERT(KMP_MASTER_GTID(__kmp_get_gtid())); 43460b57cec5SDimitry Andric #endif 43470b57cec5SDimitry Andric KMP_MB(); 43480b57cec5SDimitry Andric 43490b57cec5SDimitry Andric /* first, try to get one from the thread pool */ 43500b57cec5SDimitry Andric if (__kmp_thread_pool) { 43510b57cec5SDimitry Andric new_thr = CCAST(kmp_info_t *, __kmp_thread_pool); 43520b57cec5SDimitry Andric __kmp_thread_pool = (volatile kmp_info_t *)new_thr->th.th_next_pool; 43530b57cec5SDimitry Andric if (new_thr == __kmp_thread_pool_insert_pt) { 43540b57cec5SDimitry Andric __kmp_thread_pool_insert_pt = NULL; 43550b57cec5SDimitry Andric } 43560b57cec5SDimitry Andric TCW_4(new_thr->th.th_in_pool, FALSE); 43570b57cec5SDimitry Andric __kmp_suspend_initialize_thread(new_thr); 43580b57cec5SDimitry Andric __kmp_lock_suspend_mx(new_thr); 43590b57cec5SDimitry Andric if (new_thr->th.th_active_in_pool == TRUE) { 43600b57cec5SDimitry Andric KMP_DEBUG_ASSERT(new_thr->th.th_active == TRUE); 43610b57cec5SDimitry Andric KMP_ATOMIC_DEC(&__kmp_thread_pool_active_nth); 43620b57cec5SDimitry Andric new_thr->th.th_active_in_pool = FALSE; 43630b57cec5SDimitry Andric } 43640b57cec5SDimitry Andric __kmp_unlock_suspend_mx(new_thr); 43650b57cec5SDimitry Andric 43660b57cec5SDimitry Andric KA_TRACE(20, ("__kmp_allocate_thread: T#%d using thread T#%d\n", 43670b57cec5SDimitry Andric __kmp_get_gtid(), new_thr->th.th_info.ds.ds_gtid)); 43680b57cec5SDimitry Andric KMP_ASSERT(!new_thr->th.th_team); 43690b57cec5SDimitry Andric KMP_DEBUG_ASSERT(__kmp_nth < __kmp_threads_capacity); 43700b57cec5SDimitry Andric 43710b57cec5SDimitry Andric /* setup the thread structure */ 43720b57cec5SDimitry Andric __kmp_initialize_info(new_thr, team, new_tid, 43730b57cec5SDimitry Andric new_thr->th.th_info.ds.ds_gtid); 43740b57cec5SDimitry Andric KMP_DEBUG_ASSERT(new_thr->th.th_serial_team); 43750b57cec5SDimitry Andric 43760b57cec5SDimitry Andric TCW_4(__kmp_nth, __kmp_nth + 1); 43770b57cec5SDimitry Andric 43780b57cec5SDimitry Andric new_thr->th.th_task_state = 0; 43790b57cec5SDimitry Andric new_thr->th.th_task_state_top = 0; 43800b57cec5SDimitry Andric new_thr->th.th_task_state_stack_sz = 4; 43810b57cec5SDimitry Andric 4382349cc55cSDimitry Andric if (__kmp_barrier_gather_pattern[bs_forkjoin_barrier] == bp_dist_bar) { 4383349cc55cSDimitry Andric // Make sure pool thread has transitioned to waiting on own thread struct 4384349cc55cSDimitry Andric KMP_DEBUG_ASSERT(new_thr->th.th_used_in_team.load() == 0); 4385349cc55cSDimitry Andric // Thread activated in __kmp_allocate_team when increasing team size 4386349cc55cSDimitry Andric } 4387349cc55cSDimitry Andric 43880b57cec5SDimitry Andric #ifdef KMP_ADJUST_BLOCKTIME 43890b57cec5SDimitry Andric /* Adjust blocktime back to zero if necessary */ 43900b57cec5SDimitry Andric /* Middle initialization might not have occurred yet */ 43910b57cec5SDimitry Andric if (!__kmp_env_blocktime && (__kmp_avail_proc > 0)) { 43920b57cec5SDimitry Andric if (__kmp_nth > __kmp_avail_proc) { 43930b57cec5SDimitry Andric __kmp_zero_bt = TRUE; 43940b57cec5SDimitry Andric } 43950b57cec5SDimitry Andric } 43960b57cec5SDimitry Andric #endif /* KMP_ADJUST_BLOCKTIME */ 43970b57cec5SDimitry Andric 43980b57cec5SDimitry Andric #if KMP_DEBUG 43990b57cec5SDimitry Andric // If thread entered pool via __kmp_free_thread, wait_flag should != 44000b57cec5SDimitry Andric // KMP_BARRIER_PARENT_FLAG. 44010b57cec5SDimitry Andric int b; 44020b57cec5SDimitry Andric kmp_balign_t *balign = new_thr->th.th_bar; 44030b57cec5SDimitry Andric for (b = 0; b < bs_last_barrier; ++b) 44040b57cec5SDimitry Andric KMP_DEBUG_ASSERT(balign[b].bb.wait_flag != KMP_BARRIER_PARENT_FLAG); 44050b57cec5SDimitry Andric #endif 44060b57cec5SDimitry Andric 44070b57cec5SDimitry Andric KF_TRACE(10, ("__kmp_allocate_thread: T#%d using thread %p T#%d\n", 44080b57cec5SDimitry Andric __kmp_get_gtid(), new_thr, new_thr->th.th_info.ds.ds_gtid)); 44090b57cec5SDimitry Andric 44100b57cec5SDimitry Andric KMP_MB(); 44110b57cec5SDimitry Andric return new_thr; 44120b57cec5SDimitry Andric } 44130b57cec5SDimitry Andric 44140b57cec5SDimitry Andric /* no, well fork a new one */ 44150b57cec5SDimitry Andric KMP_ASSERT(__kmp_nth == __kmp_all_nth); 44160b57cec5SDimitry Andric KMP_ASSERT(__kmp_all_nth < __kmp_threads_capacity); 44170b57cec5SDimitry Andric 44180b57cec5SDimitry Andric #if KMP_USE_MONITOR 44190b57cec5SDimitry Andric // If this is the first worker thread the RTL is creating, then also 44200b57cec5SDimitry Andric // launch the monitor thread. We try to do this as early as possible. 44210b57cec5SDimitry Andric if (!TCR_4(__kmp_init_monitor)) { 44220b57cec5SDimitry Andric __kmp_acquire_bootstrap_lock(&__kmp_monitor_lock); 44230b57cec5SDimitry Andric if (!TCR_4(__kmp_init_monitor)) { 44240b57cec5SDimitry Andric KF_TRACE(10, ("before __kmp_create_monitor\n")); 44250b57cec5SDimitry Andric TCW_4(__kmp_init_monitor, 1); 44260b57cec5SDimitry Andric __kmp_create_monitor(&__kmp_monitor); 44270b57cec5SDimitry Andric KF_TRACE(10, ("after __kmp_create_monitor\n")); 44280b57cec5SDimitry Andric #if KMP_OS_WINDOWS 44290b57cec5SDimitry Andric // AC: wait until monitor has started. This is a fix for CQ232808. 44300b57cec5SDimitry Andric // The reason is that if the library is loaded/unloaded in a loop with 44310b57cec5SDimitry Andric // small (parallel) work in between, then there is high probability that 44320b57cec5SDimitry Andric // monitor thread started after the library shutdown. At shutdown it is 4433fe6060f1SDimitry Andric // too late to cope with the problem, because when the primary thread is 4434fe6060f1SDimitry Andric // in DllMain (process detach) the monitor has no chances to start (it is 4435fe6060f1SDimitry Andric // blocked), and primary thread has no means to inform the monitor that 4436fe6060f1SDimitry Andric // the library has gone, because all the memory which the monitor can 4437fe6060f1SDimitry Andric // access is going to be released/reset. 44380b57cec5SDimitry Andric while (TCR_4(__kmp_init_monitor) < 2) { 44390b57cec5SDimitry Andric KMP_YIELD(TRUE); 44400b57cec5SDimitry Andric } 44410b57cec5SDimitry Andric KF_TRACE(10, ("after monitor thread has started\n")); 44420b57cec5SDimitry Andric #endif 44430b57cec5SDimitry Andric } 44440b57cec5SDimitry Andric __kmp_release_bootstrap_lock(&__kmp_monitor_lock); 44450b57cec5SDimitry Andric } 44460b57cec5SDimitry Andric #endif 44470b57cec5SDimitry Andric 44480b57cec5SDimitry Andric KMP_MB(); 4449e8d8bef9SDimitry Andric 4450e8d8bef9SDimitry Andric { 4451e8d8bef9SDimitry Andric int new_start_gtid = TCR_4(__kmp_init_hidden_helper_threads) 4452e8d8bef9SDimitry Andric ? 1 4453e8d8bef9SDimitry Andric : __kmp_hidden_helper_threads_num + 1; 4454e8d8bef9SDimitry Andric 4455e8d8bef9SDimitry Andric for (new_gtid = new_start_gtid; TCR_PTR(__kmp_threads[new_gtid]) != NULL; 4456e8d8bef9SDimitry Andric ++new_gtid) { 44570b57cec5SDimitry Andric KMP_DEBUG_ASSERT(new_gtid < __kmp_threads_capacity); 44580b57cec5SDimitry Andric } 44590b57cec5SDimitry Andric 4460e8d8bef9SDimitry Andric if (TCR_4(__kmp_init_hidden_helper_threads)) { 4461e8d8bef9SDimitry Andric KMP_DEBUG_ASSERT(new_gtid <= __kmp_hidden_helper_threads_num); 4462e8d8bef9SDimitry Andric } 4463e8d8bef9SDimitry Andric } 4464e8d8bef9SDimitry Andric 44650b57cec5SDimitry Andric /* allocate space for it. */ 44660b57cec5SDimitry Andric new_thr = (kmp_info_t *)__kmp_allocate(sizeof(kmp_info_t)); 44670b57cec5SDimitry Andric 44680b57cec5SDimitry Andric TCW_SYNC_PTR(__kmp_threads[new_gtid], new_thr); 44690b57cec5SDimitry Andric 4470e8d8bef9SDimitry Andric #if USE_ITT_BUILD && USE_ITT_NOTIFY && KMP_DEBUG 4471e8d8bef9SDimitry Andric // suppress race conditions detection on synchronization flags in debug mode 4472e8d8bef9SDimitry Andric // this helps to analyze library internals eliminating false positives 4473e8d8bef9SDimitry Andric __itt_suppress_mark_range( 4474e8d8bef9SDimitry Andric __itt_suppress_range, __itt_suppress_threading_errors, 4475e8d8bef9SDimitry Andric &new_thr->th.th_sleep_loc, sizeof(new_thr->th.th_sleep_loc)); 4476e8d8bef9SDimitry Andric __itt_suppress_mark_range( 4477e8d8bef9SDimitry Andric __itt_suppress_range, __itt_suppress_threading_errors, 4478e8d8bef9SDimitry Andric &new_thr->th.th_reap_state, sizeof(new_thr->th.th_reap_state)); 4479e8d8bef9SDimitry Andric #if KMP_OS_WINDOWS 4480e8d8bef9SDimitry Andric __itt_suppress_mark_range( 4481e8d8bef9SDimitry Andric __itt_suppress_range, __itt_suppress_threading_errors, 4482e8d8bef9SDimitry Andric &new_thr->th.th_suspend_init, sizeof(new_thr->th.th_suspend_init)); 4483e8d8bef9SDimitry Andric #else 4484e8d8bef9SDimitry Andric __itt_suppress_mark_range(__itt_suppress_range, 4485e8d8bef9SDimitry Andric __itt_suppress_threading_errors, 4486e8d8bef9SDimitry Andric &new_thr->th.th_suspend_init_count, 4487e8d8bef9SDimitry Andric sizeof(new_thr->th.th_suspend_init_count)); 4488e8d8bef9SDimitry Andric #endif 4489e8d8bef9SDimitry Andric // TODO: check if we need to also suppress b_arrived flags 4490e8d8bef9SDimitry Andric __itt_suppress_mark_range(__itt_suppress_range, 4491e8d8bef9SDimitry Andric __itt_suppress_threading_errors, 4492e8d8bef9SDimitry Andric CCAST(kmp_uint64 *, &new_thr->th.th_bar[0].bb.b_go), 4493e8d8bef9SDimitry Andric sizeof(new_thr->th.th_bar[0].bb.b_go)); 4494e8d8bef9SDimitry Andric __itt_suppress_mark_range(__itt_suppress_range, 4495e8d8bef9SDimitry Andric __itt_suppress_threading_errors, 4496e8d8bef9SDimitry Andric CCAST(kmp_uint64 *, &new_thr->th.th_bar[1].bb.b_go), 4497e8d8bef9SDimitry Andric sizeof(new_thr->th.th_bar[1].bb.b_go)); 4498e8d8bef9SDimitry Andric __itt_suppress_mark_range(__itt_suppress_range, 4499e8d8bef9SDimitry Andric __itt_suppress_threading_errors, 4500e8d8bef9SDimitry Andric CCAST(kmp_uint64 *, &new_thr->th.th_bar[2].bb.b_go), 4501e8d8bef9SDimitry Andric sizeof(new_thr->th.th_bar[2].bb.b_go)); 4502e8d8bef9SDimitry Andric #endif /* USE_ITT_BUILD && USE_ITT_NOTIFY && KMP_DEBUG */ 45030b57cec5SDimitry Andric if (__kmp_storage_map) { 45040b57cec5SDimitry Andric __kmp_print_thread_storage_map(new_thr, new_gtid); 45050b57cec5SDimitry Andric } 45060b57cec5SDimitry Andric 4507fe6060f1SDimitry Andric // add the reserve serialized team, initialized from the team's primary thread 45080b57cec5SDimitry Andric { 45090b57cec5SDimitry Andric kmp_internal_control_t r_icvs = __kmp_get_x_global_icvs(team); 45100b57cec5SDimitry Andric KF_TRACE(10, ("__kmp_allocate_thread: before th_serial/serial_team\n")); 45110b57cec5SDimitry Andric new_thr->th.th_serial_team = serial_team = 45120b57cec5SDimitry Andric (kmp_team_t *)__kmp_allocate_team(root, 1, 1, 45130b57cec5SDimitry Andric #if OMPT_SUPPORT 45140b57cec5SDimitry Andric ompt_data_none, // root parallel id 45150b57cec5SDimitry Andric #endif 45160b57cec5SDimitry Andric proc_bind_default, &r_icvs, 45170b57cec5SDimitry Andric 0 USE_NESTED_HOT_ARG(NULL)); 45180b57cec5SDimitry Andric } 45190b57cec5SDimitry Andric KMP_ASSERT(serial_team); 45200b57cec5SDimitry Andric serial_team->t.t_serialized = 0; // AC: the team created in reserve, not for 45210b57cec5SDimitry Andric // execution (it is unused for now). 45220b57cec5SDimitry Andric serial_team->t.t_threads[0] = new_thr; 45230b57cec5SDimitry Andric KF_TRACE(10, 45240b57cec5SDimitry Andric ("__kmp_allocate_thread: after th_serial/serial_team : new_thr=%p\n", 45250b57cec5SDimitry Andric new_thr)); 45260b57cec5SDimitry Andric 45270b57cec5SDimitry Andric /* setup the thread structures */ 45280b57cec5SDimitry Andric __kmp_initialize_info(new_thr, team, new_tid, new_gtid); 45290b57cec5SDimitry Andric 45300b57cec5SDimitry Andric #if USE_FAST_MEMORY 45310b57cec5SDimitry Andric __kmp_initialize_fast_memory(new_thr); 45320b57cec5SDimitry Andric #endif /* USE_FAST_MEMORY */ 45330b57cec5SDimitry Andric 45340b57cec5SDimitry Andric #if KMP_USE_BGET 45350b57cec5SDimitry Andric KMP_DEBUG_ASSERT(new_thr->th.th_local.bget_data == NULL); 45360b57cec5SDimitry Andric __kmp_initialize_bget(new_thr); 45370b57cec5SDimitry Andric #endif 45380b57cec5SDimitry Andric 45390b57cec5SDimitry Andric __kmp_init_random(new_thr); // Initialize random number generator 45400b57cec5SDimitry Andric 45410b57cec5SDimitry Andric /* Initialize these only once when thread is grabbed for a team allocation */ 45420b57cec5SDimitry Andric KA_TRACE(20, 45430b57cec5SDimitry Andric ("__kmp_allocate_thread: T#%d init go fork=%u, plain=%u\n", 45440b57cec5SDimitry Andric __kmp_get_gtid(), KMP_INIT_BARRIER_STATE, KMP_INIT_BARRIER_STATE)); 45450b57cec5SDimitry Andric 45460b57cec5SDimitry Andric int b; 45470b57cec5SDimitry Andric kmp_balign_t *balign = new_thr->th.th_bar; 45480b57cec5SDimitry Andric for (b = 0; b < bs_last_barrier; ++b) { 45490b57cec5SDimitry Andric balign[b].bb.b_go = KMP_INIT_BARRIER_STATE; 45500b57cec5SDimitry Andric balign[b].bb.team = NULL; 45510b57cec5SDimitry Andric balign[b].bb.wait_flag = KMP_BARRIER_NOT_WAITING; 45520b57cec5SDimitry Andric balign[b].bb.use_oncore_barrier = 0; 45530b57cec5SDimitry Andric } 45540b57cec5SDimitry Andric 4555349cc55cSDimitry Andric TCW_PTR(new_thr->th.th_sleep_loc, NULL); 4556349cc55cSDimitry Andric new_thr->th.th_sleep_loc_type = flag_unset; 4557349cc55cSDimitry Andric 45580b57cec5SDimitry Andric new_thr->th.th_spin_here = FALSE; 45590b57cec5SDimitry Andric new_thr->th.th_next_waiting = 0; 45600b57cec5SDimitry Andric #if KMP_OS_UNIX 45610b57cec5SDimitry Andric new_thr->th.th_blocking = false; 45620b57cec5SDimitry Andric #endif 45630b57cec5SDimitry Andric 45640b57cec5SDimitry Andric #if KMP_AFFINITY_SUPPORTED 45650b57cec5SDimitry Andric new_thr->th.th_current_place = KMP_PLACE_UNDEFINED; 45660b57cec5SDimitry Andric new_thr->th.th_new_place = KMP_PLACE_UNDEFINED; 45670b57cec5SDimitry Andric new_thr->th.th_first_place = KMP_PLACE_UNDEFINED; 45680b57cec5SDimitry Andric new_thr->th.th_last_place = KMP_PLACE_UNDEFINED; 45690b57cec5SDimitry Andric #endif 45700b57cec5SDimitry Andric new_thr->th.th_def_allocator = __kmp_def_allocator; 45710b57cec5SDimitry Andric new_thr->th.th_prev_level = 0; 45720b57cec5SDimitry Andric new_thr->th.th_prev_num_threads = 1; 45730b57cec5SDimitry Andric 45740b57cec5SDimitry Andric TCW_4(new_thr->th.th_in_pool, FALSE); 45750b57cec5SDimitry Andric new_thr->th.th_active_in_pool = FALSE; 45760b57cec5SDimitry Andric TCW_4(new_thr->th.th_active, TRUE); 45770b57cec5SDimitry Andric 45780b57cec5SDimitry Andric /* adjust the global counters */ 45790b57cec5SDimitry Andric __kmp_all_nth++; 45800b57cec5SDimitry Andric __kmp_nth++; 45810b57cec5SDimitry Andric 45820b57cec5SDimitry Andric // if __kmp_adjust_gtid_mode is set, then we use method #1 (sp search) for low 45830b57cec5SDimitry Andric // numbers of procs, and method #2 (keyed API call) for higher numbers. 45840b57cec5SDimitry Andric if (__kmp_adjust_gtid_mode) { 45850b57cec5SDimitry Andric if (__kmp_all_nth >= __kmp_tls_gtid_min) { 45860b57cec5SDimitry Andric if (TCR_4(__kmp_gtid_mode) != 2) { 45870b57cec5SDimitry Andric TCW_4(__kmp_gtid_mode, 2); 45880b57cec5SDimitry Andric } 45890b57cec5SDimitry Andric } else { 45900b57cec5SDimitry Andric if (TCR_4(__kmp_gtid_mode) != 1) { 45910b57cec5SDimitry Andric TCW_4(__kmp_gtid_mode, 1); 45920b57cec5SDimitry Andric } 45930b57cec5SDimitry Andric } 45940b57cec5SDimitry Andric } 45950b57cec5SDimitry Andric 45960b57cec5SDimitry Andric #ifdef KMP_ADJUST_BLOCKTIME 45970b57cec5SDimitry Andric /* Adjust blocktime back to zero if necessary */ 45980b57cec5SDimitry Andric /* Middle initialization might not have occurred yet */ 45990b57cec5SDimitry Andric if (!__kmp_env_blocktime && (__kmp_avail_proc > 0)) { 46000b57cec5SDimitry Andric if (__kmp_nth > __kmp_avail_proc) { 46010b57cec5SDimitry Andric __kmp_zero_bt = TRUE; 46020b57cec5SDimitry Andric } 46030b57cec5SDimitry Andric } 46040b57cec5SDimitry Andric #endif /* KMP_ADJUST_BLOCKTIME */ 46050b57cec5SDimitry Andric 46060b57cec5SDimitry Andric /* actually fork it and create the new worker thread */ 46070b57cec5SDimitry Andric KF_TRACE( 46080b57cec5SDimitry Andric 10, ("__kmp_allocate_thread: before __kmp_create_worker: %p\n", new_thr)); 46090b57cec5SDimitry Andric __kmp_create_worker(new_gtid, new_thr, __kmp_stksize); 46100b57cec5SDimitry Andric KF_TRACE(10, 46110b57cec5SDimitry Andric ("__kmp_allocate_thread: after __kmp_create_worker: %p\n", new_thr)); 46120b57cec5SDimitry Andric 46130b57cec5SDimitry Andric KA_TRACE(20, ("__kmp_allocate_thread: T#%d forked T#%d\n", __kmp_get_gtid(), 46140b57cec5SDimitry Andric new_gtid)); 46150b57cec5SDimitry Andric KMP_MB(); 46160b57cec5SDimitry Andric return new_thr; 46170b57cec5SDimitry Andric } 46180b57cec5SDimitry Andric 46190b57cec5SDimitry Andric /* Reinitialize team for reuse. 46200b57cec5SDimitry Andric The hot team code calls this case at every fork barrier, so EPCC barrier 46210b57cec5SDimitry Andric test are extremely sensitive to changes in it, esp. writes to the team 46220b57cec5SDimitry Andric struct, which cause a cache invalidation in all threads. 46230b57cec5SDimitry Andric IF YOU TOUCH THIS ROUTINE, RUN EPCC C SYNCBENCH ON A BIG-IRON MACHINE!!! */ 46240b57cec5SDimitry Andric static void __kmp_reinitialize_team(kmp_team_t *team, 46250b57cec5SDimitry Andric kmp_internal_control_t *new_icvs, 46260b57cec5SDimitry Andric ident_t *loc) { 46270b57cec5SDimitry Andric KF_TRACE(10, ("__kmp_reinitialize_team: enter this_thread=%p team=%p\n", 46280b57cec5SDimitry Andric team->t.t_threads[0], team)); 46290b57cec5SDimitry Andric KMP_DEBUG_ASSERT(team && new_icvs); 46300b57cec5SDimitry Andric KMP_DEBUG_ASSERT((!TCR_4(__kmp_init_parallel)) || new_icvs->nproc); 46310b57cec5SDimitry Andric KMP_CHECK_UPDATE(team->t.t_ident, loc); 46320b57cec5SDimitry Andric 46330b57cec5SDimitry Andric KMP_CHECK_UPDATE(team->t.t_id, KMP_GEN_TEAM_ID()); 4634fe6060f1SDimitry Andric // Copy ICVs to the primary thread's implicit taskdata 46350b57cec5SDimitry Andric __kmp_init_implicit_task(loc, team->t.t_threads[0], team, 0, FALSE); 46360b57cec5SDimitry Andric copy_icvs(&team->t.t_implicit_task_taskdata[0].td_icvs, new_icvs); 46370b57cec5SDimitry Andric 46380b57cec5SDimitry Andric KF_TRACE(10, ("__kmp_reinitialize_team: exit this_thread=%p team=%p\n", 46390b57cec5SDimitry Andric team->t.t_threads[0], team)); 46400b57cec5SDimitry Andric } 46410b57cec5SDimitry Andric 46420b57cec5SDimitry Andric /* Initialize the team data structure. 46430b57cec5SDimitry Andric This assumes the t_threads and t_max_nproc are already set. 46440b57cec5SDimitry Andric Also, we don't touch the arguments */ 46450b57cec5SDimitry Andric static void __kmp_initialize_team(kmp_team_t *team, int new_nproc, 46460b57cec5SDimitry Andric kmp_internal_control_t *new_icvs, 46470b57cec5SDimitry Andric ident_t *loc) { 46480b57cec5SDimitry Andric KF_TRACE(10, ("__kmp_initialize_team: enter: team=%p\n", team)); 46490b57cec5SDimitry Andric 46500b57cec5SDimitry Andric /* verify */ 46510b57cec5SDimitry Andric KMP_DEBUG_ASSERT(team); 46520b57cec5SDimitry Andric KMP_DEBUG_ASSERT(new_nproc <= team->t.t_max_nproc); 46530b57cec5SDimitry Andric KMP_DEBUG_ASSERT(team->t.t_threads); 46540b57cec5SDimitry Andric KMP_MB(); 46550b57cec5SDimitry Andric 46560b57cec5SDimitry Andric team->t.t_master_tid = 0; /* not needed */ 46570b57cec5SDimitry Andric /* team->t.t_master_bar; not needed */ 46580b57cec5SDimitry Andric team->t.t_serialized = new_nproc > 1 ? 0 : 1; 46590b57cec5SDimitry Andric team->t.t_nproc = new_nproc; 46600b57cec5SDimitry Andric 46610b57cec5SDimitry Andric /* team->t.t_parent = NULL; TODO not needed & would mess up hot team */ 46620b57cec5SDimitry Andric team->t.t_next_pool = NULL; 46630b57cec5SDimitry Andric /* memset( team->t.t_threads, 0, sizeof(kmp_info_t*)*new_nproc ); would mess 46640b57cec5SDimitry Andric * up hot team */ 46650b57cec5SDimitry Andric 46660b57cec5SDimitry Andric TCW_SYNC_PTR(team->t.t_pkfn, NULL); /* not needed */ 46670b57cec5SDimitry Andric team->t.t_invoke = NULL; /* not needed */ 46680b57cec5SDimitry Andric 46690b57cec5SDimitry Andric // TODO???: team->t.t_max_active_levels = new_max_active_levels; 46700b57cec5SDimitry Andric team->t.t_sched.sched = new_icvs->sched.sched; 46710b57cec5SDimitry Andric 46720b57cec5SDimitry Andric #if KMP_ARCH_X86 || KMP_ARCH_X86_64 46730b57cec5SDimitry Andric team->t.t_fp_control_saved = FALSE; /* not needed */ 46740b57cec5SDimitry Andric team->t.t_x87_fpu_control_word = 0; /* not needed */ 46750b57cec5SDimitry Andric team->t.t_mxcsr = 0; /* not needed */ 46760b57cec5SDimitry Andric #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */ 46770b57cec5SDimitry Andric 46780b57cec5SDimitry Andric team->t.t_construct = 0; 46790b57cec5SDimitry Andric 46800b57cec5SDimitry Andric team->t.t_ordered.dt.t_value = 0; 46810b57cec5SDimitry Andric team->t.t_master_active = FALSE; 46820b57cec5SDimitry Andric 46830b57cec5SDimitry Andric #ifdef KMP_DEBUG 46840b57cec5SDimitry Andric team->t.t_copypriv_data = NULL; /* not necessary, but nice for debugging */ 46850b57cec5SDimitry Andric #endif 46860b57cec5SDimitry Andric #if KMP_OS_WINDOWS 46870b57cec5SDimitry Andric team->t.t_copyin_counter = 0; /* for barrier-free copyin implementation */ 46880b57cec5SDimitry Andric #endif 46890b57cec5SDimitry Andric 46900b57cec5SDimitry Andric team->t.t_control_stack_top = NULL; 46910b57cec5SDimitry Andric 46920b57cec5SDimitry Andric __kmp_reinitialize_team(team, new_icvs, loc); 46930b57cec5SDimitry Andric 46940b57cec5SDimitry Andric KMP_MB(); 46950b57cec5SDimitry Andric KF_TRACE(10, ("__kmp_initialize_team: exit: team=%p\n", team)); 46960b57cec5SDimitry Andric } 46970b57cec5SDimitry Andric 4698489b1cf2SDimitry Andric #if (KMP_OS_LINUX || KMP_OS_FREEBSD) && KMP_AFFINITY_SUPPORTED 46990b57cec5SDimitry Andric /* Sets full mask for thread and returns old mask, no changes to structures. */ 47000b57cec5SDimitry Andric static void 47010b57cec5SDimitry Andric __kmp_set_thread_affinity_mask_full_tmp(kmp_affin_mask_t *old_mask) { 47020b57cec5SDimitry Andric if (KMP_AFFINITY_CAPABLE()) { 47030b57cec5SDimitry Andric int status; 47040b57cec5SDimitry Andric if (old_mask != NULL) { 47050b57cec5SDimitry Andric status = __kmp_get_system_affinity(old_mask, TRUE); 47060b57cec5SDimitry Andric int error = errno; 47070b57cec5SDimitry Andric if (status != 0) { 47080b57cec5SDimitry Andric __kmp_fatal(KMP_MSG(ChangeThreadAffMaskError), KMP_ERR(error), 47090b57cec5SDimitry Andric __kmp_msg_null); 47100b57cec5SDimitry Andric } 47110b57cec5SDimitry Andric } 47120b57cec5SDimitry Andric __kmp_set_system_affinity(__kmp_affin_fullMask, TRUE); 47130b57cec5SDimitry Andric } 47140b57cec5SDimitry Andric } 47150b57cec5SDimitry Andric #endif 47160b57cec5SDimitry Andric 47170b57cec5SDimitry Andric #if KMP_AFFINITY_SUPPORTED 47180b57cec5SDimitry Andric 47190b57cec5SDimitry Andric // __kmp_partition_places() is the heart of the OpenMP 4.0 affinity mechanism. 4720fe6060f1SDimitry Andric // It calculates the worker + primary thread's partition based upon the parent 47210b57cec5SDimitry Andric // thread's partition, and binds each worker to a thread in their partition. 4722fe6060f1SDimitry Andric // The primary thread's partition should already include its current binding. 47230b57cec5SDimitry Andric static void __kmp_partition_places(kmp_team_t *team, int update_master_only) { 4724fe6060f1SDimitry Andric // Do not partition places for the hidden helper team 4725fe6060f1SDimitry Andric if (KMP_HIDDEN_HELPER_TEAM(team)) 4726fe6060f1SDimitry Andric return; 4727fe6060f1SDimitry Andric // Copy the primary thread's place partition to the team struct 47280b57cec5SDimitry Andric kmp_info_t *master_th = team->t.t_threads[0]; 47290b57cec5SDimitry Andric KMP_DEBUG_ASSERT(master_th != NULL); 47300b57cec5SDimitry Andric kmp_proc_bind_t proc_bind = team->t.t_proc_bind; 47310b57cec5SDimitry Andric int first_place = master_th->th.th_first_place; 47320b57cec5SDimitry Andric int last_place = master_th->th.th_last_place; 47330b57cec5SDimitry Andric int masters_place = master_th->th.th_current_place; 47340b57cec5SDimitry Andric team->t.t_first_place = first_place; 47350b57cec5SDimitry Andric team->t.t_last_place = last_place; 47360b57cec5SDimitry Andric 47370b57cec5SDimitry Andric KA_TRACE(20, ("__kmp_partition_places: enter: proc_bind = %d T#%d(%d:0) " 47380b57cec5SDimitry Andric "bound to place %d partition = [%d,%d]\n", 47390b57cec5SDimitry Andric proc_bind, __kmp_gtid_from_thread(team->t.t_threads[0]), 47400b57cec5SDimitry Andric team->t.t_id, masters_place, first_place, last_place)); 47410b57cec5SDimitry Andric 47420b57cec5SDimitry Andric switch (proc_bind) { 47430b57cec5SDimitry Andric 47440b57cec5SDimitry Andric case proc_bind_default: 4745fe6060f1SDimitry Andric // Serial teams might have the proc_bind policy set to proc_bind_default. 4746fe6060f1SDimitry Andric // Not an issue -- we don't rebind primary thread for any proc_bind policy. 47470b57cec5SDimitry Andric KMP_DEBUG_ASSERT(team->t.t_nproc == 1); 47480b57cec5SDimitry Andric break; 47490b57cec5SDimitry Andric 4750fe6060f1SDimitry Andric case proc_bind_primary: { 47510b57cec5SDimitry Andric int f; 47520b57cec5SDimitry Andric int n_th = team->t.t_nproc; 47530b57cec5SDimitry Andric for (f = 1; f < n_th; f++) { 47540b57cec5SDimitry Andric kmp_info_t *th = team->t.t_threads[f]; 47550b57cec5SDimitry Andric KMP_DEBUG_ASSERT(th != NULL); 47560b57cec5SDimitry Andric th->th.th_first_place = first_place; 47570b57cec5SDimitry Andric th->th.th_last_place = last_place; 47580b57cec5SDimitry Andric th->th.th_new_place = masters_place; 47590b57cec5SDimitry Andric if (__kmp_display_affinity && masters_place != th->th.th_current_place && 47600b57cec5SDimitry Andric team->t.t_display_affinity != 1) { 47610b57cec5SDimitry Andric team->t.t_display_affinity = 1; 47620b57cec5SDimitry Andric } 47630b57cec5SDimitry Andric 4764fe6060f1SDimitry Andric KA_TRACE(100, ("__kmp_partition_places: primary: T#%d(%d:%d) place %d " 47650b57cec5SDimitry Andric "partition = [%d,%d]\n", 47660b57cec5SDimitry Andric __kmp_gtid_from_thread(team->t.t_threads[f]), team->t.t_id, 47670b57cec5SDimitry Andric f, masters_place, first_place, last_place)); 47680b57cec5SDimitry Andric } 47690b57cec5SDimitry Andric } break; 47700b57cec5SDimitry Andric 47710b57cec5SDimitry Andric case proc_bind_close: { 47720b57cec5SDimitry Andric int f; 47730b57cec5SDimitry Andric int n_th = team->t.t_nproc; 47740b57cec5SDimitry Andric int n_places; 47750b57cec5SDimitry Andric if (first_place <= last_place) { 47760b57cec5SDimitry Andric n_places = last_place - first_place + 1; 47770b57cec5SDimitry Andric } else { 47780b57cec5SDimitry Andric n_places = __kmp_affinity_num_masks - first_place + last_place + 1; 47790b57cec5SDimitry Andric } 47800b57cec5SDimitry Andric if (n_th <= n_places) { 47810b57cec5SDimitry Andric int place = masters_place; 47820b57cec5SDimitry Andric for (f = 1; f < n_th; f++) { 47830b57cec5SDimitry Andric kmp_info_t *th = team->t.t_threads[f]; 47840b57cec5SDimitry Andric KMP_DEBUG_ASSERT(th != NULL); 47850b57cec5SDimitry Andric 47860b57cec5SDimitry Andric if (place == last_place) { 47870b57cec5SDimitry Andric place = first_place; 47880b57cec5SDimitry Andric } else if (place == (int)(__kmp_affinity_num_masks - 1)) { 47890b57cec5SDimitry Andric place = 0; 47900b57cec5SDimitry Andric } else { 47910b57cec5SDimitry Andric place++; 47920b57cec5SDimitry Andric } 47930b57cec5SDimitry Andric th->th.th_first_place = first_place; 47940b57cec5SDimitry Andric th->th.th_last_place = last_place; 47950b57cec5SDimitry Andric th->th.th_new_place = place; 47960b57cec5SDimitry Andric if (__kmp_display_affinity && place != th->th.th_current_place && 47970b57cec5SDimitry Andric team->t.t_display_affinity != 1) { 47980b57cec5SDimitry Andric team->t.t_display_affinity = 1; 47990b57cec5SDimitry Andric } 48000b57cec5SDimitry Andric 48010b57cec5SDimitry Andric KA_TRACE(100, ("__kmp_partition_places: close: T#%d(%d:%d) place %d " 48020b57cec5SDimitry Andric "partition = [%d,%d]\n", 48030b57cec5SDimitry Andric __kmp_gtid_from_thread(team->t.t_threads[f]), 48040b57cec5SDimitry Andric team->t.t_id, f, place, first_place, last_place)); 48050b57cec5SDimitry Andric } 48060b57cec5SDimitry Andric } else { 48070b57cec5SDimitry Andric int S, rem, gap, s_count; 48080b57cec5SDimitry Andric S = n_th / n_places; 48090b57cec5SDimitry Andric s_count = 0; 48100b57cec5SDimitry Andric rem = n_th - (S * n_places); 48110b57cec5SDimitry Andric gap = rem > 0 ? n_places / rem : n_places; 48120b57cec5SDimitry Andric int place = masters_place; 48130b57cec5SDimitry Andric int gap_ct = gap; 48140b57cec5SDimitry Andric for (f = 0; f < n_th; f++) { 48150b57cec5SDimitry Andric kmp_info_t *th = team->t.t_threads[f]; 48160b57cec5SDimitry Andric KMP_DEBUG_ASSERT(th != NULL); 48170b57cec5SDimitry Andric 48180b57cec5SDimitry Andric th->th.th_first_place = first_place; 48190b57cec5SDimitry Andric th->th.th_last_place = last_place; 48200b57cec5SDimitry Andric th->th.th_new_place = place; 48210b57cec5SDimitry Andric if (__kmp_display_affinity && place != th->th.th_current_place && 48220b57cec5SDimitry Andric team->t.t_display_affinity != 1) { 48230b57cec5SDimitry Andric team->t.t_display_affinity = 1; 48240b57cec5SDimitry Andric } 48250b57cec5SDimitry Andric s_count++; 48260b57cec5SDimitry Andric 48270b57cec5SDimitry Andric if ((s_count == S) && rem && (gap_ct == gap)) { 48280b57cec5SDimitry Andric // do nothing, add an extra thread to place on next iteration 48290b57cec5SDimitry Andric } else if ((s_count == S + 1) && rem && (gap_ct == gap)) { 48300b57cec5SDimitry Andric // we added an extra thread to this place; move to next place 48310b57cec5SDimitry Andric if (place == last_place) { 48320b57cec5SDimitry Andric place = first_place; 48330b57cec5SDimitry Andric } else if (place == (int)(__kmp_affinity_num_masks - 1)) { 48340b57cec5SDimitry Andric place = 0; 48350b57cec5SDimitry Andric } else { 48360b57cec5SDimitry Andric place++; 48370b57cec5SDimitry Andric } 48380b57cec5SDimitry Andric s_count = 0; 48390b57cec5SDimitry Andric gap_ct = 1; 48400b57cec5SDimitry Andric rem--; 48410b57cec5SDimitry Andric } else if (s_count == S) { // place full; don't add extra 48420b57cec5SDimitry Andric if (place == last_place) { 48430b57cec5SDimitry Andric place = first_place; 48440b57cec5SDimitry Andric } else if (place == (int)(__kmp_affinity_num_masks - 1)) { 48450b57cec5SDimitry Andric place = 0; 48460b57cec5SDimitry Andric } else { 48470b57cec5SDimitry Andric place++; 48480b57cec5SDimitry Andric } 48490b57cec5SDimitry Andric gap_ct++; 48500b57cec5SDimitry Andric s_count = 0; 48510b57cec5SDimitry Andric } 48520b57cec5SDimitry Andric 48530b57cec5SDimitry Andric KA_TRACE(100, 48540b57cec5SDimitry Andric ("__kmp_partition_places: close: T#%d(%d:%d) place %d " 48550b57cec5SDimitry Andric "partition = [%d,%d]\n", 48560b57cec5SDimitry Andric __kmp_gtid_from_thread(team->t.t_threads[f]), team->t.t_id, f, 48570b57cec5SDimitry Andric th->th.th_new_place, first_place, last_place)); 48580b57cec5SDimitry Andric } 48590b57cec5SDimitry Andric KMP_DEBUG_ASSERT(place == masters_place); 48600b57cec5SDimitry Andric } 48610b57cec5SDimitry Andric } break; 48620b57cec5SDimitry Andric 48630b57cec5SDimitry Andric case proc_bind_spread: { 48640b57cec5SDimitry Andric int f; 48650b57cec5SDimitry Andric int n_th = team->t.t_nproc; 48660b57cec5SDimitry Andric int n_places; 48670b57cec5SDimitry Andric int thidx; 48680b57cec5SDimitry Andric if (first_place <= last_place) { 48690b57cec5SDimitry Andric n_places = last_place - first_place + 1; 48700b57cec5SDimitry Andric } else { 48710b57cec5SDimitry Andric n_places = __kmp_affinity_num_masks - first_place + last_place + 1; 48720b57cec5SDimitry Andric } 48730b57cec5SDimitry Andric if (n_th <= n_places) { 48740b57cec5SDimitry Andric int place = -1; 48750b57cec5SDimitry Andric 48760b57cec5SDimitry Andric if (n_places != static_cast<int>(__kmp_affinity_num_masks)) { 48770b57cec5SDimitry Andric int S = n_places / n_th; 48780b57cec5SDimitry Andric int s_count, rem, gap, gap_ct; 48790b57cec5SDimitry Andric 48800b57cec5SDimitry Andric place = masters_place; 48810b57cec5SDimitry Andric rem = n_places - n_th * S; 48820b57cec5SDimitry Andric gap = rem ? n_th / rem : 1; 48830b57cec5SDimitry Andric gap_ct = gap; 48840b57cec5SDimitry Andric thidx = n_th; 48850b57cec5SDimitry Andric if (update_master_only == 1) 48860b57cec5SDimitry Andric thidx = 1; 48870b57cec5SDimitry Andric for (f = 0; f < thidx; f++) { 48880b57cec5SDimitry Andric kmp_info_t *th = team->t.t_threads[f]; 48890b57cec5SDimitry Andric KMP_DEBUG_ASSERT(th != NULL); 48900b57cec5SDimitry Andric 48910b57cec5SDimitry Andric th->th.th_first_place = place; 48920b57cec5SDimitry Andric th->th.th_new_place = place; 48930b57cec5SDimitry Andric if (__kmp_display_affinity && place != th->th.th_current_place && 48940b57cec5SDimitry Andric team->t.t_display_affinity != 1) { 48950b57cec5SDimitry Andric team->t.t_display_affinity = 1; 48960b57cec5SDimitry Andric } 48970b57cec5SDimitry Andric s_count = 1; 48980b57cec5SDimitry Andric while (s_count < S) { 48990b57cec5SDimitry Andric if (place == last_place) { 49000b57cec5SDimitry Andric place = first_place; 49010b57cec5SDimitry Andric } else if (place == (int)(__kmp_affinity_num_masks - 1)) { 49020b57cec5SDimitry Andric place = 0; 49030b57cec5SDimitry Andric } else { 49040b57cec5SDimitry Andric place++; 49050b57cec5SDimitry Andric } 49060b57cec5SDimitry Andric s_count++; 49070b57cec5SDimitry Andric } 49080b57cec5SDimitry Andric if (rem && (gap_ct == gap)) { 49090b57cec5SDimitry Andric if (place == last_place) { 49100b57cec5SDimitry Andric place = first_place; 49110b57cec5SDimitry Andric } else if (place == (int)(__kmp_affinity_num_masks - 1)) { 49120b57cec5SDimitry Andric place = 0; 49130b57cec5SDimitry Andric } else { 49140b57cec5SDimitry Andric place++; 49150b57cec5SDimitry Andric } 49160b57cec5SDimitry Andric rem--; 49170b57cec5SDimitry Andric gap_ct = 0; 49180b57cec5SDimitry Andric } 49190b57cec5SDimitry Andric th->th.th_last_place = place; 49200b57cec5SDimitry Andric gap_ct++; 49210b57cec5SDimitry Andric 49220b57cec5SDimitry Andric if (place == last_place) { 49230b57cec5SDimitry Andric place = first_place; 49240b57cec5SDimitry Andric } else if (place == (int)(__kmp_affinity_num_masks - 1)) { 49250b57cec5SDimitry Andric place = 0; 49260b57cec5SDimitry Andric } else { 49270b57cec5SDimitry Andric place++; 49280b57cec5SDimitry Andric } 49290b57cec5SDimitry Andric 49300b57cec5SDimitry Andric KA_TRACE(100, 49310b57cec5SDimitry Andric ("__kmp_partition_places: spread: T#%d(%d:%d) place %d " 49320b57cec5SDimitry Andric "partition = [%d,%d], __kmp_affinity_num_masks: %u\n", 49330b57cec5SDimitry Andric __kmp_gtid_from_thread(team->t.t_threads[f]), team->t.t_id, 49340b57cec5SDimitry Andric f, th->th.th_new_place, th->th.th_first_place, 49350b57cec5SDimitry Andric th->th.th_last_place, __kmp_affinity_num_masks)); 49360b57cec5SDimitry Andric } 49370b57cec5SDimitry Andric } else { 49380b57cec5SDimitry Andric /* Having uniform space of available computation places I can create 49390b57cec5SDimitry Andric T partitions of round(P/T) size and put threads into the first 49400b57cec5SDimitry Andric place of each partition. */ 49410b57cec5SDimitry Andric double current = static_cast<double>(masters_place); 49420b57cec5SDimitry Andric double spacing = 49430b57cec5SDimitry Andric (static_cast<double>(n_places + 1) / static_cast<double>(n_th)); 49440b57cec5SDimitry Andric int first, last; 49450b57cec5SDimitry Andric kmp_info_t *th; 49460b57cec5SDimitry Andric 49470b57cec5SDimitry Andric thidx = n_th + 1; 49480b57cec5SDimitry Andric if (update_master_only == 1) 49490b57cec5SDimitry Andric thidx = 1; 49500b57cec5SDimitry Andric for (f = 0; f < thidx; f++) { 49510b57cec5SDimitry Andric first = static_cast<int>(current); 49520b57cec5SDimitry Andric last = static_cast<int>(current + spacing) - 1; 49530b57cec5SDimitry Andric KMP_DEBUG_ASSERT(last >= first); 49540b57cec5SDimitry Andric if (first >= n_places) { 49550b57cec5SDimitry Andric if (masters_place) { 49560b57cec5SDimitry Andric first -= n_places; 49570b57cec5SDimitry Andric last -= n_places; 49580b57cec5SDimitry Andric if (first == (masters_place + 1)) { 49590b57cec5SDimitry Andric KMP_DEBUG_ASSERT(f == n_th); 49600b57cec5SDimitry Andric first--; 49610b57cec5SDimitry Andric } 49620b57cec5SDimitry Andric if (last == masters_place) { 49630b57cec5SDimitry Andric KMP_DEBUG_ASSERT(f == (n_th - 1)); 49640b57cec5SDimitry Andric last--; 49650b57cec5SDimitry Andric } 49660b57cec5SDimitry Andric } else { 49670b57cec5SDimitry Andric KMP_DEBUG_ASSERT(f == n_th); 49680b57cec5SDimitry Andric first = 0; 49690b57cec5SDimitry Andric last = 0; 49700b57cec5SDimitry Andric } 49710b57cec5SDimitry Andric } 49720b57cec5SDimitry Andric if (last >= n_places) { 49730b57cec5SDimitry Andric last = (n_places - 1); 49740b57cec5SDimitry Andric } 49750b57cec5SDimitry Andric place = first; 49760b57cec5SDimitry Andric current += spacing; 49770b57cec5SDimitry Andric if (f < n_th) { 49780b57cec5SDimitry Andric KMP_DEBUG_ASSERT(0 <= first); 49790b57cec5SDimitry Andric KMP_DEBUG_ASSERT(n_places > first); 49800b57cec5SDimitry Andric KMP_DEBUG_ASSERT(0 <= last); 49810b57cec5SDimitry Andric KMP_DEBUG_ASSERT(n_places > last); 49820b57cec5SDimitry Andric KMP_DEBUG_ASSERT(last_place >= first_place); 49830b57cec5SDimitry Andric th = team->t.t_threads[f]; 49840b57cec5SDimitry Andric KMP_DEBUG_ASSERT(th); 49850b57cec5SDimitry Andric th->th.th_first_place = first; 49860b57cec5SDimitry Andric th->th.th_new_place = place; 49870b57cec5SDimitry Andric th->th.th_last_place = last; 49880b57cec5SDimitry Andric if (__kmp_display_affinity && place != th->th.th_current_place && 49890b57cec5SDimitry Andric team->t.t_display_affinity != 1) { 49900b57cec5SDimitry Andric team->t.t_display_affinity = 1; 49910b57cec5SDimitry Andric } 49920b57cec5SDimitry Andric KA_TRACE(100, 49930b57cec5SDimitry Andric ("__kmp_partition_places: spread: T#%d(%d:%d) place %d " 49940b57cec5SDimitry Andric "partition = [%d,%d], spacing = %.4f\n", 49950b57cec5SDimitry Andric __kmp_gtid_from_thread(team->t.t_threads[f]), 49960b57cec5SDimitry Andric team->t.t_id, f, th->th.th_new_place, 49970b57cec5SDimitry Andric th->th.th_first_place, th->th.th_last_place, spacing)); 49980b57cec5SDimitry Andric } 49990b57cec5SDimitry Andric } 50000b57cec5SDimitry Andric } 50010b57cec5SDimitry Andric KMP_DEBUG_ASSERT(update_master_only || place == masters_place); 50020b57cec5SDimitry Andric } else { 50030b57cec5SDimitry Andric int S, rem, gap, s_count; 50040b57cec5SDimitry Andric S = n_th / n_places; 50050b57cec5SDimitry Andric s_count = 0; 50060b57cec5SDimitry Andric rem = n_th - (S * n_places); 50070b57cec5SDimitry Andric gap = rem > 0 ? n_places / rem : n_places; 50080b57cec5SDimitry Andric int place = masters_place; 50090b57cec5SDimitry Andric int gap_ct = gap; 50100b57cec5SDimitry Andric thidx = n_th; 50110b57cec5SDimitry Andric if (update_master_only == 1) 50120b57cec5SDimitry Andric thidx = 1; 50130b57cec5SDimitry Andric for (f = 0; f < thidx; f++) { 50140b57cec5SDimitry Andric kmp_info_t *th = team->t.t_threads[f]; 50150b57cec5SDimitry Andric KMP_DEBUG_ASSERT(th != NULL); 50160b57cec5SDimitry Andric 50170b57cec5SDimitry Andric th->th.th_first_place = place; 50180b57cec5SDimitry Andric th->th.th_last_place = place; 50190b57cec5SDimitry Andric th->th.th_new_place = place; 50200b57cec5SDimitry Andric if (__kmp_display_affinity && place != th->th.th_current_place && 50210b57cec5SDimitry Andric team->t.t_display_affinity != 1) { 50220b57cec5SDimitry Andric team->t.t_display_affinity = 1; 50230b57cec5SDimitry Andric } 50240b57cec5SDimitry Andric s_count++; 50250b57cec5SDimitry Andric 50260b57cec5SDimitry Andric if ((s_count == S) && rem && (gap_ct == gap)) { 50270b57cec5SDimitry Andric // do nothing, add an extra thread to place on next iteration 50280b57cec5SDimitry Andric } else if ((s_count == S + 1) && rem && (gap_ct == gap)) { 50290b57cec5SDimitry Andric // we added an extra thread to this place; move on to next place 50300b57cec5SDimitry Andric if (place == last_place) { 50310b57cec5SDimitry Andric place = first_place; 50320b57cec5SDimitry Andric } else if (place == (int)(__kmp_affinity_num_masks - 1)) { 50330b57cec5SDimitry Andric place = 0; 50340b57cec5SDimitry Andric } else { 50350b57cec5SDimitry Andric place++; 50360b57cec5SDimitry Andric } 50370b57cec5SDimitry Andric s_count = 0; 50380b57cec5SDimitry Andric gap_ct = 1; 50390b57cec5SDimitry Andric rem--; 50400b57cec5SDimitry Andric } else if (s_count == S) { // place is full; don't add extra thread 50410b57cec5SDimitry Andric if (place == last_place) { 50420b57cec5SDimitry Andric place = first_place; 50430b57cec5SDimitry Andric } else if (place == (int)(__kmp_affinity_num_masks - 1)) { 50440b57cec5SDimitry Andric place = 0; 50450b57cec5SDimitry Andric } else { 50460b57cec5SDimitry Andric place++; 50470b57cec5SDimitry Andric } 50480b57cec5SDimitry Andric gap_ct++; 50490b57cec5SDimitry Andric s_count = 0; 50500b57cec5SDimitry Andric } 50510b57cec5SDimitry Andric 50520b57cec5SDimitry Andric KA_TRACE(100, ("__kmp_partition_places: spread: T#%d(%d:%d) place %d " 50530b57cec5SDimitry Andric "partition = [%d,%d]\n", 50540b57cec5SDimitry Andric __kmp_gtid_from_thread(team->t.t_threads[f]), 50550b57cec5SDimitry Andric team->t.t_id, f, th->th.th_new_place, 50560b57cec5SDimitry Andric th->th.th_first_place, th->th.th_last_place)); 50570b57cec5SDimitry Andric } 50580b57cec5SDimitry Andric KMP_DEBUG_ASSERT(update_master_only || place == masters_place); 50590b57cec5SDimitry Andric } 50600b57cec5SDimitry Andric } break; 50610b57cec5SDimitry Andric 50620b57cec5SDimitry Andric default: 50630b57cec5SDimitry Andric break; 50640b57cec5SDimitry Andric } 50650b57cec5SDimitry Andric 50660b57cec5SDimitry Andric KA_TRACE(20, ("__kmp_partition_places: exit T#%d\n", team->t.t_id)); 50670b57cec5SDimitry Andric } 50680b57cec5SDimitry Andric 50690b57cec5SDimitry Andric #endif // KMP_AFFINITY_SUPPORTED 50700b57cec5SDimitry Andric 50710b57cec5SDimitry Andric /* allocate a new team data structure to use. take one off of the free pool if 50720b57cec5SDimitry Andric available */ 50730b57cec5SDimitry Andric kmp_team_t * 50740b57cec5SDimitry Andric __kmp_allocate_team(kmp_root_t *root, int new_nproc, int max_nproc, 50750b57cec5SDimitry Andric #if OMPT_SUPPORT 50760b57cec5SDimitry Andric ompt_data_t ompt_parallel_data, 50770b57cec5SDimitry Andric #endif 50780b57cec5SDimitry Andric kmp_proc_bind_t new_proc_bind, 50790b57cec5SDimitry Andric kmp_internal_control_t *new_icvs, 50800b57cec5SDimitry Andric int argc USE_NESTED_HOT_ARG(kmp_info_t *master)) { 50810b57cec5SDimitry Andric KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_allocate_team); 50820b57cec5SDimitry Andric int f; 50830b57cec5SDimitry Andric kmp_team_t *team; 50840b57cec5SDimitry Andric int use_hot_team = !root->r.r_active; 50850b57cec5SDimitry Andric int level = 0; 5086349cc55cSDimitry Andric int do_place_partition = 1; 50870b57cec5SDimitry Andric 50880b57cec5SDimitry Andric KA_TRACE(20, ("__kmp_allocate_team: called\n")); 50890b57cec5SDimitry Andric KMP_DEBUG_ASSERT(new_nproc >= 1 && argc >= 0); 50900b57cec5SDimitry Andric KMP_DEBUG_ASSERT(max_nproc >= new_nproc); 50910b57cec5SDimitry Andric KMP_MB(); 50920b57cec5SDimitry Andric 50930b57cec5SDimitry Andric #if KMP_NESTED_HOT_TEAMS 50940b57cec5SDimitry Andric kmp_hot_team_ptr_t *hot_teams; 50950b57cec5SDimitry Andric if (master) { 50960b57cec5SDimitry Andric team = master->th.th_team; 50970b57cec5SDimitry Andric level = team->t.t_active_level; 50980b57cec5SDimitry Andric if (master->th.th_teams_microtask) { // in teams construct? 50990b57cec5SDimitry Andric if (master->th.th_teams_size.nteams > 1 && 51000b57cec5SDimitry Andric ( // #teams > 1 51010b57cec5SDimitry Andric team->t.t_pkfn == 51020b57cec5SDimitry Andric (microtask_t)__kmp_teams_master || // inner fork of the teams 51030b57cec5SDimitry Andric master->th.th_teams_level < 51040b57cec5SDimitry Andric team->t.t_level)) { // or nested parallel inside the teams 51050b57cec5SDimitry Andric ++level; // not increment if #teams==1, or for outer fork of the teams; 51060b57cec5SDimitry Andric // increment otherwise 51070b57cec5SDimitry Andric } 5108349cc55cSDimitry Andric // Do not perform the place partition if inner fork of the teams 5109349cc55cSDimitry Andric // Wait until nested parallel region encountered inside teams construct 5110349cc55cSDimitry Andric if ((master->th.th_teams_size.nteams == 1 && 5111349cc55cSDimitry Andric master->th.th_teams_level >= team->t.t_level) || 5112349cc55cSDimitry Andric (team->t.t_pkfn == (microtask_t)__kmp_teams_master)) 5113349cc55cSDimitry Andric do_place_partition = 0; 51140b57cec5SDimitry Andric } 51150b57cec5SDimitry Andric hot_teams = master->th.th_hot_teams; 51160b57cec5SDimitry Andric if (level < __kmp_hot_teams_max_level && hot_teams && 5117e8d8bef9SDimitry Andric hot_teams[level].hot_team) { 5118e8d8bef9SDimitry Andric // hot team has already been allocated for given level 51190b57cec5SDimitry Andric use_hot_team = 1; 51200b57cec5SDimitry Andric } else { 51210b57cec5SDimitry Andric use_hot_team = 0; 51220b57cec5SDimitry Andric } 5123e8d8bef9SDimitry Andric } else { 5124e8d8bef9SDimitry Andric // check we won't access uninitialized hot_teams, just in case 5125e8d8bef9SDimitry Andric KMP_DEBUG_ASSERT(new_nproc == 1); 51260b57cec5SDimitry Andric } 51270b57cec5SDimitry Andric #endif 51280b57cec5SDimitry Andric // Optimization to use a "hot" team 51290b57cec5SDimitry Andric if (use_hot_team && new_nproc > 1) { 51300b57cec5SDimitry Andric KMP_DEBUG_ASSERT(new_nproc <= max_nproc); 51310b57cec5SDimitry Andric #if KMP_NESTED_HOT_TEAMS 51320b57cec5SDimitry Andric team = hot_teams[level].hot_team; 51330b57cec5SDimitry Andric #else 51340b57cec5SDimitry Andric team = root->r.r_hot_team; 51350b57cec5SDimitry Andric #endif 51360b57cec5SDimitry Andric #if KMP_DEBUG 51370b57cec5SDimitry Andric if (__kmp_tasking_mode != tskm_immediate_exec) { 51380b57cec5SDimitry Andric KA_TRACE(20, ("__kmp_allocate_team: hot team task_team[0] = %p " 51390b57cec5SDimitry Andric "task_team[1] = %p before reinit\n", 51400b57cec5SDimitry Andric team->t.t_task_team[0], team->t.t_task_team[1])); 51410b57cec5SDimitry Andric } 51420b57cec5SDimitry Andric #endif 51430b57cec5SDimitry Andric 5144349cc55cSDimitry Andric if (team->t.t_nproc != new_nproc && 5145349cc55cSDimitry Andric __kmp_barrier_release_pattern[bs_forkjoin_barrier] == bp_dist_bar) { 5146349cc55cSDimitry Andric // Distributed barrier may need a resize 5147349cc55cSDimitry Andric int old_nthr = team->t.t_nproc; 5148349cc55cSDimitry Andric __kmp_resize_dist_barrier(team, old_nthr, new_nproc); 5149349cc55cSDimitry Andric } 5150349cc55cSDimitry Andric 5151349cc55cSDimitry Andric // If not doing the place partition, then reset the team's proc bind 5152349cc55cSDimitry Andric // to indicate that partitioning of all threads still needs to take place 5153349cc55cSDimitry Andric if (do_place_partition == 0) 5154349cc55cSDimitry Andric team->t.t_proc_bind = proc_bind_default; 51550b57cec5SDimitry Andric // Has the number of threads changed? 51560b57cec5SDimitry Andric /* Let's assume the most common case is that the number of threads is 51570b57cec5SDimitry Andric unchanged, and put that case first. */ 51580b57cec5SDimitry Andric if (team->t.t_nproc == new_nproc) { // Check changes in number of threads 51590b57cec5SDimitry Andric KA_TRACE(20, ("__kmp_allocate_team: reusing hot team\n")); 51600b57cec5SDimitry Andric // This case can mean that omp_set_num_threads() was called and the hot 51610b57cec5SDimitry Andric // team size was already reduced, so we check the special flag 51620b57cec5SDimitry Andric if (team->t.t_size_changed == -1) { 51630b57cec5SDimitry Andric team->t.t_size_changed = 1; 51640b57cec5SDimitry Andric } else { 51650b57cec5SDimitry Andric KMP_CHECK_UPDATE(team->t.t_size_changed, 0); 51660b57cec5SDimitry Andric } 51670b57cec5SDimitry Andric 51680b57cec5SDimitry Andric // TODO???: team->t.t_max_active_levels = new_max_active_levels; 51690b57cec5SDimitry Andric kmp_r_sched_t new_sched = new_icvs->sched; 5170fe6060f1SDimitry Andric // set primary thread's schedule as new run-time schedule 51710b57cec5SDimitry Andric KMP_CHECK_UPDATE(team->t.t_sched.sched, new_sched.sched); 51720b57cec5SDimitry Andric 51730b57cec5SDimitry Andric __kmp_reinitialize_team(team, new_icvs, 51740b57cec5SDimitry Andric root->r.r_uber_thread->th.th_ident); 51750b57cec5SDimitry Andric 51760b57cec5SDimitry Andric KF_TRACE(10, ("__kmp_allocate_team2: T#%d, this_thread=%p team=%p\n", 0, 51770b57cec5SDimitry Andric team->t.t_threads[0], team)); 51780b57cec5SDimitry Andric __kmp_push_current_task_to_thread(team->t.t_threads[0], team, 0); 51790b57cec5SDimitry Andric 51800b57cec5SDimitry Andric #if KMP_AFFINITY_SUPPORTED 51810b57cec5SDimitry Andric if ((team->t.t_size_changed == 0) && 51820b57cec5SDimitry Andric (team->t.t_proc_bind == new_proc_bind)) { 51830b57cec5SDimitry Andric if (new_proc_bind == proc_bind_spread) { 5184349cc55cSDimitry Andric if (do_place_partition) { 5185349cc55cSDimitry Andric // add flag to update only master for spread 5186349cc55cSDimitry Andric __kmp_partition_places(team, 1); 5187349cc55cSDimitry Andric } 51880b57cec5SDimitry Andric } 51890b57cec5SDimitry Andric KA_TRACE(200, ("__kmp_allocate_team: reusing hot team #%d bindings: " 51900b57cec5SDimitry Andric "proc_bind = %d, partition = [%d,%d]\n", 51910b57cec5SDimitry Andric team->t.t_id, new_proc_bind, team->t.t_first_place, 51920b57cec5SDimitry Andric team->t.t_last_place)); 51930b57cec5SDimitry Andric } else { 5194349cc55cSDimitry Andric if (do_place_partition) { 51950b57cec5SDimitry Andric KMP_CHECK_UPDATE(team->t.t_proc_bind, new_proc_bind); 51960b57cec5SDimitry Andric __kmp_partition_places(team); 51970b57cec5SDimitry Andric } 5198349cc55cSDimitry Andric } 51990b57cec5SDimitry Andric #else 52000b57cec5SDimitry Andric KMP_CHECK_UPDATE(team->t.t_proc_bind, new_proc_bind); 52010b57cec5SDimitry Andric #endif /* KMP_AFFINITY_SUPPORTED */ 52020b57cec5SDimitry Andric } else if (team->t.t_nproc > new_nproc) { 52030b57cec5SDimitry Andric KA_TRACE(20, 52040b57cec5SDimitry Andric ("__kmp_allocate_team: decreasing hot team thread count to %d\n", 52050b57cec5SDimitry Andric new_nproc)); 52060b57cec5SDimitry Andric 52070b57cec5SDimitry Andric team->t.t_size_changed = 1; 5208349cc55cSDimitry Andric if (__kmp_barrier_release_pattern[bs_forkjoin_barrier] == bp_dist_bar) { 5209349cc55cSDimitry Andric // Barrier size already reduced earlier in this function 5210349cc55cSDimitry Andric // Activate team threads via th_used_in_team 5211349cc55cSDimitry Andric __kmp_add_threads_to_team(team, new_nproc); 5212349cc55cSDimitry Andric } 52130b57cec5SDimitry Andric #if KMP_NESTED_HOT_TEAMS 52140b57cec5SDimitry Andric if (__kmp_hot_teams_mode == 0) { 52150b57cec5SDimitry Andric // AC: saved number of threads should correspond to team's value in this 52160b57cec5SDimitry Andric // mode, can be bigger in mode 1, when hot team has threads in reserve 52170b57cec5SDimitry Andric KMP_DEBUG_ASSERT(hot_teams[level].hot_team_nth == team->t.t_nproc); 52180b57cec5SDimitry Andric hot_teams[level].hot_team_nth = new_nproc; 52190b57cec5SDimitry Andric #endif // KMP_NESTED_HOT_TEAMS 52200b57cec5SDimitry Andric /* release the extra threads we don't need any more */ 52210b57cec5SDimitry Andric for (f = new_nproc; f < team->t.t_nproc; f++) { 52220b57cec5SDimitry Andric KMP_DEBUG_ASSERT(team->t.t_threads[f]); 52230b57cec5SDimitry Andric if (__kmp_tasking_mode != tskm_immediate_exec) { 52240b57cec5SDimitry Andric // When decreasing team size, threads no longer in the team should 52250b57cec5SDimitry Andric // unref task team. 52260b57cec5SDimitry Andric team->t.t_threads[f]->th.th_task_team = NULL; 52270b57cec5SDimitry Andric } 52280b57cec5SDimitry Andric __kmp_free_thread(team->t.t_threads[f]); 52290b57cec5SDimitry Andric team->t.t_threads[f] = NULL; 52300b57cec5SDimitry Andric } 52310b57cec5SDimitry Andric #if KMP_NESTED_HOT_TEAMS 52320b57cec5SDimitry Andric } // (__kmp_hot_teams_mode == 0) 52330b57cec5SDimitry Andric else { 52340b57cec5SDimitry Andric // When keeping extra threads in team, switch threads to wait on own 52350b57cec5SDimitry Andric // b_go flag 52360b57cec5SDimitry Andric for (f = new_nproc; f < team->t.t_nproc; ++f) { 52370b57cec5SDimitry Andric KMP_DEBUG_ASSERT(team->t.t_threads[f]); 52380b57cec5SDimitry Andric kmp_balign_t *balign = team->t.t_threads[f]->th.th_bar; 52390b57cec5SDimitry Andric for (int b = 0; b < bs_last_barrier; ++b) { 52400b57cec5SDimitry Andric if (balign[b].bb.wait_flag == KMP_BARRIER_PARENT_FLAG) { 52410b57cec5SDimitry Andric balign[b].bb.wait_flag = KMP_BARRIER_SWITCH_TO_OWN_FLAG; 52420b57cec5SDimitry Andric } 52430b57cec5SDimitry Andric KMP_CHECK_UPDATE(balign[b].bb.leaf_kids, 0); 52440b57cec5SDimitry Andric } 52450b57cec5SDimitry Andric } 52460b57cec5SDimitry Andric } 52470b57cec5SDimitry Andric #endif // KMP_NESTED_HOT_TEAMS 52480b57cec5SDimitry Andric team->t.t_nproc = new_nproc; 52490b57cec5SDimitry Andric // TODO???: team->t.t_max_active_levels = new_max_active_levels; 52500b57cec5SDimitry Andric KMP_CHECK_UPDATE(team->t.t_sched.sched, new_icvs->sched.sched); 52510b57cec5SDimitry Andric __kmp_reinitialize_team(team, new_icvs, 52520b57cec5SDimitry Andric root->r.r_uber_thread->th.th_ident); 52530b57cec5SDimitry Andric 52540b57cec5SDimitry Andric // Update remaining threads 52550b57cec5SDimitry Andric for (f = 0; f < new_nproc; ++f) { 52560b57cec5SDimitry Andric team->t.t_threads[f]->th.th_team_nproc = new_nproc; 52570b57cec5SDimitry Andric } 52580b57cec5SDimitry Andric 5259fe6060f1SDimitry Andric // restore the current task state of the primary thread: should be the 52600b57cec5SDimitry Andric // implicit task 52610b57cec5SDimitry Andric KF_TRACE(10, ("__kmp_allocate_team: T#%d, this_thread=%p team=%p\n", 0, 52620b57cec5SDimitry Andric team->t.t_threads[0], team)); 52630b57cec5SDimitry Andric 52640b57cec5SDimitry Andric __kmp_push_current_task_to_thread(team->t.t_threads[0], team, 0); 52650b57cec5SDimitry Andric 52660b57cec5SDimitry Andric #ifdef KMP_DEBUG 52670b57cec5SDimitry Andric for (f = 0; f < team->t.t_nproc; f++) { 52680b57cec5SDimitry Andric KMP_DEBUG_ASSERT(team->t.t_threads[f] && 52690b57cec5SDimitry Andric team->t.t_threads[f]->th.th_team_nproc == 52700b57cec5SDimitry Andric team->t.t_nproc); 52710b57cec5SDimitry Andric } 52720b57cec5SDimitry Andric #endif 52730b57cec5SDimitry Andric 5274349cc55cSDimitry Andric if (do_place_partition) { 52750b57cec5SDimitry Andric KMP_CHECK_UPDATE(team->t.t_proc_bind, new_proc_bind); 52760b57cec5SDimitry Andric #if KMP_AFFINITY_SUPPORTED 52770b57cec5SDimitry Andric __kmp_partition_places(team); 52780b57cec5SDimitry Andric #endif 5279349cc55cSDimitry Andric } 52800b57cec5SDimitry Andric } else { // team->t.t_nproc < new_nproc 5281489b1cf2SDimitry Andric #if (KMP_OS_LINUX || KMP_OS_FREEBSD) && KMP_AFFINITY_SUPPORTED 52820b57cec5SDimitry Andric kmp_affin_mask_t *old_mask; 52830b57cec5SDimitry Andric if (KMP_AFFINITY_CAPABLE()) { 52840b57cec5SDimitry Andric KMP_CPU_ALLOC(old_mask); 52850b57cec5SDimitry Andric } 52860b57cec5SDimitry Andric #endif 52870b57cec5SDimitry Andric 52880b57cec5SDimitry Andric KA_TRACE(20, 52890b57cec5SDimitry Andric ("__kmp_allocate_team: increasing hot team thread count to %d\n", 52900b57cec5SDimitry Andric new_nproc)); 5291349cc55cSDimitry Andric int old_nproc = team->t.t_nproc; // save old value and use to update only 52920b57cec5SDimitry Andric team->t.t_size_changed = 1; 52930b57cec5SDimitry Andric 52940b57cec5SDimitry Andric #if KMP_NESTED_HOT_TEAMS 52950b57cec5SDimitry Andric int avail_threads = hot_teams[level].hot_team_nth; 52960b57cec5SDimitry Andric if (new_nproc < avail_threads) 52970b57cec5SDimitry Andric avail_threads = new_nproc; 52980b57cec5SDimitry Andric kmp_info_t **other_threads = team->t.t_threads; 52990b57cec5SDimitry Andric for (f = team->t.t_nproc; f < avail_threads; ++f) { 53000b57cec5SDimitry Andric // Adjust barrier data of reserved threads (if any) of the team 53010b57cec5SDimitry Andric // Other data will be set in __kmp_initialize_info() below. 53020b57cec5SDimitry Andric int b; 53030b57cec5SDimitry Andric kmp_balign_t *balign = other_threads[f]->th.th_bar; 53040b57cec5SDimitry Andric for (b = 0; b < bs_last_barrier; ++b) { 53050b57cec5SDimitry Andric balign[b].bb.b_arrived = team->t.t_bar[b].b_arrived; 53060b57cec5SDimitry Andric KMP_DEBUG_ASSERT(balign[b].bb.wait_flag != KMP_BARRIER_PARENT_FLAG); 53070b57cec5SDimitry Andric #if USE_DEBUGGER 53080b57cec5SDimitry Andric balign[b].bb.b_worker_arrived = team->t.t_bar[b].b_team_arrived; 53090b57cec5SDimitry Andric #endif 53100b57cec5SDimitry Andric } 53110b57cec5SDimitry Andric } 53120b57cec5SDimitry Andric if (hot_teams[level].hot_team_nth >= new_nproc) { 53130b57cec5SDimitry Andric // we have all needed threads in reserve, no need to allocate any 53140b57cec5SDimitry Andric // this only possible in mode 1, cannot have reserved threads in mode 0 53150b57cec5SDimitry Andric KMP_DEBUG_ASSERT(__kmp_hot_teams_mode == 1); 53160b57cec5SDimitry Andric team->t.t_nproc = new_nproc; // just get reserved threads involved 53170b57cec5SDimitry Andric } else { 5318349cc55cSDimitry Andric // We may have some threads in reserve, but not enough; 5319349cc55cSDimitry Andric // get reserved threads involved if any. 5320349cc55cSDimitry Andric team->t.t_nproc = hot_teams[level].hot_team_nth; 53210b57cec5SDimitry Andric hot_teams[level].hot_team_nth = new_nproc; // adjust hot team max size 53220b57cec5SDimitry Andric #endif // KMP_NESTED_HOT_TEAMS 53230b57cec5SDimitry Andric if (team->t.t_max_nproc < new_nproc) { 53240b57cec5SDimitry Andric /* reallocate larger arrays */ 53250b57cec5SDimitry Andric __kmp_reallocate_team_arrays(team, new_nproc); 53260b57cec5SDimitry Andric __kmp_reinitialize_team(team, new_icvs, NULL); 53270b57cec5SDimitry Andric } 53280b57cec5SDimitry Andric 5329489b1cf2SDimitry Andric #if (KMP_OS_LINUX || KMP_OS_FREEBSD) && KMP_AFFINITY_SUPPORTED 5330fe6060f1SDimitry Andric /* Temporarily set full mask for primary thread before creation of 5331fe6060f1SDimitry Andric workers. The reason is that workers inherit the affinity from the 5332fe6060f1SDimitry Andric primary thread, so if a lot of workers are created on the single 5333fe6060f1SDimitry Andric core quickly, they don't get a chance to set their own affinity for 5334fe6060f1SDimitry Andric a long time. */ 53350b57cec5SDimitry Andric __kmp_set_thread_affinity_mask_full_tmp(old_mask); 53360b57cec5SDimitry Andric #endif 53370b57cec5SDimitry Andric 53380b57cec5SDimitry Andric /* allocate new threads for the hot team */ 53390b57cec5SDimitry Andric for (f = team->t.t_nproc; f < new_nproc; f++) { 53400b57cec5SDimitry Andric kmp_info_t *new_worker = __kmp_allocate_thread(root, team, f); 53410b57cec5SDimitry Andric KMP_DEBUG_ASSERT(new_worker); 53420b57cec5SDimitry Andric team->t.t_threads[f] = new_worker; 53430b57cec5SDimitry Andric 53440b57cec5SDimitry Andric KA_TRACE(20, 53450b57cec5SDimitry Andric ("__kmp_allocate_team: team %d init T#%d arrived: " 53460b57cec5SDimitry Andric "join=%llu, plain=%llu\n", 53470b57cec5SDimitry Andric team->t.t_id, __kmp_gtid_from_tid(f, team), team->t.t_id, f, 53480b57cec5SDimitry Andric team->t.t_bar[bs_forkjoin_barrier].b_arrived, 53490b57cec5SDimitry Andric team->t.t_bar[bs_plain_barrier].b_arrived)); 53500b57cec5SDimitry Andric 53510b57cec5SDimitry Andric { // Initialize barrier data for new threads. 53520b57cec5SDimitry Andric int b; 53530b57cec5SDimitry Andric kmp_balign_t *balign = new_worker->th.th_bar; 53540b57cec5SDimitry Andric for (b = 0; b < bs_last_barrier; ++b) { 53550b57cec5SDimitry Andric balign[b].bb.b_arrived = team->t.t_bar[b].b_arrived; 53560b57cec5SDimitry Andric KMP_DEBUG_ASSERT(balign[b].bb.wait_flag != 53570b57cec5SDimitry Andric KMP_BARRIER_PARENT_FLAG); 53580b57cec5SDimitry Andric #if USE_DEBUGGER 53590b57cec5SDimitry Andric balign[b].bb.b_worker_arrived = team->t.t_bar[b].b_team_arrived; 53600b57cec5SDimitry Andric #endif 53610b57cec5SDimitry Andric } 53620b57cec5SDimitry Andric } 53630b57cec5SDimitry Andric } 53640b57cec5SDimitry Andric 5365489b1cf2SDimitry Andric #if (KMP_OS_LINUX || KMP_OS_FREEBSD) && KMP_AFFINITY_SUPPORTED 53660b57cec5SDimitry Andric if (KMP_AFFINITY_CAPABLE()) { 5367fe6060f1SDimitry Andric /* Restore initial primary thread's affinity mask */ 53680b57cec5SDimitry Andric __kmp_set_system_affinity(old_mask, TRUE); 53690b57cec5SDimitry Andric KMP_CPU_FREE(old_mask); 53700b57cec5SDimitry Andric } 53710b57cec5SDimitry Andric #endif 53720b57cec5SDimitry Andric #if KMP_NESTED_HOT_TEAMS 53730b57cec5SDimitry Andric } // end of check of t_nproc vs. new_nproc vs. hot_team_nth 53740b57cec5SDimitry Andric #endif // KMP_NESTED_HOT_TEAMS 5375349cc55cSDimitry Andric if (__kmp_barrier_release_pattern[bs_forkjoin_barrier] == bp_dist_bar) { 5376349cc55cSDimitry Andric // Barrier size already increased earlier in this function 5377349cc55cSDimitry Andric // Activate team threads via th_used_in_team 5378349cc55cSDimitry Andric __kmp_add_threads_to_team(team, new_nproc); 5379349cc55cSDimitry Andric } 53800b57cec5SDimitry Andric /* make sure everyone is syncronized */ 53810b57cec5SDimitry Andric // new threads below 53820b57cec5SDimitry Andric __kmp_initialize_team(team, new_nproc, new_icvs, 53830b57cec5SDimitry Andric root->r.r_uber_thread->th.th_ident); 53840b57cec5SDimitry Andric 53850b57cec5SDimitry Andric /* reinitialize the threads */ 53860b57cec5SDimitry Andric KMP_DEBUG_ASSERT(team->t.t_nproc == new_nproc); 53870b57cec5SDimitry Andric for (f = 0; f < team->t.t_nproc; ++f) 53880b57cec5SDimitry Andric __kmp_initialize_info(team->t.t_threads[f], team, f, 53890b57cec5SDimitry Andric __kmp_gtid_from_tid(f, team)); 53900b57cec5SDimitry Andric 53910b57cec5SDimitry Andric if (level) { // set th_task_state for new threads in nested hot team 53920b57cec5SDimitry Andric // __kmp_initialize_info() no longer zeroes th_task_state, so we should 53930b57cec5SDimitry Andric // only need to set the th_task_state for the new threads. th_task_state 5394fe6060f1SDimitry Andric // for primary thread will not be accurate until after this in 5395fe6060f1SDimitry Andric // __kmp_fork_call(), so we look to the primary thread's memo_stack to 5396fe6060f1SDimitry Andric // get the correct value. 53970b57cec5SDimitry Andric for (f = old_nproc; f < team->t.t_nproc; ++f) 53980b57cec5SDimitry Andric team->t.t_threads[f]->th.th_task_state = 53990b57cec5SDimitry Andric team->t.t_threads[0]->th.th_task_state_memo_stack[level]; 54000b57cec5SDimitry Andric } else { // set th_task_state for new threads in non-nested hot team 5401fe6060f1SDimitry Andric // copy primary thread's state 5402fe6060f1SDimitry Andric kmp_uint8 old_state = team->t.t_threads[0]->th.th_task_state; 54030b57cec5SDimitry Andric for (f = old_nproc; f < team->t.t_nproc; ++f) 54040b57cec5SDimitry Andric team->t.t_threads[f]->th.th_task_state = old_state; 54050b57cec5SDimitry Andric } 54060b57cec5SDimitry Andric 54070b57cec5SDimitry Andric #ifdef KMP_DEBUG 54080b57cec5SDimitry Andric for (f = 0; f < team->t.t_nproc; ++f) { 54090b57cec5SDimitry Andric KMP_DEBUG_ASSERT(team->t.t_threads[f] && 54100b57cec5SDimitry Andric team->t.t_threads[f]->th.th_team_nproc == 54110b57cec5SDimitry Andric team->t.t_nproc); 54120b57cec5SDimitry Andric } 54130b57cec5SDimitry Andric #endif 54140b57cec5SDimitry Andric 5415349cc55cSDimitry Andric if (do_place_partition) { 54160b57cec5SDimitry Andric KMP_CHECK_UPDATE(team->t.t_proc_bind, new_proc_bind); 54170b57cec5SDimitry Andric #if KMP_AFFINITY_SUPPORTED 54180b57cec5SDimitry Andric __kmp_partition_places(team); 54190b57cec5SDimitry Andric #endif 5420349cc55cSDimitry Andric } 54210b57cec5SDimitry Andric } // Check changes in number of threads 54220b57cec5SDimitry Andric 54230b57cec5SDimitry Andric kmp_info_t *master = team->t.t_threads[0]; 54240b57cec5SDimitry Andric if (master->th.th_teams_microtask) { 54250b57cec5SDimitry Andric for (f = 1; f < new_nproc; ++f) { 54260b57cec5SDimitry Andric // propagate teams construct specific info to workers 54270b57cec5SDimitry Andric kmp_info_t *thr = team->t.t_threads[f]; 54280b57cec5SDimitry Andric thr->th.th_teams_microtask = master->th.th_teams_microtask; 54290b57cec5SDimitry Andric thr->th.th_teams_level = master->th.th_teams_level; 54300b57cec5SDimitry Andric thr->th.th_teams_size = master->th.th_teams_size; 54310b57cec5SDimitry Andric } 54320b57cec5SDimitry Andric } 54330b57cec5SDimitry Andric #if KMP_NESTED_HOT_TEAMS 54340b57cec5SDimitry Andric if (level) { 54350b57cec5SDimitry Andric // Sync barrier state for nested hot teams, not needed for outermost hot 54360b57cec5SDimitry Andric // team. 54370b57cec5SDimitry Andric for (f = 1; f < new_nproc; ++f) { 54380b57cec5SDimitry Andric kmp_info_t *thr = team->t.t_threads[f]; 54390b57cec5SDimitry Andric int b; 54400b57cec5SDimitry Andric kmp_balign_t *balign = thr->th.th_bar; 54410b57cec5SDimitry Andric for (b = 0; b < bs_last_barrier; ++b) { 54420b57cec5SDimitry Andric balign[b].bb.b_arrived = team->t.t_bar[b].b_arrived; 54430b57cec5SDimitry Andric KMP_DEBUG_ASSERT(balign[b].bb.wait_flag != KMP_BARRIER_PARENT_FLAG); 54440b57cec5SDimitry Andric #if USE_DEBUGGER 54450b57cec5SDimitry Andric balign[b].bb.b_worker_arrived = team->t.t_bar[b].b_team_arrived; 54460b57cec5SDimitry Andric #endif 54470b57cec5SDimitry Andric } 54480b57cec5SDimitry Andric } 54490b57cec5SDimitry Andric } 54500b57cec5SDimitry Andric #endif // KMP_NESTED_HOT_TEAMS 54510b57cec5SDimitry Andric 54520b57cec5SDimitry Andric /* reallocate space for arguments if necessary */ 54530b57cec5SDimitry Andric __kmp_alloc_argv_entries(argc, team, TRUE); 54540b57cec5SDimitry Andric KMP_CHECK_UPDATE(team->t.t_argc, argc); 54550b57cec5SDimitry Andric // The hot team re-uses the previous task team, 54560b57cec5SDimitry Andric // if untouched during the previous release->gather phase. 54570b57cec5SDimitry Andric 54580b57cec5SDimitry Andric KF_TRACE(10, (" hot_team = %p\n", team)); 54590b57cec5SDimitry Andric 54600b57cec5SDimitry Andric #if KMP_DEBUG 54610b57cec5SDimitry Andric if (__kmp_tasking_mode != tskm_immediate_exec) { 54620b57cec5SDimitry Andric KA_TRACE(20, ("__kmp_allocate_team: hot team task_team[0] = %p " 54630b57cec5SDimitry Andric "task_team[1] = %p after reinit\n", 54640b57cec5SDimitry Andric team->t.t_task_team[0], team->t.t_task_team[1])); 54650b57cec5SDimitry Andric } 54660b57cec5SDimitry Andric #endif 54670b57cec5SDimitry Andric 54680b57cec5SDimitry Andric #if OMPT_SUPPORT 54690b57cec5SDimitry Andric __ompt_team_assign_id(team, ompt_parallel_data); 54700b57cec5SDimitry Andric #endif 54710b57cec5SDimitry Andric 54720b57cec5SDimitry Andric KMP_MB(); 54730b57cec5SDimitry Andric 54740b57cec5SDimitry Andric return team; 54750b57cec5SDimitry Andric } 54760b57cec5SDimitry Andric 54770b57cec5SDimitry Andric /* next, let's try to take one from the team pool */ 54780b57cec5SDimitry Andric KMP_MB(); 54790b57cec5SDimitry Andric for (team = CCAST(kmp_team_t *, __kmp_team_pool); (team);) { 54800b57cec5SDimitry Andric /* TODO: consider resizing undersized teams instead of reaping them, now 54810b57cec5SDimitry Andric that we have a resizing mechanism */ 54820b57cec5SDimitry Andric if (team->t.t_max_nproc >= max_nproc) { 54830b57cec5SDimitry Andric /* take this team from the team pool */ 54840b57cec5SDimitry Andric __kmp_team_pool = team->t.t_next_pool; 54850b57cec5SDimitry Andric 5486349cc55cSDimitry Andric if (max_nproc > 1 && 5487349cc55cSDimitry Andric __kmp_barrier_gather_pattern[bs_forkjoin_barrier] == bp_dist_bar) { 5488349cc55cSDimitry Andric if (!team->t.b) { // Allocate barrier structure 5489349cc55cSDimitry Andric team->t.b = distributedBarrier::allocate(__kmp_dflt_team_nth_ub); 5490349cc55cSDimitry Andric } 5491349cc55cSDimitry Andric } 5492349cc55cSDimitry Andric 54930b57cec5SDimitry Andric /* setup the team for fresh use */ 54940b57cec5SDimitry Andric __kmp_initialize_team(team, new_nproc, new_icvs, NULL); 54950b57cec5SDimitry Andric 54960b57cec5SDimitry Andric KA_TRACE(20, ("__kmp_allocate_team: setting task_team[0] %p and " 54970b57cec5SDimitry Andric "task_team[1] %p to NULL\n", 54980b57cec5SDimitry Andric &team->t.t_task_team[0], &team->t.t_task_team[1])); 54990b57cec5SDimitry Andric team->t.t_task_team[0] = NULL; 55000b57cec5SDimitry Andric team->t.t_task_team[1] = NULL; 55010b57cec5SDimitry Andric 55020b57cec5SDimitry Andric /* reallocate space for arguments if necessary */ 55030b57cec5SDimitry Andric __kmp_alloc_argv_entries(argc, team, TRUE); 55040b57cec5SDimitry Andric KMP_CHECK_UPDATE(team->t.t_argc, argc); 55050b57cec5SDimitry Andric 55060b57cec5SDimitry Andric KA_TRACE( 55070b57cec5SDimitry Andric 20, ("__kmp_allocate_team: team %d init arrived: join=%u, plain=%u\n", 55080b57cec5SDimitry Andric team->t.t_id, KMP_INIT_BARRIER_STATE, KMP_INIT_BARRIER_STATE)); 55090b57cec5SDimitry Andric { // Initialize barrier data. 55100b57cec5SDimitry Andric int b; 55110b57cec5SDimitry Andric for (b = 0; b < bs_last_barrier; ++b) { 55120b57cec5SDimitry Andric team->t.t_bar[b].b_arrived = KMP_INIT_BARRIER_STATE; 55130b57cec5SDimitry Andric #if USE_DEBUGGER 55140b57cec5SDimitry Andric team->t.t_bar[b].b_master_arrived = 0; 55150b57cec5SDimitry Andric team->t.t_bar[b].b_team_arrived = 0; 55160b57cec5SDimitry Andric #endif 55170b57cec5SDimitry Andric } 55180b57cec5SDimitry Andric } 55190b57cec5SDimitry Andric 55200b57cec5SDimitry Andric team->t.t_proc_bind = new_proc_bind; 55210b57cec5SDimitry Andric 55220b57cec5SDimitry Andric KA_TRACE(20, ("__kmp_allocate_team: using team from pool %d.\n", 55230b57cec5SDimitry Andric team->t.t_id)); 55240b57cec5SDimitry Andric 55250b57cec5SDimitry Andric #if OMPT_SUPPORT 55260b57cec5SDimitry Andric __ompt_team_assign_id(team, ompt_parallel_data); 55270b57cec5SDimitry Andric #endif 55280b57cec5SDimitry Andric 55290b57cec5SDimitry Andric KMP_MB(); 55300b57cec5SDimitry Andric 55310b57cec5SDimitry Andric return team; 55320b57cec5SDimitry Andric } 55330b57cec5SDimitry Andric 55340b57cec5SDimitry Andric /* reap team if it is too small, then loop back and check the next one */ 55350b57cec5SDimitry Andric // not sure if this is wise, but, will be redone during the hot-teams 55360b57cec5SDimitry Andric // rewrite. 55370b57cec5SDimitry Andric /* TODO: Use technique to find the right size hot-team, don't reap them */ 55380b57cec5SDimitry Andric team = __kmp_reap_team(team); 55390b57cec5SDimitry Andric __kmp_team_pool = team; 55400b57cec5SDimitry Andric } 55410b57cec5SDimitry Andric 55420b57cec5SDimitry Andric /* nothing available in the pool, no matter, make a new team! */ 55430b57cec5SDimitry Andric KMP_MB(); 55440b57cec5SDimitry Andric team = (kmp_team_t *)__kmp_allocate(sizeof(kmp_team_t)); 55450b57cec5SDimitry Andric 55460b57cec5SDimitry Andric /* and set it up */ 55470b57cec5SDimitry Andric team->t.t_max_nproc = max_nproc; 5548349cc55cSDimitry Andric if (max_nproc > 1 && 5549349cc55cSDimitry Andric __kmp_barrier_gather_pattern[bs_forkjoin_barrier] == bp_dist_bar) { 5550349cc55cSDimitry Andric // Allocate barrier structure 5551349cc55cSDimitry Andric team->t.b = distributedBarrier::allocate(__kmp_dflt_team_nth_ub); 5552349cc55cSDimitry Andric } 5553349cc55cSDimitry Andric 55540b57cec5SDimitry Andric /* NOTE well, for some reason allocating one big buffer and dividing it up 55550b57cec5SDimitry Andric seems to really hurt performance a lot on the P4, so, let's not use this */ 55560b57cec5SDimitry Andric __kmp_allocate_team_arrays(team, max_nproc); 55570b57cec5SDimitry Andric 55580b57cec5SDimitry Andric KA_TRACE(20, ("__kmp_allocate_team: making a new team\n")); 55590b57cec5SDimitry Andric __kmp_initialize_team(team, new_nproc, new_icvs, NULL); 55600b57cec5SDimitry Andric 55610b57cec5SDimitry Andric KA_TRACE(20, ("__kmp_allocate_team: setting task_team[0] %p and task_team[1] " 55620b57cec5SDimitry Andric "%p to NULL\n", 55630b57cec5SDimitry Andric &team->t.t_task_team[0], &team->t.t_task_team[1])); 55640b57cec5SDimitry Andric team->t.t_task_team[0] = NULL; // to be removed, as __kmp_allocate zeroes 55650b57cec5SDimitry Andric // memory, no need to duplicate 55660b57cec5SDimitry Andric team->t.t_task_team[1] = NULL; // to be removed, as __kmp_allocate zeroes 55670b57cec5SDimitry Andric // memory, no need to duplicate 55680b57cec5SDimitry Andric 55690b57cec5SDimitry Andric if (__kmp_storage_map) { 55700b57cec5SDimitry Andric __kmp_print_team_storage_map("team", team, team->t.t_id, new_nproc); 55710b57cec5SDimitry Andric } 55720b57cec5SDimitry Andric 55730b57cec5SDimitry Andric /* allocate space for arguments */ 55740b57cec5SDimitry Andric __kmp_alloc_argv_entries(argc, team, FALSE); 55750b57cec5SDimitry Andric team->t.t_argc = argc; 55760b57cec5SDimitry Andric 55770b57cec5SDimitry Andric KA_TRACE(20, 55780b57cec5SDimitry Andric ("__kmp_allocate_team: team %d init arrived: join=%u, plain=%u\n", 55790b57cec5SDimitry Andric team->t.t_id, KMP_INIT_BARRIER_STATE, KMP_INIT_BARRIER_STATE)); 55800b57cec5SDimitry Andric { // Initialize barrier data. 55810b57cec5SDimitry Andric int b; 55820b57cec5SDimitry Andric for (b = 0; b < bs_last_barrier; ++b) { 55830b57cec5SDimitry Andric team->t.t_bar[b].b_arrived = KMP_INIT_BARRIER_STATE; 55840b57cec5SDimitry Andric #if USE_DEBUGGER 55850b57cec5SDimitry Andric team->t.t_bar[b].b_master_arrived = 0; 55860b57cec5SDimitry Andric team->t.t_bar[b].b_team_arrived = 0; 55870b57cec5SDimitry Andric #endif 55880b57cec5SDimitry Andric } 55890b57cec5SDimitry Andric } 55900b57cec5SDimitry Andric 55910b57cec5SDimitry Andric team->t.t_proc_bind = new_proc_bind; 55920b57cec5SDimitry Andric 55930b57cec5SDimitry Andric #if OMPT_SUPPORT 55940b57cec5SDimitry Andric __ompt_team_assign_id(team, ompt_parallel_data); 55950b57cec5SDimitry Andric team->t.ompt_serialized_team_info = NULL; 55960b57cec5SDimitry Andric #endif 55970b57cec5SDimitry Andric 55980b57cec5SDimitry Andric KMP_MB(); 55990b57cec5SDimitry Andric 56000b57cec5SDimitry Andric KA_TRACE(20, ("__kmp_allocate_team: done creating a new team %d.\n", 56010b57cec5SDimitry Andric team->t.t_id)); 56020b57cec5SDimitry Andric 56030b57cec5SDimitry Andric return team; 56040b57cec5SDimitry Andric } 56050b57cec5SDimitry Andric 56060b57cec5SDimitry Andric /* TODO implement hot-teams at all levels */ 56070b57cec5SDimitry Andric /* TODO implement lazy thread release on demand (disband request) */ 56080b57cec5SDimitry Andric 56090b57cec5SDimitry Andric /* free the team. return it to the team pool. release all the threads 56100b57cec5SDimitry Andric * associated with it */ 56110b57cec5SDimitry Andric void __kmp_free_team(kmp_root_t *root, 56120b57cec5SDimitry Andric kmp_team_t *team USE_NESTED_HOT_ARG(kmp_info_t *master)) { 56130b57cec5SDimitry Andric int f; 56140b57cec5SDimitry Andric KA_TRACE(20, ("__kmp_free_team: T#%d freeing team %d\n", __kmp_get_gtid(), 56150b57cec5SDimitry Andric team->t.t_id)); 56160b57cec5SDimitry Andric 56170b57cec5SDimitry Andric /* verify state */ 56180b57cec5SDimitry Andric KMP_DEBUG_ASSERT(root); 56190b57cec5SDimitry Andric KMP_DEBUG_ASSERT(team); 56200b57cec5SDimitry Andric KMP_DEBUG_ASSERT(team->t.t_nproc <= team->t.t_max_nproc); 56210b57cec5SDimitry Andric KMP_DEBUG_ASSERT(team->t.t_threads); 56220b57cec5SDimitry Andric 56230b57cec5SDimitry Andric int use_hot_team = team == root->r.r_hot_team; 56240b57cec5SDimitry Andric #if KMP_NESTED_HOT_TEAMS 56250b57cec5SDimitry Andric int level; 56260b57cec5SDimitry Andric if (master) { 56270b57cec5SDimitry Andric level = team->t.t_active_level - 1; 56280b57cec5SDimitry Andric if (master->th.th_teams_microtask) { // in teams construct? 56290b57cec5SDimitry Andric if (master->th.th_teams_size.nteams > 1) { 56300b57cec5SDimitry Andric ++level; // level was not increased in teams construct for 56310b57cec5SDimitry Andric // team_of_masters 56320b57cec5SDimitry Andric } 56330b57cec5SDimitry Andric if (team->t.t_pkfn != (microtask_t)__kmp_teams_master && 56340b57cec5SDimitry Andric master->th.th_teams_level == team->t.t_level) { 56350b57cec5SDimitry Andric ++level; // level was not increased in teams construct for 56360b57cec5SDimitry Andric // team_of_workers before the parallel 56370b57cec5SDimitry Andric } // team->t.t_level will be increased inside parallel 56380b57cec5SDimitry Andric } 5639349cc55cSDimitry Andric #if KMP_DEBUG 5640349cc55cSDimitry Andric kmp_hot_team_ptr_t *hot_teams = master->th.th_hot_teams; 5641349cc55cSDimitry Andric #endif 56420b57cec5SDimitry Andric if (level < __kmp_hot_teams_max_level) { 56430b57cec5SDimitry Andric KMP_DEBUG_ASSERT(team == hot_teams[level].hot_team); 56440b57cec5SDimitry Andric use_hot_team = 1; 56450b57cec5SDimitry Andric } 56460b57cec5SDimitry Andric } 56470b57cec5SDimitry Andric #endif // KMP_NESTED_HOT_TEAMS 56480b57cec5SDimitry Andric 56490b57cec5SDimitry Andric /* team is done working */ 56500b57cec5SDimitry Andric TCW_SYNC_PTR(team->t.t_pkfn, 56510b57cec5SDimitry Andric NULL); // Important for Debugging Support Library. 56520b57cec5SDimitry Andric #if KMP_OS_WINDOWS 56530b57cec5SDimitry Andric team->t.t_copyin_counter = 0; // init counter for possible reuse 56540b57cec5SDimitry Andric #endif 56550b57cec5SDimitry Andric // Do not reset pointer to parent team to NULL for hot teams. 56560b57cec5SDimitry Andric 56570b57cec5SDimitry Andric /* if we are non-hot team, release our threads */ 56580b57cec5SDimitry Andric if (!use_hot_team) { 56590b57cec5SDimitry Andric if (__kmp_tasking_mode != tskm_immediate_exec) { 56600b57cec5SDimitry Andric // Wait for threads to reach reapable state 56610b57cec5SDimitry Andric for (f = 1; f < team->t.t_nproc; ++f) { 56620b57cec5SDimitry Andric KMP_DEBUG_ASSERT(team->t.t_threads[f]); 56630b57cec5SDimitry Andric kmp_info_t *th = team->t.t_threads[f]; 56640b57cec5SDimitry Andric volatile kmp_uint32 *state = &th->th.th_reap_state; 56650b57cec5SDimitry Andric while (*state != KMP_SAFE_TO_REAP) { 56660b57cec5SDimitry Andric #if KMP_OS_WINDOWS 56670b57cec5SDimitry Andric // On Windows a thread can be killed at any time, check this 56680b57cec5SDimitry Andric DWORD ecode; 56690b57cec5SDimitry Andric if (!__kmp_is_thread_alive(th, &ecode)) { 56700b57cec5SDimitry Andric *state = KMP_SAFE_TO_REAP; // reset the flag for dead thread 56710b57cec5SDimitry Andric break; 56720b57cec5SDimitry Andric } 56730b57cec5SDimitry Andric #endif 56740b57cec5SDimitry Andric // first check if thread is sleeping 5675e8d8bef9SDimitry Andric kmp_flag_64<> fl(&th->th.th_bar[bs_forkjoin_barrier].bb.b_go, th); 56760b57cec5SDimitry Andric if (fl.is_sleeping()) 56770b57cec5SDimitry Andric fl.resume(__kmp_gtid_from_thread(th)); 56780b57cec5SDimitry Andric KMP_CPU_PAUSE(); 56790b57cec5SDimitry Andric } 56800b57cec5SDimitry Andric } 56810b57cec5SDimitry Andric 56820b57cec5SDimitry Andric // Delete task teams 56830b57cec5SDimitry Andric int tt_idx; 56840b57cec5SDimitry Andric for (tt_idx = 0; tt_idx < 2; ++tt_idx) { 56850b57cec5SDimitry Andric kmp_task_team_t *task_team = team->t.t_task_team[tt_idx]; 56860b57cec5SDimitry Andric if (task_team != NULL) { 56870b57cec5SDimitry Andric for (f = 0; f < team->t.t_nproc; ++f) { // threads unref task teams 56880b57cec5SDimitry Andric KMP_DEBUG_ASSERT(team->t.t_threads[f]); 56890b57cec5SDimitry Andric team->t.t_threads[f]->th.th_task_team = NULL; 56900b57cec5SDimitry Andric } 56910b57cec5SDimitry Andric KA_TRACE( 56920b57cec5SDimitry Andric 20, 56930b57cec5SDimitry Andric ("__kmp_free_team: T#%d deactivating task_team %p on team %d\n", 56940b57cec5SDimitry Andric __kmp_get_gtid(), task_team, team->t.t_id)); 56950b57cec5SDimitry Andric #if KMP_NESTED_HOT_TEAMS 56960b57cec5SDimitry Andric __kmp_free_task_team(master, task_team); 56970b57cec5SDimitry Andric #endif 56980b57cec5SDimitry Andric team->t.t_task_team[tt_idx] = NULL; 56990b57cec5SDimitry Andric } 57000b57cec5SDimitry Andric } 57010b57cec5SDimitry Andric } 57020b57cec5SDimitry Andric 57030b57cec5SDimitry Andric // Reset pointer to parent team only for non-hot teams. 57040b57cec5SDimitry Andric team->t.t_parent = NULL; 57050b57cec5SDimitry Andric team->t.t_level = 0; 57060b57cec5SDimitry Andric team->t.t_active_level = 0; 57070b57cec5SDimitry Andric 57080b57cec5SDimitry Andric /* free the worker threads */ 57090b57cec5SDimitry Andric for (f = 1; f < team->t.t_nproc; ++f) { 57100b57cec5SDimitry Andric KMP_DEBUG_ASSERT(team->t.t_threads[f]); 5711349cc55cSDimitry Andric if (__kmp_barrier_gather_pattern[bs_forkjoin_barrier] == bp_dist_bar) { 5712349cc55cSDimitry Andric KMP_COMPARE_AND_STORE_ACQ32(&(team->t.t_threads[f]->th.th_used_in_team), 5713349cc55cSDimitry Andric 1, 2); 5714349cc55cSDimitry Andric } 57150b57cec5SDimitry Andric __kmp_free_thread(team->t.t_threads[f]); 5716349cc55cSDimitry Andric } 5717349cc55cSDimitry Andric 5718349cc55cSDimitry Andric if (__kmp_barrier_gather_pattern[bs_forkjoin_barrier] == bp_dist_bar) { 5719349cc55cSDimitry Andric if (team->t.b) { 5720349cc55cSDimitry Andric // wake up thread at old location 5721349cc55cSDimitry Andric team->t.b->go_release(); 5722349cc55cSDimitry Andric if (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME) { 5723349cc55cSDimitry Andric for (f = 1; f < team->t.t_nproc; ++f) { 5724349cc55cSDimitry Andric if (team->t.b->sleep[f].sleep) { 5725349cc55cSDimitry Andric __kmp_atomic_resume_64( 5726349cc55cSDimitry Andric team->t.t_threads[f]->th.th_info.ds.ds_gtid, 5727349cc55cSDimitry Andric (kmp_atomic_flag_64<> *)NULL); 5728349cc55cSDimitry Andric } 5729349cc55cSDimitry Andric } 5730349cc55cSDimitry Andric } 5731349cc55cSDimitry Andric // Wait for threads to be removed from team 5732349cc55cSDimitry Andric for (int f = 1; f < team->t.t_nproc; ++f) { 5733349cc55cSDimitry Andric while (team->t.t_threads[f]->th.th_used_in_team.load() != 0) 5734349cc55cSDimitry Andric KMP_CPU_PAUSE(); 5735349cc55cSDimitry Andric } 5736349cc55cSDimitry Andric } 5737349cc55cSDimitry Andric } 5738349cc55cSDimitry Andric 5739349cc55cSDimitry Andric for (f = 1; f < team->t.t_nproc; ++f) { 57400b57cec5SDimitry Andric team->t.t_threads[f] = NULL; 57410b57cec5SDimitry Andric } 57420b57cec5SDimitry Andric 5743349cc55cSDimitry Andric if (team->t.t_max_nproc > 1 && 5744349cc55cSDimitry Andric __kmp_barrier_gather_pattern[bs_forkjoin_barrier] == bp_dist_bar) { 5745349cc55cSDimitry Andric distributedBarrier::deallocate(team->t.b); 5746349cc55cSDimitry Andric team->t.b = NULL; 5747349cc55cSDimitry Andric } 57480b57cec5SDimitry Andric /* put the team back in the team pool */ 57490b57cec5SDimitry Andric /* TODO limit size of team pool, call reap_team if pool too large */ 57500b57cec5SDimitry Andric team->t.t_next_pool = CCAST(kmp_team_t *, __kmp_team_pool); 57510b57cec5SDimitry Andric __kmp_team_pool = (volatile kmp_team_t *)team; 5752fe6060f1SDimitry Andric } else { // Check if team was created for primary threads in teams construct 57530b57cec5SDimitry Andric // See if first worker is a CG root 57540b57cec5SDimitry Andric KMP_DEBUG_ASSERT(team->t.t_threads[1] && 57550b57cec5SDimitry Andric team->t.t_threads[1]->th.th_cg_roots); 57560b57cec5SDimitry Andric if (team->t.t_threads[1]->th.th_cg_roots->cg_root == team->t.t_threads[1]) { 57570b57cec5SDimitry Andric // Clean up the CG root nodes on workers so that this team can be re-used 57580b57cec5SDimitry Andric for (f = 1; f < team->t.t_nproc; ++f) { 57590b57cec5SDimitry Andric kmp_info_t *thr = team->t.t_threads[f]; 57600b57cec5SDimitry Andric KMP_DEBUG_ASSERT(thr && thr->th.th_cg_roots && 57610b57cec5SDimitry Andric thr->th.th_cg_roots->cg_root == thr); 57620b57cec5SDimitry Andric // Pop current CG root off list 57630b57cec5SDimitry Andric kmp_cg_root_t *tmp = thr->th.th_cg_roots; 57640b57cec5SDimitry Andric thr->th.th_cg_roots = tmp->up; 57650b57cec5SDimitry Andric KA_TRACE(100, ("__kmp_free_team: Thread %p popping node %p and moving" 57660b57cec5SDimitry Andric " up to node %p. cg_nthreads was %d\n", 57670b57cec5SDimitry Andric thr, tmp, thr->th.th_cg_roots, tmp->cg_nthreads)); 57680b57cec5SDimitry Andric int i = tmp->cg_nthreads--; 57690b57cec5SDimitry Andric if (i == 1) { 57700b57cec5SDimitry Andric __kmp_free(tmp); // free CG if we are the last thread in it 57710b57cec5SDimitry Andric } 57720b57cec5SDimitry Andric // Restore current task's thread_limit from CG root 57730b57cec5SDimitry Andric if (thr->th.th_cg_roots) 57740b57cec5SDimitry Andric thr->th.th_current_task->td_icvs.thread_limit = 57750b57cec5SDimitry Andric thr->th.th_cg_roots->cg_thread_limit; 57760b57cec5SDimitry Andric } 57770b57cec5SDimitry Andric } 57780b57cec5SDimitry Andric } 57790b57cec5SDimitry Andric 57800b57cec5SDimitry Andric KMP_MB(); 57810b57cec5SDimitry Andric } 57820b57cec5SDimitry Andric 57830b57cec5SDimitry Andric /* reap the team. destroy it, reclaim all its resources and free its memory */ 57840b57cec5SDimitry Andric kmp_team_t *__kmp_reap_team(kmp_team_t *team) { 57850b57cec5SDimitry Andric kmp_team_t *next_pool = team->t.t_next_pool; 57860b57cec5SDimitry Andric 57870b57cec5SDimitry Andric KMP_DEBUG_ASSERT(team); 57880b57cec5SDimitry Andric KMP_DEBUG_ASSERT(team->t.t_dispatch); 57890b57cec5SDimitry Andric KMP_DEBUG_ASSERT(team->t.t_disp_buffer); 57900b57cec5SDimitry Andric KMP_DEBUG_ASSERT(team->t.t_threads); 57910b57cec5SDimitry Andric KMP_DEBUG_ASSERT(team->t.t_argv); 57920b57cec5SDimitry Andric 57930b57cec5SDimitry Andric /* TODO clean the threads that are a part of this? */ 57940b57cec5SDimitry Andric 57950b57cec5SDimitry Andric /* free stuff */ 57960b57cec5SDimitry Andric __kmp_free_team_arrays(team); 57970b57cec5SDimitry Andric if (team->t.t_argv != &team->t.t_inline_argv[0]) 57980b57cec5SDimitry Andric __kmp_free((void *)team->t.t_argv); 57990b57cec5SDimitry Andric __kmp_free(team); 58000b57cec5SDimitry Andric 58010b57cec5SDimitry Andric KMP_MB(); 58020b57cec5SDimitry Andric return next_pool; 58030b57cec5SDimitry Andric } 58040b57cec5SDimitry Andric 58050b57cec5SDimitry Andric // Free the thread. Don't reap it, just place it on the pool of available 58060b57cec5SDimitry Andric // threads. 58070b57cec5SDimitry Andric // 58080b57cec5SDimitry Andric // Changes for Quad issue 527845: We need a predictable OMP tid <-> gtid 58090b57cec5SDimitry Andric // binding for the affinity mechanism to be useful. 58100b57cec5SDimitry Andric // 58110b57cec5SDimitry Andric // Now, we always keep the free list (__kmp_thread_pool) sorted by gtid. 58120b57cec5SDimitry Andric // However, we want to avoid a potential performance problem by always 58130b57cec5SDimitry Andric // scanning through the list to find the correct point at which to insert 58140b57cec5SDimitry Andric // the thread (potential N**2 behavior). To do this we keep track of the 58150b57cec5SDimitry Andric // last place a thread struct was inserted (__kmp_thread_pool_insert_pt). 58160b57cec5SDimitry Andric // With single-level parallelism, threads will always be added to the tail 58170b57cec5SDimitry Andric // of the list, kept track of by __kmp_thread_pool_insert_pt. With nested 58180b57cec5SDimitry Andric // parallelism, all bets are off and we may need to scan through the entire 58190b57cec5SDimitry Andric // free list. 58200b57cec5SDimitry Andric // 58210b57cec5SDimitry Andric // This change also has a potentially large performance benefit, for some 58220b57cec5SDimitry Andric // applications. Previously, as threads were freed from the hot team, they 58230b57cec5SDimitry Andric // would be placed back on the free list in inverse order. If the hot team 58240b57cec5SDimitry Andric // grew back to it's original size, then the freed thread would be placed 58250b57cec5SDimitry Andric // back on the hot team in reverse order. This could cause bad cache 58260b57cec5SDimitry Andric // locality problems on programs where the size of the hot team regularly 58270b57cec5SDimitry Andric // grew and shrunk. 58280b57cec5SDimitry Andric // 58295ffd83dbSDimitry Andric // Now, for single-level parallelism, the OMP tid is always == gtid. 58300b57cec5SDimitry Andric void __kmp_free_thread(kmp_info_t *this_th) { 58310b57cec5SDimitry Andric int gtid; 58320b57cec5SDimitry Andric kmp_info_t **scan; 58330b57cec5SDimitry Andric 58340b57cec5SDimitry Andric KA_TRACE(20, ("__kmp_free_thread: T#%d putting T#%d back on free pool.\n", 58350b57cec5SDimitry Andric __kmp_get_gtid(), this_th->th.th_info.ds.ds_gtid)); 58360b57cec5SDimitry Andric 58370b57cec5SDimitry Andric KMP_DEBUG_ASSERT(this_th); 58380b57cec5SDimitry Andric 58390b57cec5SDimitry Andric // When moving thread to pool, switch thread to wait on own b_go flag, and 58400b57cec5SDimitry Andric // uninitialized (NULL team). 58410b57cec5SDimitry Andric int b; 58420b57cec5SDimitry Andric kmp_balign_t *balign = this_th->th.th_bar; 58430b57cec5SDimitry Andric for (b = 0; b < bs_last_barrier; ++b) { 58440b57cec5SDimitry Andric if (balign[b].bb.wait_flag == KMP_BARRIER_PARENT_FLAG) 58450b57cec5SDimitry Andric balign[b].bb.wait_flag = KMP_BARRIER_SWITCH_TO_OWN_FLAG; 58460b57cec5SDimitry Andric balign[b].bb.team = NULL; 58470b57cec5SDimitry Andric balign[b].bb.leaf_kids = 0; 58480b57cec5SDimitry Andric } 58490b57cec5SDimitry Andric this_th->th.th_task_state = 0; 58500b57cec5SDimitry Andric this_th->th.th_reap_state = KMP_SAFE_TO_REAP; 58510b57cec5SDimitry Andric 58520b57cec5SDimitry Andric /* put thread back on the free pool */ 58530b57cec5SDimitry Andric TCW_PTR(this_th->th.th_team, NULL); 58540b57cec5SDimitry Andric TCW_PTR(this_th->th.th_root, NULL); 58550b57cec5SDimitry Andric TCW_PTR(this_th->th.th_dispatch, NULL); /* NOT NEEDED */ 58560b57cec5SDimitry Andric 58570b57cec5SDimitry Andric while (this_th->th.th_cg_roots) { 58580b57cec5SDimitry Andric this_th->th.th_cg_roots->cg_nthreads--; 58590b57cec5SDimitry Andric KA_TRACE(100, ("__kmp_free_thread: Thread %p decrement cg_nthreads on node" 58600b57cec5SDimitry Andric " %p of thread %p to %d\n", 58610b57cec5SDimitry Andric this_th, this_th->th.th_cg_roots, 58620b57cec5SDimitry Andric this_th->th.th_cg_roots->cg_root, 58630b57cec5SDimitry Andric this_th->th.th_cg_roots->cg_nthreads)); 58640b57cec5SDimitry Andric kmp_cg_root_t *tmp = this_th->th.th_cg_roots; 58650b57cec5SDimitry Andric if (tmp->cg_root == this_th) { // Thread is a cg_root 58660b57cec5SDimitry Andric KMP_DEBUG_ASSERT(tmp->cg_nthreads == 0); 58670b57cec5SDimitry Andric KA_TRACE( 58680b57cec5SDimitry Andric 5, ("__kmp_free_thread: Thread %p freeing node %p\n", this_th, tmp)); 58690b57cec5SDimitry Andric this_th->th.th_cg_roots = tmp->up; 58700b57cec5SDimitry Andric __kmp_free(tmp); 58710b57cec5SDimitry Andric } else { // Worker thread 58720b57cec5SDimitry Andric if (tmp->cg_nthreads == 0) { // last thread leaves contention group 58730b57cec5SDimitry Andric __kmp_free(tmp); 58740b57cec5SDimitry Andric } 58750b57cec5SDimitry Andric this_th->th.th_cg_roots = NULL; 58760b57cec5SDimitry Andric break; 58770b57cec5SDimitry Andric } 58780b57cec5SDimitry Andric } 58790b57cec5SDimitry Andric 58800b57cec5SDimitry Andric /* If the implicit task assigned to this thread can be used by other threads 58810b57cec5SDimitry Andric * -> multiple threads can share the data and try to free the task at 58820b57cec5SDimitry Andric * __kmp_reap_thread at exit. This duplicate use of the task data can happen 58830b57cec5SDimitry Andric * with higher probability when hot team is disabled but can occurs even when 58840b57cec5SDimitry Andric * the hot team is enabled */ 58850b57cec5SDimitry Andric __kmp_free_implicit_task(this_th); 58860b57cec5SDimitry Andric this_th->th.th_current_task = NULL; 58870b57cec5SDimitry Andric 58880b57cec5SDimitry Andric // If the __kmp_thread_pool_insert_pt is already past the new insert 58890b57cec5SDimitry Andric // point, then we need to re-scan the entire list. 58900b57cec5SDimitry Andric gtid = this_th->th.th_info.ds.ds_gtid; 58910b57cec5SDimitry Andric if (__kmp_thread_pool_insert_pt != NULL) { 58920b57cec5SDimitry Andric KMP_DEBUG_ASSERT(__kmp_thread_pool != NULL); 58930b57cec5SDimitry Andric if (__kmp_thread_pool_insert_pt->th.th_info.ds.ds_gtid > gtid) { 58940b57cec5SDimitry Andric __kmp_thread_pool_insert_pt = NULL; 58950b57cec5SDimitry Andric } 58960b57cec5SDimitry Andric } 58970b57cec5SDimitry Andric 58980b57cec5SDimitry Andric // Scan down the list to find the place to insert the thread. 58990b57cec5SDimitry Andric // scan is the address of a link in the list, possibly the address of 59000b57cec5SDimitry Andric // __kmp_thread_pool itself. 59010b57cec5SDimitry Andric // 59025ffd83dbSDimitry Andric // In the absence of nested parallelism, the for loop will have 0 iterations. 59030b57cec5SDimitry Andric if (__kmp_thread_pool_insert_pt != NULL) { 59040b57cec5SDimitry Andric scan = &(__kmp_thread_pool_insert_pt->th.th_next_pool); 59050b57cec5SDimitry Andric } else { 59060b57cec5SDimitry Andric scan = CCAST(kmp_info_t **, &__kmp_thread_pool); 59070b57cec5SDimitry Andric } 59080b57cec5SDimitry Andric for (; (*scan != NULL) && ((*scan)->th.th_info.ds.ds_gtid < gtid); 59090b57cec5SDimitry Andric scan = &((*scan)->th.th_next_pool)) 59100b57cec5SDimitry Andric ; 59110b57cec5SDimitry Andric 59120b57cec5SDimitry Andric // Insert the new element on the list, and set __kmp_thread_pool_insert_pt 59130b57cec5SDimitry Andric // to its address. 59140b57cec5SDimitry Andric TCW_PTR(this_th->th.th_next_pool, *scan); 59150b57cec5SDimitry Andric __kmp_thread_pool_insert_pt = *scan = this_th; 59160b57cec5SDimitry Andric KMP_DEBUG_ASSERT((this_th->th.th_next_pool == NULL) || 59170b57cec5SDimitry Andric (this_th->th.th_info.ds.ds_gtid < 59180b57cec5SDimitry Andric this_th->th.th_next_pool->th.th_info.ds.ds_gtid)); 59190b57cec5SDimitry Andric TCW_4(this_th->th.th_in_pool, TRUE); 59200b57cec5SDimitry Andric __kmp_suspend_initialize_thread(this_th); 59210b57cec5SDimitry Andric __kmp_lock_suspend_mx(this_th); 59220b57cec5SDimitry Andric if (this_th->th.th_active == TRUE) { 59230b57cec5SDimitry Andric KMP_ATOMIC_INC(&__kmp_thread_pool_active_nth); 59240b57cec5SDimitry Andric this_th->th.th_active_in_pool = TRUE; 59250b57cec5SDimitry Andric } 59260b57cec5SDimitry Andric #if KMP_DEBUG 59270b57cec5SDimitry Andric else { 59280b57cec5SDimitry Andric KMP_DEBUG_ASSERT(this_th->th.th_active_in_pool == FALSE); 59290b57cec5SDimitry Andric } 59300b57cec5SDimitry Andric #endif 59310b57cec5SDimitry Andric __kmp_unlock_suspend_mx(this_th); 59320b57cec5SDimitry Andric 59330b57cec5SDimitry Andric TCW_4(__kmp_nth, __kmp_nth - 1); 59340b57cec5SDimitry Andric 59350b57cec5SDimitry Andric #ifdef KMP_ADJUST_BLOCKTIME 59360b57cec5SDimitry Andric /* Adjust blocktime back to user setting or default if necessary */ 59370b57cec5SDimitry Andric /* Middle initialization might never have occurred */ 59380b57cec5SDimitry Andric if (!__kmp_env_blocktime && (__kmp_avail_proc > 0)) { 59390b57cec5SDimitry Andric KMP_DEBUG_ASSERT(__kmp_avail_proc > 0); 59400b57cec5SDimitry Andric if (__kmp_nth <= __kmp_avail_proc) { 59410b57cec5SDimitry Andric __kmp_zero_bt = FALSE; 59420b57cec5SDimitry Andric } 59430b57cec5SDimitry Andric } 59440b57cec5SDimitry Andric #endif /* KMP_ADJUST_BLOCKTIME */ 59450b57cec5SDimitry Andric 59460b57cec5SDimitry Andric KMP_MB(); 59470b57cec5SDimitry Andric } 59480b57cec5SDimitry Andric 59490b57cec5SDimitry Andric /* ------------------------------------------------------------------------ */ 59500b57cec5SDimitry Andric 59510b57cec5SDimitry Andric void *__kmp_launch_thread(kmp_info_t *this_thr) { 5952d409305fSDimitry Andric #if OMP_PROFILING_SUPPORT 5953e8d8bef9SDimitry Andric ProfileTraceFile = getenv("LIBOMPTARGET_PROFILE"); 5954e8d8bef9SDimitry Andric // TODO: add a configuration option for time granularity 5955e8d8bef9SDimitry Andric if (ProfileTraceFile) 5956e8d8bef9SDimitry Andric llvm::timeTraceProfilerInitialize(500 /* us */, "libomptarget"); 5957e8d8bef9SDimitry Andric #endif 5958e8d8bef9SDimitry Andric 59590b57cec5SDimitry Andric int gtid = this_thr->th.th_info.ds.ds_gtid; 59600b57cec5SDimitry Andric /* void *stack_data;*/ 5961489b1cf2SDimitry Andric kmp_team_t **volatile pteam; 59620b57cec5SDimitry Andric 59630b57cec5SDimitry Andric KMP_MB(); 59640b57cec5SDimitry Andric KA_TRACE(10, ("__kmp_launch_thread: T#%d start\n", gtid)); 59650b57cec5SDimitry Andric 59660b57cec5SDimitry Andric if (__kmp_env_consistency_check) { 59670b57cec5SDimitry Andric this_thr->th.th_cons = __kmp_allocate_cons_stack(gtid); // ATT: Memory leak? 59680b57cec5SDimitry Andric } 59690b57cec5SDimitry Andric 5970fe6060f1SDimitry Andric #if OMPD_SUPPORT 5971fe6060f1SDimitry Andric if (ompd_state & OMPD_ENABLE_BP) 5972fe6060f1SDimitry Andric ompd_bp_thread_begin(); 5973fe6060f1SDimitry Andric #endif 5974fe6060f1SDimitry Andric 59750b57cec5SDimitry Andric #if OMPT_SUPPORT 5976fe6060f1SDimitry Andric ompt_data_t *thread_data = nullptr; 59770b57cec5SDimitry Andric if (ompt_enabled.enabled) { 59780b57cec5SDimitry Andric thread_data = &(this_thr->th.ompt_thread_info.thread_data); 59790b57cec5SDimitry Andric *thread_data = ompt_data_none; 59800b57cec5SDimitry Andric 59810b57cec5SDimitry Andric this_thr->th.ompt_thread_info.state = ompt_state_overhead; 59820b57cec5SDimitry Andric this_thr->th.ompt_thread_info.wait_id = 0; 59830b57cec5SDimitry Andric this_thr->th.ompt_thread_info.idle_frame = OMPT_GET_FRAME_ADDRESS(0); 5984489b1cf2SDimitry Andric this_thr->th.ompt_thread_info.parallel_flags = 0; 59850b57cec5SDimitry Andric if (ompt_enabled.ompt_callback_thread_begin) { 59860b57cec5SDimitry Andric ompt_callbacks.ompt_callback(ompt_callback_thread_begin)( 59870b57cec5SDimitry Andric ompt_thread_worker, thread_data); 59880b57cec5SDimitry Andric } 59890b57cec5SDimitry Andric this_thr->th.ompt_thread_info.state = ompt_state_idle; 59900b57cec5SDimitry Andric } 59910b57cec5SDimitry Andric #endif 5992489b1cf2SDimitry Andric 59930b57cec5SDimitry Andric /* This is the place where threads wait for work */ 59940b57cec5SDimitry Andric while (!TCR_4(__kmp_global.g.g_done)) { 59950b57cec5SDimitry Andric KMP_DEBUG_ASSERT(this_thr == __kmp_threads[gtid]); 59960b57cec5SDimitry Andric KMP_MB(); 59970b57cec5SDimitry Andric 59980b57cec5SDimitry Andric /* wait for work to do */ 59990b57cec5SDimitry Andric KA_TRACE(20, ("__kmp_launch_thread: T#%d waiting for work\n", gtid)); 60000b57cec5SDimitry Andric 60010b57cec5SDimitry Andric /* No tid yet since not part of a team */ 60020b57cec5SDimitry Andric __kmp_fork_barrier(gtid, KMP_GTID_DNE); 60030b57cec5SDimitry Andric 60040b57cec5SDimitry Andric #if OMPT_SUPPORT 60050b57cec5SDimitry Andric if (ompt_enabled.enabled) { 60060b57cec5SDimitry Andric this_thr->th.ompt_thread_info.state = ompt_state_overhead; 60070b57cec5SDimitry Andric } 60080b57cec5SDimitry Andric #endif 60090b57cec5SDimitry Andric 6010489b1cf2SDimitry Andric pteam = &this_thr->th.th_team; 60110b57cec5SDimitry Andric 60120b57cec5SDimitry Andric /* have we been allocated? */ 60130b57cec5SDimitry Andric if (TCR_SYNC_PTR(*pteam) && !TCR_4(__kmp_global.g.g_done)) { 60140b57cec5SDimitry Andric /* we were just woken up, so run our new task */ 60150b57cec5SDimitry Andric if (TCR_SYNC_PTR((*pteam)->t.t_pkfn) != NULL) { 60160b57cec5SDimitry Andric int rc; 60170b57cec5SDimitry Andric KA_TRACE(20, 60180b57cec5SDimitry Andric ("__kmp_launch_thread: T#%d(%d:%d) invoke microtask = %p\n", 60190b57cec5SDimitry Andric gtid, (*pteam)->t.t_id, __kmp_tid_from_gtid(gtid), 60200b57cec5SDimitry Andric (*pteam)->t.t_pkfn)); 60210b57cec5SDimitry Andric 60220b57cec5SDimitry Andric updateHWFPControl(*pteam); 60230b57cec5SDimitry Andric 60240b57cec5SDimitry Andric #if OMPT_SUPPORT 60250b57cec5SDimitry Andric if (ompt_enabled.enabled) { 60260b57cec5SDimitry Andric this_thr->th.ompt_thread_info.state = ompt_state_work_parallel; 60270b57cec5SDimitry Andric } 60280b57cec5SDimitry Andric #endif 60290b57cec5SDimitry Andric 60300b57cec5SDimitry Andric rc = (*pteam)->t.t_invoke(gtid); 60310b57cec5SDimitry Andric KMP_ASSERT(rc); 60320b57cec5SDimitry Andric 60330b57cec5SDimitry Andric KMP_MB(); 60340b57cec5SDimitry Andric KA_TRACE(20, ("__kmp_launch_thread: T#%d(%d:%d) done microtask = %p\n", 60350b57cec5SDimitry Andric gtid, (*pteam)->t.t_id, __kmp_tid_from_gtid(gtid), 60360b57cec5SDimitry Andric (*pteam)->t.t_pkfn)); 60370b57cec5SDimitry Andric } 60380b57cec5SDimitry Andric #if OMPT_SUPPORT 60390b57cec5SDimitry Andric if (ompt_enabled.enabled) { 60400b57cec5SDimitry Andric /* no frame set while outside task */ 60410b57cec5SDimitry Andric __ompt_get_task_info_object(0)->frame.exit_frame = ompt_data_none; 60420b57cec5SDimitry Andric 60430b57cec5SDimitry Andric this_thr->th.ompt_thread_info.state = ompt_state_overhead; 60440b57cec5SDimitry Andric } 60450b57cec5SDimitry Andric #endif 60460b57cec5SDimitry Andric /* join barrier after parallel region */ 60470b57cec5SDimitry Andric __kmp_join_barrier(gtid); 60480b57cec5SDimitry Andric } 60490b57cec5SDimitry Andric } 60500b57cec5SDimitry Andric TCR_SYNC_PTR((intptr_t)__kmp_global.g.g_done); 60510b57cec5SDimitry Andric 6052fe6060f1SDimitry Andric #if OMPD_SUPPORT 6053fe6060f1SDimitry Andric if (ompd_state & OMPD_ENABLE_BP) 6054fe6060f1SDimitry Andric ompd_bp_thread_end(); 6055fe6060f1SDimitry Andric #endif 6056fe6060f1SDimitry Andric 60570b57cec5SDimitry Andric #if OMPT_SUPPORT 60580b57cec5SDimitry Andric if (ompt_enabled.ompt_callback_thread_end) { 60590b57cec5SDimitry Andric ompt_callbacks.ompt_callback(ompt_callback_thread_end)(thread_data); 60600b57cec5SDimitry Andric } 60610b57cec5SDimitry Andric #endif 60620b57cec5SDimitry Andric 60630b57cec5SDimitry Andric this_thr->th.th_task_team = NULL; 60640b57cec5SDimitry Andric /* run the destructors for the threadprivate data for this thread */ 60650b57cec5SDimitry Andric __kmp_common_destroy_gtid(gtid); 60660b57cec5SDimitry Andric 60670b57cec5SDimitry Andric KA_TRACE(10, ("__kmp_launch_thread: T#%d done\n", gtid)); 60680b57cec5SDimitry Andric KMP_MB(); 6069e8d8bef9SDimitry Andric 6070d409305fSDimitry Andric #if OMP_PROFILING_SUPPORT 6071e8d8bef9SDimitry Andric llvm::timeTraceProfilerFinishThread(); 6072e8d8bef9SDimitry Andric #endif 60730b57cec5SDimitry Andric return this_thr; 60740b57cec5SDimitry Andric } 60750b57cec5SDimitry Andric 60760b57cec5SDimitry Andric /* ------------------------------------------------------------------------ */ 60770b57cec5SDimitry Andric 60780b57cec5SDimitry Andric void __kmp_internal_end_dest(void *specific_gtid) { 60790b57cec5SDimitry Andric // Make sure no significant bits are lost 6080e8d8bef9SDimitry Andric int gtid; 6081e8d8bef9SDimitry Andric __kmp_type_convert((kmp_intptr_t)specific_gtid - 1, >id); 60820b57cec5SDimitry Andric 60830b57cec5SDimitry Andric KA_TRACE(30, ("__kmp_internal_end_dest: T#%d\n", gtid)); 60840b57cec5SDimitry Andric /* NOTE: the gtid is stored as gitd+1 in the thread-local-storage 60850b57cec5SDimitry Andric * this is because 0 is reserved for the nothing-stored case */ 60860b57cec5SDimitry Andric 60870b57cec5SDimitry Andric __kmp_internal_end_thread(gtid); 60880b57cec5SDimitry Andric } 60890b57cec5SDimitry Andric 60900b57cec5SDimitry Andric #if KMP_OS_UNIX && KMP_DYNAMIC_LIB 60910b57cec5SDimitry Andric 60920b57cec5SDimitry Andric __attribute__((destructor)) void __kmp_internal_end_dtor(void) { 60930b57cec5SDimitry Andric __kmp_internal_end_atexit(); 60940b57cec5SDimitry Andric } 60950b57cec5SDimitry Andric 60960b57cec5SDimitry Andric #endif 60970b57cec5SDimitry Andric 60980b57cec5SDimitry Andric /* [Windows] josh: when the atexit handler is called, there may still be more 60990b57cec5SDimitry Andric than one thread alive */ 61000b57cec5SDimitry Andric void __kmp_internal_end_atexit(void) { 61010b57cec5SDimitry Andric KA_TRACE(30, ("__kmp_internal_end_atexit\n")); 61020b57cec5SDimitry Andric /* [Windows] 61030b57cec5SDimitry Andric josh: ideally, we want to completely shutdown the library in this atexit 61040b57cec5SDimitry Andric handler, but stat code that depends on thread specific data for gtid fails 61050b57cec5SDimitry Andric because that data becomes unavailable at some point during the shutdown, so 61060b57cec5SDimitry Andric we call __kmp_internal_end_thread instead. We should eventually remove the 61070b57cec5SDimitry Andric dependency on __kmp_get_specific_gtid in the stat code and use 61080b57cec5SDimitry Andric __kmp_internal_end_library to cleanly shutdown the library. 61090b57cec5SDimitry Andric 61100b57cec5SDimitry Andric // TODO: Can some of this comment about GVS be removed? 61110b57cec5SDimitry Andric I suspect that the offending stat code is executed when the calling thread 61120b57cec5SDimitry Andric tries to clean up a dead root thread's data structures, resulting in GVS 61130b57cec5SDimitry Andric code trying to close the GVS structures for that thread, but since the stat 61140b57cec5SDimitry Andric code uses __kmp_get_specific_gtid to get the gtid with the assumption that 61150b57cec5SDimitry Andric the calling thread is cleaning up itself instead of another thread, it get 61160b57cec5SDimitry Andric confused. This happens because allowing a thread to unregister and cleanup 61170b57cec5SDimitry Andric another thread is a recent modification for addressing an issue. 61180b57cec5SDimitry Andric Based on the current design (20050722), a thread may end up 61190b57cec5SDimitry Andric trying to unregister another thread only if thread death does not trigger 61200b57cec5SDimitry Andric the calling of __kmp_internal_end_thread. For Linux* OS, there is the 61210b57cec5SDimitry Andric thread specific data destructor function to detect thread death. For 61220b57cec5SDimitry Andric Windows dynamic, there is DllMain(THREAD_DETACH). For Windows static, there 61230b57cec5SDimitry Andric is nothing. Thus, the workaround is applicable only for Windows static 61240b57cec5SDimitry Andric stat library. */ 61250b57cec5SDimitry Andric __kmp_internal_end_library(-1); 61260b57cec5SDimitry Andric #if KMP_OS_WINDOWS 61270b57cec5SDimitry Andric __kmp_close_console(); 61280b57cec5SDimitry Andric #endif 61290b57cec5SDimitry Andric } 61300b57cec5SDimitry Andric 61310b57cec5SDimitry Andric static void __kmp_reap_thread(kmp_info_t *thread, int is_root) { 61320b57cec5SDimitry Andric // It is assumed __kmp_forkjoin_lock is acquired. 61330b57cec5SDimitry Andric 61340b57cec5SDimitry Andric int gtid; 61350b57cec5SDimitry Andric 61360b57cec5SDimitry Andric KMP_DEBUG_ASSERT(thread != NULL); 61370b57cec5SDimitry Andric 61380b57cec5SDimitry Andric gtid = thread->th.th_info.ds.ds_gtid; 61390b57cec5SDimitry Andric 61400b57cec5SDimitry Andric if (!is_root) { 61410b57cec5SDimitry Andric if (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME) { 61420b57cec5SDimitry Andric /* Assume the threads are at the fork barrier here */ 61430b57cec5SDimitry Andric KA_TRACE( 61440b57cec5SDimitry Andric 20, ("__kmp_reap_thread: releasing T#%d from fork barrier for reap\n", 61450b57cec5SDimitry Andric gtid)); 6146349cc55cSDimitry Andric if (__kmp_barrier_gather_pattern[bs_forkjoin_barrier] == bp_dist_bar) { 6147349cc55cSDimitry Andric while ( 6148349cc55cSDimitry Andric !KMP_COMPARE_AND_STORE_ACQ32(&(thread->th.th_used_in_team), 0, 3)) 6149349cc55cSDimitry Andric KMP_CPU_PAUSE(); 6150349cc55cSDimitry Andric __kmp_resume_32(gtid, (kmp_flag_32<false, false> *)NULL); 6151349cc55cSDimitry Andric } else { 6152349cc55cSDimitry Andric /* Need release fence here to prevent seg faults for tree forkjoin 6153349cc55cSDimitry Andric barrier (GEH) */ 6154e8d8bef9SDimitry Andric kmp_flag_64<> flag(&thread->th.th_bar[bs_forkjoin_barrier].bb.b_go, 6155e8d8bef9SDimitry Andric thread); 61560b57cec5SDimitry Andric __kmp_release_64(&flag); 61570b57cec5SDimitry Andric } 6158349cc55cSDimitry Andric } 61590b57cec5SDimitry Andric 61600b57cec5SDimitry Andric // Terminate OS thread. 61610b57cec5SDimitry Andric __kmp_reap_worker(thread); 61620b57cec5SDimitry Andric 61630b57cec5SDimitry Andric // The thread was killed asynchronously. If it was actively 61640b57cec5SDimitry Andric // spinning in the thread pool, decrement the global count. 61650b57cec5SDimitry Andric // 61660b57cec5SDimitry Andric // There is a small timing hole here - if the worker thread was just waking 61670b57cec5SDimitry Andric // up after sleeping in the pool, had reset it's th_active_in_pool flag but 61680b57cec5SDimitry Andric // not decremented the global counter __kmp_thread_pool_active_nth yet, then 61690b57cec5SDimitry Andric // the global counter might not get updated. 61700b57cec5SDimitry Andric // 61710b57cec5SDimitry Andric // Currently, this can only happen as the library is unloaded, 61720b57cec5SDimitry Andric // so there are no harmful side effects. 61730b57cec5SDimitry Andric if (thread->th.th_active_in_pool) { 61740b57cec5SDimitry Andric thread->th.th_active_in_pool = FALSE; 61750b57cec5SDimitry Andric KMP_ATOMIC_DEC(&__kmp_thread_pool_active_nth); 61760b57cec5SDimitry Andric KMP_DEBUG_ASSERT(__kmp_thread_pool_active_nth >= 0); 61770b57cec5SDimitry Andric } 61780b57cec5SDimitry Andric } 61790b57cec5SDimitry Andric 61800b57cec5SDimitry Andric __kmp_free_implicit_task(thread); 61810b57cec5SDimitry Andric 61820b57cec5SDimitry Andric // Free the fast memory for tasking 61830b57cec5SDimitry Andric #if USE_FAST_MEMORY 61840b57cec5SDimitry Andric __kmp_free_fast_memory(thread); 61850b57cec5SDimitry Andric #endif /* USE_FAST_MEMORY */ 61860b57cec5SDimitry Andric 61870b57cec5SDimitry Andric __kmp_suspend_uninitialize_thread(thread); 61880b57cec5SDimitry Andric 61890b57cec5SDimitry Andric KMP_DEBUG_ASSERT(__kmp_threads[gtid] == thread); 61900b57cec5SDimitry Andric TCW_SYNC_PTR(__kmp_threads[gtid], NULL); 61910b57cec5SDimitry Andric 61920b57cec5SDimitry Andric --__kmp_all_nth; 61930b57cec5SDimitry Andric // __kmp_nth was decremented when thread is added to the pool. 61940b57cec5SDimitry Andric 61950b57cec5SDimitry Andric #ifdef KMP_ADJUST_BLOCKTIME 61960b57cec5SDimitry Andric /* Adjust blocktime back to user setting or default if necessary */ 61970b57cec5SDimitry Andric /* Middle initialization might never have occurred */ 61980b57cec5SDimitry Andric if (!__kmp_env_blocktime && (__kmp_avail_proc > 0)) { 61990b57cec5SDimitry Andric KMP_DEBUG_ASSERT(__kmp_avail_proc > 0); 62000b57cec5SDimitry Andric if (__kmp_nth <= __kmp_avail_proc) { 62010b57cec5SDimitry Andric __kmp_zero_bt = FALSE; 62020b57cec5SDimitry Andric } 62030b57cec5SDimitry Andric } 62040b57cec5SDimitry Andric #endif /* KMP_ADJUST_BLOCKTIME */ 62050b57cec5SDimitry Andric 62060b57cec5SDimitry Andric /* free the memory being used */ 62070b57cec5SDimitry Andric if (__kmp_env_consistency_check) { 62080b57cec5SDimitry Andric if (thread->th.th_cons) { 62090b57cec5SDimitry Andric __kmp_free_cons_stack(thread->th.th_cons); 62100b57cec5SDimitry Andric thread->th.th_cons = NULL; 62110b57cec5SDimitry Andric } 62120b57cec5SDimitry Andric } 62130b57cec5SDimitry Andric 62140b57cec5SDimitry Andric if (thread->th.th_pri_common != NULL) { 62150b57cec5SDimitry Andric __kmp_free(thread->th.th_pri_common); 62160b57cec5SDimitry Andric thread->th.th_pri_common = NULL; 62170b57cec5SDimitry Andric } 62180b57cec5SDimitry Andric 62190b57cec5SDimitry Andric if (thread->th.th_task_state_memo_stack != NULL) { 62200b57cec5SDimitry Andric __kmp_free(thread->th.th_task_state_memo_stack); 62210b57cec5SDimitry Andric thread->th.th_task_state_memo_stack = NULL; 62220b57cec5SDimitry Andric } 62230b57cec5SDimitry Andric 62240b57cec5SDimitry Andric #if KMP_USE_BGET 62250b57cec5SDimitry Andric if (thread->th.th_local.bget_data != NULL) { 62260b57cec5SDimitry Andric __kmp_finalize_bget(thread); 62270b57cec5SDimitry Andric } 62280b57cec5SDimitry Andric #endif 62290b57cec5SDimitry Andric 62300b57cec5SDimitry Andric #if KMP_AFFINITY_SUPPORTED 62310b57cec5SDimitry Andric if (thread->th.th_affin_mask != NULL) { 62320b57cec5SDimitry Andric KMP_CPU_FREE(thread->th.th_affin_mask); 62330b57cec5SDimitry Andric thread->th.th_affin_mask = NULL; 62340b57cec5SDimitry Andric } 62350b57cec5SDimitry Andric #endif /* KMP_AFFINITY_SUPPORTED */ 62360b57cec5SDimitry Andric 62370b57cec5SDimitry Andric #if KMP_USE_HIER_SCHED 62380b57cec5SDimitry Andric if (thread->th.th_hier_bar_data != NULL) { 62390b57cec5SDimitry Andric __kmp_free(thread->th.th_hier_bar_data); 62400b57cec5SDimitry Andric thread->th.th_hier_bar_data = NULL; 62410b57cec5SDimitry Andric } 62420b57cec5SDimitry Andric #endif 62430b57cec5SDimitry Andric 62440b57cec5SDimitry Andric __kmp_reap_team(thread->th.th_serial_team); 62450b57cec5SDimitry Andric thread->th.th_serial_team = NULL; 62460b57cec5SDimitry Andric __kmp_free(thread); 62470b57cec5SDimitry Andric 62480b57cec5SDimitry Andric KMP_MB(); 62490b57cec5SDimitry Andric 62500b57cec5SDimitry Andric } // __kmp_reap_thread 62510b57cec5SDimitry Andric 6252349cc55cSDimitry Andric static void __kmp_itthash_clean(kmp_info_t *th) { 6253349cc55cSDimitry Andric #if USE_ITT_NOTIFY 6254349cc55cSDimitry Andric if (__kmp_itt_region_domains.count > 0) { 6255349cc55cSDimitry Andric for (int i = 0; i < KMP_MAX_FRAME_DOMAINS; ++i) { 6256349cc55cSDimitry Andric kmp_itthash_entry_t *bucket = __kmp_itt_region_domains.buckets[i]; 6257349cc55cSDimitry Andric while (bucket) { 6258349cc55cSDimitry Andric kmp_itthash_entry_t *next = bucket->next_in_bucket; 6259349cc55cSDimitry Andric __kmp_thread_free(th, bucket); 6260349cc55cSDimitry Andric bucket = next; 6261349cc55cSDimitry Andric } 6262349cc55cSDimitry Andric } 6263349cc55cSDimitry Andric } 6264349cc55cSDimitry Andric if (__kmp_itt_barrier_domains.count > 0) { 6265349cc55cSDimitry Andric for (int i = 0; i < KMP_MAX_FRAME_DOMAINS; ++i) { 6266349cc55cSDimitry Andric kmp_itthash_entry_t *bucket = __kmp_itt_barrier_domains.buckets[i]; 6267349cc55cSDimitry Andric while (bucket) { 6268349cc55cSDimitry Andric kmp_itthash_entry_t *next = bucket->next_in_bucket; 6269349cc55cSDimitry Andric __kmp_thread_free(th, bucket); 6270349cc55cSDimitry Andric bucket = next; 6271349cc55cSDimitry Andric } 6272349cc55cSDimitry Andric } 6273349cc55cSDimitry Andric } 6274349cc55cSDimitry Andric #endif 6275349cc55cSDimitry Andric } 6276349cc55cSDimitry Andric 62770b57cec5SDimitry Andric static void __kmp_internal_end(void) { 62780b57cec5SDimitry Andric int i; 62790b57cec5SDimitry Andric 62800b57cec5SDimitry Andric /* First, unregister the library */ 62810b57cec5SDimitry Andric __kmp_unregister_library(); 62820b57cec5SDimitry Andric 62830b57cec5SDimitry Andric #if KMP_OS_WINDOWS 62840b57cec5SDimitry Andric /* In Win static library, we can't tell when a root actually dies, so we 62850b57cec5SDimitry Andric reclaim the data structures for any root threads that have died but not 62860b57cec5SDimitry Andric unregistered themselves, in order to shut down cleanly. 62870b57cec5SDimitry Andric In Win dynamic library we also can't tell when a thread dies. */ 62880b57cec5SDimitry Andric __kmp_reclaim_dead_roots(); // AC: moved here to always clean resources of 62890b57cec5SDimitry Andric // dead roots 62900b57cec5SDimitry Andric #endif 62910b57cec5SDimitry Andric 62920b57cec5SDimitry Andric for (i = 0; i < __kmp_threads_capacity; i++) 62930b57cec5SDimitry Andric if (__kmp_root[i]) 62940b57cec5SDimitry Andric if (__kmp_root[i]->r.r_active) 62950b57cec5SDimitry Andric break; 62960b57cec5SDimitry Andric KMP_MB(); /* Flush all pending memory write invalidates. */ 62970b57cec5SDimitry Andric TCW_SYNC_4(__kmp_global.g.g_done, TRUE); 62980b57cec5SDimitry Andric 62990b57cec5SDimitry Andric if (i < __kmp_threads_capacity) { 63000b57cec5SDimitry Andric #if KMP_USE_MONITOR 63010b57cec5SDimitry Andric // 2009-09-08 (lev): Other alive roots found. Why do we kill the monitor?? 63020b57cec5SDimitry Andric KMP_MB(); /* Flush all pending memory write invalidates. */ 63030b57cec5SDimitry Andric 63040b57cec5SDimitry Andric // Need to check that monitor was initialized before reaping it. If we are 63050b57cec5SDimitry Andric // called form __kmp_atfork_child (which sets __kmp_init_parallel = 0), then 63060b57cec5SDimitry Andric // __kmp_monitor will appear to contain valid data, but it is only valid in 63070b57cec5SDimitry Andric // the parent process, not the child. 63080b57cec5SDimitry Andric // New behavior (201008): instead of keying off of the flag 63090b57cec5SDimitry Andric // __kmp_init_parallel, the monitor thread creation is keyed off 63100b57cec5SDimitry Andric // of the new flag __kmp_init_monitor. 63110b57cec5SDimitry Andric __kmp_acquire_bootstrap_lock(&__kmp_monitor_lock); 63120b57cec5SDimitry Andric if (TCR_4(__kmp_init_monitor)) { 63130b57cec5SDimitry Andric __kmp_reap_monitor(&__kmp_monitor); 63140b57cec5SDimitry Andric TCW_4(__kmp_init_monitor, 0); 63150b57cec5SDimitry Andric } 63160b57cec5SDimitry Andric __kmp_release_bootstrap_lock(&__kmp_monitor_lock); 63170b57cec5SDimitry Andric KA_TRACE(10, ("__kmp_internal_end: monitor reaped\n")); 63180b57cec5SDimitry Andric #endif // KMP_USE_MONITOR 63190b57cec5SDimitry Andric } else { 63200b57cec5SDimitry Andric /* TODO move this to cleanup code */ 63210b57cec5SDimitry Andric #ifdef KMP_DEBUG 63220b57cec5SDimitry Andric /* make sure that everything has properly ended */ 63230b57cec5SDimitry Andric for (i = 0; i < __kmp_threads_capacity; i++) { 63240b57cec5SDimitry Andric if (__kmp_root[i]) { 63250b57cec5SDimitry Andric // KMP_ASSERT( ! KMP_UBER_GTID( i ) ); // AC: 63260b57cec5SDimitry Andric // there can be uber threads alive here 63270b57cec5SDimitry Andric KMP_ASSERT(!__kmp_root[i]->r.r_active); // TODO: can they be active? 63280b57cec5SDimitry Andric } 63290b57cec5SDimitry Andric } 63300b57cec5SDimitry Andric #endif 63310b57cec5SDimitry Andric 63320b57cec5SDimitry Andric KMP_MB(); 63330b57cec5SDimitry Andric 63340b57cec5SDimitry Andric // Reap the worker threads. 63350b57cec5SDimitry Andric // This is valid for now, but be careful if threads are reaped sooner. 63360b57cec5SDimitry Andric while (__kmp_thread_pool != NULL) { // Loop thru all the thread in the pool. 63370b57cec5SDimitry Andric // Get the next thread from the pool. 63380b57cec5SDimitry Andric kmp_info_t *thread = CCAST(kmp_info_t *, __kmp_thread_pool); 63390b57cec5SDimitry Andric __kmp_thread_pool = thread->th.th_next_pool; 63400b57cec5SDimitry Andric // Reap it. 63410b57cec5SDimitry Andric KMP_DEBUG_ASSERT(thread->th.th_reap_state == KMP_SAFE_TO_REAP); 63420b57cec5SDimitry Andric thread->th.th_next_pool = NULL; 63430b57cec5SDimitry Andric thread->th.th_in_pool = FALSE; 63440b57cec5SDimitry Andric __kmp_reap_thread(thread, 0); 63450b57cec5SDimitry Andric } 63460b57cec5SDimitry Andric __kmp_thread_pool_insert_pt = NULL; 63470b57cec5SDimitry Andric 63480b57cec5SDimitry Andric // Reap teams. 63490b57cec5SDimitry Andric while (__kmp_team_pool != NULL) { // Loop thru all the teams in the pool. 63500b57cec5SDimitry Andric // Get the next team from the pool. 63510b57cec5SDimitry Andric kmp_team_t *team = CCAST(kmp_team_t *, __kmp_team_pool); 63520b57cec5SDimitry Andric __kmp_team_pool = team->t.t_next_pool; 63530b57cec5SDimitry Andric // Reap it. 63540b57cec5SDimitry Andric team->t.t_next_pool = NULL; 63550b57cec5SDimitry Andric __kmp_reap_team(team); 63560b57cec5SDimitry Andric } 63570b57cec5SDimitry Andric 63580b57cec5SDimitry Andric __kmp_reap_task_teams(); 63590b57cec5SDimitry Andric 63600b57cec5SDimitry Andric #if KMP_OS_UNIX 63610b57cec5SDimitry Andric // Threads that are not reaped should not access any resources since they 63620b57cec5SDimitry Andric // are going to be deallocated soon, so the shutdown sequence should wait 63630b57cec5SDimitry Andric // until all threads either exit the final spin-waiting loop or begin 63640b57cec5SDimitry Andric // sleeping after the given blocktime. 63650b57cec5SDimitry Andric for (i = 0; i < __kmp_threads_capacity; i++) { 63660b57cec5SDimitry Andric kmp_info_t *thr = __kmp_threads[i]; 63670b57cec5SDimitry Andric while (thr && KMP_ATOMIC_LD_ACQ(&thr->th.th_blocking)) 63680b57cec5SDimitry Andric KMP_CPU_PAUSE(); 63690b57cec5SDimitry Andric } 63700b57cec5SDimitry Andric #endif 63710b57cec5SDimitry Andric 63720b57cec5SDimitry Andric for (i = 0; i < __kmp_threads_capacity; ++i) { 63730b57cec5SDimitry Andric // TBD: Add some checking... 63740b57cec5SDimitry Andric // Something like KMP_DEBUG_ASSERT( __kmp_thread[ i ] == NULL ); 63750b57cec5SDimitry Andric } 63760b57cec5SDimitry Andric 63770b57cec5SDimitry Andric /* Make sure all threadprivate destructors get run by joining with all 63780b57cec5SDimitry Andric worker threads before resetting this flag */ 63790b57cec5SDimitry Andric TCW_SYNC_4(__kmp_init_common, FALSE); 63800b57cec5SDimitry Andric 63810b57cec5SDimitry Andric KA_TRACE(10, ("__kmp_internal_end: all workers reaped\n")); 63820b57cec5SDimitry Andric KMP_MB(); 63830b57cec5SDimitry Andric 63840b57cec5SDimitry Andric #if KMP_USE_MONITOR 63850b57cec5SDimitry Andric // See note above: One of the possible fixes for CQ138434 / CQ140126 63860b57cec5SDimitry Andric // 63870b57cec5SDimitry Andric // FIXME: push both code fragments down and CSE them? 63880b57cec5SDimitry Andric // push them into __kmp_cleanup() ? 63890b57cec5SDimitry Andric __kmp_acquire_bootstrap_lock(&__kmp_monitor_lock); 63900b57cec5SDimitry Andric if (TCR_4(__kmp_init_monitor)) { 63910b57cec5SDimitry Andric __kmp_reap_monitor(&__kmp_monitor); 63920b57cec5SDimitry Andric TCW_4(__kmp_init_monitor, 0); 63930b57cec5SDimitry Andric } 63940b57cec5SDimitry Andric __kmp_release_bootstrap_lock(&__kmp_monitor_lock); 63950b57cec5SDimitry Andric KA_TRACE(10, ("__kmp_internal_end: monitor reaped\n")); 63960b57cec5SDimitry Andric #endif 63970b57cec5SDimitry Andric } /* else !__kmp_global.t_active */ 63980b57cec5SDimitry Andric TCW_4(__kmp_init_gtid, FALSE); 63990b57cec5SDimitry Andric KMP_MB(); /* Flush all pending memory write invalidates. */ 64000b57cec5SDimitry Andric 64010b57cec5SDimitry Andric __kmp_cleanup(); 64020b57cec5SDimitry Andric #if OMPT_SUPPORT 64030b57cec5SDimitry Andric ompt_fini(); 64040b57cec5SDimitry Andric #endif 64050b57cec5SDimitry Andric } 64060b57cec5SDimitry Andric 64070b57cec5SDimitry Andric void __kmp_internal_end_library(int gtid_req) { 64080b57cec5SDimitry Andric /* if we have already cleaned up, don't try again, it wouldn't be pretty */ 64090b57cec5SDimitry Andric /* this shouldn't be a race condition because __kmp_internal_end() is the 64100b57cec5SDimitry Andric only place to clear __kmp_serial_init */ 64110b57cec5SDimitry Andric /* we'll check this later too, after we get the lock */ 64120b57cec5SDimitry Andric // 2009-09-06: We do not set g_abort without setting g_done. This check looks 64135ffd83dbSDimitry Andric // redundant, because the next check will work in any case. 64140b57cec5SDimitry Andric if (__kmp_global.g.g_abort) { 64150b57cec5SDimitry Andric KA_TRACE(11, ("__kmp_internal_end_library: abort, exiting\n")); 64160b57cec5SDimitry Andric /* TODO abort? */ 64170b57cec5SDimitry Andric return; 64180b57cec5SDimitry Andric } 64190b57cec5SDimitry Andric if (TCR_4(__kmp_global.g.g_done) || !__kmp_init_serial) { 64200b57cec5SDimitry Andric KA_TRACE(10, ("__kmp_internal_end_library: already finished\n")); 64210b57cec5SDimitry Andric return; 64220b57cec5SDimitry Andric } 64230b57cec5SDimitry Andric 6424fe6060f1SDimitry Andric // If hidden helper team has been initialized, we need to deinit it 6425fe6060f1SDimitry Andric if (TCR_4(__kmp_init_hidden_helper) && 6426fe6060f1SDimitry Andric !TCR_4(__kmp_hidden_helper_team_done)) { 6427fe6060f1SDimitry Andric TCW_SYNC_4(__kmp_hidden_helper_team_done, TRUE); 6428fe6060f1SDimitry Andric // First release the main thread to let it continue its work 6429fe6060f1SDimitry Andric __kmp_hidden_helper_main_thread_release(); 6430fe6060f1SDimitry Andric // Wait until the hidden helper team has been destroyed 6431fe6060f1SDimitry Andric __kmp_hidden_helper_threads_deinitz_wait(); 6432fe6060f1SDimitry Andric } 6433fe6060f1SDimitry Andric 64340b57cec5SDimitry Andric KMP_MB(); /* Flush all pending memory write invalidates. */ 64350b57cec5SDimitry Andric /* find out who we are and what we should do */ 64360b57cec5SDimitry Andric { 64370b57cec5SDimitry Andric int gtid = (gtid_req >= 0) ? gtid_req : __kmp_gtid_get_specific(); 64380b57cec5SDimitry Andric KA_TRACE( 64390b57cec5SDimitry Andric 10, ("__kmp_internal_end_library: enter T#%d (%d)\n", gtid, gtid_req)); 64400b57cec5SDimitry Andric if (gtid == KMP_GTID_SHUTDOWN) { 64410b57cec5SDimitry Andric KA_TRACE(10, ("__kmp_internal_end_library: !__kmp_init_runtime, system " 64420b57cec5SDimitry Andric "already shutdown\n")); 64430b57cec5SDimitry Andric return; 64440b57cec5SDimitry Andric } else if (gtid == KMP_GTID_MONITOR) { 64450b57cec5SDimitry Andric KA_TRACE(10, ("__kmp_internal_end_library: monitor thread, gtid not " 64460b57cec5SDimitry Andric "registered, or system shutdown\n")); 64470b57cec5SDimitry Andric return; 64480b57cec5SDimitry Andric } else if (gtid == KMP_GTID_DNE) { 64490b57cec5SDimitry Andric KA_TRACE(10, ("__kmp_internal_end_library: gtid not registered or system " 64500b57cec5SDimitry Andric "shutdown\n")); 64510b57cec5SDimitry Andric /* we don't know who we are, but we may still shutdown the library */ 64520b57cec5SDimitry Andric } else if (KMP_UBER_GTID(gtid)) { 64530b57cec5SDimitry Andric /* unregister ourselves as an uber thread. gtid is no longer valid */ 64540b57cec5SDimitry Andric if (__kmp_root[gtid]->r.r_active) { 64550b57cec5SDimitry Andric __kmp_global.g.g_abort = -1; 64560b57cec5SDimitry Andric TCW_SYNC_4(__kmp_global.g.g_done, TRUE); 6457e8d8bef9SDimitry Andric __kmp_unregister_library(); 64580b57cec5SDimitry Andric KA_TRACE(10, 64590b57cec5SDimitry Andric ("__kmp_internal_end_library: root still active, abort T#%d\n", 64600b57cec5SDimitry Andric gtid)); 64610b57cec5SDimitry Andric return; 64620b57cec5SDimitry Andric } else { 6463349cc55cSDimitry Andric __kmp_itthash_clean(__kmp_threads[gtid]); 64640b57cec5SDimitry Andric KA_TRACE( 64650b57cec5SDimitry Andric 10, 64660b57cec5SDimitry Andric ("__kmp_internal_end_library: unregistering sibling T#%d\n", gtid)); 64670b57cec5SDimitry Andric __kmp_unregister_root_current_thread(gtid); 64680b57cec5SDimitry Andric } 64690b57cec5SDimitry Andric } else { 64700b57cec5SDimitry Andric /* worker threads may call this function through the atexit handler, if they 64710b57cec5SDimitry Andric * call exit() */ 64720b57cec5SDimitry Andric /* For now, skip the usual subsequent processing and just dump the debug buffer. 64730b57cec5SDimitry Andric TODO: do a thorough shutdown instead */ 64740b57cec5SDimitry Andric #ifdef DUMP_DEBUG_ON_EXIT 64750b57cec5SDimitry Andric if (__kmp_debug_buf) 64760b57cec5SDimitry Andric __kmp_dump_debug_buffer(); 64770b57cec5SDimitry Andric #endif 6478e8d8bef9SDimitry Andric // added unregister library call here when we switch to shm linux 6479e8d8bef9SDimitry Andric // if we don't, it will leave lots of files in /dev/shm 6480e8d8bef9SDimitry Andric // cleanup shared memory file before exiting. 6481e8d8bef9SDimitry Andric __kmp_unregister_library(); 64820b57cec5SDimitry Andric return; 64830b57cec5SDimitry Andric } 64840b57cec5SDimitry Andric } 64850b57cec5SDimitry Andric /* synchronize the termination process */ 64860b57cec5SDimitry Andric __kmp_acquire_bootstrap_lock(&__kmp_initz_lock); 64870b57cec5SDimitry Andric 64880b57cec5SDimitry Andric /* have we already finished */ 64890b57cec5SDimitry Andric if (__kmp_global.g.g_abort) { 64900b57cec5SDimitry Andric KA_TRACE(10, ("__kmp_internal_end_library: abort, exiting\n")); 64910b57cec5SDimitry Andric /* TODO abort? */ 64920b57cec5SDimitry Andric __kmp_release_bootstrap_lock(&__kmp_initz_lock); 64930b57cec5SDimitry Andric return; 64940b57cec5SDimitry Andric } 64950b57cec5SDimitry Andric if (TCR_4(__kmp_global.g.g_done) || !__kmp_init_serial) { 64960b57cec5SDimitry Andric __kmp_release_bootstrap_lock(&__kmp_initz_lock); 64970b57cec5SDimitry Andric return; 64980b57cec5SDimitry Andric } 64990b57cec5SDimitry Andric 65000b57cec5SDimitry Andric /* We need this lock to enforce mutex between this reading of 65010b57cec5SDimitry Andric __kmp_threads_capacity and the writing by __kmp_register_root. 65020b57cec5SDimitry Andric Alternatively, we can use a counter of roots that is atomically updated by 65030b57cec5SDimitry Andric __kmp_get_global_thread_id_reg, __kmp_do_serial_initialize and 65040b57cec5SDimitry Andric __kmp_internal_end_*. */ 65050b57cec5SDimitry Andric __kmp_acquire_bootstrap_lock(&__kmp_forkjoin_lock); 65060b57cec5SDimitry Andric 65070b57cec5SDimitry Andric /* now we can safely conduct the actual termination */ 65080b57cec5SDimitry Andric __kmp_internal_end(); 65090b57cec5SDimitry Andric 65100b57cec5SDimitry Andric __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock); 65110b57cec5SDimitry Andric __kmp_release_bootstrap_lock(&__kmp_initz_lock); 65120b57cec5SDimitry Andric 65130b57cec5SDimitry Andric KA_TRACE(10, ("__kmp_internal_end_library: exit\n")); 65140b57cec5SDimitry Andric 65150b57cec5SDimitry Andric #ifdef DUMP_DEBUG_ON_EXIT 65160b57cec5SDimitry Andric if (__kmp_debug_buf) 65170b57cec5SDimitry Andric __kmp_dump_debug_buffer(); 65180b57cec5SDimitry Andric #endif 65190b57cec5SDimitry Andric 65200b57cec5SDimitry Andric #if KMP_OS_WINDOWS 65210b57cec5SDimitry Andric __kmp_close_console(); 65220b57cec5SDimitry Andric #endif 65230b57cec5SDimitry Andric 65240b57cec5SDimitry Andric __kmp_fini_allocator(); 65250b57cec5SDimitry Andric 65260b57cec5SDimitry Andric } // __kmp_internal_end_library 65270b57cec5SDimitry Andric 65280b57cec5SDimitry Andric void __kmp_internal_end_thread(int gtid_req) { 65290b57cec5SDimitry Andric int i; 65300b57cec5SDimitry Andric 65310b57cec5SDimitry Andric /* if we have already cleaned up, don't try again, it wouldn't be pretty */ 65320b57cec5SDimitry Andric /* this shouldn't be a race condition because __kmp_internal_end() is the 65330b57cec5SDimitry Andric * only place to clear __kmp_serial_init */ 65340b57cec5SDimitry Andric /* we'll check this later too, after we get the lock */ 65350b57cec5SDimitry Andric // 2009-09-06: We do not set g_abort without setting g_done. This check looks 65360b57cec5SDimitry Andric // redundant, because the next check will work in any case. 65370b57cec5SDimitry Andric if (__kmp_global.g.g_abort) { 65380b57cec5SDimitry Andric KA_TRACE(11, ("__kmp_internal_end_thread: abort, exiting\n")); 65390b57cec5SDimitry Andric /* TODO abort? */ 65400b57cec5SDimitry Andric return; 65410b57cec5SDimitry Andric } 65420b57cec5SDimitry Andric if (TCR_4(__kmp_global.g.g_done) || !__kmp_init_serial) { 65430b57cec5SDimitry Andric KA_TRACE(10, ("__kmp_internal_end_thread: already finished\n")); 65440b57cec5SDimitry Andric return; 65450b57cec5SDimitry Andric } 65460b57cec5SDimitry Andric 6547e8d8bef9SDimitry Andric // If hidden helper team has been initialized, we need to deinit it 6548fe6060f1SDimitry Andric if (TCR_4(__kmp_init_hidden_helper) && 6549fe6060f1SDimitry Andric !TCR_4(__kmp_hidden_helper_team_done)) { 6550e8d8bef9SDimitry Andric TCW_SYNC_4(__kmp_hidden_helper_team_done, TRUE); 6551e8d8bef9SDimitry Andric // First release the main thread to let it continue its work 6552e8d8bef9SDimitry Andric __kmp_hidden_helper_main_thread_release(); 6553e8d8bef9SDimitry Andric // Wait until the hidden helper team has been destroyed 6554e8d8bef9SDimitry Andric __kmp_hidden_helper_threads_deinitz_wait(); 6555e8d8bef9SDimitry Andric } 6556e8d8bef9SDimitry Andric 65570b57cec5SDimitry Andric KMP_MB(); /* Flush all pending memory write invalidates. */ 65580b57cec5SDimitry Andric 65590b57cec5SDimitry Andric /* find out who we are and what we should do */ 65600b57cec5SDimitry Andric { 65610b57cec5SDimitry Andric int gtid = (gtid_req >= 0) ? gtid_req : __kmp_gtid_get_specific(); 65620b57cec5SDimitry Andric KA_TRACE(10, 65630b57cec5SDimitry Andric ("__kmp_internal_end_thread: enter T#%d (%d)\n", gtid, gtid_req)); 65640b57cec5SDimitry Andric if (gtid == KMP_GTID_SHUTDOWN) { 65650b57cec5SDimitry Andric KA_TRACE(10, ("__kmp_internal_end_thread: !__kmp_init_runtime, system " 65660b57cec5SDimitry Andric "already shutdown\n")); 65670b57cec5SDimitry Andric return; 65680b57cec5SDimitry Andric } else if (gtid == KMP_GTID_MONITOR) { 65690b57cec5SDimitry Andric KA_TRACE(10, ("__kmp_internal_end_thread: monitor thread, gtid not " 65700b57cec5SDimitry Andric "registered, or system shutdown\n")); 65710b57cec5SDimitry Andric return; 65720b57cec5SDimitry Andric } else if (gtid == KMP_GTID_DNE) { 65730b57cec5SDimitry Andric KA_TRACE(10, ("__kmp_internal_end_thread: gtid not registered or system " 65740b57cec5SDimitry Andric "shutdown\n")); 65750b57cec5SDimitry Andric return; 65760b57cec5SDimitry Andric /* we don't know who we are */ 65770b57cec5SDimitry Andric } else if (KMP_UBER_GTID(gtid)) { 65780b57cec5SDimitry Andric /* unregister ourselves as an uber thread. gtid is no longer valid */ 65790b57cec5SDimitry Andric if (__kmp_root[gtid]->r.r_active) { 65800b57cec5SDimitry Andric __kmp_global.g.g_abort = -1; 65810b57cec5SDimitry Andric TCW_SYNC_4(__kmp_global.g.g_done, TRUE); 65820b57cec5SDimitry Andric KA_TRACE(10, 65830b57cec5SDimitry Andric ("__kmp_internal_end_thread: root still active, abort T#%d\n", 65840b57cec5SDimitry Andric gtid)); 65850b57cec5SDimitry Andric return; 65860b57cec5SDimitry Andric } else { 65870b57cec5SDimitry Andric KA_TRACE(10, ("__kmp_internal_end_thread: unregistering sibling T#%d\n", 65880b57cec5SDimitry Andric gtid)); 65890b57cec5SDimitry Andric __kmp_unregister_root_current_thread(gtid); 65900b57cec5SDimitry Andric } 65910b57cec5SDimitry Andric } else { 65920b57cec5SDimitry Andric /* just a worker thread, let's leave */ 65930b57cec5SDimitry Andric KA_TRACE(10, ("__kmp_internal_end_thread: worker thread T#%d\n", gtid)); 65940b57cec5SDimitry Andric 65950b57cec5SDimitry Andric if (gtid >= 0) { 65960b57cec5SDimitry Andric __kmp_threads[gtid]->th.th_task_team = NULL; 65970b57cec5SDimitry Andric } 65980b57cec5SDimitry Andric 65990b57cec5SDimitry Andric KA_TRACE(10, 66000b57cec5SDimitry Andric ("__kmp_internal_end_thread: worker thread done, exiting T#%d\n", 66010b57cec5SDimitry Andric gtid)); 66020b57cec5SDimitry Andric return; 66030b57cec5SDimitry Andric } 66040b57cec5SDimitry Andric } 66050b57cec5SDimitry Andric #if KMP_DYNAMIC_LIB 66060b57cec5SDimitry Andric if (__kmp_pause_status != kmp_hard_paused) 66070b57cec5SDimitry Andric // AC: lets not shutdown the dynamic library at the exit of uber thread, 66080b57cec5SDimitry Andric // because we will better shutdown later in the library destructor. 66090b57cec5SDimitry Andric { 66100b57cec5SDimitry Andric KA_TRACE(10, ("__kmp_internal_end_thread: exiting T#%d\n", gtid_req)); 66110b57cec5SDimitry Andric return; 66120b57cec5SDimitry Andric } 66130b57cec5SDimitry Andric #endif 66140b57cec5SDimitry Andric /* synchronize the termination process */ 66150b57cec5SDimitry Andric __kmp_acquire_bootstrap_lock(&__kmp_initz_lock); 66160b57cec5SDimitry Andric 66170b57cec5SDimitry Andric /* have we already finished */ 66180b57cec5SDimitry Andric if (__kmp_global.g.g_abort) { 66190b57cec5SDimitry Andric KA_TRACE(10, ("__kmp_internal_end_thread: abort, exiting\n")); 66200b57cec5SDimitry Andric /* TODO abort? */ 66210b57cec5SDimitry Andric __kmp_release_bootstrap_lock(&__kmp_initz_lock); 66220b57cec5SDimitry Andric return; 66230b57cec5SDimitry Andric } 66240b57cec5SDimitry Andric if (TCR_4(__kmp_global.g.g_done) || !__kmp_init_serial) { 66250b57cec5SDimitry Andric __kmp_release_bootstrap_lock(&__kmp_initz_lock); 66260b57cec5SDimitry Andric return; 66270b57cec5SDimitry Andric } 66280b57cec5SDimitry Andric 66290b57cec5SDimitry Andric /* We need this lock to enforce mutex between this reading of 66300b57cec5SDimitry Andric __kmp_threads_capacity and the writing by __kmp_register_root. 66310b57cec5SDimitry Andric Alternatively, we can use a counter of roots that is atomically updated by 66320b57cec5SDimitry Andric __kmp_get_global_thread_id_reg, __kmp_do_serial_initialize and 66330b57cec5SDimitry Andric __kmp_internal_end_*. */ 66340b57cec5SDimitry Andric 66350b57cec5SDimitry Andric /* should we finish the run-time? are all siblings done? */ 66360b57cec5SDimitry Andric __kmp_acquire_bootstrap_lock(&__kmp_forkjoin_lock); 66370b57cec5SDimitry Andric 66380b57cec5SDimitry Andric for (i = 0; i < __kmp_threads_capacity; ++i) { 66390b57cec5SDimitry Andric if (KMP_UBER_GTID(i)) { 66400b57cec5SDimitry Andric KA_TRACE( 66410b57cec5SDimitry Andric 10, 66420b57cec5SDimitry Andric ("__kmp_internal_end_thread: remaining sibling task: gtid==%d\n", i)); 66430b57cec5SDimitry Andric __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock); 66440b57cec5SDimitry Andric __kmp_release_bootstrap_lock(&__kmp_initz_lock); 66450b57cec5SDimitry Andric return; 66460b57cec5SDimitry Andric } 66470b57cec5SDimitry Andric } 66480b57cec5SDimitry Andric 66490b57cec5SDimitry Andric /* now we can safely conduct the actual termination */ 66500b57cec5SDimitry Andric 66510b57cec5SDimitry Andric __kmp_internal_end(); 66520b57cec5SDimitry Andric 66530b57cec5SDimitry Andric __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock); 66540b57cec5SDimitry Andric __kmp_release_bootstrap_lock(&__kmp_initz_lock); 66550b57cec5SDimitry Andric 66560b57cec5SDimitry Andric KA_TRACE(10, ("__kmp_internal_end_thread: exit T#%d\n", gtid_req)); 66570b57cec5SDimitry Andric 66580b57cec5SDimitry Andric #ifdef DUMP_DEBUG_ON_EXIT 66590b57cec5SDimitry Andric if (__kmp_debug_buf) 66600b57cec5SDimitry Andric __kmp_dump_debug_buffer(); 66610b57cec5SDimitry Andric #endif 66620b57cec5SDimitry Andric } // __kmp_internal_end_thread 66630b57cec5SDimitry Andric 66640b57cec5SDimitry Andric // ----------------------------------------------------------------------------- 66650b57cec5SDimitry Andric // Library registration stuff. 66660b57cec5SDimitry Andric 66670b57cec5SDimitry Andric static long __kmp_registration_flag = 0; 66680b57cec5SDimitry Andric // Random value used to indicate library initialization. 66690b57cec5SDimitry Andric static char *__kmp_registration_str = NULL; 66700b57cec5SDimitry Andric // Value to be saved in env var __KMP_REGISTERED_LIB_<pid>. 66710b57cec5SDimitry Andric 66720b57cec5SDimitry Andric static inline char *__kmp_reg_status_name() { 66730b57cec5SDimitry Andric /* On RHEL 3u5 if linked statically, getpid() returns different values in 66740b57cec5SDimitry Andric each thread. If registration and unregistration go in different threads 66750b57cec5SDimitry Andric (omp_misc_other_root_exit.cpp test case), the name of registered_lib_env 66760b57cec5SDimitry Andric env var can not be found, because the name will contain different pid. */ 6677e8d8bef9SDimitry Andric // macOS* complains about name being too long with additional getuid() 6678e8d8bef9SDimitry Andric #if KMP_OS_UNIX && !KMP_OS_DARWIN && KMP_DYNAMIC_LIB 6679e8d8bef9SDimitry Andric return __kmp_str_format("__KMP_REGISTERED_LIB_%d_%d", (int)getpid(), 6680e8d8bef9SDimitry Andric (int)getuid()); 6681e8d8bef9SDimitry Andric #else 66820b57cec5SDimitry Andric return __kmp_str_format("__KMP_REGISTERED_LIB_%d", (int)getpid()); 6683e8d8bef9SDimitry Andric #endif 66840b57cec5SDimitry Andric } // __kmp_reg_status_get 66850b57cec5SDimitry Andric 66860b57cec5SDimitry Andric void __kmp_register_library_startup(void) { 66870b57cec5SDimitry Andric 66880b57cec5SDimitry Andric char *name = __kmp_reg_status_name(); // Name of the environment variable. 66890b57cec5SDimitry Andric int done = 0; 66900b57cec5SDimitry Andric union { 66910b57cec5SDimitry Andric double dtime; 66920b57cec5SDimitry Andric long ltime; 66930b57cec5SDimitry Andric } time; 66940b57cec5SDimitry Andric #if KMP_ARCH_X86 || KMP_ARCH_X86_64 66950b57cec5SDimitry Andric __kmp_initialize_system_tick(); 66960b57cec5SDimitry Andric #endif 66970b57cec5SDimitry Andric __kmp_read_system_time(&time.dtime); 66980b57cec5SDimitry Andric __kmp_registration_flag = 0xCAFE0000L | (time.ltime & 0x0000FFFFL); 66990b57cec5SDimitry Andric __kmp_registration_str = 67000b57cec5SDimitry Andric __kmp_str_format("%p-%lx-%s", &__kmp_registration_flag, 67010b57cec5SDimitry Andric __kmp_registration_flag, KMP_LIBRARY_FILE); 67020b57cec5SDimitry Andric 67030b57cec5SDimitry Andric KA_TRACE(50, ("__kmp_register_library_startup: %s=\"%s\"\n", name, 67040b57cec5SDimitry Andric __kmp_registration_str)); 67050b57cec5SDimitry Andric 67060b57cec5SDimitry Andric while (!done) { 67070b57cec5SDimitry Andric 67080b57cec5SDimitry Andric char *value = NULL; // Actual value of the environment variable. 67090b57cec5SDimitry Andric 6710349cc55cSDimitry Andric #if defined(KMP_USE_SHM) 6711e8d8bef9SDimitry Andric char *shm_name = __kmp_str_format("/%s", name); 6712e8d8bef9SDimitry Andric int shm_preexist = 0; 6713e8d8bef9SDimitry Andric char *data1; 6714e8d8bef9SDimitry Andric int fd1 = shm_open(shm_name, O_CREAT | O_EXCL | O_RDWR, 0666); 6715e8d8bef9SDimitry Andric if ((fd1 == -1) && (errno == EEXIST)) { 6716e8d8bef9SDimitry Andric // file didn't open because it already exists. 6717e8d8bef9SDimitry Andric // try opening existing file 6718e8d8bef9SDimitry Andric fd1 = shm_open(shm_name, O_RDWR, 0666); 6719e8d8bef9SDimitry Andric if (fd1 == -1) { // file didn't open 6720e8d8bef9SDimitry Andric // error out here 6721e8d8bef9SDimitry Andric __kmp_fatal(KMP_MSG(FunctionError, "Can't open SHM"), KMP_ERR(0), 6722e8d8bef9SDimitry Andric __kmp_msg_null); 6723e8d8bef9SDimitry Andric } else { 6724e8d8bef9SDimitry Andric // able to open existing file 6725e8d8bef9SDimitry Andric shm_preexist = 1; 6726e8d8bef9SDimitry Andric } 6727e8d8bef9SDimitry Andric } else if (fd1 == -1) { // SHM didn't open; it was due to error other than 6728e8d8bef9SDimitry Andric // already exists. 6729e8d8bef9SDimitry Andric // error out here. 6730e8d8bef9SDimitry Andric __kmp_fatal(KMP_MSG(FunctionError, "Can't open SHM2"), KMP_ERR(errno), 6731e8d8bef9SDimitry Andric __kmp_msg_null); 6732e8d8bef9SDimitry Andric } 6733e8d8bef9SDimitry Andric if (shm_preexist == 0) { 6734e8d8bef9SDimitry Andric // we created SHM now set size 6735e8d8bef9SDimitry Andric if (ftruncate(fd1, SHM_SIZE) == -1) { 6736e8d8bef9SDimitry Andric // error occured setting size; 6737e8d8bef9SDimitry Andric __kmp_fatal(KMP_MSG(FunctionError, "Can't set size of SHM"), 6738e8d8bef9SDimitry Andric KMP_ERR(errno), __kmp_msg_null); 6739e8d8bef9SDimitry Andric } 6740e8d8bef9SDimitry Andric } 6741e8d8bef9SDimitry Andric data1 = 6742e8d8bef9SDimitry Andric (char *)mmap(0, SHM_SIZE, PROT_READ | PROT_WRITE, MAP_SHARED, fd1, 0); 6743e8d8bef9SDimitry Andric if (data1 == MAP_FAILED) { 6744e8d8bef9SDimitry Andric // failed to map shared memory 6745e8d8bef9SDimitry Andric __kmp_fatal(KMP_MSG(FunctionError, "Can't map SHM"), KMP_ERR(errno), 6746e8d8bef9SDimitry Andric __kmp_msg_null); 6747e8d8bef9SDimitry Andric } 6748e8d8bef9SDimitry Andric if (shm_preexist == 0) { // set data to SHM, set value 6749e8d8bef9SDimitry Andric KMP_STRCPY_S(data1, SHM_SIZE, __kmp_registration_str); 6750e8d8bef9SDimitry Andric } 6751e8d8bef9SDimitry Andric // Read value from either what we just wrote or existing file. 6752e8d8bef9SDimitry Andric value = __kmp_str_format("%s", data1); // read value from SHM 6753e8d8bef9SDimitry Andric munmap(data1, SHM_SIZE); 6754e8d8bef9SDimitry Andric close(fd1); 6755e8d8bef9SDimitry Andric #else // Windows and unix with static library 67560b57cec5SDimitry Andric // Set environment variable, but do not overwrite if it is exist. 67570b57cec5SDimitry Andric __kmp_env_set(name, __kmp_registration_str, 0); 6758e8d8bef9SDimitry Andric // read value to see if it got set 67590b57cec5SDimitry Andric value = __kmp_env_get(name); 6760e8d8bef9SDimitry Andric #endif 6761e8d8bef9SDimitry Andric 67620b57cec5SDimitry Andric if (value != NULL && strcmp(value, __kmp_registration_str) == 0) { 67630b57cec5SDimitry Andric done = 1; // Ok, environment variable set successfully, exit the loop. 67640b57cec5SDimitry Andric } else { 67650b57cec5SDimitry Andric // Oops. Write failed. Another copy of OpenMP RTL is in memory. 67660b57cec5SDimitry Andric // Check whether it alive or dead. 67670b57cec5SDimitry Andric int neighbor = 0; // 0 -- unknown status, 1 -- alive, 2 -- dead. 67680b57cec5SDimitry Andric char *tail = value; 67690b57cec5SDimitry Andric char *flag_addr_str = NULL; 67700b57cec5SDimitry Andric char *flag_val_str = NULL; 67710b57cec5SDimitry Andric char const *file_name = NULL; 67720b57cec5SDimitry Andric __kmp_str_split(tail, '-', &flag_addr_str, &tail); 67730b57cec5SDimitry Andric __kmp_str_split(tail, '-', &flag_val_str, &tail); 67740b57cec5SDimitry Andric file_name = tail; 67750b57cec5SDimitry Andric if (tail != NULL) { 6776fe6060f1SDimitry Andric unsigned long *flag_addr = 0; 6777fe6060f1SDimitry Andric unsigned long flag_val = 0; 67780b57cec5SDimitry Andric KMP_SSCANF(flag_addr_str, "%p", RCAST(void **, &flag_addr)); 67790b57cec5SDimitry Andric KMP_SSCANF(flag_val_str, "%lx", &flag_val); 67800b57cec5SDimitry Andric if (flag_addr != 0 && flag_val != 0 && strcmp(file_name, "") != 0) { 67810b57cec5SDimitry Andric // First, check whether environment-encoded address is mapped into 67820b57cec5SDimitry Andric // addr space. 67830b57cec5SDimitry Andric // If so, dereference it to see if it still has the right value. 67840b57cec5SDimitry Andric if (__kmp_is_address_mapped(flag_addr) && *flag_addr == flag_val) { 67850b57cec5SDimitry Andric neighbor = 1; 67860b57cec5SDimitry Andric } else { 67870b57cec5SDimitry Andric // If not, then we know the other copy of the library is no longer 67880b57cec5SDimitry Andric // running. 67890b57cec5SDimitry Andric neighbor = 2; 67900b57cec5SDimitry Andric } 67910b57cec5SDimitry Andric } 67920b57cec5SDimitry Andric } 67930b57cec5SDimitry Andric switch (neighbor) { 67940b57cec5SDimitry Andric case 0: // Cannot parse environment variable -- neighbor status unknown. 67950b57cec5SDimitry Andric // Assume it is the incompatible format of future version of the 67960b57cec5SDimitry Andric // library. Assume the other library is alive. 67970b57cec5SDimitry Andric // WARN( ... ); // TODO: Issue a warning. 67980b57cec5SDimitry Andric file_name = "unknown library"; 67990b57cec5SDimitry Andric KMP_FALLTHROUGH(); 68000b57cec5SDimitry Andric // Attention! Falling to the next case. That's intentional. 68010b57cec5SDimitry Andric case 1: { // Neighbor is alive. 68020b57cec5SDimitry Andric // Check it is allowed. 68030b57cec5SDimitry Andric char *duplicate_ok = __kmp_env_get("KMP_DUPLICATE_LIB_OK"); 68040b57cec5SDimitry Andric if (!__kmp_str_match_true(duplicate_ok)) { 68050b57cec5SDimitry Andric // That's not allowed. Issue fatal error. 68060b57cec5SDimitry Andric __kmp_fatal(KMP_MSG(DuplicateLibrary, KMP_LIBRARY_FILE, file_name), 68070b57cec5SDimitry Andric KMP_HNT(DuplicateLibrary), __kmp_msg_null); 68080b57cec5SDimitry Andric } 68090b57cec5SDimitry Andric KMP_INTERNAL_FREE(duplicate_ok); 68100b57cec5SDimitry Andric __kmp_duplicate_library_ok = 1; 68110b57cec5SDimitry Andric done = 1; // Exit the loop. 68120b57cec5SDimitry Andric } break; 68130b57cec5SDimitry Andric case 2: { // Neighbor is dead. 6814e8d8bef9SDimitry Andric 6815349cc55cSDimitry Andric #if defined(KMP_USE_SHM) 6816e8d8bef9SDimitry Andric // close shared memory. 6817e8d8bef9SDimitry Andric shm_unlink(shm_name); // this removes file in /dev/shm 6818e8d8bef9SDimitry Andric #else 68190b57cec5SDimitry Andric // Clear the variable and try to register library again. 68200b57cec5SDimitry Andric __kmp_env_unset(name); 6821e8d8bef9SDimitry Andric #endif 68220b57cec5SDimitry Andric } break; 6823fe6060f1SDimitry Andric default: { 6824fe6060f1SDimitry Andric KMP_DEBUG_ASSERT(0); 6825fe6060f1SDimitry Andric } break; 68260b57cec5SDimitry Andric } 68270b57cec5SDimitry Andric } 68280b57cec5SDimitry Andric KMP_INTERNAL_FREE((void *)value); 6829349cc55cSDimitry Andric #if defined(KMP_USE_SHM) 6830e8d8bef9SDimitry Andric KMP_INTERNAL_FREE((void *)shm_name); 6831e8d8bef9SDimitry Andric #endif 6832e8d8bef9SDimitry Andric } // while 68330b57cec5SDimitry Andric KMP_INTERNAL_FREE((void *)name); 68340b57cec5SDimitry Andric 68350b57cec5SDimitry Andric } // func __kmp_register_library_startup 68360b57cec5SDimitry Andric 68370b57cec5SDimitry Andric void __kmp_unregister_library(void) { 68380b57cec5SDimitry Andric 68390b57cec5SDimitry Andric char *name = __kmp_reg_status_name(); 6840e8d8bef9SDimitry Andric char *value = NULL; 6841e8d8bef9SDimitry Andric 6842349cc55cSDimitry Andric #if defined(KMP_USE_SHM) 6843e8d8bef9SDimitry Andric char *shm_name = __kmp_str_format("/%s", name); 6844e8d8bef9SDimitry Andric int fd1 = shm_open(shm_name, O_RDONLY, 0666); 6845e8d8bef9SDimitry Andric if (fd1 == -1) { 6846e8d8bef9SDimitry Andric // file did not open. return. 6847e8d8bef9SDimitry Andric return; 6848e8d8bef9SDimitry Andric } 6849e8d8bef9SDimitry Andric char *data1 = (char *)mmap(0, SHM_SIZE, PROT_READ, MAP_SHARED, fd1, 0); 6850e8d8bef9SDimitry Andric if (data1 != MAP_FAILED) { 6851e8d8bef9SDimitry Andric value = __kmp_str_format("%s", data1); // read value from SHM 6852e8d8bef9SDimitry Andric munmap(data1, SHM_SIZE); 6853e8d8bef9SDimitry Andric } 6854e8d8bef9SDimitry Andric close(fd1); 6855e8d8bef9SDimitry Andric #else 6856e8d8bef9SDimitry Andric value = __kmp_env_get(name); 6857e8d8bef9SDimitry Andric #endif 68580b57cec5SDimitry Andric 68590b57cec5SDimitry Andric KMP_DEBUG_ASSERT(__kmp_registration_flag != 0); 68600b57cec5SDimitry Andric KMP_DEBUG_ASSERT(__kmp_registration_str != NULL); 68610b57cec5SDimitry Andric if (value != NULL && strcmp(value, __kmp_registration_str) == 0) { 68620b57cec5SDimitry Andric // Ok, this is our variable. Delete it. 6863349cc55cSDimitry Andric #if defined(KMP_USE_SHM) 6864e8d8bef9SDimitry Andric shm_unlink(shm_name); // this removes file in /dev/shm 6865e8d8bef9SDimitry Andric #else 68660b57cec5SDimitry Andric __kmp_env_unset(name); 6867e8d8bef9SDimitry Andric #endif 68680b57cec5SDimitry Andric } 68690b57cec5SDimitry Andric 6870349cc55cSDimitry Andric #if defined(KMP_USE_SHM) 6871e8d8bef9SDimitry Andric KMP_INTERNAL_FREE(shm_name); 6872e8d8bef9SDimitry Andric #endif 6873e8d8bef9SDimitry Andric 68740b57cec5SDimitry Andric KMP_INTERNAL_FREE(__kmp_registration_str); 68750b57cec5SDimitry Andric KMP_INTERNAL_FREE(value); 68760b57cec5SDimitry Andric KMP_INTERNAL_FREE(name); 68770b57cec5SDimitry Andric 68780b57cec5SDimitry Andric __kmp_registration_flag = 0; 68790b57cec5SDimitry Andric __kmp_registration_str = NULL; 68800b57cec5SDimitry Andric 68810b57cec5SDimitry Andric } // __kmp_unregister_library 68820b57cec5SDimitry Andric 68830b57cec5SDimitry Andric // End of Library registration stuff. 68840b57cec5SDimitry Andric // ----------------------------------------------------------------------------- 68850b57cec5SDimitry Andric 68860b57cec5SDimitry Andric #if KMP_MIC_SUPPORTED 68870b57cec5SDimitry Andric 68880b57cec5SDimitry Andric static void __kmp_check_mic_type() { 68890b57cec5SDimitry Andric kmp_cpuid_t cpuid_state = {0}; 68900b57cec5SDimitry Andric kmp_cpuid_t *cs_p = &cpuid_state; 68910b57cec5SDimitry Andric __kmp_x86_cpuid(1, 0, cs_p); 68920b57cec5SDimitry Andric // We don't support mic1 at the moment 68930b57cec5SDimitry Andric if ((cs_p->eax & 0xff0) == 0xB10) { 68940b57cec5SDimitry Andric __kmp_mic_type = mic2; 68950b57cec5SDimitry Andric } else if ((cs_p->eax & 0xf0ff0) == 0x50670) { 68960b57cec5SDimitry Andric __kmp_mic_type = mic3; 68970b57cec5SDimitry Andric } else { 68980b57cec5SDimitry Andric __kmp_mic_type = non_mic; 68990b57cec5SDimitry Andric } 69000b57cec5SDimitry Andric } 69010b57cec5SDimitry Andric 69020b57cec5SDimitry Andric #endif /* KMP_MIC_SUPPORTED */ 69030b57cec5SDimitry Andric 6904e8d8bef9SDimitry Andric #if KMP_HAVE_UMWAIT 6905e8d8bef9SDimitry Andric static void __kmp_user_level_mwait_init() { 6906e8d8bef9SDimitry Andric struct kmp_cpuid buf; 6907e8d8bef9SDimitry Andric __kmp_x86_cpuid(7, 0, &buf); 690804eeddc0SDimitry Andric __kmp_waitpkg_enabled = ((buf.ecx >> 5) & 1); 690904eeddc0SDimitry Andric __kmp_umwait_enabled = __kmp_waitpkg_enabled && __kmp_user_level_mwait; 691004eeddc0SDimitry Andric __kmp_tpause_enabled = __kmp_waitpkg_enabled && (__kmp_tpause_state > 0); 6911e8d8bef9SDimitry Andric KF_TRACE(30, ("__kmp_user_level_mwait_init: __kmp_umwait_enabled = %d\n", 6912e8d8bef9SDimitry Andric __kmp_umwait_enabled)); 6913e8d8bef9SDimitry Andric } 6914e8d8bef9SDimitry Andric #elif KMP_HAVE_MWAIT 6915e8d8bef9SDimitry Andric #ifndef AT_INTELPHIUSERMWAIT 6916e8d8bef9SDimitry Andric // Spurious, non-existent value that should always fail to return anything. 6917e8d8bef9SDimitry Andric // Will be replaced with the correct value when we know that. 6918e8d8bef9SDimitry Andric #define AT_INTELPHIUSERMWAIT 10000 6919e8d8bef9SDimitry Andric #endif 6920e8d8bef9SDimitry Andric // getauxval() function is available in RHEL7 and SLES12. If a system with an 6921e8d8bef9SDimitry Andric // earlier OS is used to build the RTL, we'll use the following internal 6922e8d8bef9SDimitry Andric // function when the entry is not found. 6923e8d8bef9SDimitry Andric unsigned long getauxval(unsigned long) KMP_WEAK_ATTRIBUTE_EXTERNAL; 6924e8d8bef9SDimitry Andric unsigned long getauxval(unsigned long) { return 0; } 6925e8d8bef9SDimitry Andric 6926e8d8bef9SDimitry Andric static void __kmp_user_level_mwait_init() { 6927e8d8bef9SDimitry Andric // When getauxval() and correct value of AT_INTELPHIUSERMWAIT are available 6928e8d8bef9SDimitry Andric // use them to find if the user-level mwait is enabled. Otherwise, forcibly 6929e8d8bef9SDimitry Andric // set __kmp_mwait_enabled=TRUE on Intel MIC if the environment variable 6930e8d8bef9SDimitry Andric // KMP_USER_LEVEL_MWAIT was set to TRUE. 6931e8d8bef9SDimitry Andric if (__kmp_mic_type == mic3) { 6932e8d8bef9SDimitry Andric unsigned long res = getauxval(AT_INTELPHIUSERMWAIT); 6933e8d8bef9SDimitry Andric if ((res & 0x1) || __kmp_user_level_mwait) { 6934e8d8bef9SDimitry Andric __kmp_mwait_enabled = TRUE; 6935e8d8bef9SDimitry Andric if (__kmp_user_level_mwait) { 6936e8d8bef9SDimitry Andric KMP_INFORM(EnvMwaitWarn); 6937e8d8bef9SDimitry Andric } 6938e8d8bef9SDimitry Andric } else { 6939e8d8bef9SDimitry Andric __kmp_mwait_enabled = FALSE; 6940e8d8bef9SDimitry Andric } 6941e8d8bef9SDimitry Andric } 6942e8d8bef9SDimitry Andric KF_TRACE(30, ("__kmp_user_level_mwait_init: __kmp_mic_type = %d, " 6943e8d8bef9SDimitry Andric "__kmp_mwait_enabled = %d\n", 6944e8d8bef9SDimitry Andric __kmp_mic_type, __kmp_mwait_enabled)); 6945e8d8bef9SDimitry Andric } 6946e8d8bef9SDimitry Andric #endif /* KMP_HAVE_UMWAIT */ 6947e8d8bef9SDimitry Andric 69480b57cec5SDimitry Andric static void __kmp_do_serial_initialize(void) { 69490b57cec5SDimitry Andric int i, gtid; 6950e8d8bef9SDimitry Andric size_t size; 69510b57cec5SDimitry Andric 69520b57cec5SDimitry Andric KA_TRACE(10, ("__kmp_do_serial_initialize: enter\n")); 69530b57cec5SDimitry Andric 69540b57cec5SDimitry Andric KMP_DEBUG_ASSERT(sizeof(kmp_int32) == 4); 69550b57cec5SDimitry Andric KMP_DEBUG_ASSERT(sizeof(kmp_uint32) == 4); 69560b57cec5SDimitry Andric KMP_DEBUG_ASSERT(sizeof(kmp_int64) == 8); 69570b57cec5SDimitry Andric KMP_DEBUG_ASSERT(sizeof(kmp_uint64) == 8); 69580b57cec5SDimitry Andric KMP_DEBUG_ASSERT(sizeof(kmp_intptr_t) == sizeof(void *)); 69590b57cec5SDimitry Andric 69600b57cec5SDimitry Andric #if OMPT_SUPPORT 69610b57cec5SDimitry Andric ompt_pre_init(); 69620b57cec5SDimitry Andric #endif 6963fe6060f1SDimitry Andric #if OMPD_SUPPORT 6964fe6060f1SDimitry Andric __kmp_env_dump(); 6965fe6060f1SDimitry Andric ompd_init(); 6966fe6060f1SDimitry Andric #endif 69670b57cec5SDimitry Andric 69680b57cec5SDimitry Andric __kmp_validate_locks(); 69690b57cec5SDimitry Andric 69700b57cec5SDimitry Andric /* Initialize internal memory allocator */ 69710b57cec5SDimitry Andric __kmp_init_allocator(); 69720b57cec5SDimitry Andric 6973fcaf7f86SDimitry Andric /* Register the library startup via an environment variable or via mapped 6974fcaf7f86SDimitry Andric shared memory file and check to see whether another copy of the library is 6975fcaf7f86SDimitry Andric already registered. Since forked child process is often terminated, we 6976fcaf7f86SDimitry Andric postpone the registration till middle initialization in the child */ 6977fcaf7f86SDimitry Andric if (__kmp_need_register_serial) 69780b57cec5SDimitry Andric __kmp_register_library_startup(); 69790b57cec5SDimitry Andric 69800b57cec5SDimitry Andric /* TODO reinitialization of library */ 69810b57cec5SDimitry Andric if (TCR_4(__kmp_global.g.g_done)) { 69820b57cec5SDimitry Andric KA_TRACE(10, ("__kmp_do_serial_initialize: reinitialization of library\n")); 69830b57cec5SDimitry Andric } 69840b57cec5SDimitry Andric 69850b57cec5SDimitry Andric __kmp_global.g.g_abort = 0; 69860b57cec5SDimitry Andric TCW_SYNC_4(__kmp_global.g.g_done, FALSE); 69870b57cec5SDimitry Andric 69880b57cec5SDimitry Andric /* initialize the locks */ 69890b57cec5SDimitry Andric #if KMP_USE_ADAPTIVE_LOCKS 69900b57cec5SDimitry Andric #if KMP_DEBUG_ADAPTIVE_LOCKS 69910b57cec5SDimitry Andric __kmp_init_speculative_stats(); 69920b57cec5SDimitry Andric #endif 69930b57cec5SDimitry Andric #endif 69940b57cec5SDimitry Andric #if KMP_STATS_ENABLED 69950b57cec5SDimitry Andric __kmp_stats_init(); 69960b57cec5SDimitry Andric #endif 69970b57cec5SDimitry Andric __kmp_init_lock(&__kmp_global_lock); 69980b57cec5SDimitry Andric __kmp_init_queuing_lock(&__kmp_dispatch_lock); 69990b57cec5SDimitry Andric __kmp_init_lock(&__kmp_debug_lock); 70000b57cec5SDimitry Andric __kmp_init_atomic_lock(&__kmp_atomic_lock); 70010b57cec5SDimitry Andric __kmp_init_atomic_lock(&__kmp_atomic_lock_1i); 70020b57cec5SDimitry Andric __kmp_init_atomic_lock(&__kmp_atomic_lock_2i); 70030b57cec5SDimitry Andric __kmp_init_atomic_lock(&__kmp_atomic_lock_4i); 70040b57cec5SDimitry Andric __kmp_init_atomic_lock(&__kmp_atomic_lock_4r); 70050b57cec5SDimitry Andric __kmp_init_atomic_lock(&__kmp_atomic_lock_8i); 70060b57cec5SDimitry Andric __kmp_init_atomic_lock(&__kmp_atomic_lock_8r); 70070b57cec5SDimitry Andric __kmp_init_atomic_lock(&__kmp_atomic_lock_8c); 70080b57cec5SDimitry Andric __kmp_init_atomic_lock(&__kmp_atomic_lock_10r); 70090b57cec5SDimitry Andric __kmp_init_atomic_lock(&__kmp_atomic_lock_16r); 70100b57cec5SDimitry Andric __kmp_init_atomic_lock(&__kmp_atomic_lock_16c); 70110b57cec5SDimitry Andric __kmp_init_atomic_lock(&__kmp_atomic_lock_20c); 70120b57cec5SDimitry Andric __kmp_init_atomic_lock(&__kmp_atomic_lock_32c); 70130b57cec5SDimitry Andric __kmp_init_bootstrap_lock(&__kmp_forkjoin_lock); 70140b57cec5SDimitry Andric __kmp_init_bootstrap_lock(&__kmp_exit_lock); 70150b57cec5SDimitry Andric #if KMP_USE_MONITOR 70160b57cec5SDimitry Andric __kmp_init_bootstrap_lock(&__kmp_monitor_lock); 70170b57cec5SDimitry Andric #endif 70180b57cec5SDimitry Andric __kmp_init_bootstrap_lock(&__kmp_tp_cached_lock); 70190b57cec5SDimitry Andric 70200b57cec5SDimitry Andric /* conduct initialization and initial setup of configuration */ 70210b57cec5SDimitry Andric 70220b57cec5SDimitry Andric __kmp_runtime_initialize(); 70230b57cec5SDimitry Andric 70240b57cec5SDimitry Andric #if KMP_MIC_SUPPORTED 70250b57cec5SDimitry Andric __kmp_check_mic_type(); 70260b57cec5SDimitry Andric #endif 70270b57cec5SDimitry Andric 70280b57cec5SDimitry Andric // Some global variable initialization moved here from kmp_env_initialize() 70290b57cec5SDimitry Andric #ifdef KMP_DEBUG 70300b57cec5SDimitry Andric kmp_diag = 0; 70310b57cec5SDimitry Andric #endif 70320b57cec5SDimitry Andric __kmp_abort_delay = 0; 70330b57cec5SDimitry Andric 70340b57cec5SDimitry Andric // From __kmp_init_dflt_team_nth() 70350b57cec5SDimitry Andric /* assume the entire machine will be used */ 70360b57cec5SDimitry Andric __kmp_dflt_team_nth_ub = __kmp_xproc; 70370b57cec5SDimitry Andric if (__kmp_dflt_team_nth_ub < KMP_MIN_NTH) { 70380b57cec5SDimitry Andric __kmp_dflt_team_nth_ub = KMP_MIN_NTH; 70390b57cec5SDimitry Andric } 70400b57cec5SDimitry Andric if (__kmp_dflt_team_nth_ub > __kmp_sys_max_nth) { 70410b57cec5SDimitry Andric __kmp_dflt_team_nth_ub = __kmp_sys_max_nth; 70420b57cec5SDimitry Andric } 70430b57cec5SDimitry Andric __kmp_max_nth = __kmp_sys_max_nth; 70440b57cec5SDimitry Andric __kmp_cg_max_nth = __kmp_sys_max_nth; 70450b57cec5SDimitry Andric __kmp_teams_max_nth = __kmp_xproc; // set a "reasonable" default 70460b57cec5SDimitry Andric if (__kmp_teams_max_nth > __kmp_sys_max_nth) { 70470b57cec5SDimitry Andric __kmp_teams_max_nth = __kmp_sys_max_nth; 70480b57cec5SDimitry Andric } 70490b57cec5SDimitry Andric 70500b57cec5SDimitry Andric // Three vars below moved here from __kmp_env_initialize() "KMP_BLOCKTIME" 70510b57cec5SDimitry Andric // part 70520b57cec5SDimitry Andric __kmp_dflt_blocktime = KMP_DEFAULT_BLOCKTIME; 70530b57cec5SDimitry Andric #if KMP_USE_MONITOR 70540b57cec5SDimitry Andric __kmp_monitor_wakeups = 70550b57cec5SDimitry Andric KMP_WAKEUPS_FROM_BLOCKTIME(__kmp_dflt_blocktime, __kmp_monitor_wakeups); 70560b57cec5SDimitry Andric __kmp_bt_intervals = 70570b57cec5SDimitry Andric KMP_INTERVALS_FROM_BLOCKTIME(__kmp_dflt_blocktime, __kmp_monitor_wakeups); 70580b57cec5SDimitry Andric #endif 70590b57cec5SDimitry Andric // From "KMP_LIBRARY" part of __kmp_env_initialize() 70600b57cec5SDimitry Andric __kmp_library = library_throughput; 70610b57cec5SDimitry Andric // From KMP_SCHEDULE initialization 70620b57cec5SDimitry Andric __kmp_static = kmp_sch_static_balanced; 70630b57cec5SDimitry Andric // AC: do not use analytical here, because it is non-monotonous 70640b57cec5SDimitry Andric //__kmp_guided = kmp_sch_guided_iterative_chunked; 70650b57cec5SDimitry Andric //__kmp_auto = kmp_sch_guided_analytical_chunked; // AC: it is the default, no 70660b57cec5SDimitry Andric // need to repeat assignment 70670b57cec5SDimitry Andric // Barrier initialization. Moved here from __kmp_env_initialize() Barrier branch 70680b57cec5SDimitry Andric // bit control and barrier method control parts 70690b57cec5SDimitry Andric #if KMP_FAST_REDUCTION_BARRIER 70700b57cec5SDimitry Andric #define kmp_reduction_barrier_gather_bb ((int)1) 70710b57cec5SDimitry Andric #define kmp_reduction_barrier_release_bb ((int)1) 7072349cc55cSDimitry Andric #define kmp_reduction_barrier_gather_pat __kmp_barrier_gather_pat_dflt 7073349cc55cSDimitry Andric #define kmp_reduction_barrier_release_pat __kmp_barrier_release_pat_dflt 70740b57cec5SDimitry Andric #endif // KMP_FAST_REDUCTION_BARRIER 70750b57cec5SDimitry Andric for (i = bs_plain_barrier; i < bs_last_barrier; i++) { 70760b57cec5SDimitry Andric __kmp_barrier_gather_branch_bits[i] = __kmp_barrier_gather_bb_dflt; 70770b57cec5SDimitry Andric __kmp_barrier_release_branch_bits[i] = __kmp_barrier_release_bb_dflt; 70780b57cec5SDimitry Andric __kmp_barrier_gather_pattern[i] = __kmp_barrier_gather_pat_dflt; 70790b57cec5SDimitry Andric __kmp_barrier_release_pattern[i] = __kmp_barrier_release_pat_dflt; 70800b57cec5SDimitry Andric #if KMP_FAST_REDUCTION_BARRIER 70810b57cec5SDimitry Andric if (i == bs_reduction_barrier) { // tested and confirmed on ALTIX only ( 70820b57cec5SDimitry Andric // lin_64 ): hyper,1 70830b57cec5SDimitry Andric __kmp_barrier_gather_branch_bits[i] = kmp_reduction_barrier_gather_bb; 70840b57cec5SDimitry Andric __kmp_barrier_release_branch_bits[i] = kmp_reduction_barrier_release_bb; 70850b57cec5SDimitry Andric __kmp_barrier_gather_pattern[i] = kmp_reduction_barrier_gather_pat; 70860b57cec5SDimitry Andric __kmp_barrier_release_pattern[i] = kmp_reduction_barrier_release_pat; 70870b57cec5SDimitry Andric } 70880b57cec5SDimitry Andric #endif // KMP_FAST_REDUCTION_BARRIER 70890b57cec5SDimitry Andric } 70900b57cec5SDimitry Andric #if KMP_FAST_REDUCTION_BARRIER 70910b57cec5SDimitry Andric #undef kmp_reduction_barrier_release_pat 70920b57cec5SDimitry Andric #undef kmp_reduction_barrier_gather_pat 70930b57cec5SDimitry Andric #undef kmp_reduction_barrier_release_bb 70940b57cec5SDimitry Andric #undef kmp_reduction_barrier_gather_bb 70950b57cec5SDimitry Andric #endif // KMP_FAST_REDUCTION_BARRIER 70960b57cec5SDimitry Andric #if KMP_MIC_SUPPORTED 70970b57cec5SDimitry Andric if (__kmp_mic_type == mic2) { // KNC 70980b57cec5SDimitry Andric // AC: plane=3,2, forkjoin=2,1 are optimal for 240 threads on KNC 70990b57cec5SDimitry Andric __kmp_barrier_gather_branch_bits[bs_plain_barrier] = 3; // plain gather 71000b57cec5SDimitry Andric __kmp_barrier_release_branch_bits[bs_forkjoin_barrier] = 71010b57cec5SDimitry Andric 1; // forkjoin release 71020b57cec5SDimitry Andric __kmp_barrier_gather_pattern[bs_forkjoin_barrier] = bp_hierarchical_bar; 71030b57cec5SDimitry Andric __kmp_barrier_release_pattern[bs_forkjoin_barrier] = bp_hierarchical_bar; 71040b57cec5SDimitry Andric } 71050b57cec5SDimitry Andric #if KMP_FAST_REDUCTION_BARRIER 71060b57cec5SDimitry Andric if (__kmp_mic_type == mic2) { // KNC 71070b57cec5SDimitry Andric __kmp_barrier_gather_pattern[bs_reduction_barrier] = bp_hierarchical_bar; 71080b57cec5SDimitry Andric __kmp_barrier_release_pattern[bs_reduction_barrier] = bp_hierarchical_bar; 71090b57cec5SDimitry Andric } 71100b57cec5SDimitry Andric #endif // KMP_FAST_REDUCTION_BARRIER 71110b57cec5SDimitry Andric #endif // KMP_MIC_SUPPORTED 71120b57cec5SDimitry Andric 71130b57cec5SDimitry Andric // From KMP_CHECKS initialization 71140b57cec5SDimitry Andric #ifdef KMP_DEBUG 71150b57cec5SDimitry Andric __kmp_env_checks = TRUE; /* development versions have the extra checks */ 71160b57cec5SDimitry Andric #else 71170b57cec5SDimitry Andric __kmp_env_checks = FALSE; /* port versions do not have the extra checks */ 71180b57cec5SDimitry Andric #endif 71190b57cec5SDimitry Andric 71200b57cec5SDimitry Andric // From "KMP_FOREIGN_THREADS_THREADPRIVATE" initialization 71210b57cec5SDimitry Andric __kmp_foreign_tp = TRUE; 71220b57cec5SDimitry Andric 71230b57cec5SDimitry Andric __kmp_global.g.g_dynamic = FALSE; 71240b57cec5SDimitry Andric __kmp_global.g.g_dynamic_mode = dynamic_default; 71250b57cec5SDimitry Andric 7126fe6060f1SDimitry Andric __kmp_init_nesting_mode(); 7127fe6060f1SDimitry Andric 71280b57cec5SDimitry Andric __kmp_env_initialize(NULL); 71290b57cec5SDimitry Andric 7130e8d8bef9SDimitry Andric #if KMP_HAVE_MWAIT || KMP_HAVE_UMWAIT 7131e8d8bef9SDimitry Andric __kmp_user_level_mwait_init(); 7132e8d8bef9SDimitry Andric #endif 71330b57cec5SDimitry Andric // Print all messages in message catalog for testing purposes. 71340b57cec5SDimitry Andric #ifdef KMP_DEBUG 71350b57cec5SDimitry Andric char const *val = __kmp_env_get("KMP_DUMP_CATALOG"); 71360b57cec5SDimitry Andric if (__kmp_str_match_true(val)) { 71370b57cec5SDimitry Andric kmp_str_buf_t buffer; 71380b57cec5SDimitry Andric __kmp_str_buf_init(&buffer); 71390b57cec5SDimitry Andric __kmp_i18n_dump_catalog(&buffer); 71400b57cec5SDimitry Andric __kmp_printf("%s", buffer.str); 71410b57cec5SDimitry Andric __kmp_str_buf_free(&buffer); 71420b57cec5SDimitry Andric } 71430b57cec5SDimitry Andric __kmp_env_free(&val); 71440b57cec5SDimitry Andric #endif 71450b57cec5SDimitry Andric 71460b57cec5SDimitry Andric __kmp_threads_capacity = 71470b57cec5SDimitry Andric __kmp_initial_threads_capacity(__kmp_dflt_team_nth_ub); 71480b57cec5SDimitry Andric // Moved here from __kmp_env_initialize() "KMP_ALL_THREADPRIVATE" part 71490b57cec5SDimitry Andric __kmp_tp_capacity = __kmp_default_tp_capacity( 71500b57cec5SDimitry Andric __kmp_dflt_team_nth_ub, __kmp_max_nth, __kmp_allThreadsSpecified); 71510b57cec5SDimitry Andric 71520b57cec5SDimitry Andric // If the library is shut down properly, both pools must be NULL. Just in 71530b57cec5SDimitry Andric // case, set them to NULL -- some memory may leak, but subsequent code will 71540b57cec5SDimitry Andric // work even if pools are not freed. 71550b57cec5SDimitry Andric KMP_DEBUG_ASSERT(__kmp_thread_pool == NULL); 71560b57cec5SDimitry Andric KMP_DEBUG_ASSERT(__kmp_thread_pool_insert_pt == NULL); 71570b57cec5SDimitry Andric KMP_DEBUG_ASSERT(__kmp_team_pool == NULL); 71580b57cec5SDimitry Andric __kmp_thread_pool = NULL; 71590b57cec5SDimitry Andric __kmp_thread_pool_insert_pt = NULL; 71600b57cec5SDimitry Andric __kmp_team_pool = NULL; 71610b57cec5SDimitry Andric 71620b57cec5SDimitry Andric /* Allocate all of the variable sized records */ 71630b57cec5SDimitry Andric /* NOTE: __kmp_threads_capacity entries are allocated, but the arrays are 71640b57cec5SDimitry Andric * expandable */ 71650b57cec5SDimitry Andric /* Since allocation is cache-aligned, just add extra padding at the end */ 71660b57cec5SDimitry Andric size = 71670b57cec5SDimitry Andric (sizeof(kmp_info_t *) + sizeof(kmp_root_t *)) * __kmp_threads_capacity + 71680b57cec5SDimitry Andric CACHE_LINE; 71690b57cec5SDimitry Andric __kmp_threads = (kmp_info_t **)__kmp_allocate(size); 71700b57cec5SDimitry Andric __kmp_root = (kmp_root_t **)((char *)__kmp_threads + 71710b57cec5SDimitry Andric sizeof(kmp_info_t *) * __kmp_threads_capacity); 71720b57cec5SDimitry Andric 71730b57cec5SDimitry Andric /* init thread counts */ 71740b57cec5SDimitry Andric KMP_DEBUG_ASSERT(__kmp_all_nth == 71750b57cec5SDimitry Andric 0); // Asserts fail if the library is reinitializing and 71760b57cec5SDimitry Andric KMP_DEBUG_ASSERT(__kmp_nth == 0); // something was wrong in termination. 71770b57cec5SDimitry Andric __kmp_all_nth = 0; 71780b57cec5SDimitry Andric __kmp_nth = 0; 71790b57cec5SDimitry Andric 71800b57cec5SDimitry Andric /* setup the uber master thread and hierarchy */ 71810b57cec5SDimitry Andric gtid = __kmp_register_root(TRUE); 71820b57cec5SDimitry Andric KA_TRACE(10, ("__kmp_do_serial_initialize T#%d\n", gtid)); 71830b57cec5SDimitry Andric KMP_ASSERT(KMP_UBER_GTID(gtid)); 71840b57cec5SDimitry Andric KMP_ASSERT(KMP_INITIAL_GTID(gtid)); 71850b57cec5SDimitry Andric 71860b57cec5SDimitry Andric KMP_MB(); /* Flush all pending memory write invalidates. */ 71870b57cec5SDimitry Andric 71880b57cec5SDimitry Andric __kmp_common_initialize(); 71890b57cec5SDimitry Andric 71900b57cec5SDimitry Andric #if KMP_OS_UNIX 71910b57cec5SDimitry Andric /* invoke the child fork handler */ 71920b57cec5SDimitry Andric __kmp_register_atfork(); 71930b57cec5SDimitry Andric #endif 71940b57cec5SDimitry Andric 71950b57cec5SDimitry Andric #if !KMP_DYNAMIC_LIB 71960b57cec5SDimitry Andric { 71970b57cec5SDimitry Andric /* Invoke the exit handler when the program finishes, only for static 71980b57cec5SDimitry Andric library. For dynamic library, we already have _fini and DllMain. */ 71990b57cec5SDimitry Andric int rc = atexit(__kmp_internal_end_atexit); 72000b57cec5SDimitry Andric if (rc != 0) { 72010b57cec5SDimitry Andric __kmp_fatal(KMP_MSG(FunctionError, "atexit()"), KMP_ERR(rc), 72020b57cec5SDimitry Andric __kmp_msg_null); 72030b57cec5SDimitry Andric } 72040b57cec5SDimitry Andric } 72050b57cec5SDimitry Andric #endif 72060b57cec5SDimitry Andric 72070b57cec5SDimitry Andric #if KMP_HANDLE_SIGNALS 72080b57cec5SDimitry Andric #if KMP_OS_UNIX 72090b57cec5SDimitry Andric /* NOTE: make sure that this is called before the user installs their own 72100b57cec5SDimitry Andric signal handlers so that the user handlers are called first. this way they 72110b57cec5SDimitry Andric can return false, not call our handler, avoid terminating the library, and 72120b57cec5SDimitry Andric continue execution where they left off. */ 72130b57cec5SDimitry Andric __kmp_install_signals(FALSE); 72140b57cec5SDimitry Andric #endif /* KMP_OS_UNIX */ 72150b57cec5SDimitry Andric #if KMP_OS_WINDOWS 72160b57cec5SDimitry Andric __kmp_install_signals(TRUE); 72170b57cec5SDimitry Andric #endif /* KMP_OS_WINDOWS */ 72180b57cec5SDimitry Andric #endif 72190b57cec5SDimitry Andric 72200b57cec5SDimitry Andric /* we have finished the serial initialization */ 72210b57cec5SDimitry Andric __kmp_init_counter++; 72220b57cec5SDimitry Andric 72230b57cec5SDimitry Andric __kmp_init_serial = TRUE; 72240b57cec5SDimitry Andric 72250b57cec5SDimitry Andric if (__kmp_settings) { 72260b57cec5SDimitry Andric __kmp_env_print(); 72270b57cec5SDimitry Andric } 72280b57cec5SDimitry Andric 72290b57cec5SDimitry Andric if (__kmp_display_env || __kmp_display_env_verbose) { 72300b57cec5SDimitry Andric __kmp_env_print_2(); 72310b57cec5SDimitry Andric } 72320b57cec5SDimitry Andric 72330b57cec5SDimitry Andric #if OMPT_SUPPORT 72340b57cec5SDimitry Andric ompt_post_init(); 72350b57cec5SDimitry Andric #endif 72360b57cec5SDimitry Andric 72370b57cec5SDimitry Andric KMP_MB(); 72380b57cec5SDimitry Andric 72390b57cec5SDimitry Andric KA_TRACE(10, ("__kmp_do_serial_initialize: exit\n")); 72400b57cec5SDimitry Andric } 72410b57cec5SDimitry Andric 72420b57cec5SDimitry Andric void __kmp_serial_initialize(void) { 72430b57cec5SDimitry Andric if (__kmp_init_serial) { 72440b57cec5SDimitry Andric return; 72450b57cec5SDimitry Andric } 72460b57cec5SDimitry Andric __kmp_acquire_bootstrap_lock(&__kmp_initz_lock); 72470b57cec5SDimitry Andric if (__kmp_init_serial) { 72480b57cec5SDimitry Andric __kmp_release_bootstrap_lock(&__kmp_initz_lock); 72490b57cec5SDimitry Andric return; 72500b57cec5SDimitry Andric } 72510b57cec5SDimitry Andric __kmp_do_serial_initialize(); 72520b57cec5SDimitry Andric __kmp_release_bootstrap_lock(&__kmp_initz_lock); 72530b57cec5SDimitry Andric } 72540b57cec5SDimitry Andric 72550b57cec5SDimitry Andric static void __kmp_do_middle_initialize(void) { 72560b57cec5SDimitry Andric int i, j; 72570b57cec5SDimitry Andric int prev_dflt_team_nth; 72580b57cec5SDimitry Andric 72590b57cec5SDimitry Andric if (!__kmp_init_serial) { 72600b57cec5SDimitry Andric __kmp_do_serial_initialize(); 72610b57cec5SDimitry Andric } 72620b57cec5SDimitry Andric 72630b57cec5SDimitry Andric KA_TRACE(10, ("__kmp_middle_initialize: enter\n")); 72640b57cec5SDimitry Andric 7265fcaf7f86SDimitry Andric if (UNLIKELY(!__kmp_need_register_serial)) { 7266fcaf7f86SDimitry Andric // We are in a forked child process. The registration was skipped during 7267fcaf7f86SDimitry Andric // serial initialization in __kmp_atfork_child handler. Do it here. 7268fcaf7f86SDimitry Andric __kmp_register_library_startup(); 7269fcaf7f86SDimitry Andric } 7270fcaf7f86SDimitry Andric 72710b57cec5SDimitry Andric // Save the previous value for the __kmp_dflt_team_nth so that 72720b57cec5SDimitry Andric // we can avoid some reinitialization if it hasn't changed. 72730b57cec5SDimitry Andric prev_dflt_team_nth = __kmp_dflt_team_nth; 72740b57cec5SDimitry Andric 72750b57cec5SDimitry Andric #if KMP_AFFINITY_SUPPORTED 72760b57cec5SDimitry Andric // __kmp_affinity_initialize() will try to set __kmp_ncores to the 72770b57cec5SDimitry Andric // number of cores on the machine. 72780b57cec5SDimitry Andric __kmp_affinity_initialize(); 72790b57cec5SDimitry Andric 72800b57cec5SDimitry Andric #endif /* KMP_AFFINITY_SUPPORTED */ 72810b57cec5SDimitry Andric 72820b57cec5SDimitry Andric KMP_ASSERT(__kmp_xproc > 0); 72830b57cec5SDimitry Andric if (__kmp_avail_proc == 0) { 72840b57cec5SDimitry Andric __kmp_avail_proc = __kmp_xproc; 72850b57cec5SDimitry Andric } 72860b57cec5SDimitry Andric 72870b57cec5SDimitry Andric // If there were empty places in num_threads list (OMP_NUM_THREADS=,,2,3), 72880b57cec5SDimitry Andric // correct them now 72890b57cec5SDimitry Andric j = 0; 72900b57cec5SDimitry Andric while ((j < __kmp_nested_nth.used) && !__kmp_nested_nth.nth[j]) { 72910b57cec5SDimitry Andric __kmp_nested_nth.nth[j] = __kmp_dflt_team_nth = __kmp_dflt_team_nth_ub = 72920b57cec5SDimitry Andric __kmp_avail_proc; 72930b57cec5SDimitry Andric j++; 72940b57cec5SDimitry Andric } 72950b57cec5SDimitry Andric 72960b57cec5SDimitry Andric if (__kmp_dflt_team_nth == 0) { 72970b57cec5SDimitry Andric #ifdef KMP_DFLT_NTH_CORES 72980b57cec5SDimitry Andric // Default #threads = #cores 72990b57cec5SDimitry Andric __kmp_dflt_team_nth = __kmp_ncores; 73000b57cec5SDimitry Andric KA_TRACE(20, ("__kmp_middle_initialize: setting __kmp_dflt_team_nth = " 73010b57cec5SDimitry Andric "__kmp_ncores (%d)\n", 73020b57cec5SDimitry Andric __kmp_dflt_team_nth)); 73030b57cec5SDimitry Andric #else 73040b57cec5SDimitry Andric // Default #threads = #available OS procs 73050b57cec5SDimitry Andric __kmp_dflt_team_nth = __kmp_avail_proc; 73060b57cec5SDimitry Andric KA_TRACE(20, ("__kmp_middle_initialize: setting __kmp_dflt_team_nth = " 73070b57cec5SDimitry Andric "__kmp_avail_proc(%d)\n", 73080b57cec5SDimitry Andric __kmp_dflt_team_nth)); 73090b57cec5SDimitry Andric #endif /* KMP_DFLT_NTH_CORES */ 73100b57cec5SDimitry Andric } 73110b57cec5SDimitry Andric 73120b57cec5SDimitry Andric if (__kmp_dflt_team_nth < KMP_MIN_NTH) { 73130b57cec5SDimitry Andric __kmp_dflt_team_nth = KMP_MIN_NTH; 73140b57cec5SDimitry Andric } 73150b57cec5SDimitry Andric if (__kmp_dflt_team_nth > __kmp_sys_max_nth) { 73160b57cec5SDimitry Andric __kmp_dflt_team_nth = __kmp_sys_max_nth; 73170b57cec5SDimitry Andric } 73180b57cec5SDimitry Andric 7319fe6060f1SDimitry Andric if (__kmp_nesting_mode > 0) 7320fe6060f1SDimitry Andric __kmp_set_nesting_mode_threads(); 7321fe6060f1SDimitry Andric 73220b57cec5SDimitry Andric // There's no harm in continuing if the following check fails, 73230b57cec5SDimitry Andric // but it indicates an error in the previous logic. 73240b57cec5SDimitry Andric KMP_DEBUG_ASSERT(__kmp_dflt_team_nth <= __kmp_dflt_team_nth_ub); 73250b57cec5SDimitry Andric 73260b57cec5SDimitry Andric if (__kmp_dflt_team_nth != prev_dflt_team_nth) { 73270b57cec5SDimitry Andric // Run through the __kmp_threads array and set the num threads icv for each 73280b57cec5SDimitry Andric // root thread that is currently registered with the RTL (which has not 73290b57cec5SDimitry Andric // already explicitly set its nthreads-var with a call to 73300b57cec5SDimitry Andric // omp_set_num_threads()). 73310b57cec5SDimitry Andric for (i = 0; i < __kmp_threads_capacity; i++) { 73320b57cec5SDimitry Andric kmp_info_t *thread = __kmp_threads[i]; 73330b57cec5SDimitry Andric if (thread == NULL) 73340b57cec5SDimitry Andric continue; 73350b57cec5SDimitry Andric if (thread->th.th_current_task->td_icvs.nproc != 0) 73360b57cec5SDimitry Andric continue; 73370b57cec5SDimitry Andric 73380b57cec5SDimitry Andric set__nproc(__kmp_threads[i], __kmp_dflt_team_nth); 73390b57cec5SDimitry Andric } 73400b57cec5SDimitry Andric } 73410b57cec5SDimitry Andric KA_TRACE( 73420b57cec5SDimitry Andric 20, 73430b57cec5SDimitry Andric ("__kmp_middle_initialize: final value for __kmp_dflt_team_nth = %d\n", 73440b57cec5SDimitry Andric __kmp_dflt_team_nth)); 73450b57cec5SDimitry Andric 73460b57cec5SDimitry Andric #ifdef KMP_ADJUST_BLOCKTIME 73470b57cec5SDimitry Andric /* Adjust blocktime to zero if necessary now that __kmp_avail_proc is set */ 73480b57cec5SDimitry Andric if (!__kmp_env_blocktime && (__kmp_avail_proc > 0)) { 73490b57cec5SDimitry Andric KMP_DEBUG_ASSERT(__kmp_avail_proc > 0); 73500b57cec5SDimitry Andric if (__kmp_nth > __kmp_avail_proc) { 73510b57cec5SDimitry Andric __kmp_zero_bt = TRUE; 73520b57cec5SDimitry Andric } 73530b57cec5SDimitry Andric } 73540b57cec5SDimitry Andric #endif /* KMP_ADJUST_BLOCKTIME */ 73550b57cec5SDimitry Andric 73560b57cec5SDimitry Andric /* we have finished middle initialization */ 73570b57cec5SDimitry Andric TCW_SYNC_4(__kmp_init_middle, TRUE); 73580b57cec5SDimitry Andric 73590b57cec5SDimitry Andric KA_TRACE(10, ("__kmp_do_middle_initialize: exit\n")); 73600b57cec5SDimitry Andric } 73610b57cec5SDimitry Andric 73620b57cec5SDimitry Andric void __kmp_middle_initialize(void) { 73630b57cec5SDimitry Andric if (__kmp_init_middle) { 73640b57cec5SDimitry Andric return; 73650b57cec5SDimitry Andric } 73660b57cec5SDimitry Andric __kmp_acquire_bootstrap_lock(&__kmp_initz_lock); 73670b57cec5SDimitry Andric if (__kmp_init_middle) { 73680b57cec5SDimitry Andric __kmp_release_bootstrap_lock(&__kmp_initz_lock); 73690b57cec5SDimitry Andric return; 73700b57cec5SDimitry Andric } 73710b57cec5SDimitry Andric __kmp_do_middle_initialize(); 73720b57cec5SDimitry Andric __kmp_release_bootstrap_lock(&__kmp_initz_lock); 73730b57cec5SDimitry Andric } 73740b57cec5SDimitry Andric 73750b57cec5SDimitry Andric void __kmp_parallel_initialize(void) { 73760b57cec5SDimitry Andric int gtid = __kmp_entry_gtid(); // this might be a new root 73770b57cec5SDimitry Andric 73780b57cec5SDimitry Andric /* synchronize parallel initialization (for sibling) */ 73790b57cec5SDimitry Andric if (TCR_4(__kmp_init_parallel)) 73800b57cec5SDimitry Andric return; 73810b57cec5SDimitry Andric __kmp_acquire_bootstrap_lock(&__kmp_initz_lock); 73820b57cec5SDimitry Andric if (TCR_4(__kmp_init_parallel)) { 73830b57cec5SDimitry Andric __kmp_release_bootstrap_lock(&__kmp_initz_lock); 73840b57cec5SDimitry Andric return; 73850b57cec5SDimitry Andric } 73860b57cec5SDimitry Andric 73870b57cec5SDimitry Andric /* TODO reinitialization after we have already shut down */ 73880b57cec5SDimitry Andric if (TCR_4(__kmp_global.g.g_done)) { 73890b57cec5SDimitry Andric KA_TRACE( 73900b57cec5SDimitry Andric 10, 73910b57cec5SDimitry Andric ("__kmp_parallel_initialize: attempt to init while shutting down\n")); 73920b57cec5SDimitry Andric __kmp_infinite_loop(); 73930b57cec5SDimitry Andric } 73940b57cec5SDimitry Andric 73950b57cec5SDimitry Andric /* jc: The lock __kmp_initz_lock is already held, so calling 73960b57cec5SDimitry Andric __kmp_serial_initialize would cause a deadlock. So we call 73970b57cec5SDimitry Andric __kmp_do_serial_initialize directly. */ 73980b57cec5SDimitry Andric if (!__kmp_init_middle) { 73990b57cec5SDimitry Andric __kmp_do_middle_initialize(); 74000b57cec5SDimitry Andric } 7401fe6060f1SDimitry Andric __kmp_assign_root_init_mask(); 74020b57cec5SDimitry Andric __kmp_resume_if_hard_paused(); 74030b57cec5SDimitry Andric 74040b57cec5SDimitry Andric /* begin initialization */ 74050b57cec5SDimitry Andric KA_TRACE(10, ("__kmp_parallel_initialize: enter\n")); 74060b57cec5SDimitry Andric KMP_ASSERT(KMP_UBER_GTID(gtid)); 74070b57cec5SDimitry Andric 74080b57cec5SDimitry Andric #if KMP_ARCH_X86 || KMP_ARCH_X86_64 74090b57cec5SDimitry Andric // Save the FP control regs. 74100b57cec5SDimitry Andric // Worker threads will set theirs to these values at thread startup. 74110b57cec5SDimitry Andric __kmp_store_x87_fpu_control_word(&__kmp_init_x87_fpu_control_word); 74120b57cec5SDimitry Andric __kmp_store_mxcsr(&__kmp_init_mxcsr); 74130b57cec5SDimitry Andric __kmp_init_mxcsr &= KMP_X86_MXCSR_MASK; 74140b57cec5SDimitry Andric #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */ 74150b57cec5SDimitry Andric 74160b57cec5SDimitry Andric #if KMP_OS_UNIX 74170b57cec5SDimitry Andric #if KMP_HANDLE_SIGNALS 74180b57cec5SDimitry Andric /* must be after __kmp_serial_initialize */ 74190b57cec5SDimitry Andric __kmp_install_signals(TRUE); 74200b57cec5SDimitry Andric #endif 74210b57cec5SDimitry Andric #endif 74220b57cec5SDimitry Andric 74230b57cec5SDimitry Andric __kmp_suspend_initialize(); 74240b57cec5SDimitry Andric 74250b57cec5SDimitry Andric #if defined(USE_LOAD_BALANCE) 74260b57cec5SDimitry Andric if (__kmp_global.g.g_dynamic_mode == dynamic_default) { 74270b57cec5SDimitry Andric __kmp_global.g.g_dynamic_mode = dynamic_load_balance; 74280b57cec5SDimitry Andric } 74290b57cec5SDimitry Andric #else 74300b57cec5SDimitry Andric if (__kmp_global.g.g_dynamic_mode == dynamic_default) { 74310b57cec5SDimitry Andric __kmp_global.g.g_dynamic_mode = dynamic_thread_limit; 74320b57cec5SDimitry Andric } 74330b57cec5SDimitry Andric #endif 74340b57cec5SDimitry Andric 74350b57cec5SDimitry Andric if (__kmp_version) { 74360b57cec5SDimitry Andric __kmp_print_version_2(); 74370b57cec5SDimitry Andric } 74380b57cec5SDimitry Andric 74390b57cec5SDimitry Andric /* we have finished parallel initialization */ 74400b57cec5SDimitry Andric TCW_SYNC_4(__kmp_init_parallel, TRUE); 74410b57cec5SDimitry Andric 74420b57cec5SDimitry Andric KMP_MB(); 74430b57cec5SDimitry Andric KA_TRACE(10, ("__kmp_parallel_initialize: exit\n")); 74440b57cec5SDimitry Andric 74450b57cec5SDimitry Andric __kmp_release_bootstrap_lock(&__kmp_initz_lock); 74460b57cec5SDimitry Andric } 74470b57cec5SDimitry Andric 7448e8d8bef9SDimitry Andric void __kmp_hidden_helper_initialize() { 7449e8d8bef9SDimitry Andric if (TCR_4(__kmp_init_hidden_helper)) 7450e8d8bef9SDimitry Andric return; 7451e8d8bef9SDimitry Andric 7452e8d8bef9SDimitry Andric // __kmp_parallel_initialize is required before we initialize hidden helper 7453e8d8bef9SDimitry Andric if (!TCR_4(__kmp_init_parallel)) 7454e8d8bef9SDimitry Andric __kmp_parallel_initialize(); 7455e8d8bef9SDimitry Andric 7456e8d8bef9SDimitry Andric // Double check. Note that this double check should not be placed before 7457e8d8bef9SDimitry Andric // __kmp_parallel_initialize as it will cause dead lock. 7458e8d8bef9SDimitry Andric __kmp_acquire_bootstrap_lock(&__kmp_initz_lock); 7459e8d8bef9SDimitry Andric if (TCR_4(__kmp_init_hidden_helper)) { 7460e8d8bef9SDimitry Andric __kmp_release_bootstrap_lock(&__kmp_initz_lock); 7461e8d8bef9SDimitry Andric return; 7462e8d8bef9SDimitry Andric } 7463e8d8bef9SDimitry Andric 7464e8d8bef9SDimitry Andric // Set the count of hidden helper tasks to be executed to zero 7465e8d8bef9SDimitry Andric KMP_ATOMIC_ST_REL(&__kmp_unexecuted_hidden_helper_tasks, 0); 7466e8d8bef9SDimitry Andric 7467e8d8bef9SDimitry Andric // Set the global variable indicating that we're initializing hidden helper 7468e8d8bef9SDimitry Andric // team/threads 7469e8d8bef9SDimitry Andric TCW_SYNC_4(__kmp_init_hidden_helper_threads, TRUE); 7470e8d8bef9SDimitry Andric 7471e8d8bef9SDimitry Andric // Platform independent initialization 7472e8d8bef9SDimitry Andric __kmp_do_initialize_hidden_helper_threads(); 7473e8d8bef9SDimitry Andric 7474e8d8bef9SDimitry Andric // Wait here for the finish of initialization of hidden helper teams 7475e8d8bef9SDimitry Andric __kmp_hidden_helper_threads_initz_wait(); 7476e8d8bef9SDimitry Andric 7477e8d8bef9SDimitry Andric // We have finished hidden helper initialization 7478e8d8bef9SDimitry Andric TCW_SYNC_4(__kmp_init_hidden_helper, TRUE); 7479e8d8bef9SDimitry Andric 7480e8d8bef9SDimitry Andric __kmp_release_bootstrap_lock(&__kmp_initz_lock); 7481e8d8bef9SDimitry Andric } 7482e8d8bef9SDimitry Andric 74830b57cec5SDimitry Andric /* ------------------------------------------------------------------------ */ 74840b57cec5SDimitry Andric 74850b57cec5SDimitry Andric void __kmp_run_before_invoked_task(int gtid, int tid, kmp_info_t *this_thr, 74860b57cec5SDimitry Andric kmp_team_t *team) { 74870b57cec5SDimitry Andric kmp_disp_t *dispatch; 74880b57cec5SDimitry Andric 74890b57cec5SDimitry Andric KMP_MB(); 74900b57cec5SDimitry Andric 74910b57cec5SDimitry Andric /* none of the threads have encountered any constructs, yet. */ 74920b57cec5SDimitry Andric this_thr->th.th_local.this_construct = 0; 74930b57cec5SDimitry Andric #if KMP_CACHE_MANAGE 74940b57cec5SDimitry Andric KMP_CACHE_PREFETCH(&this_thr->th.th_bar[bs_forkjoin_barrier].bb.b_arrived); 74950b57cec5SDimitry Andric #endif /* KMP_CACHE_MANAGE */ 74960b57cec5SDimitry Andric dispatch = (kmp_disp_t *)TCR_PTR(this_thr->th.th_dispatch); 74970b57cec5SDimitry Andric KMP_DEBUG_ASSERT(dispatch); 74980b57cec5SDimitry Andric KMP_DEBUG_ASSERT(team->t.t_dispatch); 74990b57cec5SDimitry Andric // KMP_DEBUG_ASSERT( this_thr->th.th_dispatch == &team->t.t_dispatch[ 75000b57cec5SDimitry Andric // this_thr->th.th_info.ds.ds_tid ] ); 75010b57cec5SDimitry Andric 75020b57cec5SDimitry Andric dispatch->th_disp_index = 0; /* reset the dispatch buffer counter */ 75030b57cec5SDimitry Andric dispatch->th_doacross_buf_idx = 0; // reset doacross dispatch buffer counter 75040b57cec5SDimitry Andric if (__kmp_env_consistency_check) 75050b57cec5SDimitry Andric __kmp_push_parallel(gtid, team->t.t_ident); 75060b57cec5SDimitry Andric 75070b57cec5SDimitry Andric KMP_MB(); /* Flush all pending memory write invalidates. */ 75080b57cec5SDimitry Andric } 75090b57cec5SDimitry Andric 75100b57cec5SDimitry Andric void __kmp_run_after_invoked_task(int gtid, int tid, kmp_info_t *this_thr, 75110b57cec5SDimitry Andric kmp_team_t *team) { 75120b57cec5SDimitry Andric if (__kmp_env_consistency_check) 75130b57cec5SDimitry Andric __kmp_pop_parallel(gtid, team->t.t_ident); 75140b57cec5SDimitry Andric 75150b57cec5SDimitry Andric __kmp_finish_implicit_task(this_thr); 75160b57cec5SDimitry Andric } 75170b57cec5SDimitry Andric 75180b57cec5SDimitry Andric int __kmp_invoke_task_func(int gtid) { 75190b57cec5SDimitry Andric int rc; 75200b57cec5SDimitry Andric int tid = __kmp_tid_from_gtid(gtid); 75210b57cec5SDimitry Andric kmp_info_t *this_thr = __kmp_threads[gtid]; 75220b57cec5SDimitry Andric kmp_team_t *team = this_thr->th.th_team; 75230b57cec5SDimitry Andric 75240b57cec5SDimitry Andric __kmp_run_before_invoked_task(gtid, tid, this_thr, team); 75250b57cec5SDimitry Andric #if USE_ITT_BUILD 75260b57cec5SDimitry Andric if (__itt_stack_caller_create_ptr) { 7527fe6060f1SDimitry Andric // inform ittnotify about entering user's code 7528fe6060f1SDimitry Andric if (team->t.t_stack_id != NULL) { 7529fe6060f1SDimitry Andric __kmp_itt_stack_callee_enter((__itt_caller)team->t.t_stack_id); 7530fe6060f1SDimitry Andric } else { 7531fe6060f1SDimitry Andric KMP_DEBUG_ASSERT(team->t.t_parent->t.t_stack_id != NULL); 75320b57cec5SDimitry Andric __kmp_itt_stack_callee_enter( 7533fe6060f1SDimitry Andric (__itt_caller)team->t.t_parent->t.t_stack_id); 7534fe6060f1SDimitry Andric } 75350b57cec5SDimitry Andric } 75360b57cec5SDimitry Andric #endif /* USE_ITT_BUILD */ 75370b57cec5SDimitry Andric #if INCLUDE_SSC_MARKS 75380b57cec5SDimitry Andric SSC_MARK_INVOKING(); 75390b57cec5SDimitry Andric #endif 75400b57cec5SDimitry Andric 75410b57cec5SDimitry Andric #if OMPT_SUPPORT 75420b57cec5SDimitry Andric void *dummy; 7543489b1cf2SDimitry Andric void **exit_frame_p; 75440b57cec5SDimitry Andric ompt_data_t *my_task_data; 75450b57cec5SDimitry Andric ompt_data_t *my_parallel_data; 75460b57cec5SDimitry Andric int ompt_team_size; 75470b57cec5SDimitry Andric 75480b57cec5SDimitry Andric if (ompt_enabled.enabled) { 7549fe6060f1SDimitry Andric exit_frame_p = &(team->t.t_implicit_task_taskdata[tid] 7550fe6060f1SDimitry Andric .ompt_task_info.frame.exit_frame.ptr); 75510b57cec5SDimitry Andric } else { 7552489b1cf2SDimitry Andric exit_frame_p = &dummy; 75530b57cec5SDimitry Andric } 75540b57cec5SDimitry Andric 75550b57cec5SDimitry Andric my_task_data = 75560b57cec5SDimitry Andric &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data); 75570b57cec5SDimitry Andric my_parallel_data = &(team->t.ompt_team_info.parallel_data); 75580b57cec5SDimitry Andric if (ompt_enabled.ompt_callback_implicit_task) { 75590b57cec5SDimitry Andric ompt_team_size = team->t.t_nproc; 75600b57cec5SDimitry Andric ompt_callbacks.ompt_callback(ompt_callback_implicit_task)( 75610b57cec5SDimitry Andric ompt_scope_begin, my_parallel_data, my_task_data, ompt_team_size, 7562489b1cf2SDimitry Andric __kmp_tid_from_gtid(gtid), ompt_task_implicit); 75630b57cec5SDimitry Andric OMPT_CUR_TASK_INFO(this_thr)->thread_num = __kmp_tid_from_gtid(gtid); 75640b57cec5SDimitry Andric } 75650b57cec5SDimitry Andric #endif 75660b57cec5SDimitry Andric 75670b57cec5SDimitry Andric #if KMP_STATS_ENABLED 75680b57cec5SDimitry Andric stats_state_e previous_state = KMP_GET_THREAD_STATE(); 75690b57cec5SDimitry Andric if (previous_state == stats_state_e::TEAMS_REGION) { 75700b57cec5SDimitry Andric KMP_PUSH_PARTITIONED_TIMER(OMP_teams); 75710b57cec5SDimitry Andric } else { 75720b57cec5SDimitry Andric KMP_PUSH_PARTITIONED_TIMER(OMP_parallel); 75730b57cec5SDimitry Andric } 75740b57cec5SDimitry Andric KMP_SET_THREAD_STATE(IMPLICIT_TASK); 75750b57cec5SDimitry Andric #endif 75760b57cec5SDimitry Andric 75770b57cec5SDimitry Andric rc = __kmp_invoke_microtask((microtask_t)TCR_SYNC_PTR(team->t.t_pkfn), gtid, 75780b57cec5SDimitry Andric tid, (int)team->t.t_argc, (void **)team->t.t_argv 75790b57cec5SDimitry Andric #if OMPT_SUPPORT 75800b57cec5SDimitry Andric , 7581489b1cf2SDimitry Andric exit_frame_p 75820b57cec5SDimitry Andric #endif 75830b57cec5SDimitry Andric ); 75840b57cec5SDimitry Andric #if OMPT_SUPPORT 7585489b1cf2SDimitry Andric *exit_frame_p = NULL; 7586489b1cf2SDimitry Andric this_thr->th.ompt_thread_info.parallel_flags |= ompt_parallel_team; 75870b57cec5SDimitry Andric #endif 75880b57cec5SDimitry Andric 75890b57cec5SDimitry Andric #if KMP_STATS_ENABLED 75900b57cec5SDimitry Andric if (previous_state == stats_state_e::TEAMS_REGION) { 75910b57cec5SDimitry Andric KMP_SET_THREAD_STATE(previous_state); 75920b57cec5SDimitry Andric } 75930b57cec5SDimitry Andric KMP_POP_PARTITIONED_TIMER(); 75940b57cec5SDimitry Andric #endif 75950b57cec5SDimitry Andric 75960b57cec5SDimitry Andric #if USE_ITT_BUILD 75970b57cec5SDimitry Andric if (__itt_stack_caller_create_ptr) { 7598fe6060f1SDimitry Andric // inform ittnotify about leaving user's code 7599fe6060f1SDimitry Andric if (team->t.t_stack_id != NULL) { 7600fe6060f1SDimitry Andric __kmp_itt_stack_callee_leave((__itt_caller)team->t.t_stack_id); 7601fe6060f1SDimitry Andric } else { 7602fe6060f1SDimitry Andric KMP_DEBUG_ASSERT(team->t.t_parent->t.t_stack_id != NULL); 76030b57cec5SDimitry Andric __kmp_itt_stack_callee_leave( 7604fe6060f1SDimitry Andric (__itt_caller)team->t.t_parent->t.t_stack_id); 7605fe6060f1SDimitry Andric } 76060b57cec5SDimitry Andric } 76070b57cec5SDimitry Andric #endif /* USE_ITT_BUILD */ 76080b57cec5SDimitry Andric __kmp_run_after_invoked_task(gtid, tid, this_thr, team); 76090b57cec5SDimitry Andric 76100b57cec5SDimitry Andric return rc; 76110b57cec5SDimitry Andric } 76120b57cec5SDimitry Andric 76130b57cec5SDimitry Andric void __kmp_teams_master(int gtid) { 7614fe6060f1SDimitry Andric // This routine is called by all primary threads in teams construct 76150b57cec5SDimitry Andric kmp_info_t *thr = __kmp_threads[gtid]; 76160b57cec5SDimitry Andric kmp_team_t *team = thr->th.th_team; 76170b57cec5SDimitry Andric ident_t *loc = team->t.t_ident; 76180b57cec5SDimitry Andric thr->th.th_set_nproc = thr->th.th_teams_size.nth; 76190b57cec5SDimitry Andric KMP_DEBUG_ASSERT(thr->th.th_teams_microtask); 76200b57cec5SDimitry Andric KMP_DEBUG_ASSERT(thr->th.th_set_nproc); 76210b57cec5SDimitry Andric KA_TRACE(20, ("__kmp_teams_master: T#%d, Tid %d, microtask %p\n", gtid, 76220b57cec5SDimitry Andric __kmp_tid_from_gtid(gtid), thr->th.th_teams_microtask)); 76230b57cec5SDimitry Andric 76240b57cec5SDimitry Andric // This thread is a new CG root. Set up the proper variables. 76250b57cec5SDimitry Andric kmp_cg_root_t *tmp = (kmp_cg_root_t *)__kmp_allocate(sizeof(kmp_cg_root_t)); 76260b57cec5SDimitry Andric tmp->cg_root = thr; // Make thr the CG root 7627fe6060f1SDimitry Andric // Init to thread limit stored when league primary threads were forked 76280b57cec5SDimitry Andric tmp->cg_thread_limit = thr->th.th_current_task->td_icvs.thread_limit; 76290b57cec5SDimitry Andric tmp->cg_nthreads = 1; // Init counter to one active thread, this one 76300b57cec5SDimitry Andric KA_TRACE(100, ("__kmp_teams_master: Thread %p created node %p and init" 76310b57cec5SDimitry Andric " cg_nthreads to 1\n", 76320b57cec5SDimitry Andric thr, tmp)); 76330b57cec5SDimitry Andric tmp->up = thr->th.th_cg_roots; 76340b57cec5SDimitry Andric thr->th.th_cg_roots = tmp; 76350b57cec5SDimitry Andric 76360b57cec5SDimitry Andric // Launch league of teams now, but not let workers execute 76370b57cec5SDimitry Andric // (they hang on fork barrier until next parallel) 76380b57cec5SDimitry Andric #if INCLUDE_SSC_MARKS 76390b57cec5SDimitry Andric SSC_MARK_FORKING(); 76400b57cec5SDimitry Andric #endif 76410b57cec5SDimitry Andric __kmp_fork_call(loc, gtid, fork_context_intel, team->t.t_argc, 76420b57cec5SDimitry Andric (microtask_t)thr->th.th_teams_microtask, // "wrapped" task 76430b57cec5SDimitry Andric VOLATILE_CAST(launch_t) __kmp_invoke_task_func, NULL); 76440b57cec5SDimitry Andric #if INCLUDE_SSC_MARKS 76450b57cec5SDimitry Andric SSC_MARK_JOINING(); 76460b57cec5SDimitry Andric #endif 76470b57cec5SDimitry Andric // If the team size was reduced from the limit, set it to the new size 76480b57cec5SDimitry Andric if (thr->th.th_team_nproc < thr->th.th_teams_size.nth) 76490b57cec5SDimitry Andric thr->th.th_teams_size.nth = thr->th.th_team_nproc; 76500b57cec5SDimitry Andric // AC: last parameter "1" eliminates join barrier which won't work because 76510b57cec5SDimitry Andric // worker threads are in a fork barrier waiting for more parallel regions 76520b57cec5SDimitry Andric __kmp_join_call(loc, gtid 76530b57cec5SDimitry Andric #if OMPT_SUPPORT 76540b57cec5SDimitry Andric , 76550b57cec5SDimitry Andric fork_context_intel 76560b57cec5SDimitry Andric #endif 76570b57cec5SDimitry Andric , 76580b57cec5SDimitry Andric 1); 76590b57cec5SDimitry Andric } 76600b57cec5SDimitry Andric 76610b57cec5SDimitry Andric int __kmp_invoke_teams_master(int gtid) { 76620b57cec5SDimitry Andric kmp_info_t *this_thr = __kmp_threads[gtid]; 76630b57cec5SDimitry Andric kmp_team_t *team = this_thr->th.th_team; 76640b57cec5SDimitry Andric #if KMP_DEBUG 76650b57cec5SDimitry Andric if (!__kmp_threads[gtid]->th.th_team->t.t_serialized) 76660b57cec5SDimitry Andric KMP_DEBUG_ASSERT((void *)__kmp_threads[gtid]->th.th_team->t.t_pkfn == 76670b57cec5SDimitry Andric (void *)__kmp_teams_master); 76680b57cec5SDimitry Andric #endif 76690b57cec5SDimitry Andric __kmp_run_before_invoked_task(gtid, 0, this_thr, team); 7670489b1cf2SDimitry Andric #if OMPT_SUPPORT 7671489b1cf2SDimitry Andric int tid = __kmp_tid_from_gtid(gtid); 7672489b1cf2SDimitry Andric ompt_data_t *task_data = 7673489b1cf2SDimitry Andric &team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data; 7674489b1cf2SDimitry Andric ompt_data_t *parallel_data = &team->t.ompt_team_info.parallel_data; 7675489b1cf2SDimitry Andric if (ompt_enabled.ompt_callback_implicit_task) { 7676489b1cf2SDimitry Andric ompt_callbacks.ompt_callback(ompt_callback_implicit_task)( 7677489b1cf2SDimitry Andric ompt_scope_begin, parallel_data, task_data, team->t.t_nproc, tid, 7678489b1cf2SDimitry Andric ompt_task_initial); 7679489b1cf2SDimitry Andric OMPT_CUR_TASK_INFO(this_thr)->thread_num = tid; 7680489b1cf2SDimitry Andric } 7681489b1cf2SDimitry Andric #endif 76820b57cec5SDimitry Andric __kmp_teams_master(gtid); 7683489b1cf2SDimitry Andric #if OMPT_SUPPORT 7684489b1cf2SDimitry Andric this_thr->th.ompt_thread_info.parallel_flags |= ompt_parallel_league; 7685489b1cf2SDimitry Andric #endif 76860b57cec5SDimitry Andric __kmp_run_after_invoked_task(gtid, 0, this_thr, team); 76870b57cec5SDimitry Andric return 1; 76880b57cec5SDimitry Andric } 76890b57cec5SDimitry Andric 76900b57cec5SDimitry Andric /* this sets the requested number of threads for the next parallel region 76910b57cec5SDimitry Andric encountered by this team. since this should be enclosed in the forkjoin 7692480093f4SDimitry Andric critical section it should avoid race conditions with asymmetrical nested 76930b57cec5SDimitry Andric parallelism */ 76940b57cec5SDimitry Andric 76950b57cec5SDimitry Andric void __kmp_push_num_threads(ident_t *id, int gtid, int num_threads) { 76960b57cec5SDimitry Andric kmp_info_t *thr = __kmp_threads[gtid]; 76970b57cec5SDimitry Andric 76980b57cec5SDimitry Andric if (num_threads > 0) 76990b57cec5SDimitry Andric thr->th.th_set_nproc = num_threads; 77000b57cec5SDimitry Andric } 77010b57cec5SDimitry Andric 7702fe6060f1SDimitry Andric static void __kmp_push_thread_limit(kmp_info_t *thr, int num_teams, 7703fe6060f1SDimitry Andric int num_threads) { 7704fe6060f1SDimitry Andric KMP_DEBUG_ASSERT(thr); 7705fe6060f1SDimitry Andric // Remember the number of threads for inner parallel regions 7706fe6060f1SDimitry Andric if (!TCR_4(__kmp_init_middle)) 7707fe6060f1SDimitry Andric __kmp_middle_initialize(); // get internal globals calculated 7708fe6060f1SDimitry Andric __kmp_assign_root_init_mask(); 7709fe6060f1SDimitry Andric KMP_DEBUG_ASSERT(__kmp_avail_proc); 7710fe6060f1SDimitry Andric KMP_DEBUG_ASSERT(__kmp_dflt_team_nth); 7711fe6060f1SDimitry Andric 7712fe6060f1SDimitry Andric if (num_threads == 0) { 7713fe6060f1SDimitry Andric if (__kmp_teams_thread_limit > 0) { 7714fe6060f1SDimitry Andric num_threads = __kmp_teams_thread_limit; 7715fe6060f1SDimitry Andric } else { 7716fe6060f1SDimitry Andric num_threads = __kmp_avail_proc / num_teams; 7717fe6060f1SDimitry Andric } 7718fe6060f1SDimitry Andric // adjust num_threads w/o warning as it is not user setting 7719fe6060f1SDimitry Andric // num_threads = min(num_threads, nthreads-var, thread-limit-var) 7720fe6060f1SDimitry Andric // no thread_limit clause specified - do not change thread-limit-var ICV 7721fe6060f1SDimitry Andric if (num_threads > __kmp_dflt_team_nth) { 7722fe6060f1SDimitry Andric num_threads = __kmp_dflt_team_nth; // honor nthreads-var ICV 7723fe6060f1SDimitry Andric } 7724fe6060f1SDimitry Andric if (num_threads > thr->th.th_current_task->td_icvs.thread_limit) { 7725fe6060f1SDimitry Andric num_threads = thr->th.th_current_task->td_icvs.thread_limit; 7726fe6060f1SDimitry Andric } // prevent team size to exceed thread-limit-var 7727fe6060f1SDimitry Andric if (num_teams * num_threads > __kmp_teams_max_nth) { 7728fe6060f1SDimitry Andric num_threads = __kmp_teams_max_nth / num_teams; 7729fe6060f1SDimitry Andric } 7730fe6060f1SDimitry Andric if (num_threads == 0) { 7731fe6060f1SDimitry Andric num_threads = 1; 7732fe6060f1SDimitry Andric } 7733fe6060f1SDimitry Andric } else { 77340eae32dcSDimitry Andric if (num_threads < 0) { 77350eae32dcSDimitry Andric __kmp_msg(kmp_ms_warning, KMP_MSG(CantFormThrTeam, num_threads, 1), 77360eae32dcSDimitry Andric __kmp_msg_null); 77370eae32dcSDimitry Andric num_threads = 1; 77380eae32dcSDimitry Andric } 7739fe6060f1SDimitry Andric // This thread will be the primary thread of the league primary threads 7740fe6060f1SDimitry Andric // Store new thread limit; old limit is saved in th_cg_roots list 7741fe6060f1SDimitry Andric thr->th.th_current_task->td_icvs.thread_limit = num_threads; 7742fe6060f1SDimitry Andric // num_threads = min(num_threads, nthreads-var) 7743fe6060f1SDimitry Andric if (num_threads > __kmp_dflt_team_nth) { 7744fe6060f1SDimitry Andric num_threads = __kmp_dflt_team_nth; // honor nthreads-var ICV 7745fe6060f1SDimitry Andric } 7746fe6060f1SDimitry Andric if (num_teams * num_threads > __kmp_teams_max_nth) { 7747fe6060f1SDimitry Andric int new_threads = __kmp_teams_max_nth / num_teams; 7748fe6060f1SDimitry Andric if (new_threads == 0) { 7749fe6060f1SDimitry Andric new_threads = 1; 7750fe6060f1SDimitry Andric } 7751fe6060f1SDimitry Andric if (new_threads != num_threads) { 7752fe6060f1SDimitry Andric if (!__kmp_reserve_warn) { // user asked for too many threads 7753fe6060f1SDimitry Andric __kmp_reserve_warn = 1; // conflicts with KMP_TEAMS_THREAD_LIMIT 7754fe6060f1SDimitry Andric __kmp_msg(kmp_ms_warning, 7755fe6060f1SDimitry Andric KMP_MSG(CantFormThrTeam, num_threads, new_threads), 7756fe6060f1SDimitry Andric KMP_HNT(Unset_ALL_THREADS), __kmp_msg_null); 7757fe6060f1SDimitry Andric } 7758fe6060f1SDimitry Andric } 7759fe6060f1SDimitry Andric num_threads = new_threads; 7760fe6060f1SDimitry Andric } 7761fe6060f1SDimitry Andric } 7762fe6060f1SDimitry Andric thr->th.th_teams_size.nth = num_threads; 7763fe6060f1SDimitry Andric } 7764fe6060f1SDimitry Andric 77650b57cec5SDimitry Andric /* this sets the requested number of teams for the teams region and/or 77660b57cec5SDimitry Andric the number of threads for the next parallel region encountered */ 77670b57cec5SDimitry Andric void __kmp_push_num_teams(ident_t *id, int gtid, int num_teams, 77680b57cec5SDimitry Andric int num_threads) { 77690b57cec5SDimitry Andric kmp_info_t *thr = __kmp_threads[gtid]; 77700eae32dcSDimitry Andric if (num_teams < 0) { 77710eae32dcSDimitry Andric // OpenMP specification requires requested values to be positive, 77720eae32dcSDimitry Andric // but people can send us any value, so we'd better check 77730eae32dcSDimitry Andric __kmp_msg(kmp_ms_warning, KMP_MSG(NumTeamsNotPositive, num_teams, 1), 77740eae32dcSDimitry Andric __kmp_msg_null); 77750eae32dcSDimitry Andric num_teams = 1; 77760eae32dcSDimitry Andric } 7777fe6060f1SDimitry Andric if (num_teams == 0) { 7778fe6060f1SDimitry Andric if (__kmp_nteams > 0) { 7779fe6060f1SDimitry Andric num_teams = __kmp_nteams; 7780fe6060f1SDimitry Andric } else { 77810b57cec5SDimitry Andric num_teams = 1; // default number of teams is 1. 7782fe6060f1SDimitry Andric } 7783fe6060f1SDimitry Andric } 77840b57cec5SDimitry Andric if (num_teams > __kmp_teams_max_nth) { // if too many teams requested? 77850b57cec5SDimitry Andric if (!__kmp_reserve_warn) { 77860b57cec5SDimitry Andric __kmp_reserve_warn = 1; 77870b57cec5SDimitry Andric __kmp_msg(kmp_ms_warning, 77880b57cec5SDimitry Andric KMP_MSG(CantFormThrTeam, num_teams, __kmp_teams_max_nth), 77890b57cec5SDimitry Andric KMP_HNT(Unset_ALL_THREADS), __kmp_msg_null); 77900b57cec5SDimitry Andric } 77910b57cec5SDimitry Andric num_teams = __kmp_teams_max_nth; 77920b57cec5SDimitry Andric } 77930b57cec5SDimitry Andric // Set number of teams (number of threads in the outer "parallel" of the 77940b57cec5SDimitry Andric // teams) 77950b57cec5SDimitry Andric thr->th.th_set_nproc = thr->th.th_teams_size.nteams = num_teams; 77960b57cec5SDimitry Andric 7797fe6060f1SDimitry Andric __kmp_push_thread_limit(thr, num_teams, num_threads); 7798489b1cf2SDimitry Andric } 7799fe6060f1SDimitry Andric 7800fe6060f1SDimitry Andric /* This sets the requested number of teams for the teams region and/or 7801fe6060f1SDimitry Andric the number of threads for the next parallel region encountered */ 7802fe6060f1SDimitry Andric void __kmp_push_num_teams_51(ident_t *id, int gtid, int num_teams_lb, 7803fe6060f1SDimitry Andric int num_teams_ub, int num_threads) { 7804fe6060f1SDimitry Andric kmp_info_t *thr = __kmp_threads[gtid]; 7805fe6060f1SDimitry Andric KMP_DEBUG_ASSERT(num_teams_lb >= 0 && num_teams_ub >= 0); 7806fe6060f1SDimitry Andric KMP_DEBUG_ASSERT(num_teams_ub >= num_teams_lb); 7807fe6060f1SDimitry Andric KMP_DEBUG_ASSERT(num_threads >= 0); 7808fe6060f1SDimitry Andric 7809fe6060f1SDimitry Andric if (num_teams_lb > num_teams_ub) { 7810fe6060f1SDimitry Andric __kmp_fatal(KMP_MSG(FailedToCreateTeam, num_teams_lb, num_teams_ub), 7811fe6060f1SDimitry Andric KMP_HNT(SetNewBound, __kmp_teams_max_nth), __kmp_msg_null); 78120b57cec5SDimitry Andric } 7813fe6060f1SDimitry Andric 7814fe6060f1SDimitry Andric int num_teams = 1; // defalt number of teams is 1. 7815fe6060f1SDimitry Andric 7816fe6060f1SDimitry Andric if (num_teams_lb == 0 && num_teams_ub > 0) 7817fe6060f1SDimitry Andric num_teams_lb = num_teams_ub; 7818fe6060f1SDimitry Andric 7819fe6060f1SDimitry Andric if (num_teams_lb == 0 && num_teams_ub == 0) { // no num_teams clause 7820fe6060f1SDimitry Andric num_teams = (__kmp_nteams > 0) ? __kmp_nteams : num_teams; 7821fe6060f1SDimitry Andric if (num_teams > __kmp_teams_max_nth) { 7822fe6060f1SDimitry Andric if (!__kmp_reserve_warn) { 7823fe6060f1SDimitry Andric __kmp_reserve_warn = 1; 78240b57cec5SDimitry Andric __kmp_msg(kmp_ms_warning, 7825fe6060f1SDimitry Andric KMP_MSG(CantFormThrTeam, num_teams, __kmp_teams_max_nth), 78260b57cec5SDimitry Andric KMP_HNT(Unset_ALL_THREADS), __kmp_msg_null); 78270b57cec5SDimitry Andric } 7828fe6060f1SDimitry Andric num_teams = __kmp_teams_max_nth; 7829fe6060f1SDimitry Andric } 7830fe6060f1SDimitry Andric } else if (num_teams_lb == num_teams_ub) { // requires exact number of teams 7831fe6060f1SDimitry Andric num_teams = num_teams_ub; 7832fe6060f1SDimitry Andric } else { // num_teams_lb <= num_teams <= num_teams_ub 78330eae32dcSDimitry Andric if (num_threads <= 0) { 7834fe6060f1SDimitry Andric if (num_teams_ub > __kmp_teams_max_nth) { 7835fe6060f1SDimitry Andric num_teams = num_teams_lb; 7836fe6060f1SDimitry Andric } else { 7837fe6060f1SDimitry Andric num_teams = num_teams_ub; 7838fe6060f1SDimitry Andric } 7839fe6060f1SDimitry Andric } else { 7840fe6060f1SDimitry Andric num_teams = (num_threads > __kmp_teams_max_nth) 7841fe6060f1SDimitry Andric ? num_teams 7842fe6060f1SDimitry Andric : __kmp_teams_max_nth / num_threads; 7843fe6060f1SDimitry Andric if (num_teams < num_teams_lb) { 7844fe6060f1SDimitry Andric num_teams = num_teams_lb; 7845fe6060f1SDimitry Andric } else if (num_teams > num_teams_ub) { 7846fe6060f1SDimitry Andric num_teams = num_teams_ub; 78470b57cec5SDimitry Andric } 78480b57cec5SDimitry Andric } 7849fe6060f1SDimitry Andric } 7850fe6060f1SDimitry Andric // Set number of teams (number of threads in the outer "parallel" of the 7851fe6060f1SDimitry Andric // teams) 7852fe6060f1SDimitry Andric thr->th.th_set_nproc = thr->th.th_teams_size.nteams = num_teams; 7853fe6060f1SDimitry Andric 7854fe6060f1SDimitry Andric __kmp_push_thread_limit(thr, num_teams, num_threads); 78550b57cec5SDimitry Andric } 78560b57cec5SDimitry Andric 78570b57cec5SDimitry Andric // Set the proc_bind var to use in the following parallel region. 78580b57cec5SDimitry Andric void __kmp_push_proc_bind(ident_t *id, int gtid, kmp_proc_bind_t proc_bind) { 78590b57cec5SDimitry Andric kmp_info_t *thr = __kmp_threads[gtid]; 78600b57cec5SDimitry Andric thr->th.th_set_proc_bind = proc_bind; 78610b57cec5SDimitry Andric } 78620b57cec5SDimitry Andric 78630b57cec5SDimitry Andric /* Launch the worker threads into the microtask. */ 78640b57cec5SDimitry Andric 78650b57cec5SDimitry Andric void __kmp_internal_fork(ident_t *id, int gtid, kmp_team_t *team) { 78660b57cec5SDimitry Andric kmp_info_t *this_thr = __kmp_threads[gtid]; 78670b57cec5SDimitry Andric 78680b57cec5SDimitry Andric #ifdef KMP_DEBUG 78690b57cec5SDimitry Andric int f; 78700b57cec5SDimitry Andric #endif /* KMP_DEBUG */ 78710b57cec5SDimitry Andric 78720b57cec5SDimitry Andric KMP_DEBUG_ASSERT(team); 78730b57cec5SDimitry Andric KMP_DEBUG_ASSERT(this_thr->th.th_team == team); 78740b57cec5SDimitry Andric KMP_ASSERT(KMP_MASTER_GTID(gtid)); 78750b57cec5SDimitry Andric KMP_MB(); /* Flush all pending memory write invalidates. */ 78760b57cec5SDimitry Andric 78770b57cec5SDimitry Andric team->t.t_construct = 0; /* no single directives seen yet */ 78780b57cec5SDimitry Andric team->t.t_ordered.dt.t_value = 78790b57cec5SDimitry Andric 0; /* thread 0 enters the ordered section first */ 78800b57cec5SDimitry Andric 78810b57cec5SDimitry Andric /* Reset the identifiers on the dispatch buffer */ 78820b57cec5SDimitry Andric KMP_DEBUG_ASSERT(team->t.t_disp_buffer); 78830b57cec5SDimitry Andric if (team->t.t_max_nproc > 1) { 78840b57cec5SDimitry Andric int i; 78850b57cec5SDimitry Andric for (i = 0; i < __kmp_dispatch_num_buffers; ++i) { 78860b57cec5SDimitry Andric team->t.t_disp_buffer[i].buffer_index = i; 78870b57cec5SDimitry Andric team->t.t_disp_buffer[i].doacross_buf_idx = i; 78880b57cec5SDimitry Andric } 78890b57cec5SDimitry Andric } else { 78900b57cec5SDimitry Andric team->t.t_disp_buffer[0].buffer_index = 0; 78910b57cec5SDimitry Andric team->t.t_disp_buffer[0].doacross_buf_idx = 0; 78920b57cec5SDimitry Andric } 78930b57cec5SDimitry Andric 78940b57cec5SDimitry Andric KMP_MB(); /* Flush all pending memory write invalidates. */ 78950b57cec5SDimitry Andric KMP_ASSERT(this_thr->th.th_team == team); 78960b57cec5SDimitry Andric 78970b57cec5SDimitry Andric #ifdef KMP_DEBUG 78980b57cec5SDimitry Andric for (f = 0; f < team->t.t_nproc; f++) { 78990b57cec5SDimitry Andric KMP_DEBUG_ASSERT(team->t.t_threads[f] && 79000b57cec5SDimitry Andric team->t.t_threads[f]->th.th_team_nproc == team->t.t_nproc); 79010b57cec5SDimitry Andric } 79020b57cec5SDimitry Andric #endif /* KMP_DEBUG */ 79030b57cec5SDimitry Andric 79040b57cec5SDimitry Andric /* release the worker threads so they may begin working */ 79050b57cec5SDimitry Andric __kmp_fork_barrier(gtid, 0); 79060b57cec5SDimitry Andric } 79070b57cec5SDimitry Andric 79080b57cec5SDimitry Andric void __kmp_internal_join(ident_t *id, int gtid, kmp_team_t *team) { 79090b57cec5SDimitry Andric kmp_info_t *this_thr = __kmp_threads[gtid]; 79100b57cec5SDimitry Andric 79110b57cec5SDimitry Andric KMP_DEBUG_ASSERT(team); 79120b57cec5SDimitry Andric KMP_DEBUG_ASSERT(this_thr->th.th_team == team); 79130b57cec5SDimitry Andric KMP_ASSERT(KMP_MASTER_GTID(gtid)); 79140b57cec5SDimitry Andric KMP_MB(); /* Flush all pending memory write invalidates. */ 79150b57cec5SDimitry Andric 79160b57cec5SDimitry Andric /* Join barrier after fork */ 79170b57cec5SDimitry Andric 79180b57cec5SDimitry Andric #ifdef KMP_DEBUG 79190b57cec5SDimitry Andric if (__kmp_threads[gtid] && 79200b57cec5SDimitry Andric __kmp_threads[gtid]->th.th_team_nproc != team->t.t_nproc) { 79210b57cec5SDimitry Andric __kmp_printf("GTID: %d, __kmp_threads[%d]=%p\n", gtid, gtid, 79220b57cec5SDimitry Andric __kmp_threads[gtid]); 79230b57cec5SDimitry Andric __kmp_printf("__kmp_threads[%d]->th.th_team_nproc=%d, TEAM: %p, " 79240b57cec5SDimitry Andric "team->t.t_nproc=%d\n", 79250b57cec5SDimitry Andric gtid, __kmp_threads[gtid]->th.th_team_nproc, team, 79260b57cec5SDimitry Andric team->t.t_nproc); 79270b57cec5SDimitry Andric __kmp_print_structure(); 79280b57cec5SDimitry Andric } 79290b57cec5SDimitry Andric KMP_DEBUG_ASSERT(__kmp_threads[gtid] && 79300b57cec5SDimitry Andric __kmp_threads[gtid]->th.th_team_nproc == team->t.t_nproc); 79310b57cec5SDimitry Andric #endif /* KMP_DEBUG */ 79320b57cec5SDimitry Andric 79330b57cec5SDimitry Andric __kmp_join_barrier(gtid); /* wait for everyone */ 79340b57cec5SDimitry Andric #if OMPT_SUPPORT 79350b57cec5SDimitry Andric if (ompt_enabled.enabled && 79360b57cec5SDimitry Andric this_thr->th.ompt_thread_info.state == ompt_state_wait_barrier_implicit) { 79370b57cec5SDimitry Andric int ds_tid = this_thr->th.th_info.ds.ds_tid; 79380b57cec5SDimitry Andric ompt_data_t *task_data = OMPT_CUR_TASK_DATA(this_thr); 79390b57cec5SDimitry Andric this_thr->th.ompt_thread_info.state = ompt_state_overhead; 79400b57cec5SDimitry Andric #if OMPT_OPTIONAL 79410b57cec5SDimitry Andric void *codeptr = NULL; 79420b57cec5SDimitry Andric if (KMP_MASTER_TID(ds_tid) && 79430b57cec5SDimitry Andric (ompt_callbacks.ompt_callback(ompt_callback_sync_region_wait) || 79440b57cec5SDimitry Andric ompt_callbacks.ompt_callback(ompt_callback_sync_region))) 79450b57cec5SDimitry Andric codeptr = OMPT_CUR_TEAM_INFO(this_thr)->master_return_address; 79460b57cec5SDimitry Andric 79470b57cec5SDimitry Andric if (ompt_enabled.ompt_callback_sync_region_wait) { 79480b57cec5SDimitry Andric ompt_callbacks.ompt_callback(ompt_callback_sync_region_wait)( 79490b57cec5SDimitry Andric ompt_sync_region_barrier_implicit, ompt_scope_end, NULL, task_data, 79500b57cec5SDimitry Andric codeptr); 79510b57cec5SDimitry Andric } 79520b57cec5SDimitry Andric if (ompt_enabled.ompt_callback_sync_region) { 79530b57cec5SDimitry Andric ompt_callbacks.ompt_callback(ompt_callback_sync_region)( 79540b57cec5SDimitry Andric ompt_sync_region_barrier_implicit, ompt_scope_end, NULL, task_data, 79550b57cec5SDimitry Andric codeptr); 79560b57cec5SDimitry Andric } 79570b57cec5SDimitry Andric #endif 79580b57cec5SDimitry Andric if (!KMP_MASTER_TID(ds_tid) && ompt_enabled.ompt_callback_implicit_task) { 79590b57cec5SDimitry Andric ompt_callbacks.ompt_callback(ompt_callback_implicit_task)( 7960fe6060f1SDimitry Andric ompt_scope_end, NULL, task_data, 0, ds_tid, 7961fe6060f1SDimitry Andric ompt_task_implicit); // TODO: Can this be ompt_task_initial? 79620b57cec5SDimitry Andric } 79630b57cec5SDimitry Andric } 79640b57cec5SDimitry Andric #endif 79650b57cec5SDimitry Andric 79660b57cec5SDimitry Andric KMP_MB(); /* Flush all pending memory write invalidates. */ 79670b57cec5SDimitry Andric KMP_ASSERT(this_thr->th.th_team == team); 79680b57cec5SDimitry Andric } 79690b57cec5SDimitry Andric 79700b57cec5SDimitry Andric /* ------------------------------------------------------------------------ */ 79710b57cec5SDimitry Andric 79720b57cec5SDimitry Andric #ifdef USE_LOAD_BALANCE 79730b57cec5SDimitry Andric 79740b57cec5SDimitry Andric // Return the worker threads actively spinning in the hot team, if we 79750b57cec5SDimitry Andric // are at the outermost level of parallelism. Otherwise, return 0. 79760b57cec5SDimitry Andric static int __kmp_active_hot_team_nproc(kmp_root_t *root) { 79770b57cec5SDimitry Andric int i; 79780b57cec5SDimitry Andric int retval; 79790b57cec5SDimitry Andric kmp_team_t *hot_team; 79800b57cec5SDimitry Andric 79810b57cec5SDimitry Andric if (root->r.r_active) { 79820b57cec5SDimitry Andric return 0; 79830b57cec5SDimitry Andric } 79840b57cec5SDimitry Andric hot_team = root->r.r_hot_team; 79850b57cec5SDimitry Andric if (__kmp_dflt_blocktime == KMP_MAX_BLOCKTIME) { 7986fe6060f1SDimitry Andric return hot_team->t.t_nproc - 1; // Don't count primary thread 79870b57cec5SDimitry Andric } 79880b57cec5SDimitry Andric 7989fe6060f1SDimitry Andric // Skip the primary thread - it is accounted for elsewhere. 79900b57cec5SDimitry Andric retval = 0; 79910b57cec5SDimitry Andric for (i = 1; i < hot_team->t.t_nproc; i++) { 79920b57cec5SDimitry Andric if (hot_team->t.t_threads[i]->th.th_active) { 79930b57cec5SDimitry Andric retval++; 79940b57cec5SDimitry Andric } 79950b57cec5SDimitry Andric } 79960b57cec5SDimitry Andric return retval; 79970b57cec5SDimitry Andric } 79980b57cec5SDimitry Andric 79990b57cec5SDimitry Andric // Perform an automatic adjustment to the number of 80000b57cec5SDimitry Andric // threads used by the next parallel region. 80010b57cec5SDimitry Andric static int __kmp_load_balance_nproc(kmp_root_t *root, int set_nproc) { 80020b57cec5SDimitry Andric int retval; 80030b57cec5SDimitry Andric int pool_active; 80040b57cec5SDimitry Andric int hot_team_active; 80050b57cec5SDimitry Andric int team_curr_active; 80060b57cec5SDimitry Andric int system_active; 80070b57cec5SDimitry Andric 80080b57cec5SDimitry Andric KB_TRACE(20, ("__kmp_load_balance_nproc: called root:%p set_nproc:%d\n", root, 80090b57cec5SDimitry Andric set_nproc)); 80100b57cec5SDimitry Andric KMP_DEBUG_ASSERT(root); 80110b57cec5SDimitry Andric KMP_DEBUG_ASSERT(root->r.r_root_team->t.t_threads[0] 80120b57cec5SDimitry Andric ->th.th_current_task->td_icvs.dynamic == TRUE); 80130b57cec5SDimitry Andric KMP_DEBUG_ASSERT(set_nproc > 1); 80140b57cec5SDimitry Andric 80150b57cec5SDimitry Andric if (set_nproc == 1) { 80160b57cec5SDimitry Andric KB_TRACE(20, ("__kmp_load_balance_nproc: serial execution.\n")); 80170b57cec5SDimitry Andric return 1; 80180b57cec5SDimitry Andric } 80190b57cec5SDimitry Andric 80200b57cec5SDimitry Andric // Threads that are active in the thread pool, active in the hot team for this 80210b57cec5SDimitry Andric // particular root (if we are at the outer par level), and the currently 8022fe6060f1SDimitry Andric // executing thread (to become the primary thread) are available to add to the 8023fe6060f1SDimitry Andric // new team, but are currently contributing to the system load, and must be 80240b57cec5SDimitry Andric // accounted for. 80250b57cec5SDimitry Andric pool_active = __kmp_thread_pool_active_nth; 80260b57cec5SDimitry Andric hot_team_active = __kmp_active_hot_team_nproc(root); 80270b57cec5SDimitry Andric team_curr_active = pool_active + hot_team_active + 1; 80280b57cec5SDimitry Andric 80290b57cec5SDimitry Andric // Check the system load. 80300b57cec5SDimitry Andric system_active = __kmp_get_load_balance(__kmp_avail_proc + team_curr_active); 80310b57cec5SDimitry Andric KB_TRACE(30, ("__kmp_load_balance_nproc: system active = %d pool active = %d " 80320b57cec5SDimitry Andric "hot team active = %d\n", 80330b57cec5SDimitry Andric system_active, pool_active, hot_team_active)); 80340b57cec5SDimitry Andric 80350b57cec5SDimitry Andric if (system_active < 0) { 80360b57cec5SDimitry Andric // There was an error reading the necessary info from /proc, so use the 80370b57cec5SDimitry Andric // thread limit algorithm instead. Once we set __kmp_global.g.g_dynamic_mode 80380b57cec5SDimitry Andric // = dynamic_thread_limit, we shouldn't wind up getting back here. 80390b57cec5SDimitry Andric __kmp_global.g.g_dynamic_mode = dynamic_thread_limit; 80400b57cec5SDimitry Andric KMP_WARNING(CantLoadBalUsing, "KMP_DYNAMIC_MODE=thread limit"); 80410b57cec5SDimitry Andric 80420b57cec5SDimitry Andric // Make this call behave like the thread limit algorithm. 80430b57cec5SDimitry Andric retval = __kmp_avail_proc - __kmp_nth + 80440b57cec5SDimitry Andric (root->r.r_active ? 1 : root->r.r_hot_team->t.t_nproc); 80450b57cec5SDimitry Andric if (retval > set_nproc) { 80460b57cec5SDimitry Andric retval = set_nproc; 80470b57cec5SDimitry Andric } 80480b57cec5SDimitry Andric if (retval < KMP_MIN_NTH) { 80490b57cec5SDimitry Andric retval = KMP_MIN_NTH; 80500b57cec5SDimitry Andric } 80510b57cec5SDimitry Andric 80520b57cec5SDimitry Andric KB_TRACE(20, ("__kmp_load_balance_nproc: thread limit exit. retval:%d\n", 80530b57cec5SDimitry Andric retval)); 80540b57cec5SDimitry Andric return retval; 80550b57cec5SDimitry Andric } 80560b57cec5SDimitry Andric 80570b57cec5SDimitry Andric // There is a slight delay in the load balance algorithm in detecting new 80580b57cec5SDimitry Andric // running procs. The real system load at this instant should be at least as 80590b57cec5SDimitry Andric // large as the #active omp thread that are available to add to the team. 80600b57cec5SDimitry Andric if (system_active < team_curr_active) { 80610b57cec5SDimitry Andric system_active = team_curr_active; 80620b57cec5SDimitry Andric } 80630b57cec5SDimitry Andric retval = __kmp_avail_proc - system_active + team_curr_active; 80640b57cec5SDimitry Andric if (retval > set_nproc) { 80650b57cec5SDimitry Andric retval = set_nproc; 80660b57cec5SDimitry Andric } 80670b57cec5SDimitry Andric if (retval < KMP_MIN_NTH) { 80680b57cec5SDimitry Andric retval = KMP_MIN_NTH; 80690b57cec5SDimitry Andric } 80700b57cec5SDimitry Andric 80710b57cec5SDimitry Andric KB_TRACE(20, ("__kmp_load_balance_nproc: exit. retval:%d\n", retval)); 80720b57cec5SDimitry Andric return retval; 80730b57cec5SDimitry Andric } // __kmp_load_balance_nproc() 80740b57cec5SDimitry Andric 80750b57cec5SDimitry Andric #endif /* USE_LOAD_BALANCE */ 80760b57cec5SDimitry Andric 80770b57cec5SDimitry Andric /* ------------------------------------------------------------------------ */ 80780b57cec5SDimitry Andric 80790b57cec5SDimitry Andric /* NOTE: this is called with the __kmp_init_lock held */ 80800b57cec5SDimitry Andric void __kmp_cleanup(void) { 80810b57cec5SDimitry Andric int f; 80820b57cec5SDimitry Andric 80830b57cec5SDimitry Andric KA_TRACE(10, ("__kmp_cleanup: enter\n")); 80840b57cec5SDimitry Andric 80850b57cec5SDimitry Andric if (TCR_4(__kmp_init_parallel)) { 80860b57cec5SDimitry Andric #if KMP_HANDLE_SIGNALS 80870b57cec5SDimitry Andric __kmp_remove_signals(); 80880b57cec5SDimitry Andric #endif 80890b57cec5SDimitry Andric TCW_4(__kmp_init_parallel, FALSE); 80900b57cec5SDimitry Andric } 80910b57cec5SDimitry Andric 80920b57cec5SDimitry Andric if (TCR_4(__kmp_init_middle)) { 80930b57cec5SDimitry Andric #if KMP_AFFINITY_SUPPORTED 80940b57cec5SDimitry Andric __kmp_affinity_uninitialize(); 80950b57cec5SDimitry Andric #endif /* KMP_AFFINITY_SUPPORTED */ 80960b57cec5SDimitry Andric __kmp_cleanup_hierarchy(); 80970b57cec5SDimitry Andric TCW_4(__kmp_init_middle, FALSE); 80980b57cec5SDimitry Andric } 80990b57cec5SDimitry Andric 81000b57cec5SDimitry Andric KA_TRACE(10, ("__kmp_cleanup: go serial cleanup\n")); 81010b57cec5SDimitry Andric 81020b57cec5SDimitry Andric if (__kmp_init_serial) { 81030b57cec5SDimitry Andric __kmp_runtime_destroy(); 81040b57cec5SDimitry Andric __kmp_init_serial = FALSE; 81050b57cec5SDimitry Andric } 81060b57cec5SDimitry Andric 81070b57cec5SDimitry Andric __kmp_cleanup_threadprivate_caches(); 81080b57cec5SDimitry Andric 81090b57cec5SDimitry Andric for (f = 0; f < __kmp_threads_capacity; f++) { 81100b57cec5SDimitry Andric if (__kmp_root[f] != NULL) { 81110b57cec5SDimitry Andric __kmp_free(__kmp_root[f]); 81120b57cec5SDimitry Andric __kmp_root[f] = NULL; 81130b57cec5SDimitry Andric } 81140b57cec5SDimitry Andric } 81150b57cec5SDimitry Andric __kmp_free(__kmp_threads); 81160b57cec5SDimitry Andric // __kmp_threads and __kmp_root were allocated at once, as single block, so 81170b57cec5SDimitry Andric // there is no need in freeing __kmp_root. 81180b57cec5SDimitry Andric __kmp_threads = NULL; 81190b57cec5SDimitry Andric __kmp_root = NULL; 81200b57cec5SDimitry Andric __kmp_threads_capacity = 0; 81210b57cec5SDimitry Andric 812281ad6265SDimitry Andric // Free old __kmp_threads arrays if they exist. 812381ad6265SDimitry Andric kmp_old_threads_list_t *ptr = __kmp_old_threads_list; 812481ad6265SDimitry Andric while (ptr) { 812581ad6265SDimitry Andric kmp_old_threads_list_t *next = ptr->next; 812681ad6265SDimitry Andric __kmp_free(ptr->threads); 812781ad6265SDimitry Andric __kmp_free(ptr); 812881ad6265SDimitry Andric ptr = next; 812981ad6265SDimitry Andric } 813081ad6265SDimitry Andric 81310b57cec5SDimitry Andric #if KMP_USE_DYNAMIC_LOCK 81320b57cec5SDimitry Andric __kmp_cleanup_indirect_user_locks(); 81330b57cec5SDimitry Andric #else 81340b57cec5SDimitry Andric __kmp_cleanup_user_locks(); 81350b57cec5SDimitry Andric #endif 8136fe6060f1SDimitry Andric #if OMPD_SUPPORT 8137fe6060f1SDimitry Andric if (ompd_state) { 8138fe6060f1SDimitry Andric __kmp_free(ompd_env_block); 8139fe6060f1SDimitry Andric ompd_env_block = NULL; 8140fe6060f1SDimitry Andric ompd_env_block_size = 0; 8141fe6060f1SDimitry Andric } 8142fe6060f1SDimitry Andric #endif 81430b57cec5SDimitry Andric 81440b57cec5SDimitry Andric #if KMP_AFFINITY_SUPPORTED 81450b57cec5SDimitry Andric KMP_INTERNAL_FREE(CCAST(char *, __kmp_cpuinfo_file)); 81460b57cec5SDimitry Andric __kmp_cpuinfo_file = NULL; 81470b57cec5SDimitry Andric #endif /* KMP_AFFINITY_SUPPORTED */ 81480b57cec5SDimitry Andric 81490b57cec5SDimitry Andric #if KMP_USE_ADAPTIVE_LOCKS 81500b57cec5SDimitry Andric #if KMP_DEBUG_ADAPTIVE_LOCKS 81510b57cec5SDimitry Andric __kmp_print_speculative_stats(); 81520b57cec5SDimitry Andric #endif 81530b57cec5SDimitry Andric #endif 81540b57cec5SDimitry Andric KMP_INTERNAL_FREE(__kmp_nested_nth.nth); 81550b57cec5SDimitry Andric __kmp_nested_nth.nth = NULL; 81560b57cec5SDimitry Andric __kmp_nested_nth.size = 0; 81570b57cec5SDimitry Andric __kmp_nested_nth.used = 0; 81580b57cec5SDimitry Andric KMP_INTERNAL_FREE(__kmp_nested_proc_bind.bind_types); 81590b57cec5SDimitry Andric __kmp_nested_proc_bind.bind_types = NULL; 81600b57cec5SDimitry Andric __kmp_nested_proc_bind.size = 0; 81610b57cec5SDimitry Andric __kmp_nested_proc_bind.used = 0; 81620b57cec5SDimitry Andric if (__kmp_affinity_format) { 81630b57cec5SDimitry Andric KMP_INTERNAL_FREE(__kmp_affinity_format); 81640b57cec5SDimitry Andric __kmp_affinity_format = NULL; 81650b57cec5SDimitry Andric } 81660b57cec5SDimitry Andric 81670b57cec5SDimitry Andric __kmp_i18n_catclose(); 81680b57cec5SDimitry Andric 81690b57cec5SDimitry Andric #if KMP_USE_HIER_SCHED 81700b57cec5SDimitry Andric __kmp_hier_scheds.deallocate(); 81710b57cec5SDimitry Andric #endif 81720b57cec5SDimitry Andric 81730b57cec5SDimitry Andric #if KMP_STATS_ENABLED 81740b57cec5SDimitry Andric __kmp_stats_fini(); 81750b57cec5SDimitry Andric #endif 81760b57cec5SDimitry Andric 81770b57cec5SDimitry Andric KA_TRACE(10, ("__kmp_cleanup: exit\n")); 81780b57cec5SDimitry Andric } 81790b57cec5SDimitry Andric 81800b57cec5SDimitry Andric /* ------------------------------------------------------------------------ */ 81810b57cec5SDimitry Andric 81820b57cec5SDimitry Andric int __kmp_ignore_mppbeg(void) { 81830b57cec5SDimitry Andric char *env; 81840b57cec5SDimitry Andric 81850b57cec5SDimitry Andric if ((env = getenv("KMP_IGNORE_MPPBEG")) != NULL) { 81860b57cec5SDimitry Andric if (__kmp_str_match_false(env)) 81870b57cec5SDimitry Andric return FALSE; 81880b57cec5SDimitry Andric } 81890b57cec5SDimitry Andric // By default __kmpc_begin() is no-op. 81900b57cec5SDimitry Andric return TRUE; 81910b57cec5SDimitry Andric } 81920b57cec5SDimitry Andric 81930b57cec5SDimitry Andric int __kmp_ignore_mppend(void) { 81940b57cec5SDimitry Andric char *env; 81950b57cec5SDimitry Andric 81960b57cec5SDimitry Andric if ((env = getenv("KMP_IGNORE_MPPEND")) != NULL) { 81970b57cec5SDimitry Andric if (__kmp_str_match_false(env)) 81980b57cec5SDimitry Andric return FALSE; 81990b57cec5SDimitry Andric } 82000b57cec5SDimitry Andric // By default __kmpc_end() is no-op. 82010b57cec5SDimitry Andric return TRUE; 82020b57cec5SDimitry Andric } 82030b57cec5SDimitry Andric 82040b57cec5SDimitry Andric void __kmp_internal_begin(void) { 82050b57cec5SDimitry Andric int gtid; 82060b57cec5SDimitry Andric kmp_root_t *root; 82070b57cec5SDimitry Andric 82080b57cec5SDimitry Andric /* this is a very important step as it will register new sibling threads 82090b57cec5SDimitry Andric and assign these new uber threads a new gtid */ 82100b57cec5SDimitry Andric gtid = __kmp_entry_gtid(); 82110b57cec5SDimitry Andric root = __kmp_threads[gtid]->th.th_root; 82120b57cec5SDimitry Andric KMP_ASSERT(KMP_UBER_GTID(gtid)); 82130b57cec5SDimitry Andric 82140b57cec5SDimitry Andric if (root->r.r_begin) 82150b57cec5SDimitry Andric return; 82160b57cec5SDimitry Andric __kmp_acquire_lock(&root->r.r_begin_lock, gtid); 82170b57cec5SDimitry Andric if (root->r.r_begin) { 82180b57cec5SDimitry Andric __kmp_release_lock(&root->r.r_begin_lock, gtid); 82190b57cec5SDimitry Andric return; 82200b57cec5SDimitry Andric } 82210b57cec5SDimitry Andric 82220b57cec5SDimitry Andric root->r.r_begin = TRUE; 82230b57cec5SDimitry Andric 82240b57cec5SDimitry Andric __kmp_release_lock(&root->r.r_begin_lock, gtid); 82250b57cec5SDimitry Andric } 82260b57cec5SDimitry Andric 82270b57cec5SDimitry Andric /* ------------------------------------------------------------------------ */ 82280b57cec5SDimitry Andric 82290b57cec5SDimitry Andric void __kmp_user_set_library(enum library_type arg) { 82300b57cec5SDimitry Andric int gtid; 82310b57cec5SDimitry Andric kmp_root_t *root; 82320b57cec5SDimitry Andric kmp_info_t *thread; 82330b57cec5SDimitry Andric 82340b57cec5SDimitry Andric /* first, make sure we are initialized so we can get our gtid */ 82350b57cec5SDimitry Andric 82360b57cec5SDimitry Andric gtid = __kmp_entry_gtid(); 82370b57cec5SDimitry Andric thread = __kmp_threads[gtid]; 82380b57cec5SDimitry Andric 82390b57cec5SDimitry Andric root = thread->th.th_root; 82400b57cec5SDimitry Andric 82410b57cec5SDimitry Andric KA_TRACE(20, ("__kmp_user_set_library: enter T#%d, arg: %d, %d\n", gtid, arg, 82420b57cec5SDimitry Andric library_serial)); 82430b57cec5SDimitry Andric if (root->r.r_in_parallel) { /* Must be called in serial section of top-level 82440b57cec5SDimitry Andric thread */ 82450b57cec5SDimitry Andric KMP_WARNING(SetLibraryIncorrectCall); 82460b57cec5SDimitry Andric return; 82470b57cec5SDimitry Andric } 82480b57cec5SDimitry Andric 82490b57cec5SDimitry Andric switch (arg) { 82500b57cec5SDimitry Andric case library_serial: 82510b57cec5SDimitry Andric thread->th.th_set_nproc = 0; 82520b57cec5SDimitry Andric set__nproc(thread, 1); 82530b57cec5SDimitry Andric break; 82540b57cec5SDimitry Andric case library_turnaround: 82550b57cec5SDimitry Andric thread->th.th_set_nproc = 0; 82560b57cec5SDimitry Andric set__nproc(thread, __kmp_dflt_team_nth ? __kmp_dflt_team_nth 82570b57cec5SDimitry Andric : __kmp_dflt_team_nth_ub); 82580b57cec5SDimitry Andric break; 82590b57cec5SDimitry Andric case library_throughput: 82600b57cec5SDimitry Andric thread->th.th_set_nproc = 0; 82610b57cec5SDimitry Andric set__nproc(thread, __kmp_dflt_team_nth ? __kmp_dflt_team_nth 82620b57cec5SDimitry Andric : __kmp_dflt_team_nth_ub); 82630b57cec5SDimitry Andric break; 82640b57cec5SDimitry Andric default: 82650b57cec5SDimitry Andric KMP_FATAL(UnknownLibraryType, arg); 82660b57cec5SDimitry Andric } 82670b57cec5SDimitry Andric 82680b57cec5SDimitry Andric __kmp_aux_set_library(arg); 82690b57cec5SDimitry Andric } 82700b57cec5SDimitry Andric 82710b57cec5SDimitry Andric void __kmp_aux_set_stacksize(size_t arg) { 82720b57cec5SDimitry Andric if (!__kmp_init_serial) 82730b57cec5SDimitry Andric __kmp_serial_initialize(); 82740b57cec5SDimitry Andric 82750b57cec5SDimitry Andric #if KMP_OS_DARWIN 82760b57cec5SDimitry Andric if (arg & (0x1000 - 1)) { 82770b57cec5SDimitry Andric arg &= ~(0x1000 - 1); 82780b57cec5SDimitry Andric if (arg + 0x1000) /* check for overflow if we round up */ 82790b57cec5SDimitry Andric arg += 0x1000; 82800b57cec5SDimitry Andric } 82810b57cec5SDimitry Andric #endif 82820b57cec5SDimitry Andric __kmp_acquire_bootstrap_lock(&__kmp_initz_lock); 82830b57cec5SDimitry Andric 82840b57cec5SDimitry Andric /* only change the default stacksize before the first parallel region */ 82850b57cec5SDimitry Andric if (!TCR_4(__kmp_init_parallel)) { 82860b57cec5SDimitry Andric size_t value = arg; /* argument is in bytes */ 82870b57cec5SDimitry Andric 82880b57cec5SDimitry Andric if (value < __kmp_sys_min_stksize) 82890b57cec5SDimitry Andric value = __kmp_sys_min_stksize; 82900b57cec5SDimitry Andric else if (value > KMP_MAX_STKSIZE) 82910b57cec5SDimitry Andric value = KMP_MAX_STKSIZE; 82920b57cec5SDimitry Andric 82930b57cec5SDimitry Andric __kmp_stksize = value; 82940b57cec5SDimitry Andric 82950b57cec5SDimitry Andric __kmp_env_stksize = TRUE; /* was KMP_STACKSIZE specified? */ 82960b57cec5SDimitry Andric } 82970b57cec5SDimitry Andric 82980b57cec5SDimitry Andric __kmp_release_bootstrap_lock(&__kmp_initz_lock); 82990b57cec5SDimitry Andric } 83000b57cec5SDimitry Andric 83010b57cec5SDimitry Andric /* set the behaviour of the runtime library */ 83020b57cec5SDimitry Andric /* TODO this can cause some odd behaviour with sibling parallelism... */ 83030b57cec5SDimitry Andric void __kmp_aux_set_library(enum library_type arg) { 83040b57cec5SDimitry Andric __kmp_library = arg; 83050b57cec5SDimitry Andric 83060b57cec5SDimitry Andric switch (__kmp_library) { 83070b57cec5SDimitry Andric case library_serial: { 83080b57cec5SDimitry Andric KMP_INFORM(LibraryIsSerial); 83090b57cec5SDimitry Andric } break; 83100b57cec5SDimitry Andric case library_turnaround: 83110b57cec5SDimitry Andric if (__kmp_use_yield == 1 && !__kmp_use_yield_exp_set) 83120b57cec5SDimitry Andric __kmp_use_yield = 2; // only yield when oversubscribed 83130b57cec5SDimitry Andric break; 83140b57cec5SDimitry Andric case library_throughput: 83150b57cec5SDimitry Andric if (__kmp_dflt_blocktime == KMP_MAX_BLOCKTIME) 831681ad6265SDimitry Andric __kmp_dflt_blocktime = KMP_DEFAULT_BLOCKTIME; 83170b57cec5SDimitry Andric break; 83180b57cec5SDimitry Andric default: 83190b57cec5SDimitry Andric KMP_FATAL(UnknownLibraryType, arg); 83200b57cec5SDimitry Andric } 83210b57cec5SDimitry Andric } 83220b57cec5SDimitry Andric 83230b57cec5SDimitry Andric /* Getting team information common for all team API */ 83240b57cec5SDimitry Andric // Returns NULL if not in teams construct 83250b57cec5SDimitry Andric static kmp_team_t *__kmp_aux_get_team_info(int &teams_serialized) { 83260b57cec5SDimitry Andric kmp_info_t *thr = __kmp_entry_thread(); 83270b57cec5SDimitry Andric teams_serialized = 0; 83280b57cec5SDimitry Andric if (thr->th.th_teams_microtask) { 83290b57cec5SDimitry Andric kmp_team_t *team = thr->th.th_team; 83300b57cec5SDimitry Andric int tlevel = thr->th.th_teams_level; // the level of the teams construct 83310b57cec5SDimitry Andric int ii = team->t.t_level; 83320b57cec5SDimitry Andric teams_serialized = team->t.t_serialized; 83330b57cec5SDimitry Andric int level = tlevel + 1; 83340b57cec5SDimitry Andric KMP_DEBUG_ASSERT(ii >= tlevel); 83350b57cec5SDimitry Andric while (ii > level) { 83360b57cec5SDimitry Andric for (teams_serialized = team->t.t_serialized; 83370b57cec5SDimitry Andric (teams_serialized > 0) && (ii > level); teams_serialized--, ii--) { 83380b57cec5SDimitry Andric } 83390b57cec5SDimitry Andric if (team->t.t_serialized && (!teams_serialized)) { 83400b57cec5SDimitry Andric team = team->t.t_parent; 83410b57cec5SDimitry Andric continue; 83420b57cec5SDimitry Andric } 83430b57cec5SDimitry Andric if (ii > level) { 83440b57cec5SDimitry Andric team = team->t.t_parent; 83450b57cec5SDimitry Andric ii--; 83460b57cec5SDimitry Andric } 83470b57cec5SDimitry Andric } 83480b57cec5SDimitry Andric return team; 83490b57cec5SDimitry Andric } 83500b57cec5SDimitry Andric return NULL; 83510b57cec5SDimitry Andric } 83520b57cec5SDimitry Andric 83530b57cec5SDimitry Andric int __kmp_aux_get_team_num() { 83540b57cec5SDimitry Andric int serialized; 83550b57cec5SDimitry Andric kmp_team_t *team = __kmp_aux_get_team_info(serialized); 83560b57cec5SDimitry Andric if (team) { 83570b57cec5SDimitry Andric if (serialized > 1) { 83580b57cec5SDimitry Andric return 0; // teams region is serialized ( 1 team of 1 thread ). 83590b57cec5SDimitry Andric } else { 83600b57cec5SDimitry Andric return team->t.t_master_tid; 83610b57cec5SDimitry Andric } 83620b57cec5SDimitry Andric } 83630b57cec5SDimitry Andric return 0; 83640b57cec5SDimitry Andric } 83650b57cec5SDimitry Andric 83660b57cec5SDimitry Andric int __kmp_aux_get_num_teams() { 83670b57cec5SDimitry Andric int serialized; 83680b57cec5SDimitry Andric kmp_team_t *team = __kmp_aux_get_team_info(serialized); 83690b57cec5SDimitry Andric if (team) { 83700b57cec5SDimitry Andric if (serialized > 1) { 83710b57cec5SDimitry Andric return 1; 83720b57cec5SDimitry Andric } else { 83730b57cec5SDimitry Andric return team->t.t_parent->t.t_nproc; 83740b57cec5SDimitry Andric } 83750b57cec5SDimitry Andric } 83760b57cec5SDimitry Andric return 1; 83770b57cec5SDimitry Andric } 83780b57cec5SDimitry Andric 83790b57cec5SDimitry Andric /* ------------------------------------------------------------------------ */ 83800b57cec5SDimitry Andric 83810b57cec5SDimitry Andric /* 83820b57cec5SDimitry Andric * Affinity Format Parser 83830b57cec5SDimitry Andric * 83840b57cec5SDimitry Andric * Field is in form of: %[[[0].]size]type 83850b57cec5SDimitry Andric * % and type are required (%% means print a literal '%') 83860b57cec5SDimitry Andric * type is either single char or long name surrounded by {}, 83870b57cec5SDimitry Andric * e.g., N or {num_threads} 83880b57cec5SDimitry Andric * 0 => leading zeros 83890b57cec5SDimitry Andric * . => right justified when size is specified 83900b57cec5SDimitry Andric * by default output is left justified 83910b57cec5SDimitry Andric * size is the *minimum* field length 83920b57cec5SDimitry Andric * All other characters are printed as is 83930b57cec5SDimitry Andric * 83940b57cec5SDimitry Andric * Available field types: 83950b57cec5SDimitry Andric * L {thread_level} - omp_get_level() 83960b57cec5SDimitry Andric * n {thread_num} - omp_get_thread_num() 83970b57cec5SDimitry Andric * h {host} - name of host machine 83980b57cec5SDimitry Andric * P {process_id} - process id (integer) 83990b57cec5SDimitry Andric * T {thread_identifier} - native thread identifier (integer) 84000b57cec5SDimitry Andric * N {num_threads} - omp_get_num_threads() 84010b57cec5SDimitry Andric * A {ancestor_tnum} - omp_get_ancestor_thread_num(omp_get_level()-1) 84020b57cec5SDimitry Andric * a {thread_affinity} - comma separated list of integers or integer ranges 84030b57cec5SDimitry Andric * (values of affinity mask) 84040b57cec5SDimitry Andric * 84050b57cec5SDimitry Andric * Implementation-specific field types can be added 84060b57cec5SDimitry Andric * If a type is unknown, print "undefined" 84070b57cec5SDimitry Andric */ 84080b57cec5SDimitry Andric 84090b57cec5SDimitry Andric // Structure holding the short name, long name, and corresponding data type 84100b57cec5SDimitry Andric // for snprintf. A table of these will represent the entire valid keyword 84110b57cec5SDimitry Andric // field types. 84120b57cec5SDimitry Andric typedef struct kmp_affinity_format_field_t { 84130b57cec5SDimitry Andric char short_name; // from spec e.g., L -> thread level 84140b57cec5SDimitry Andric const char *long_name; // from spec thread_level -> thread level 84150b57cec5SDimitry Andric char field_format; // data type for snprintf (typically 'd' or 's' 84160b57cec5SDimitry Andric // for integer or string) 84170b57cec5SDimitry Andric } kmp_affinity_format_field_t; 84180b57cec5SDimitry Andric 84190b57cec5SDimitry Andric static const kmp_affinity_format_field_t __kmp_affinity_format_table[] = { 84200b57cec5SDimitry Andric #if KMP_AFFINITY_SUPPORTED 84210b57cec5SDimitry Andric {'A', "thread_affinity", 's'}, 84220b57cec5SDimitry Andric #endif 84230b57cec5SDimitry Andric {'t', "team_num", 'd'}, 84240b57cec5SDimitry Andric {'T', "num_teams", 'd'}, 84250b57cec5SDimitry Andric {'L', "nesting_level", 'd'}, 84260b57cec5SDimitry Andric {'n', "thread_num", 'd'}, 84270b57cec5SDimitry Andric {'N', "num_threads", 'd'}, 84280b57cec5SDimitry Andric {'a', "ancestor_tnum", 'd'}, 84290b57cec5SDimitry Andric {'H', "host", 's'}, 84300b57cec5SDimitry Andric {'P', "process_id", 'd'}, 84310b57cec5SDimitry Andric {'i', "native_thread_id", 'd'}}; 84320b57cec5SDimitry Andric 84330b57cec5SDimitry Andric // Return the number of characters it takes to hold field 84340b57cec5SDimitry Andric static int __kmp_aux_capture_affinity_field(int gtid, const kmp_info_t *th, 84350b57cec5SDimitry Andric const char **ptr, 84360b57cec5SDimitry Andric kmp_str_buf_t *field_buffer) { 84370b57cec5SDimitry Andric int rc, format_index, field_value; 84380b57cec5SDimitry Andric const char *width_left, *width_right; 84390b57cec5SDimitry Andric bool pad_zeros, right_justify, parse_long_name, found_valid_name; 84400b57cec5SDimitry Andric static const int FORMAT_SIZE = 20; 84410b57cec5SDimitry Andric char format[FORMAT_SIZE] = {0}; 84420b57cec5SDimitry Andric char absolute_short_name = 0; 84430b57cec5SDimitry Andric 84440b57cec5SDimitry Andric KMP_DEBUG_ASSERT(gtid >= 0); 84450b57cec5SDimitry Andric KMP_DEBUG_ASSERT(th); 84460b57cec5SDimitry Andric KMP_DEBUG_ASSERT(**ptr == '%'); 84470b57cec5SDimitry Andric KMP_DEBUG_ASSERT(field_buffer); 84480b57cec5SDimitry Andric 84490b57cec5SDimitry Andric __kmp_str_buf_clear(field_buffer); 84500b57cec5SDimitry Andric 84510b57cec5SDimitry Andric // Skip the initial % 84520b57cec5SDimitry Andric (*ptr)++; 84530b57cec5SDimitry Andric 84540b57cec5SDimitry Andric // Check for %% first 84550b57cec5SDimitry Andric if (**ptr == '%') { 84560b57cec5SDimitry Andric __kmp_str_buf_cat(field_buffer, "%", 1); 84570b57cec5SDimitry Andric (*ptr)++; // skip over the second % 84580b57cec5SDimitry Andric return 1; 84590b57cec5SDimitry Andric } 84600b57cec5SDimitry Andric 84610b57cec5SDimitry Andric // Parse field modifiers if they are present 84620b57cec5SDimitry Andric pad_zeros = false; 84630b57cec5SDimitry Andric if (**ptr == '0') { 84640b57cec5SDimitry Andric pad_zeros = true; 84650b57cec5SDimitry Andric (*ptr)++; // skip over 0 84660b57cec5SDimitry Andric } 84670b57cec5SDimitry Andric right_justify = false; 84680b57cec5SDimitry Andric if (**ptr == '.') { 84690b57cec5SDimitry Andric right_justify = true; 84700b57cec5SDimitry Andric (*ptr)++; // skip over . 84710b57cec5SDimitry Andric } 84720b57cec5SDimitry Andric // Parse width of field: [width_left, width_right) 84730b57cec5SDimitry Andric width_left = width_right = NULL; 84740b57cec5SDimitry Andric if (**ptr >= '0' && **ptr <= '9') { 84750b57cec5SDimitry Andric width_left = *ptr; 84760b57cec5SDimitry Andric SKIP_DIGITS(*ptr); 84770b57cec5SDimitry Andric width_right = *ptr; 84780b57cec5SDimitry Andric } 84790b57cec5SDimitry Andric 84800b57cec5SDimitry Andric // Create the format for KMP_SNPRINTF based on flags parsed above 84810b57cec5SDimitry Andric format_index = 0; 84820b57cec5SDimitry Andric format[format_index++] = '%'; 84830b57cec5SDimitry Andric if (!right_justify) 84840b57cec5SDimitry Andric format[format_index++] = '-'; 84850b57cec5SDimitry Andric if (pad_zeros) 84860b57cec5SDimitry Andric format[format_index++] = '0'; 84870b57cec5SDimitry Andric if (width_left && width_right) { 84880b57cec5SDimitry Andric int i = 0; 84890b57cec5SDimitry Andric // Only allow 8 digit number widths. 84900b57cec5SDimitry Andric // This also prevents overflowing format variable 84910b57cec5SDimitry Andric while (i < 8 && width_left < width_right) { 84920b57cec5SDimitry Andric format[format_index++] = *width_left; 84930b57cec5SDimitry Andric width_left++; 84940b57cec5SDimitry Andric i++; 84950b57cec5SDimitry Andric } 84960b57cec5SDimitry Andric } 84970b57cec5SDimitry Andric 84980b57cec5SDimitry Andric // Parse a name (long or short) 84990b57cec5SDimitry Andric // Canonicalize the name into absolute_short_name 85000b57cec5SDimitry Andric found_valid_name = false; 85010b57cec5SDimitry Andric parse_long_name = (**ptr == '{'); 85020b57cec5SDimitry Andric if (parse_long_name) 85030b57cec5SDimitry Andric (*ptr)++; // skip initial left brace 85040b57cec5SDimitry Andric for (size_t i = 0; i < sizeof(__kmp_affinity_format_table) / 85050b57cec5SDimitry Andric sizeof(__kmp_affinity_format_table[0]); 85060b57cec5SDimitry Andric ++i) { 85070b57cec5SDimitry Andric char short_name = __kmp_affinity_format_table[i].short_name; 85080b57cec5SDimitry Andric const char *long_name = __kmp_affinity_format_table[i].long_name; 85090b57cec5SDimitry Andric char field_format = __kmp_affinity_format_table[i].field_format; 85100b57cec5SDimitry Andric if (parse_long_name) { 8511e8d8bef9SDimitry Andric size_t length = KMP_STRLEN(long_name); 85120b57cec5SDimitry Andric if (strncmp(*ptr, long_name, length) == 0) { 85130b57cec5SDimitry Andric found_valid_name = true; 85140b57cec5SDimitry Andric (*ptr) += length; // skip the long name 85150b57cec5SDimitry Andric } 85160b57cec5SDimitry Andric } else if (**ptr == short_name) { 85170b57cec5SDimitry Andric found_valid_name = true; 85180b57cec5SDimitry Andric (*ptr)++; // skip the short name 85190b57cec5SDimitry Andric } 85200b57cec5SDimitry Andric if (found_valid_name) { 85210b57cec5SDimitry Andric format[format_index++] = field_format; 85220b57cec5SDimitry Andric format[format_index++] = '\0'; 85230b57cec5SDimitry Andric absolute_short_name = short_name; 85240b57cec5SDimitry Andric break; 85250b57cec5SDimitry Andric } 85260b57cec5SDimitry Andric } 85270b57cec5SDimitry Andric if (parse_long_name) { 85280b57cec5SDimitry Andric if (**ptr != '}') { 85290b57cec5SDimitry Andric absolute_short_name = 0; 85300b57cec5SDimitry Andric } else { 85310b57cec5SDimitry Andric (*ptr)++; // skip over the right brace 85320b57cec5SDimitry Andric } 85330b57cec5SDimitry Andric } 85340b57cec5SDimitry Andric 85350b57cec5SDimitry Andric // Attempt to fill the buffer with the requested 85360b57cec5SDimitry Andric // value using snprintf within __kmp_str_buf_print() 85370b57cec5SDimitry Andric switch (absolute_short_name) { 85380b57cec5SDimitry Andric case 't': 85390b57cec5SDimitry Andric rc = __kmp_str_buf_print(field_buffer, format, __kmp_aux_get_team_num()); 85400b57cec5SDimitry Andric break; 85410b57cec5SDimitry Andric case 'T': 85420b57cec5SDimitry Andric rc = __kmp_str_buf_print(field_buffer, format, __kmp_aux_get_num_teams()); 85430b57cec5SDimitry Andric break; 85440b57cec5SDimitry Andric case 'L': 85450b57cec5SDimitry Andric rc = __kmp_str_buf_print(field_buffer, format, th->th.th_team->t.t_level); 85460b57cec5SDimitry Andric break; 85470b57cec5SDimitry Andric case 'n': 85480b57cec5SDimitry Andric rc = __kmp_str_buf_print(field_buffer, format, __kmp_tid_from_gtid(gtid)); 85490b57cec5SDimitry Andric break; 85500b57cec5SDimitry Andric case 'H': { 85510b57cec5SDimitry Andric static const int BUFFER_SIZE = 256; 85520b57cec5SDimitry Andric char buf[BUFFER_SIZE]; 85530b57cec5SDimitry Andric __kmp_expand_host_name(buf, BUFFER_SIZE); 85540b57cec5SDimitry Andric rc = __kmp_str_buf_print(field_buffer, format, buf); 85550b57cec5SDimitry Andric } break; 85560b57cec5SDimitry Andric case 'P': 85570b57cec5SDimitry Andric rc = __kmp_str_buf_print(field_buffer, format, getpid()); 85580b57cec5SDimitry Andric break; 85590b57cec5SDimitry Andric case 'i': 85600b57cec5SDimitry Andric rc = __kmp_str_buf_print(field_buffer, format, __kmp_gettid()); 85610b57cec5SDimitry Andric break; 85620b57cec5SDimitry Andric case 'N': 85630b57cec5SDimitry Andric rc = __kmp_str_buf_print(field_buffer, format, th->th.th_team->t.t_nproc); 85640b57cec5SDimitry Andric break; 85650b57cec5SDimitry Andric case 'a': 85660b57cec5SDimitry Andric field_value = 85670b57cec5SDimitry Andric __kmp_get_ancestor_thread_num(gtid, th->th.th_team->t.t_level - 1); 85680b57cec5SDimitry Andric rc = __kmp_str_buf_print(field_buffer, format, field_value); 85690b57cec5SDimitry Andric break; 85700b57cec5SDimitry Andric #if KMP_AFFINITY_SUPPORTED 85710b57cec5SDimitry Andric case 'A': { 85720b57cec5SDimitry Andric kmp_str_buf_t buf; 85730b57cec5SDimitry Andric __kmp_str_buf_init(&buf); 85740b57cec5SDimitry Andric __kmp_affinity_str_buf_mask(&buf, th->th.th_affin_mask); 85750b57cec5SDimitry Andric rc = __kmp_str_buf_print(field_buffer, format, buf.str); 85760b57cec5SDimitry Andric __kmp_str_buf_free(&buf); 85770b57cec5SDimitry Andric } break; 85780b57cec5SDimitry Andric #endif 85790b57cec5SDimitry Andric default: 85800b57cec5SDimitry Andric // According to spec, If an implementation does not have info for field 85810b57cec5SDimitry Andric // type, then "undefined" is printed 85820b57cec5SDimitry Andric rc = __kmp_str_buf_print(field_buffer, "%s", "undefined"); 85830b57cec5SDimitry Andric // Skip the field 85840b57cec5SDimitry Andric if (parse_long_name) { 85850b57cec5SDimitry Andric SKIP_TOKEN(*ptr); 85860b57cec5SDimitry Andric if (**ptr == '}') 85870b57cec5SDimitry Andric (*ptr)++; 85880b57cec5SDimitry Andric } else { 85890b57cec5SDimitry Andric (*ptr)++; 85900b57cec5SDimitry Andric } 85910b57cec5SDimitry Andric } 85920b57cec5SDimitry Andric 85930b57cec5SDimitry Andric KMP_ASSERT(format_index <= FORMAT_SIZE); 85940b57cec5SDimitry Andric return rc; 85950b57cec5SDimitry Andric } 85960b57cec5SDimitry Andric 85970b57cec5SDimitry Andric /* 85980b57cec5SDimitry Andric * Return number of characters needed to hold the affinity string 85990b57cec5SDimitry Andric * (not including null byte character) 86000b57cec5SDimitry Andric * The resultant string is printed to buffer, which the caller can then 86010b57cec5SDimitry Andric * handle afterwards 86020b57cec5SDimitry Andric */ 86030b57cec5SDimitry Andric size_t __kmp_aux_capture_affinity(int gtid, const char *format, 86040b57cec5SDimitry Andric kmp_str_buf_t *buffer) { 86050b57cec5SDimitry Andric const char *parse_ptr; 86060b57cec5SDimitry Andric size_t retval; 86070b57cec5SDimitry Andric const kmp_info_t *th; 86080b57cec5SDimitry Andric kmp_str_buf_t field; 86090b57cec5SDimitry Andric 86100b57cec5SDimitry Andric KMP_DEBUG_ASSERT(buffer); 86110b57cec5SDimitry Andric KMP_DEBUG_ASSERT(gtid >= 0); 86120b57cec5SDimitry Andric 86130b57cec5SDimitry Andric __kmp_str_buf_init(&field); 86140b57cec5SDimitry Andric __kmp_str_buf_clear(buffer); 86150b57cec5SDimitry Andric 86160b57cec5SDimitry Andric th = __kmp_threads[gtid]; 86170b57cec5SDimitry Andric retval = 0; 86180b57cec5SDimitry Andric 86190b57cec5SDimitry Andric // If format is NULL or zero-length string, then we use 86200b57cec5SDimitry Andric // affinity-format-var ICV 86210b57cec5SDimitry Andric parse_ptr = format; 86220b57cec5SDimitry Andric if (parse_ptr == NULL || *parse_ptr == '\0') { 86230b57cec5SDimitry Andric parse_ptr = __kmp_affinity_format; 86240b57cec5SDimitry Andric } 86250b57cec5SDimitry Andric KMP_DEBUG_ASSERT(parse_ptr); 86260b57cec5SDimitry Andric 86270b57cec5SDimitry Andric while (*parse_ptr != '\0') { 86280b57cec5SDimitry Andric // Parse a field 86290b57cec5SDimitry Andric if (*parse_ptr == '%') { 86300b57cec5SDimitry Andric // Put field in the buffer 86310b57cec5SDimitry Andric int rc = __kmp_aux_capture_affinity_field(gtid, th, &parse_ptr, &field); 86320b57cec5SDimitry Andric __kmp_str_buf_catbuf(buffer, &field); 86330b57cec5SDimitry Andric retval += rc; 86340b57cec5SDimitry Andric } else { 86350b57cec5SDimitry Andric // Put literal character in buffer 86360b57cec5SDimitry Andric __kmp_str_buf_cat(buffer, parse_ptr, 1); 86370b57cec5SDimitry Andric retval++; 86380b57cec5SDimitry Andric parse_ptr++; 86390b57cec5SDimitry Andric } 86400b57cec5SDimitry Andric } 86410b57cec5SDimitry Andric __kmp_str_buf_free(&field); 86420b57cec5SDimitry Andric return retval; 86430b57cec5SDimitry Andric } 86440b57cec5SDimitry Andric 86450b57cec5SDimitry Andric // Displays the affinity string to stdout 86460b57cec5SDimitry Andric void __kmp_aux_display_affinity(int gtid, const char *format) { 86470b57cec5SDimitry Andric kmp_str_buf_t buf; 86480b57cec5SDimitry Andric __kmp_str_buf_init(&buf); 86490b57cec5SDimitry Andric __kmp_aux_capture_affinity(gtid, format, &buf); 86500b57cec5SDimitry Andric __kmp_fprintf(kmp_out, "%s" KMP_END_OF_LINE, buf.str); 86510b57cec5SDimitry Andric __kmp_str_buf_free(&buf); 86520b57cec5SDimitry Andric } 86530b57cec5SDimitry Andric 86540b57cec5SDimitry Andric /* ------------------------------------------------------------------------ */ 86550b57cec5SDimitry Andric 86560b57cec5SDimitry Andric void __kmp_aux_set_blocktime(int arg, kmp_info_t *thread, int tid) { 86570b57cec5SDimitry Andric int blocktime = arg; /* argument is in milliseconds */ 86580b57cec5SDimitry Andric #if KMP_USE_MONITOR 86590b57cec5SDimitry Andric int bt_intervals; 86600b57cec5SDimitry Andric #endif 8661e8d8bef9SDimitry Andric kmp_int8 bt_set; 86620b57cec5SDimitry Andric 86630b57cec5SDimitry Andric __kmp_save_internal_controls(thread); 86640b57cec5SDimitry Andric 86650b57cec5SDimitry Andric /* Normalize and set blocktime for the teams */ 86660b57cec5SDimitry Andric if (blocktime < KMP_MIN_BLOCKTIME) 86670b57cec5SDimitry Andric blocktime = KMP_MIN_BLOCKTIME; 86680b57cec5SDimitry Andric else if (blocktime > KMP_MAX_BLOCKTIME) 86690b57cec5SDimitry Andric blocktime = KMP_MAX_BLOCKTIME; 86700b57cec5SDimitry Andric 86710b57cec5SDimitry Andric set__blocktime_team(thread->th.th_team, tid, blocktime); 86720b57cec5SDimitry Andric set__blocktime_team(thread->th.th_serial_team, 0, blocktime); 86730b57cec5SDimitry Andric 86740b57cec5SDimitry Andric #if KMP_USE_MONITOR 86750b57cec5SDimitry Andric /* Calculate and set blocktime intervals for the teams */ 86760b57cec5SDimitry Andric bt_intervals = KMP_INTERVALS_FROM_BLOCKTIME(blocktime, __kmp_monitor_wakeups); 86770b57cec5SDimitry Andric 86780b57cec5SDimitry Andric set__bt_intervals_team(thread->th.th_team, tid, bt_intervals); 86790b57cec5SDimitry Andric set__bt_intervals_team(thread->th.th_serial_team, 0, bt_intervals); 86800b57cec5SDimitry Andric #endif 86810b57cec5SDimitry Andric 86820b57cec5SDimitry Andric /* Set whether blocktime has been set to "TRUE" */ 86830b57cec5SDimitry Andric bt_set = TRUE; 86840b57cec5SDimitry Andric 86850b57cec5SDimitry Andric set__bt_set_team(thread->th.th_team, tid, bt_set); 86860b57cec5SDimitry Andric set__bt_set_team(thread->th.th_serial_team, 0, bt_set); 86870b57cec5SDimitry Andric #if KMP_USE_MONITOR 86880b57cec5SDimitry Andric KF_TRACE(10, ("kmp_set_blocktime: T#%d(%d:%d), blocktime=%d, " 86890b57cec5SDimitry Andric "bt_intervals=%d, monitor_updates=%d\n", 86900b57cec5SDimitry Andric __kmp_gtid_from_tid(tid, thread->th.th_team), 86910b57cec5SDimitry Andric thread->th.th_team->t.t_id, tid, blocktime, bt_intervals, 86920b57cec5SDimitry Andric __kmp_monitor_wakeups)); 86930b57cec5SDimitry Andric #else 86940b57cec5SDimitry Andric KF_TRACE(10, ("kmp_set_blocktime: T#%d(%d:%d), blocktime=%d\n", 86950b57cec5SDimitry Andric __kmp_gtid_from_tid(tid, thread->th.th_team), 86960b57cec5SDimitry Andric thread->th.th_team->t.t_id, tid, blocktime)); 86970b57cec5SDimitry Andric #endif 86980b57cec5SDimitry Andric } 86990b57cec5SDimitry Andric 8700e8d8bef9SDimitry Andric void __kmp_aux_set_defaults(char const *str, size_t len) { 87010b57cec5SDimitry Andric if (!__kmp_init_serial) { 87020b57cec5SDimitry Andric __kmp_serial_initialize(); 87030b57cec5SDimitry Andric } 87040b57cec5SDimitry Andric __kmp_env_initialize(str); 87050b57cec5SDimitry Andric 87060b57cec5SDimitry Andric if (__kmp_settings || __kmp_display_env || __kmp_display_env_verbose) { 87070b57cec5SDimitry Andric __kmp_env_print(); 87080b57cec5SDimitry Andric } 87090b57cec5SDimitry Andric } // __kmp_aux_set_defaults 87100b57cec5SDimitry Andric 87110b57cec5SDimitry Andric /* ------------------------------------------------------------------------ */ 87120b57cec5SDimitry Andric /* internal fast reduction routines */ 87130b57cec5SDimitry Andric 87140b57cec5SDimitry Andric PACKED_REDUCTION_METHOD_T 87150b57cec5SDimitry Andric __kmp_determine_reduction_method( 87160b57cec5SDimitry Andric ident_t *loc, kmp_int32 global_tid, kmp_int32 num_vars, size_t reduce_size, 87170b57cec5SDimitry Andric void *reduce_data, void (*reduce_func)(void *lhs_data, void *rhs_data), 87180b57cec5SDimitry Andric kmp_critical_name *lck) { 87190b57cec5SDimitry Andric 87200b57cec5SDimitry Andric // Default reduction method: critical construct ( lck != NULL, like in current 87210b57cec5SDimitry Andric // PAROPT ) 87220b57cec5SDimitry Andric // If ( reduce_data!=NULL && reduce_func!=NULL ): the tree-reduction method 87230b57cec5SDimitry Andric // can be selected by RTL 87240b57cec5SDimitry Andric // If loc->flags contains KMP_IDENT_ATOMIC_REDUCE, the atomic reduce method 87250b57cec5SDimitry Andric // can be selected by RTL 87260b57cec5SDimitry Andric // Finally, it's up to OpenMP RTL to make a decision on which method to select 87270b57cec5SDimitry Andric // among generated by PAROPT. 87280b57cec5SDimitry Andric 87290b57cec5SDimitry Andric PACKED_REDUCTION_METHOD_T retval; 87300b57cec5SDimitry Andric 87310b57cec5SDimitry Andric int team_size; 87320b57cec5SDimitry Andric 87330b57cec5SDimitry Andric KMP_DEBUG_ASSERT(loc); // it would be nice to test ( loc != 0 ) 87340b57cec5SDimitry Andric KMP_DEBUG_ASSERT(lck); // it would be nice to test ( lck != 0 ) 87350b57cec5SDimitry Andric 87360b57cec5SDimitry Andric #define FAST_REDUCTION_ATOMIC_METHOD_GENERATED \ 873781ad6265SDimitry Andric (loc && \ 873881ad6265SDimitry Andric ((loc->flags & (KMP_IDENT_ATOMIC_REDUCE)) == (KMP_IDENT_ATOMIC_REDUCE))) 87390b57cec5SDimitry Andric #define FAST_REDUCTION_TREE_METHOD_GENERATED ((reduce_data) && (reduce_func)) 87400b57cec5SDimitry Andric 87410b57cec5SDimitry Andric retval = critical_reduce_block; 87420b57cec5SDimitry Andric 87430b57cec5SDimitry Andric // another choice of getting a team size (with 1 dynamic deference) is slower 87440b57cec5SDimitry Andric team_size = __kmp_get_team_num_threads(global_tid); 87450b57cec5SDimitry Andric if (team_size == 1) { 87460b57cec5SDimitry Andric 87470b57cec5SDimitry Andric retval = empty_reduce_block; 87480b57cec5SDimitry Andric 87490b57cec5SDimitry Andric } else { 87500b57cec5SDimitry Andric 87510b57cec5SDimitry Andric int atomic_available = FAST_REDUCTION_ATOMIC_METHOD_GENERATED; 87520b57cec5SDimitry Andric 8753489b1cf2SDimitry Andric #if KMP_ARCH_X86_64 || KMP_ARCH_PPC64 || KMP_ARCH_AARCH64 || \ 8754489b1cf2SDimitry Andric KMP_ARCH_MIPS64 || KMP_ARCH_RISCV64 87550b57cec5SDimitry Andric 87560b57cec5SDimitry Andric #if KMP_OS_LINUX || KMP_OS_DRAGONFLY || KMP_OS_FREEBSD || KMP_OS_NETBSD || \ 87570b57cec5SDimitry Andric KMP_OS_OPENBSD || KMP_OS_WINDOWS || KMP_OS_DARWIN || KMP_OS_HURD 87580b57cec5SDimitry Andric 87590b57cec5SDimitry Andric int teamsize_cutoff = 4; 87600b57cec5SDimitry Andric 87610b57cec5SDimitry Andric #if KMP_MIC_SUPPORTED 87620b57cec5SDimitry Andric if (__kmp_mic_type != non_mic) { 87630b57cec5SDimitry Andric teamsize_cutoff = 8; 87640b57cec5SDimitry Andric } 87650b57cec5SDimitry Andric #endif 87660b57cec5SDimitry Andric int tree_available = FAST_REDUCTION_TREE_METHOD_GENERATED; 87670b57cec5SDimitry Andric if (tree_available) { 87680b57cec5SDimitry Andric if (team_size <= teamsize_cutoff) { 87690b57cec5SDimitry Andric if (atomic_available) { 87700b57cec5SDimitry Andric retval = atomic_reduce_block; 87710b57cec5SDimitry Andric } 87720b57cec5SDimitry Andric } else { 87730b57cec5SDimitry Andric retval = TREE_REDUCE_BLOCK_WITH_REDUCTION_BARRIER; 87740b57cec5SDimitry Andric } 87750b57cec5SDimitry Andric } else if (atomic_available) { 87760b57cec5SDimitry Andric retval = atomic_reduce_block; 87770b57cec5SDimitry Andric } 87780b57cec5SDimitry Andric #else 87790b57cec5SDimitry Andric #error "Unknown or unsupported OS" 87800b57cec5SDimitry Andric #endif // KMP_OS_LINUX || KMP_OS_DRAGONFLY || KMP_OS_FREEBSD || KMP_OS_NETBSD || 87810b57cec5SDimitry Andric // KMP_OS_OPENBSD || KMP_OS_WINDOWS || KMP_OS_DARWIN || KMP_OS_HURD 87820b57cec5SDimitry Andric 87830b57cec5SDimitry Andric #elif KMP_ARCH_X86 || KMP_ARCH_ARM || KMP_ARCH_AARCH || KMP_ARCH_MIPS 87840b57cec5SDimitry Andric 87850b57cec5SDimitry Andric #if KMP_OS_LINUX || KMP_OS_FREEBSD || KMP_OS_WINDOWS || KMP_OS_HURD 87860b57cec5SDimitry Andric 87870b57cec5SDimitry Andric // basic tuning 87880b57cec5SDimitry Andric 87890b57cec5SDimitry Andric if (atomic_available) { 87900b57cec5SDimitry Andric if (num_vars <= 2) { // && ( team_size <= 8 ) due to false-sharing ??? 87910b57cec5SDimitry Andric retval = atomic_reduce_block; 87920b57cec5SDimitry Andric } 87930b57cec5SDimitry Andric } // otherwise: use critical section 87940b57cec5SDimitry Andric 87950b57cec5SDimitry Andric #elif KMP_OS_DARWIN 87960b57cec5SDimitry Andric 87970b57cec5SDimitry Andric int tree_available = FAST_REDUCTION_TREE_METHOD_GENERATED; 87980b57cec5SDimitry Andric if (atomic_available && (num_vars <= 3)) { 87990b57cec5SDimitry Andric retval = atomic_reduce_block; 88000b57cec5SDimitry Andric } else if (tree_available) { 88010b57cec5SDimitry Andric if ((reduce_size > (9 * sizeof(kmp_real64))) && 88020b57cec5SDimitry Andric (reduce_size < (2000 * sizeof(kmp_real64)))) { 88030b57cec5SDimitry Andric retval = TREE_REDUCE_BLOCK_WITH_PLAIN_BARRIER; 88040b57cec5SDimitry Andric } 88050b57cec5SDimitry Andric } // otherwise: use critical section 88060b57cec5SDimitry Andric 88070b57cec5SDimitry Andric #else 88080b57cec5SDimitry Andric #error "Unknown or unsupported OS" 88090b57cec5SDimitry Andric #endif 88100b57cec5SDimitry Andric 88110b57cec5SDimitry Andric #else 88120b57cec5SDimitry Andric #error "Unknown or unsupported architecture" 88130b57cec5SDimitry Andric #endif 88140b57cec5SDimitry Andric } 88150b57cec5SDimitry Andric 88160b57cec5SDimitry Andric // KMP_FORCE_REDUCTION 88170b57cec5SDimitry Andric 88180b57cec5SDimitry Andric // If the team is serialized (team_size == 1), ignore the forced reduction 88190b57cec5SDimitry Andric // method and stay with the unsynchronized method (empty_reduce_block) 88200b57cec5SDimitry Andric if (__kmp_force_reduction_method != reduction_method_not_defined && 88210b57cec5SDimitry Andric team_size != 1) { 88220b57cec5SDimitry Andric 88230b57cec5SDimitry Andric PACKED_REDUCTION_METHOD_T forced_retval = critical_reduce_block; 88240b57cec5SDimitry Andric 88250b57cec5SDimitry Andric int atomic_available, tree_available; 88260b57cec5SDimitry Andric 88270b57cec5SDimitry Andric switch ((forced_retval = __kmp_force_reduction_method)) { 88280b57cec5SDimitry Andric case critical_reduce_block: 88290b57cec5SDimitry Andric KMP_ASSERT(lck); // lck should be != 0 88300b57cec5SDimitry Andric break; 88310b57cec5SDimitry Andric 88320b57cec5SDimitry Andric case atomic_reduce_block: 88330b57cec5SDimitry Andric atomic_available = FAST_REDUCTION_ATOMIC_METHOD_GENERATED; 88340b57cec5SDimitry Andric if (!atomic_available) { 88350b57cec5SDimitry Andric KMP_WARNING(RedMethodNotSupported, "atomic"); 88360b57cec5SDimitry Andric forced_retval = critical_reduce_block; 88370b57cec5SDimitry Andric } 88380b57cec5SDimitry Andric break; 88390b57cec5SDimitry Andric 88400b57cec5SDimitry Andric case tree_reduce_block: 88410b57cec5SDimitry Andric tree_available = FAST_REDUCTION_TREE_METHOD_GENERATED; 88420b57cec5SDimitry Andric if (!tree_available) { 88430b57cec5SDimitry Andric KMP_WARNING(RedMethodNotSupported, "tree"); 88440b57cec5SDimitry Andric forced_retval = critical_reduce_block; 88450b57cec5SDimitry Andric } else { 88460b57cec5SDimitry Andric #if KMP_FAST_REDUCTION_BARRIER 88470b57cec5SDimitry Andric forced_retval = TREE_REDUCE_BLOCK_WITH_REDUCTION_BARRIER; 88480b57cec5SDimitry Andric #endif 88490b57cec5SDimitry Andric } 88500b57cec5SDimitry Andric break; 88510b57cec5SDimitry Andric 88520b57cec5SDimitry Andric default: 88530b57cec5SDimitry Andric KMP_ASSERT(0); // "unsupported method specified" 88540b57cec5SDimitry Andric } 88550b57cec5SDimitry Andric 88560b57cec5SDimitry Andric retval = forced_retval; 88570b57cec5SDimitry Andric } 88580b57cec5SDimitry Andric 88590b57cec5SDimitry Andric KA_TRACE(10, ("reduction method selected=%08x\n", retval)); 88600b57cec5SDimitry Andric 88610b57cec5SDimitry Andric #undef FAST_REDUCTION_TREE_METHOD_GENERATED 88620b57cec5SDimitry Andric #undef FAST_REDUCTION_ATOMIC_METHOD_GENERATED 88630b57cec5SDimitry Andric 88640b57cec5SDimitry Andric return (retval); 88650b57cec5SDimitry Andric } 88660b57cec5SDimitry Andric // this function is for testing set/get/determine reduce method 88670b57cec5SDimitry Andric kmp_int32 __kmp_get_reduce_method(void) { 88680b57cec5SDimitry Andric return ((__kmp_entry_thread()->th.th_local.packed_reduction_method) >> 8); 88690b57cec5SDimitry Andric } 88700b57cec5SDimitry Andric 88710b57cec5SDimitry Andric // Soft pause sets up threads to ignore blocktime and just go to sleep. 88720b57cec5SDimitry Andric // Spin-wait code checks __kmp_pause_status and reacts accordingly. 88730b57cec5SDimitry Andric void __kmp_soft_pause() { __kmp_pause_status = kmp_soft_paused; } 88740b57cec5SDimitry Andric 88750b57cec5SDimitry Andric // Hard pause shuts down the runtime completely. Resume happens naturally when 88760b57cec5SDimitry Andric // OpenMP is used subsequently. 88770b57cec5SDimitry Andric void __kmp_hard_pause() { 88780b57cec5SDimitry Andric __kmp_pause_status = kmp_hard_paused; 88790b57cec5SDimitry Andric __kmp_internal_end_thread(-1); 88800b57cec5SDimitry Andric } 88810b57cec5SDimitry Andric 88820b57cec5SDimitry Andric // Soft resume sets __kmp_pause_status, and wakes up all threads. 88830b57cec5SDimitry Andric void __kmp_resume_if_soft_paused() { 88840b57cec5SDimitry Andric if (__kmp_pause_status == kmp_soft_paused) { 88850b57cec5SDimitry Andric __kmp_pause_status = kmp_not_paused; 88860b57cec5SDimitry Andric 88870b57cec5SDimitry Andric for (int gtid = 1; gtid < __kmp_threads_capacity; ++gtid) { 88880b57cec5SDimitry Andric kmp_info_t *thread = __kmp_threads[gtid]; 88890b57cec5SDimitry Andric if (thread) { // Wake it if sleeping 8890e8d8bef9SDimitry Andric kmp_flag_64<> fl(&thread->th.th_bar[bs_forkjoin_barrier].bb.b_go, 8891e8d8bef9SDimitry Andric thread); 88920b57cec5SDimitry Andric if (fl.is_sleeping()) 88930b57cec5SDimitry Andric fl.resume(gtid); 88940b57cec5SDimitry Andric else if (__kmp_try_suspend_mx(thread)) { // got suspend lock 88950b57cec5SDimitry Andric __kmp_unlock_suspend_mx(thread); // unlock it; it won't sleep 88960b57cec5SDimitry Andric } else { // thread holds the lock and may sleep soon 88970b57cec5SDimitry Andric do { // until either the thread sleeps, or we can get the lock 88980b57cec5SDimitry Andric if (fl.is_sleeping()) { 88990b57cec5SDimitry Andric fl.resume(gtid); 89000b57cec5SDimitry Andric break; 89010b57cec5SDimitry Andric } else if (__kmp_try_suspend_mx(thread)) { 89020b57cec5SDimitry Andric __kmp_unlock_suspend_mx(thread); 89030b57cec5SDimitry Andric break; 89040b57cec5SDimitry Andric } 89050b57cec5SDimitry Andric } while (1); 89060b57cec5SDimitry Andric } 89070b57cec5SDimitry Andric } 89080b57cec5SDimitry Andric } 89090b57cec5SDimitry Andric } 89100b57cec5SDimitry Andric } 89110b57cec5SDimitry Andric 89120b57cec5SDimitry Andric // This function is called via __kmpc_pause_resource. Returns 0 if successful. 89130b57cec5SDimitry Andric // TODO: add warning messages 89140b57cec5SDimitry Andric int __kmp_pause_resource(kmp_pause_status_t level) { 89150b57cec5SDimitry Andric if (level == kmp_not_paused) { // requesting resume 89160b57cec5SDimitry Andric if (__kmp_pause_status == kmp_not_paused) { 89170b57cec5SDimitry Andric // error message about runtime not being paused, so can't resume 89180b57cec5SDimitry Andric return 1; 89190b57cec5SDimitry Andric } else { 89200b57cec5SDimitry Andric KMP_DEBUG_ASSERT(__kmp_pause_status == kmp_soft_paused || 89210b57cec5SDimitry Andric __kmp_pause_status == kmp_hard_paused); 89220b57cec5SDimitry Andric __kmp_pause_status = kmp_not_paused; 89230b57cec5SDimitry Andric return 0; 89240b57cec5SDimitry Andric } 89250b57cec5SDimitry Andric } else if (level == kmp_soft_paused) { // requesting soft pause 89260b57cec5SDimitry Andric if (__kmp_pause_status != kmp_not_paused) { 89270b57cec5SDimitry Andric // error message about already being paused 89280b57cec5SDimitry Andric return 1; 89290b57cec5SDimitry Andric } else { 89300b57cec5SDimitry Andric __kmp_soft_pause(); 89310b57cec5SDimitry Andric return 0; 89320b57cec5SDimitry Andric } 89330b57cec5SDimitry Andric } else if (level == kmp_hard_paused) { // requesting hard pause 89340b57cec5SDimitry Andric if (__kmp_pause_status != kmp_not_paused) { 89350b57cec5SDimitry Andric // error message about already being paused 89360b57cec5SDimitry Andric return 1; 89370b57cec5SDimitry Andric } else { 89380b57cec5SDimitry Andric __kmp_hard_pause(); 89390b57cec5SDimitry Andric return 0; 89400b57cec5SDimitry Andric } 89410b57cec5SDimitry Andric } else { 89420b57cec5SDimitry Andric // error message about invalid level 89430b57cec5SDimitry Andric return 1; 89440b57cec5SDimitry Andric } 89450b57cec5SDimitry Andric } 89465ffd83dbSDimitry Andric 89475ffd83dbSDimitry Andric void __kmp_omp_display_env(int verbose) { 89485ffd83dbSDimitry Andric __kmp_acquire_bootstrap_lock(&__kmp_initz_lock); 89495ffd83dbSDimitry Andric if (__kmp_init_serial == 0) 89505ffd83dbSDimitry Andric __kmp_do_serial_initialize(); 89515ffd83dbSDimitry Andric __kmp_display_env_impl(!verbose, verbose); 89525ffd83dbSDimitry Andric __kmp_release_bootstrap_lock(&__kmp_initz_lock); 89535ffd83dbSDimitry Andric } 8954e8d8bef9SDimitry Andric 8955349cc55cSDimitry Andric // The team size is changing, so distributed barrier must be modified 8956349cc55cSDimitry Andric void __kmp_resize_dist_barrier(kmp_team_t *team, int old_nthreads, 8957349cc55cSDimitry Andric int new_nthreads) { 8958349cc55cSDimitry Andric KMP_DEBUG_ASSERT(__kmp_barrier_release_pattern[bs_forkjoin_barrier] == 8959349cc55cSDimitry Andric bp_dist_bar); 8960349cc55cSDimitry Andric kmp_info_t **other_threads = team->t.t_threads; 8961349cc55cSDimitry Andric 8962349cc55cSDimitry Andric // We want all the workers to stop waiting on the barrier while we adjust the 8963349cc55cSDimitry Andric // size of the team. 8964349cc55cSDimitry Andric for (int f = 1; f < old_nthreads; ++f) { 8965349cc55cSDimitry Andric KMP_DEBUG_ASSERT(other_threads[f] != NULL); 8966349cc55cSDimitry Andric // Ignore threads that are already inactive or not present in the team 8967349cc55cSDimitry Andric if (team->t.t_threads[f]->th.th_used_in_team.load() == 0) { 8968349cc55cSDimitry Andric // teams construct causes thread_limit to get passed in, and some of 8969349cc55cSDimitry Andric // those could be inactive; just ignore them 8970349cc55cSDimitry Andric continue; 8971349cc55cSDimitry Andric } 8972349cc55cSDimitry Andric // If thread is transitioning still to in_use state, wait for it 8973349cc55cSDimitry Andric if (team->t.t_threads[f]->th.th_used_in_team.load() == 3) { 8974349cc55cSDimitry Andric while (team->t.t_threads[f]->th.th_used_in_team.load() == 3) 8975349cc55cSDimitry Andric KMP_CPU_PAUSE(); 8976349cc55cSDimitry Andric } 8977349cc55cSDimitry Andric // The thread should be in_use now 8978349cc55cSDimitry Andric KMP_DEBUG_ASSERT(team->t.t_threads[f]->th.th_used_in_team.load() == 1); 8979349cc55cSDimitry Andric // Transition to unused state 8980349cc55cSDimitry Andric team->t.t_threads[f]->th.th_used_in_team.store(2); 8981349cc55cSDimitry Andric KMP_DEBUG_ASSERT(team->t.t_threads[f]->th.th_used_in_team.load() == 2); 8982349cc55cSDimitry Andric } 8983349cc55cSDimitry Andric // Release all the workers 898481ad6265SDimitry Andric team->t.b->go_release(); 8985349cc55cSDimitry Andric 8986349cc55cSDimitry Andric KMP_MFENCE(); 8987349cc55cSDimitry Andric 8988349cc55cSDimitry Andric // Workers should see transition status 2 and move to 0; but may need to be 8989349cc55cSDimitry Andric // woken up first 8990349cc55cSDimitry Andric int count = old_nthreads - 1; 8991349cc55cSDimitry Andric while (count > 0) { 8992349cc55cSDimitry Andric count = old_nthreads - 1; 8993349cc55cSDimitry Andric for (int f = 1; f < old_nthreads; ++f) { 8994349cc55cSDimitry Andric if (other_threads[f]->th.th_used_in_team.load() != 0) { 8995349cc55cSDimitry Andric if (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME) { // Wake up the workers 8996349cc55cSDimitry Andric kmp_atomic_flag_64<> *flag = (kmp_atomic_flag_64<> *)CCAST( 8997349cc55cSDimitry Andric void *, other_threads[f]->th.th_sleep_loc); 8998349cc55cSDimitry Andric __kmp_atomic_resume_64(other_threads[f]->th.th_info.ds.ds_gtid, flag); 8999349cc55cSDimitry Andric } 9000349cc55cSDimitry Andric } else { 9001349cc55cSDimitry Andric KMP_DEBUG_ASSERT(team->t.t_threads[f]->th.th_used_in_team.load() == 0); 9002349cc55cSDimitry Andric count--; 9003349cc55cSDimitry Andric } 9004349cc55cSDimitry Andric } 9005349cc55cSDimitry Andric } 9006349cc55cSDimitry Andric // Now update the barrier size 9007349cc55cSDimitry Andric team->t.b->update_num_threads(new_nthreads); 9008349cc55cSDimitry Andric team->t.b->go_reset(); 9009349cc55cSDimitry Andric } 9010349cc55cSDimitry Andric 9011349cc55cSDimitry Andric void __kmp_add_threads_to_team(kmp_team_t *team, int new_nthreads) { 9012349cc55cSDimitry Andric // Add the threads back to the team 9013349cc55cSDimitry Andric KMP_DEBUG_ASSERT(team); 9014349cc55cSDimitry Andric // Threads were paused and pointed at th_used_in_team temporarily during a 9015349cc55cSDimitry Andric // resize of the team. We're going to set th_used_in_team to 3 to indicate to 9016349cc55cSDimitry Andric // the thread that it should transition itself back into the team. Then, if 9017349cc55cSDimitry Andric // blocktime isn't infinite, the thread could be sleeping, so we send a resume 9018349cc55cSDimitry Andric // to wake it up. 9019349cc55cSDimitry Andric for (int f = 1; f < new_nthreads; ++f) { 9020349cc55cSDimitry Andric KMP_DEBUG_ASSERT(team->t.t_threads[f]); 9021349cc55cSDimitry Andric KMP_COMPARE_AND_STORE_ACQ32(&(team->t.t_threads[f]->th.th_used_in_team), 0, 9022349cc55cSDimitry Andric 3); 9023349cc55cSDimitry Andric if (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME) { // Wake up sleeping threads 9024349cc55cSDimitry Andric __kmp_resume_32(team->t.t_threads[f]->th.th_info.ds.ds_gtid, 9025349cc55cSDimitry Andric (kmp_flag_32<false, false> *)NULL); 9026349cc55cSDimitry Andric } 9027349cc55cSDimitry Andric } 9028349cc55cSDimitry Andric // The threads should be transitioning to the team; when they are done, they 9029349cc55cSDimitry Andric // should have set th_used_in_team to 1. This loop forces master to wait until 9030349cc55cSDimitry Andric // all threads have moved into the team and are waiting in the barrier. 9031349cc55cSDimitry Andric int count = new_nthreads - 1; 9032349cc55cSDimitry Andric while (count > 0) { 9033349cc55cSDimitry Andric count = new_nthreads - 1; 9034349cc55cSDimitry Andric for (int f = 1; f < new_nthreads; ++f) { 9035349cc55cSDimitry Andric if (team->t.t_threads[f]->th.th_used_in_team.load() == 1) { 9036349cc55cSDimitry Andric count--; 9037349cc55cSDimitry Andric } 9038349cc55cSDimitry Andric } 9039349cc55cSDimitry Andric } 9040349cc55cSDimitry Andric } 9041349cc55cSDimitry Andric 9042e8d8bef9SDimitry Andric // Globals and functions for hidden helper task 9043e8d8bef9SDimitry Andric kmp_info_t **__kmp_hidden_helper_threads; 9044e8d8bef9SDimitry Andric kmp_info_t *__kmp_hidden_helper_main_thread; 9045e8d8bef9SDimitry Andric std::atomic<kmp_int32> __kmp_unexecuted_hidden_helper_tasks; 9046e8d8bef9SDimitry Andric #if KMP_OS_LINUX 9047fe6060f1SDimitry Andric kmp_int32 __kmp_hidden_helper_threads_num = 8; 9048e8d8bef9SDimitry Andric kmp_int32 __kmp_enable_hidden_helper = TRUE; 9049e8d8bef9SDimitry Andric #else 9050fe6060f1SDimitry Andric kmp_int32 __kmp_hidden_helper_threads_num = 0; 9051e8d8bef9SDimitry Andric kmp_int32 __kmp_enable_hidden_helper = FALSE; 9052e8d8bef9SDimitry Andric #endif 9053e8d8bef9SDimitry Andric 9054e8d8bef9SDimitry Andric namespace { 9055e8d8bef9SDimitry Andric std::atomic<kmp_int32> __kmp_hit_hidden_helper_threads_num; 9056e8d8bef9SDimitry Andric 9057e8d8bef9SDimitry Andric void __kmp_hidden_helper_wrapper_fn(int *gtid, int *, ...) { 9058e8d8bef9SDimitry Andric // This is an explicit synchronization on all hidden helper threads in case 9059e8d8bef9SDimitry Andric // that when a regular thread pushes a hidden helper task to one hidden 9060e8d8bef9SDimitry Andric // helper thread, the thread has not been awaken once since they're released 9061e8d8bef9SDimitry Andric // by the main thread after creating the team. 9062e8d8bef9SDimitry Andric KMP_ATOMIC_INC(&__kmp_hit_hidden_helper_threads_num); 9063e8d8bef9SDimitry Andric while (KMP_ATOMIC_LD_ACQ(&__kmp_hit_hidden_helper_threads_num) != 9064e8d8bef9SDimitry Andric __kmp_hidden_helper_threads_num) 9065e8d8bef9SDimitry Andric ; 9066e8d8bef9SDimitry Andric 9067e8d8bef9SDimitry Andric // If main thread, then wait for signal 9068e8d8bef9SDimitry Andric if (__kmpc_master(nullptr, *gtid)) { 9069e8d8bef9SDimitry Andric // First, unset the initial state and release the initial thread 9070e8d8bef9SDimitry Andric TCW_4(__kmp_init_hidden_helper_threads, FALSE); 9071e8d8bef9SDimitry Andric __kmp_hidden_helper_initz_release(); 9072e8d8bef9SDimitry Andric __kmp_hidden_helper_main_thread_wait(); 9073e8d8bef9SDimitry Andric // Now wake up all worker threads 9074e8d8bef9SDimitry Andric for (int i = 1; i < __kmp_hit_hidden_helper_threads_num; ++i) { 9075e8d8bef9SDimitry Andric __kmp_hidden_helper_worker_thread_signal(); 9076e8d8bef9SDimitry Andric } 9077e8d8bef9SDimitry Andric } 9078e8d8bef9SDimitry Andric } 9079e8d8bef9SDimitry Andric } // namespace 9080e8d8bef9SDimitry Andric 9081e8d8bef9SDimitry Andric void __kmp_hidden_helper_threads_initz_routine() { 9082e8d8bef9SDimitry Andric // Create a new root for hidden helper team/threads 9083e8d8bef9SDimitry Andric const int gtid = __kmp_register_root(TRUE); 9084e8d8bef9SDimitry Andric __kmp_hidden_helper_main_thread = __kmp_threads[gtid]; 9085e8d8bef9SDimitry Andric __kmp_hidden_helper_threads = &__kmp_threads[gtid]; 9086e8d8bef9SDimitry Andric __kmp_hidden_helper_main_thread->th.th_set_nproc = 9087e8d8bef9SDimitry Andric __kmp_hidden_helper_threads_num; 9088e8d8bef9SDimitry Andric 9089e8d8bef9SDimitry Andric KMP_ATOMIC_ST_REL(&__kmp_hit_hidden_helper_threads_num, 0); 9090e8d8bef9SDimitry Andric 9091e8d8bef9SDimitry Andric __kmpc_fork_call(nullptr, 0, __kmp_hidden_helper_wrapper_fn); 9092e8d8bef9SDimitry Andric 9093e8d8bef9SDimitry Andric // Set the initialization flag to FALSE 9094e8d8bef9SDimitry Andric TCW_SYNC_4(__kmp_init_hidden_helper, FALSE); 9095e8d8bef9SDimitry Andric 9096e8d8bef9SDimitry Andric __kmp_hidden_helper_threads_deinitz_release(); 9097e8d8bef9SDimitry Andric } 9098fe6060f1SDimitry Andric 9099fe6060f1SDimitry Andric /* Nesting Mode: 9100fe6060f1SDimitry Andric Set via KMP_NESTING_MODE, which takes an integer. 9101fe6060f1SDimitry Andric Note: we skip duplicate topology levels, and skip levels with only 9102fe6060f1SDimitry Andric one entity. 9103fe6060f1SDimitry Andric KMP_NESTING_MODE=0 is the default, and doesn't use nesting mode. 9104fe6060f1SDimitry Andric KMP_NESTING_MODE=1 sets as many nesting levels as there are distinct levels 9105fe6060f1SDimitry Andric in the topology, and initializes the number of threads at each of those 9106fe6060f1SDimitry Andric levels to the number of entities at each level, respectively, below the 9107fe6060f1SDimitry Andric entity at the parent level. 9108fe6060f1SDimitry Andric KMP_NESTING_MODE=N, where N>1, attempts to create up to N nesting levels, 9109fe6060f1SDimitry Andric but starts with nesting OFF -- max-active-levels-var is 1 -- and requires 9110fe6060f1SDimitry Andric the user to turn nesting on explicitly. This is an even more experimental 9111fe6060f1SDimitry Andric option to this experimental feature, and may change or go away in the 9112fe6060f1SDimitry Andric future. 9113fe6060f1SDimitry Andric */ 9114fe6060f1SDimitry Andric 9115fe6060f1SDimitry Andric // Allocate space to store nesting levels 9116fe6060f1SDimitry Andric void __kmp_init_nesting_mode() { 9117fe6060f1SDimitry Andric int levels = KMP_HW_LAST; 9118fe6060f1SDimitry Andric __kmp_nesting_mode_nlevels = levels; 9119fe6060f1SDimitry Andric __kmp_nesting_nth_level = (int *)KMP_INTERNAL_MALLOC(levels * sizeof(int)); 9120fe6060f1SDimitry Andric for (int i = 0; i < levels; ++i) 9121fe6060f1SDimitry Andric __kmp_nesting_nth_level[i] = 0; 9122fe6060f1SDimitry Andric if (__kmp_nested_nth.size < levels) { 9123fe6060f1SDimitry Andric __kmp_nested_nth.nth = 9124fe6060f1SDimitry Andric (int *)KMP_INTERNAL_REALLOC(__kmp_nested_nth.nth, levels * sizeof(int)); 9125fe6060f1SDimitry Andric __kmp_nested_nth.size = levels; 9126fe6060f1SDimitry Andric } 9127fe6060f1SDimitry Andric } 9128fe6060f1SDimitry Andric 9129fe6060f1SDimitry Andric // Set # threads for top levels of nesting; must be called after topology set 9130fe6060f1SDimitry Andric void __kmp_set_nesting_mode_threads() { 9131fe6060f1SDimitry Andric kmp_info_t *thread = __kmp_threads[__kmp_entry_gtid()]; 9132fe6060f1SDimitry Andric 9133fe6060f1SDimitry Andric if (__kmp_nesting_mode == 1) 9134fe6060f1SDimitry Andric __kmp_nesting_mode_nlevels = KMP_MAX_ACTIVE_LEVELS_LIMIT; 9135fe6060f1SDimitry Andric else if (__kmp_nesting_mode > 1) 9136fe6060f1SDimitry Andric __kmp_nesting_mode_nlevels = __kmp_nesting_mode; 9137fe6060f1SDimitry Andric 9138fe6060f1SDimitry Andric if (__kmp_topology) { // use topology info 9139fe6060f1SDimitry Andric int loc, hw_level; 9140fe6060f1SDimitry Andric for (loc = 0, hw_level = 0; hw_level < __kmp_topology->get_depth() && 9141fe6060f1SDimitry Andric loc < __kmp_nesting_mode_nlevels; 9142fe6060f1SDimitry Andric loc++, hw_level++) { 9143fe6060f1SDimitry Andric __kmp_nesting_nth_level[loc] = __kmp_topology->get_ratio(hw_level); 9144fe6060f1SDimitry Andric if (__kmp_nesting_nth_level[loc] == 1) 9145fe6060f1SDimitry Andric loc--; 9146fe6060f1SDimitry Andric } 9147fe6060f1SDimitry Andric // Make sure all cores are used 9148fe6060f1SDimitry Andric if (__kmp_nesting_mode > 1 && loc > 1) { 9149fe6060f1SDimitry Andric int core_level = __kmp_topology->get_level(KMP_HW_CORE); 9150fe6060f1SDimitry Andric int num_cores = __kmp_topology->get_count(core_level); 9151fe6060f1SDimitry Andric int upper_levels = 1; 9152fe6060f1SDimitry Andric for (int level = 0; level < loc - 1; ++level) 9153fe6060f1SDimitry Andric upper_levels *= __kmp_nesting_nth_level[level]; 9154fe6060f1SDimitry Andric if (upper_levels * __kmp_nesting_nth_level[loc - 1] < num_cores) 9155fe6060f1SDimitry Andric __kmp_nesting_nth_level[loc - 1] = 9156fe6060f1SDimitry Andric num_cores / __kmp_nesting_nth_level[loc - 2]; 9157fe6060f1SDimitry Andric } 9158fe6060f1SDimitry Andric __kmp_nesting_mode_nlevels = loc; 9159fe6060f1SDimitry Andric __kmp_nested_nth.used = __kmp_nesting_mode_nlevels; 9160fe6060f1SDimitry Andric } else { // no topology info available; provide a reasonable guesstimation 9161fe6060f1SDimitry Andric if (__kmp_avail_proc >= 4) { 9162fe6060f1SDimitry Andric __kmp_nesting_nth_level[0] = __kmp_avail_proc / 2; 9163fe6060f1SDimitry Andric __kmp_nesting_nth_level[1] = 2; 9164fe6060f1SDimitry Andric __kmp_nesting_mode_nlevels = 2; 9165fe6060f1SDimitry Andric } else { 9166fe6060f1SDimitry Andric __kmp_nesting_nth_level[0] = __kmp_avail_proc; 9167fe6060f1SDimitry Andric __kmp_nesting_mode_nlevels = 1; 9168fe6060f1SDimitry Andric } 9169fe6060f1SDimitry Andric __kmp_nested_nth.used = __kmp_nesting_mode_nlevels; 9170fe6060f1SDimitry Andric } 9171fe6060f1SDimitry Andric for (int i = 0; i < __kmp_nesting_mode_nlevels; ++i) { 9172fe6060f1SDimitry Andric __kmp_nested_nth.nth[i] = __kmp_nesting_nth_level[i]; 9173fe6060f1SDimitry Andric } 9174fe6060f1SDimitry Andric set__nproc(thread, __kmp_nesting_nth_level[0]); 9175fe6060f1SDimitry Andric if (__kmp_nesting_mode > 1 && __kmp_nesting_mode_nlevels > __kmp_nesting_mode) 9176fe6060f1SDimitry Andric __kmp_nesting_mode_nlevels = __kmp_nesting_mode; 9177fe6060f1SDimitry Andric if (get__max_active_levels(thread) > 1) { 9178fe6060f1SDimitry Andric // if max levels was set, set nesting mode levels to same 9179fe6060f1SDimitry Andric __kmp_nesting_mode_nlevels = get__max_active_levels(thread); 9180fe6060f1SDimitry Andric } 9181fe6060f1SDimitry Andric if (__kmp_nesting_mode == 1) // turn on nesting for this case only 9182fe6060f1SDimitry Andric set__max_active_levels(thread, __kmp_nesting_mode_nlevels); 9183fe6060f1SDimitry Andric } 9184