10b57cec5SDimitry Andric /* 20b57cec5SDimitry Andric * kmp_runtime.cpp -- KPTS runtime support library 30b57cec5SDimitry Andric */ 40b57cec5SDimitry Andric 50b57cec5SDimitry Andric //===----------------------------------------------------------------------===// 60b57cec5SDimitry Andric // 70b57cec5SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 80b57cec5SDimitry Andric // See https://llvm.org/LICENSE.txt for license information. 90b57cec5SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 100b57cec5SDimitry Andric // 110b57cec5SDimitry Andric //===----------------------------------------------------------------------===// 120b57cec5SDimitry Andric 130b57cec5SDimitry Andric #include "kmp.h" 140b57cec5SDimitry Andric #include "kmp_affinity.h" 150b57cec5SDimitry Andric #include "kmp_atomic.h" 160b57cec5SDimitry Andric #include "kmp_environment.h" 170b57cec5SDimitry Andric #include "kmp_error.h" 180b57cec5SDimitry Andric #include "kmp_i18n.h" 190b57cec5SDimitry Andric #include "kmp_io.h" 200b57cec5SDimitry Andric #include "kmp_itt.h" 210b57cec5SDimitry Andric #include "kmp_settings.h" 220b57cec5SDimitry Andric #include "kmp_stats.h" 230b57cec5SDimitry Andric #include "kmp_str.h" 240b57cec5SDimitry Andric #include "kmp_wait_release.h" 250b57cec5SDimitry Andric #include "kmp_wrapper_getpid.h" 260b57cec5SDimitry Andric #include "kmp_dispatch.h" 270b57cec5SDimitry Andric #if KMP_USE_HIER_SCHED 280b57cec5SDimitry Andric #include "kmp_dispatch_hier.h" 290b57cec5SDimitry Andric #endif 300b57cec5SDimitry Andric 310b57cec5SDimitry Andric #if OMPT_SUPPORT 320b57cec5SDimitry Andric #include "ompt-specific.h" 330b57cec5SDimitry Andric #endif 34fe6060f1SDimitry Andric #if OMPD_SUPPORT 35fe6060f1SDimitry Andric #include "ompd-specific.h" 36fe6060f1SDimitry Andric #endif 370b57cec5SDimitry Andric 38d409305fSDimitry Andric #if OMP_PROFILING_SUPPORT 39e8d8bef9SDimitry Andric #include "llvm/Support/TimeProfiler.h" 40e8d8bef9SDimitry Andric static char *ProfileTraceFile = nullptr; 41e8d8bef9SDimitry Andric #endif 42e8d8bef9SDimitry Andric 430b57cec5SDimitry Andric /* these are temporary issues to be dealt with */ 440b57cec5SDimitry Andric #define KMP_USE_PRCTL 0 450b57cec5SDimitry Andric 460b57cec5SDimitry Andric #if KMP_OS_WINDOWS 470b57cec5SDimitry Andric #include <process.h> 480b57cec5SDimitry Andric #endif 490b57cec5SDimitry Andric 50e8d8bef9SDimitry Andric #if KMP_OS_WINDOWS 51e8d8bef9SDimitry Andric // windows does not need include files as it doesn't use shared memory 52e8d8bef9SDimitry Andric #else 53e8d8bef9SDimitry Andric #include <sys/mman.h> 54e8d8bef9SDimitry Andric #include <sys/stat.h> 55e8d8bef9SDimitry Andric #include <fcntl.h> 56e8d8bef9SDimitry Andric #define SHM_SIZE 1024 57e8d8bef9SDimitry Andric #endif 58e8d8bef9SDimitry Andric 590b57cec5SDimitry Andric #if defined(KMP_GOMP_COMPAT) 600b57cec5SDimitry Andric char const __kmp_version_alt_comp[] = 610b57cec5SDimitry Andric KMP_VERSION_PREFIX "alternative compiler support: yes"; 620b57cec5SDimitry Andric #endif /* defined(KMP_GOMP_COMPAT) */ 630b57cec5SDimitry Andric 640b57cec5SDimitry Andric char const __kmp_version_omp_api[] = 650b57cec5SDimitry Andric KMP_VERSION_PREFIX "API version: 5.0 (201611)"; 660b57cec5SDimitry Andric 670b57cec5SDimitry Andric #ifdef KMP_DEBUG 680b57cec5SDimitry Andric char const __kmp_version_lock[] = 690b57cec5SDimitry Andric KMP_VERSION_PREFIX "lock type: run time selectable"; 700b57cec5SDimitry Andric #endif /* KMP_DEBUG */ 710b57cec5SDimitry Andric 720b57cec5SDimitry Andric #define KMP_MIN(x, y) ((x) < (y) ? (x) : (y)) 730b57cec5SDimitry Andric 740b57cec5SDimitry Andric /* ------------------------------------------------------------------------ */ 750b57cec5SDimitry Andric 760b57cec5SDimitry Andric #if KMP_USE_MONITOR 770b57cec5SDimitry Andric kmp_info_t __kmp_monitor; 780b57cec5SDimitry Andric #endif 790b57cec5SDimitry Andric 800b57cec5SDimitry Andric /* Forward declarations */ 810b57cec5SDimitry Andric 820b57cec5SDimitry Andric void __kmp_cleanup(void); 830b57cec5SDimitry Andric 840b57cec5SDimitry Andric static void __kmp_initialize_info(kmp_info_t *, kmp_team_t *, int tid, 850b57cec5SDimitry Andric int gtid); 860b57cec5SDimitry Andric static void __kmp_initialize_team(kmp_team_t *team, int new_nproc, 870b57cec5SDimitry Andric kmp_internal_control_t *new_icvs, 880b57cec5SDimitry Andric ident_t *loc); 890b57cec5SDimitry Andric #if KMP_AFFINITY_SUPPORTED 900b57cec5SDimitry Andric static void __kmp_partition_places(kmp_team_t *team, 910b57cec5SDimitry Andric int update_master_only = 0); 920b57cec5SDimitry Andric #endif 930b57cec5SDimitry Andric static void __kmp_do_serial_initialize(void); 940b57cec5SDimitry Andric void __kmp_fork_barrier(int gtid, int tid); 950b57cec5SDimitry Andric void __kmp_join_barrier(int gtid); 960b57cec5SDimitry Andric void __kmp_setup_icv_copy(kmp_team_t *team, int new_nproc, 970b57cec5SDimitry Andric kmp_internal_control_t *new_icvs, ident_t *loc); 980b57cec5SDimitry Andric 990b57cec5SDimitry Andric #ifdef USE_LOAD_BALANCE 1000b57cec5SDimitry Andric static int __kmp_load_balance_nproc(kmp_root_t *root, int set_nproc); 1010b57cec5SDimitry Andric #endif 1020b57cec5SDimitry Andric 1030b57cec5SDimitry Andric static int __kmp_expand_threads(int nNeed); 1040b57cec5SDimitry Andric #if KMP_OS_WINDOWS 1050b57cec5SDimitry Andric static int __kmp_unregister_root_other_thread(int gtid); 1060b57cec5SDimitry Andric #endif 1070b57cec5SDimitry Andric static void __kmp_reap_thread(kmp_info_t *thread, int is_root); 1080b57cec5SDimitry Andric kmp_info_t *__kmp_thread_pool_insert_pt = NULL; 1090b57cec5SDimitry Andric 110349cc55cSDimitry Andric void __kmp_resize_dist_barrier(kmp_team_t *team, int old_nthreads, 111349cc55cSDimitry Andric int new_nthreads); 112349cc55cSDimitry Andric void __kmp_add_threads_to_team(kmp_team_t *team, int new_nthreads); 113349cc55cSDimitry Andric 1140b57cec5SDimitry Andric /* Calculate the identifier of the current thread */ 1150b57cec5SDimitry Andric /* fast (and somewhat portable) way to get unique identifier of executing 1160b57cec5SDimitry Andric thread. Returns KMP_GTID_DNE if we haven't been assigned a gtid. */ 1170b57cec5SDimitry Andric int __kmp_get_global_thread_id() { 1180b57cec5SDimitry Andric int i; 1190b57cec5SDimitry Andric kmp_info_t **other_threads; 1200b57cec5SDimitry Andric size_t stack_data; 1210b57cec5SDimitry Andric char *stack_addr; 1220b57cec5SDimitry Andric size_t stack_size; 1230b57cec5SDimitry Andric char *stack_base; 1240b57cec5SDimitry Andric 1250b57cec5SDimitry Andric KA_TRACE( 1260b57cec5SDimitry Andric 1000, 1270b57cec5SDimitry Andric ("*** __kmp_get_global_thread_id: entering, nproc=%d all_nproc=%d\n", 1280b57cec5SDimitry Andric __kmp_nth, __kmp_all_nth)); 1290b57cec5SDimitry Andric 1300b57cec5SDimitry Andric /* JPH - to handle the case where __kmpc_end(0) is called immediately prior to 1310b57cec5SDimitry Andric a parallel region, made it return KMP_GTID_DNE to force serial_initialize 1320b57cec5SDimitry Andric by caller. Had to handle KMP_GTID_DNE at all call-sites, or else guarantee 1330b57cec5SDimitry Andric __kmp_init_gtid for this to work. */ 1340b57cec5SDimitry Andric 1350b57cec5SDimitry Andric if (!TCR_4(__kmp_init_gtid)) 1360b57cec5SDimitry Andric return KMP_GTID_DNE; 1370b57cec5SDimitry Andric 1380b57cec5SDimitry Andric #ifdef KMP_TDATA_GTID 1390b57cec5SDimitry Andric if (TCR_4(__kmp_gtid_mode) >= 3) { 1400b57cec5SDimitry Andric KA_TRACE(1000, ("*** __kmp_get_global_thread_id: using TDATA\n")); 1410b57cec5SDimitry Andric return __kmp_gtid; 1420b57cec5SDimitry Andric } 1430b57cec5SDimitry Andric #endif 1440b57cec5SDimitry Andric if (TCR_4(__kmp_gtid_mode) >= 2) { 1450b57cec5SDimitry Andric KA_TRACE(1000, ("*** __kmp_get_global_thread_id: using keyed TLS\n")); 1460b57cec5SDimitry Andric return __kmp_gtid_get_specific(); 1470b57cec5SDimitry Andric } 1480b57cec5SDimitry Andric KA_TRACE(1000, ("*** __kmp_get_global_thread_id: using internal alg.\n")); 1490b57cec5SDimitry Andric 1500b57cec5SDimitry Andric stack_addr = (char *)&stack_data; 1510b57cec5SDimitry Andric other_threads = __kmp_threads; 1520b57cec5SDimitry Andric 1530b57cec5SDimitry Andric /* ATT: The code below is a source of potential bugs due to unsynchronized 1540b57cec5SDimitry Andric access to __kmp_threads array. For example: 1550b57cec5SDimitry Andric 1. Current thread loads other_threads[i] to thr and checks it, it is 1560b57cec5SDimitry Andric non-NULL. 1570b57cec5SDimitry Andric 2. Current thread is suspended by OS. 1580b57cec5SDimitry Andric 3. Another thread unregisters and finishes (debug versions of free() 1590b57cec5SDimitry Andric may fill memory with something like 0xEF). 1600b57cec5SDimitry Andric 4. Current thread is resumed. 1610b57cec5SDimitry Andric 5. Current thread reads junk from *thr. 1620b57cec5SDimitry Andric TODO: Fix it. --ln */ 1630b57cec5SDimitry Andric 1640b57cec5SDimitry Andric for (i = 0; i < __kmp_threads_capacity; i++) { 1650b57cec5SDimitry Andric 1660b57cec5SDimitry Andric kmp_info_t *thr = (kmp_info_t *)TCR_SYNC_PTR(other_threads[i]); 1670b57cec5SDimitry Andric if (!thr) 1680b57cec5SDimitry Andric continue; 1690b57cec5SDimitry Andric 1700b57cec5SDimitry Andric stack_size = (size_t)TCR_PTR(thr->th.th_info.ds.ds_stacksize); 1710b57cec5SDimitry Andric stack_base = (char *)TCR_PTR(thr->th.th_info.ds.ds_stackbase); 1720b57cec5SDimitry Andric 1730b57cec5SDimitry Andric /* stack grows down -- search through all of the active threads */ 1740b57cec5SDimitry Andric 1750b57cec5SDimitry Andric if (stack_addr <= stack_base) { 1760b57cec5SDimitry Andric size_t stack_diff = stack_base - stack_addr; 1770b57cec5SDimitry Andric 1780b57cec5SDimitry Andric if (stack_diff <= stack_size) { 1790b57cec5SDimitry Andric /* The only way we can be closer than the allocated */ 1800b57cec5SDimitry Andric /* stack size is if we are running on this thread. */ 1810b57cec5SDimitry Andric KMP_DEBUG_ASSERT(__kmp_gtid_get_specific() == i); 1820b57cec5SDimitry Andric return i; 1830b57cec5SDimitry Andric } 1840b57cec5SDimitry Andric } 1850b57cec5SDimitry Andric } 1860b57cec5SDimitry Andric 1870b57cec5SDimitry Andric /* get specific to try and determine our gtid */ 1880b57cec5SDimitry Andric KA_TRACE(1000, 1890b57cec5SDimitry Andric ("*** __kmp_get_global_thread_id: internal alg. failed to find " 1900b57cec5SDimitry Andric "thread, using TLS\n")); 1910b57cec5SDimitry Andric i = __kmp_gtid_get_specific(); 1920b57cec5SDimitry Andric 1930b57cec5SDimitry Andric /*fprintf( stderr, "=== %d\n", i ); */ /* GROO */ 1940b57cec5SDimitry Andric 1950b57cec5SDimitry Andric /* if we havn't been assigned a gtid, then return code */ 1960b57cec5SDimitry Andric if (i < 0) 1970b57cec5SDimitry Andric return i; 1980b57cec5SDimitry Andric 1990b57cec5SDimitry Andric /* dynamically updated stack window for uber threads to avoid get_specific 2000b57cec5SDimitry Andric call */ 2010b57cec5SDimitry Andric if (!TCR_4(other_threads[i]->th.th_info.ds.ds_stackgrow)) { 2020b57cec5SDimitry Andric KMP_FATAL(StackOverflow, i); 2030b57cec5SDimitry Andric } 2040b57cec5SDimitry Andric 2050b57cec5SDimitry Andric stack_base = (char *)other_threads[i]->th.th_info.ds.ds_stackbase; 2060b57cec5SDimitry Andric if (stack_addr > stack_base) { 2070b57cec5SDimitry Andric TCW_PTR(other_threads[i]->th.th_info.ds.ds_stackbase, stack_addr); 2080b57cec5SDimitry Andric TCW_PTR(other_threads[i]->th.th_info.ds.ds_stacksize, 2090b57cec5SDimitry Andric other_threads[i]->th.th_info.ds.ds_stacksize + stack_addr - 2100b57cec5SDimitry Andric stack_base); 2110b57cec5SDimitry Andric } else { 2120b57cec5SDimitry Andric TCW_PTR(other_threads[i]->th.th_info.ds.ds_stacksize, 2130b57cec5SDimitry Andric stack_base - stack_addr); 2140b57cec5SDimitry Andric } 2150b57cec5SDimitry Andric 2160b57cec5SDimitry Andric /* Reprint stack bounds for ubermaster since they have been refined */ 2170b57cec5SDimitry Andric if (__kmp_storage_map) { 2180b57cec5SDimitry Andric char *stack_end = (char *)other_threads[i]->th.th_info.ds.ds_stackbase; 2190b57cec5SDimitry Andric char *stack_beg = stack_end - other_threads[i]->th.th_info.ds.ds_stacksize; 2200b57cec5SDimitry Andric __kmp_print_storage_map_gtid(i, stack_beg, stack_end, 2210b57cec5SDimitry Andric other_threads[i]->th.th_info.ds.ds_stacksize, 2220b57cec5SDimitry Andric "th_%d stack (refinement)", i); 2230b57cec5SDimitry Andric } 2240b57cec5SDimitry Andric return i; 2250b57cec5SDimitry Andric } 2260b57cec5SDimitry Andric 2270b57cec5SDimitry Andric int __kmp_get_global_thread_id_reg() { 2280b57cec5SDimitry Andric int gtid; 2290b57cec5SDimitry Andric 2300b57cec5SDimitry Andric if (!__kmp_init_serial) { 2310b57cec5SDimitry Andric gtid = KMP_GTID_DNE; 2320b57cec5SDimitry Andric } else 2330b57cec5SDimitry Andric #ifdef KMP_TDATA_GTID 2340b57cec5SDimitry Andric if (TCR_4(__kmp_gtid_mode) >= 3) { 2350b57cec5SDimitry Andric KA_TRACE(1000, ("*** __kmp_get_global_thread_id_reg: using TDATA\n")); 2360b57cec5SDimitry Andric gtid = __kmp_gtid; 2370b57cec5SDimitry Andric } else 2380b57cec5SDimitry Andric #endif 2390b57cec5SDimitry Andric if (TCR_4(__kmp_gtid_mode) >= 2) { 2400b57cec5SDimitry Andric KA_TRACE(1000, ("*** __kmp_get_global_thread_id_reg: using keyed TLS\n")); 2410b57cec5SDimitry Andric gtid = __kmp_gtid_get_specific(); 2420b57cec5SDimitry Andric } else { 2430b57cec5SDimitry Andric KA_TRACE(1000, 2440b57cec5SDimitry Andric ("*** __kmp_get_global_thread_id_reg: using internal alg.\n")); 2450b57cec5SDimitry Andric gtid = __kmp_get_global_thread_id(); 2460b57cec5SDimitry Andric } 2470b57cec5SDimitry Andric 2480b57cec5SDimitry Andric /* we must be a new uber master sibling thread */ 2490b57cec5SDimitry Andric if (gtid == KMP_GTID_DNE) { 2500b57cec5SDimitry Andric KA_TRACE(10, 2510b57cec5SDimitry Andric ("__kmp_get_global_thread_id_reg: Encountered new root thread. " 2520b57cec5SDimitry Andric "Registering a new gtid.\n")); 2530b57cec5SDimitry Andric __kmp_acquire_bootstrap_lock(&__kmp_initz_lock); 2540b57cec5SDimitry Andric if (!__kmp_init_serial) { 2550b57cec5SDimitry Andric __kmp_do_serial_initialize(); 2560b57cec5SDimitry Andric gtid = __kmp_gtid_get_specific(); 2570b57cec5SDimitry Andric } else { 2580b57cec5SDimitry Andric gtid = __kmp_register_root(FALSE); 2590b57cec5SDimitry Andric } 2600b57cec5SDimitry Andric __kmp_release_bootstrap_lock(&__kmp_initz_lock); 2610b57cec5SDimitry Andric /*__kmp_printf( "+++ %d\n", gtid ); */ /* GROO */ 2620b57cec5SDimitry Andric } 2630b57cec5SDimitry Andric 2640b57cec5SDimitry Andric KMP_DEBUG_ASSERT(gtid >= 0); 2650b57cec5SDimitry Andric 2660b57cec5SDimitry Andric return gtid; 2670b57cec5SDimitry Andric } 2680b57cec5SDimitry Andric 2690b57cec5SDimitry Andric /* caller must hold forkjoin_lock */ 2700b57cec5SDimitry Andric void __kmp_check_stack_overlap(kmp_info_t *th) { 2710b57cec5SDimitry Andric int f; 2720b57cec5SDimitry Andric char *stack_beg = NULL; 2730b57cec5SDimitry Andric char *stack_end = NULL; 2740b57cec5SDimitry Andric int gtid; 2750b57cec5SDimitry Andric 2760b57cec5SDimitry Andric KA_TRACE(10, ("__kmp_check_stack_overlap: called\n")); 2770b57cec5SDimitry Andric if (__kmp_storage_map) { 2780b57cec5SDimitry Andric stack_end = (char *)th->th.th_info.ds.ds_stackbase; 2790b57cec5SDimitry Andric stack_beg = stack_end - th->th.th_info.ds.ds_stacksize; 2800b57cec5SDimitry Andric 2810b57cec5SDimitry Andric gtid = __kmp_gtid_from_thread(th); 2820b57cec5SDimitry Andric 2830b57cec5SDimitry Andric if (gtid == KMP_GTID_MONITOR) { 2840b57cec5SDimitry Andric __kmp_print_storage_map_gtid( 2850b57cec5SDimitry Andric gtid, stack_beg, stack_end, th->th.th_info.ds.ds_stacksize, 2860b57cec5SDimitry Andric "th_%s stack (%s)", "mon", 2870b57cec5SDimitry Andric (th->th.th_info.ds.ds_stackgrow) ? "initial" : "actual"); 2880b57cec5SDimitry Andric } else { 2890b57cec5SDimitry Andric __kmp_print_storage_map_gtid( 2900b57cec5SDimitry Andric gtid, stack_beg, stack_end, th->th.th_info.ds.ds_stacksize, 2910b57cec5SDimitry Andric "th_%d stack (%s)", gtid, 2920b57cec5SDimitry Andric (th->th.th_info.ds.ds_stackgrow) ? "initial" : "actual"); 2930b57cec5SDimitry Andric } 2940b57cec5SDimitry Andric } 2950b57cec5SDimitry Andric 2960b57cec5SDimitry Andric /* No point in checking ubermaster threads since they use refinement and 2970b57cec5SDimitry Andric * cannot overlap */ 2980b57cec5SDimitry Andric gtid = __kmp_gtid_from_thread(th); 2990b57cec5SDimitry Andric if (__kmp_env_checks == TRUE && !KMP_UBER_GTID(gtid)) { 3000b57cec5SDimitry Andric KA_TRACE(10, 3010b57cec5SDimitry Andric ("__kmp_check_stack_overlap: performing extensive checking\n")); 3020b57cec5SDimitry Andric if (stack_beg == NULL) { 3030b57cec5SDimitry Andric stack_end = (char *)th->th.th_info.ds.ds_stackbase; 3040b57cec5SDimitry Andric stack_beg = stack_end - th->th.th_info.ds.ds_stacksize; 3050b57cec5SDimitry Andric } 3060b57cec5SDimitry Andric 3070b57cec5SDimitry Andric for (f = 0; f < __kmp_threads_capacity; f++) { 3080b57cec5SDimitry Andric kmp_info_t *f_th = (kmp_info_t *)TCR_SYNC_PTR(__kmp_threads[f]); 3090b57cec5SDimitry Andric 3100b57cec5SDimitry Andric if (f_th && f_th != th) { 3110b57cec5SDimitry Andric char *other_stack_end = 3120b57cec5SDimitry Andric (char *)TCR_PTR(f_th->th.th_info.ds.ds_stackbase); 3130b57cec5SDimitry Andric char *other_stack_beg = 3140b57cec5SDimitry Andric other_stack_end - (size_t)TCR_PTR(f_th->th.th_info.ds.ds_stacksize); 3150b57cec5SDimitry Andric if ((stack_beg > other_stack_beg && stack_beg < other_stack_end) || 3160b57cec5SDimitry Andric (stack_end > other_stack_beg && stack_end < other_stack_end)) { 3170b57cec5SDimitry Andric 3180b57cec5SDimitry Andric /* Print the other stack values before the abort */ 3190b57cec5SDimitry Andric if (__kmp_storage_map) 3200b57cec5SDimitry Andric __kmp_print_storage_map_gtid( 3210b57cec5SDimitry Andric -1, other_stack_beg, other_stack_end, 3220b57cec5SDimitry Andric (size_t)TCR_PTR(f_th->th.th_info.ds.ds_stacksize), 3230b57cec5SDimitry Andric "th_%d stack (overlapped)", __kmp_gtid_from_thread(f_th)); 3240b57cec5SDimitry Andric 3250b57cec5SDimitry Andric __kmp_fatal(KMP_MSG(StackOverlap), KMP_HNT(ChangeStackLimit), 3260b57cec5SDimitry Andric __kmp_msg_null); 3270b57cec5SDimitry Andric } 3280b57cec5SDimitry Andric } 3290b57cec5SDimitry Andric } 3300b57cec5SDimitry Andric } 3310b57cec5SDimitry Andric KA_TRACE(10, ("__kmp_check_stack_overlap: returning\n")); 3320b57cec5SDimitry Andric } 3330b57cec5SDimitry Andric 3340b57cec5SDimitry Andric /* ------------------------------------------------------------------------ */ 3350b57cec5SDimitry Andric 3360b57cec5SDimitry Andric void __kmp_infinite_loop(void) { 3370b57cec5SDimitry Andric static int done = FALSE; 3380b57cec5SDimitry Andric 3390b57cec5SDimitry Andric while (!done) { 3400b57cec5SDimitry Andric KMP_YIELD(TRUE); 3410b57cec5SDimitry Andric } 3420b57cec5SDimitry Andric } 3430b57cec5SDimitry Andric 3440b57cec5SDimitry Andric #define MAX_MESSAGE 512 3450b57cec5SDimitry Andric 3460b57cec5SDimitry Andric void __kmp_print_storage_map_gtid(int gtid, void *p1, void *p2, size_t size, 3470b57cec5SDimitry Andric char const *format, ...) { 3480b57cec5SDimitry Andric char buffer[MAX_MESSAGE]; 3490b57cec5SDimitry Andric va_list ap; 3500b57cec5SDimitry Andric 3510b57cec5SDimitry Andric va_start(ap, format); 3520b57cec5SDimitry Andric KMP_SNPRINTF(buffer, sizeof(buffer), "OMP storage map: %p %p%8lu %s\n", p1, 3530b57cec5SDimitry Andric p2, (unsigned long)size, format); 3540b57cec5SDimitry Andric __kmp_acquire_bootstrap_lock(&__kmp_stdio_lock); 3550b57cec5SDimitry Andric __kmp_vprintf(kmp_err, buffer, ap); 3560b57cec5SDimitry Andric #if KMP_PRINT_DATA_PLACEMENT 3570b57cec5SDimitry Andric int node; 3580b57cec5SDimitry Andric if (gtid >= 0) { 3590b57cec5SDimitry Andric if (p1 <= p2 && (char *)p2 - (char *)p1 == size) { 3600b57cec5SDimitry Andric if (__kmp_storage_map_verbose) { 3610b57cec5SDimitry Andric node = __kmp_get_host_node(p1); 3620b57cec5SDimitry Andric if (node < 0) /* doesn't work, so don't try this next time */ 3630b57cec5SDimitry Andric __kmp_storage_map_verbose = FALSE; 3640b57cec5SDimitry Andric else { 3650b57cec5SDimitry Andric char *last; 3660b57cec5SDimitry Andric int lastNode; 3670b57cec5SDimitry Andric int localProc = __kmp_get_cpu_from_gtid(gtid); 3680b57cec5SDimitry Andric 3690b57cec5SDimitry Andric const int page_size = KMP_GET_PAGE_SIZE(); 3700b57cec5SDimitry Andric 3710b57cec5SDimitry Andric p1 = (void *)((size_t)p1 & ~((size_t)page_size - 1)); 3720b57cec5SDimitry Andric p2 = (void *)(((size_t)p2 - 1) & ~((size_t)page_size - 1)); 3730b57cec5SDimitry Andric if (localProc >= 0) 3740b57cec5SDimitry Andric __kmp_printf_no_lock(" GTID %d localNode %d\n", gtid, 3750b57cec5SDimitry Andric localProc >> 1); 3760b57cec5SDimitry Andric else 3770b57cec5SDimitry Andric __kmp_printf_no_lock(" GTID %d\n", gtid); 3780b57cec5SDimitry Andric #if KMP_USE_PRCTL 3790b57cec5SDimitry Andric /* The more elaborate format is disabled for now because of the prctl 3800b57cec5SDimitry Andric * hanging bug. */ 3810b57cec5SDimitry Andric do { 3820b57cec5SDimitry Andric last = p1; 3830b57cec5SDimitry Andric lastNode = node; 3840b57cec5SDimitry Andric /* This loop collates adjacent pages with the same host node. */ 3850b57cec5SDimitry Andric do { 3860b57cec5SDimitry Andric (char *)p1 += page_size; 3870b57cec5SDimitry Andric } while (p1 <= p2 && (node = __kmp_get_host_node(p1)) == lastNode); 3880b57cec5SDimitry Andric __kmp_printf_no_lock(" %p-%p memNode %d\n", last, (char *)p1 - 1, 3890b57cec5SDimitry Andric lastNode); 3900b57cec5SDimitry Andric } while (p1 <= p2); 3910b57cec5SDimitry Andric #else 3920b57cec5SDimitry Andric __kmp_printf_no_lock(" %p-%p memNode %d\n", p1, 3930b57cec5SDimitry Andric (char *)p1 + (page_size - 1), 3940b57cec5SDimitry Andric __kmp_get_host_node(p1)); 3950b57cec5SDimitry Andric if (p1 < p2) { 3960b57cec5SDimitry Andric __kmp_printf_no_lock(" %p-%p memNode %d\n", p2, 3970b57cec5SDimitry Andric (char *)p2 + (page_size - 1), 3980b57cec5SDimitry Andric __kmp_get_host_node(p2)); 3990b57cec5SDimitry Andric } 4000b57cec5SDimitry Andric #endif 4010b57cec5SDimitry Andric } 4020b57cec5SDimitry Andric } 4030b57cec5SDimitry Andric } else 4040b57cec5SDimitry Andric __kmp_printf_no_lock(" %s\n", KMP_I18N_STR(StorageMapWarning)); 4050b57cec5SDimitry Andric } 4060b57cec5SDimitry Andric #endif /* KMP_PRINT_DATA_PLACEMENT */ 4070b57cec5SDimitry Andric __kmp_release_bootstrap_lock(&__kmp_stdio_lock); 4080b57cec5SDimitry Andric } 4090b57cec5SDimitry Andric 4100b57cec5SDimitry Andric void __kmp_warn(char const *format, ...) { 4110b57cec5SDimitry Andric char buffer[MAX_MESSAGE]; 4120b57cec5SDimitry Andric va_list ap; 4130b57cec5SDimitry Andric 4140b57cec5SDimitry Andric if (__kmp_generate_warnings == kmp_warnings_off) { 4150b57cec5SDimitry Andric return; 4160b57cec5SDimitry Andric } 4170b57cec5SDimitry Andric 4180b57cec5SDimitry Andric va_start(ap, format); 4190b57cec5SDimitry Andric 4200b57cec5SDimitry Andric KMP_SNPRINTF(buffer, sizeof(buffer), "OMP warning: %s\n", format); 4210b57cec5SDimitry Andric __kmp_acquire_bootstrap_lock(&__kmp_stdio_lock); 4220b57cec5SDimitry Andric __kmp_vprintf(kmp_err, buffer, ap); 4230b57cec5SDimitry Andric __kmp_release_bootstrap_lock(&__kmp_stdio_lock); 4240b57cec5SDimitry Andric 4250b57cec5SDimitry Andric va_end(ap); 4260b57cec5SDimitry Andric } 4270b57cec5SDimitry Andric 4280b57cec5SDimitry Andric void __kmp_abort_process() { 4290b57cec5SDimitry Andric // Later threads may stall here, but that's ok because abort() will kill them. 4300b57cec5SDimitry Andric __kmp_acquire_bootstrap_lock(&__kmp_exit_lock); 4310b57cec5SDimitry Andric 4320b57cec5SDimitry Andric if (__kmp_debug_buf) { 4330b57cec5SDimitry Andric __kmp_dump_debug_buffer(); 4340b57cec5SDimitry Andric } 4350b57cec5SDimitry Andric 4360b57cec5SDimitry Andric if (KMP_OS_WINDOWS) { 4370b57cec5SDimitry Andric // Let other threads know of abnormal termination and prevent deadlock 4380b57cec5SDimitry Andric // if abort happened during library initialization or shutdown 4390b57cec5SDimitry Andric __kmp_global.g.g_abort = SIGABRT; 4400b57cec5SDimitry Andric 4410b57cec5SDimitry Andric /* On Windows* OS by default abort() causes pop-up error box, which stalls 4420b57cec5SDimitry Andric nightly testing. Unfortunately, we cannot reliably suppress pop-up error 4430b57cec5SDimitry Andric boxes. _set_abort_behavior() works well, but this function is not 4440b57cec5SDimitry Andric available in VS7 (this is not problem for DLL, but it is a problem for 4450b57cec5SDimitry Andric static OpenMP RTL). SetErrorMode (and so, timelimit utility) does not 4460b57cec5SDimitry Andric help, at least in some versions of MS C RTL. 4470b57cec5SDimitry Andric 4480b57cec5SDimitry Andric It seems following sequence is the only way to simulate abort() and 4490b57cec5SDimitry Andric avoid pop-up error box. */ 4500b57cec5SDimitry Andric raise(SIGABRT); 4510b57cec5SDimitry Andric _exit(3); // Just in case, if signal ignored, exit anyway. 4520b57cec5SDimitry Andric } else { 453e8d8bef9SDimitry Andric __kmp_unregister_library(); 4540b57cec5SDimitry Andric abort(); 4550b57cec5SDimitry Andric } 4560b57cec5SDimitry Andric 4570b57cec5SDimitry Andric __kmp_infinite_loop(); 4580b57cec5SDimitry Andric __kmp_release_bootstrap_lock(&__kmp_exit_lock); 4590b57cec5SDimitry Andric 4600b57cec5SDimitry Andric } // __kmp_abort_process 4610b57cec5SDimitry Andric 4620b57cec5SDimitry Andric void __kmp_abort_thread(void) { 4630b57cec5SDimitry Andric // TODO: Eliminate g_abort global variable and this function. 4640b57cec5SDimitry Andric // In case of abort just call abort(), it will kill all the threads. 4650b57cec5SDimitry Andric __kmp_infinite_loop(); 4660b57cec5SDimitry Andric } // __kmp_abort_thread 4670b57cec5SDimitry Andric 4680b57cec5SDimitry Andric /* Print out the storage map for the major kmp_info_t thread data structures 4690b57cec5SDimitry Andric that are allocated together. */ 4700b57cec5SDimitry Andric 4710b57cec5SDimitry Andric static void __kmp_print_thread_storage_map(kmp_info_t *thr, int gtid) { 4720b57cec5SDimitry Andric __kmp_print_storage_map_gtid(gtid, thr, thr + 1, sizeof(kmp_info_t), "th_%d", 4730b57cec5SDimitry Andric gtid); 4740b57cec5SDimitry Andric 4750b57cec5SDimitry Andric __kmp_print_storage_map_gtid(gtid, &thr->th.th_info, &thr->th.th_team, 4760b57cec5SDimitry Andric sizeof(kmp_desc_t), "th_%d.th_info", gtid); 4770b57cec5SDimitry Andric 4780b57cec5SDimitry Andric __kmp_print_storage_map_gtid(gtid, &thr->th.th_local, &thr->th.th_pri_head, 4790b57cec5SDimitry Andric sizeof(kmp_local_t), "th_%d.th_local", gtid); 4800b57cec5SDimitry Andric 4810b57cec5SDimitry Andric __kmp_print_storage_map_gtid( 4820b57cec5SDimitry Andric gtid, &thr->th.th_bar[0], &thr->th.th_bar[bs_last_barrier], 4830b57cec5SDimitry Andric sizeof(kmp_balign_t) * bs_last_barrier, "th_%d.th_bar", gtid); 4840b57cec5SDimitry Andric 4850b57cec5SDimitry Andric __kmp_print_storage_map_gtid(gtid, &thr->th.th_bar[bs_plain_barrier], 4860b57cec5SDimitry Andric &thr->th.th_bar[bs_plain_barrier + 1], 4870b57cec5SDimitry Andric sizeof(kmp_balign_t), "th_%d.th_bar[plain]", 4880b57cec5SDimitry Andric gtid); 4890b57cec5SDimitry Andric 4900b57cec5SDimitry Andric __kmp_print_storage_map_gtid(gtid, &thr->th.th_bar[bs_forkjoin_barrier], 4910b57cec5SDimitry Andric &thr->th.th_bar[bs_forkjoin_barrier + 1], 4920b57cec5SDimitry Andric sizeof(kmp_balign_t), "th_%d.th_bar[forkjoin]", 4930b57cec5SDimitry Andric gtid); 4940b57cec5SDimitry Andric 4950b57cec5SDimitry Andric #if KMP_FAST_REDUCTION_BARRIER 4960b57cec5SDimitry Andric __kmp_print_storage_map_gtid(gtid, &thr->th.th_bar[bs_reduction_barrier], 4970b57cec5SDimitry Andric &thr->th.th_bar[bs_reduction_barrier + 1], 4980b57cec5SDimitry Andric sizeof(kmp_balign_t), "th_%d.th_bar[reduction]", 4990b57cec5SDimitry Andric gtid); 5000b57cec5SDimitry Andric #endif // KMP_FAST_REDUCTION_BARRIER 5010b57cec5SDimitry Andric } 5020b57cec5SDimitry Andric 5030b57cec5SDimitry Andric /* Print out the storage map for the major kmp_team_t team data structures 5040b57cec5SDimitry Andric that are allocated together. */ 5050b57cec5SDimitry Andric 5060b57cec5SDimitry Andric static void __kmp_print_team_storage_map(const char *header, kmp_team_t *team, 5070b57cec5SDimitry Andric int team_id, int num_thr) { 5080b57cec5SDimitry Andric int num_disp_buff = team->t.t_max_nproc > 1 ? __kmp_dispatch_num_buffers : 2; 5090b57cec5SDimitry Andric __kmp_print_storage_map_gtid(-1, team, team + 1, sizeof(kmp_team_t), "%s_%d", 5100b57cec5SDimitry Andric header, team_id); 5110b57cec5SDimitry Andric 5120b57cec5SDimitry Andric __kmp_print_storage_map_gtid(-1, &team->t.t_bar[0], 5130b57cec5SDimitry Andric &team->t.t_bar[bs_last_barrier], 5140b57cec5SDimitry Andric sizeof(kmp_balign_team_t) * bs_last_barrier, 5150b57cec5SDimitry Andric "%s_%d.t_bar", header, team_id); 5160b57cec5SDimitry Andric 5170b57cec5SDimitry Andric __kmp_print_storage_map_gtid(-1, &team->t.t_bar[bs_plain_barrier], 5180b57cec5SDimitry Andric &team->t.t_bar[bs_plain_barrier + 1], 5190b57cec5SDimitry Andric sizeof(kmp_balign_team_t), "%s_%d.t_bar[plain]", 5200b57cec5SDimitry Andric header, team_id); 5210b57cec5SDimitry Andric 5220b57cec5SDimitry Andric __kmp_print_storage_map_gtid(-1, &team->t.t_bar[bs_forkjoin_barrier], 5230b57cec5SDimitry Andric &team->t.t_bar[bs_forkjoin_barrier + 1], 5240b57cec5SDimitry Andric sizeof(kmp_balign_team_t), 5250b57cec5SDimitry Andric "%s_%d.t_bar[forkjoin]", header, team_id); 5260b57cec5SDimitry Andric 5270b57cec5SDimitry Andric #if KMP_FAST_REDUCTION_BARRIER 5280b57cec5SDimitry Andric __kmp_print_storage_map_gtid(-1, &team->t.t_bar[bs_reduction_barrier], 5290b57cec5SDimitry Andric &team->t.t_bar[bs_reduction_barrier + 1], 5300b57cec5SDimitry Andric sizeof(kmp_balign_team_t), 5310b57cec5SDimitry Andric "%s_%d.t_bar[reduction]", header, team_id); 5320b57cec5SDimitry Andric #endif // KMP_FAST_REDUCTION_BARRIER 5330b57cec5SDimitry Andric 5340b57cec5SDimitry Andric __kmp_print_storage_map_gtid( 5350b57cec5SDimitry Andric -1, &team->t.t_dispatch[0], &team->t.t_dispatch[num_thr], 5360b57cec5SDimitry Andric sizeof(kmp_disp_t) * num_thr, "%s_%d.t_dispatch", header, team_id); 5370b57cec5SDimitry Andric 5380b57cec5SDimitry Andric __kmp_print_storage_map_gtid( 5390b57cec5SDimitry Andric -1, &team->t.t_threads[0], &team->t.t_threads[num_thr], 5400b57cec5SDimitry Andric sizeof(kmp_info_t *) * num_thr, "%s_%d.t_threads", header, team_id); 5410b57cec5SDimitry Andric 5420b57cec5SDimitry Andric __kmp_print_storage_map_gtid(-1, &team->t.t_disp_buffer[0], 5430b57cec5SDimitry Andric &team->t.t_disp_buffer[num_disp_buff], 5440b57cec5SDimitry Andric sizeof(dispatch_shared_info_t) * num_disp_buff, 5450b57cec5SDimitry Andric "%s_%d.t_disp_buffer", header, team_id); 5460b57cec5SDimitry Andric } 5470b57cec5SDimitry Andric 548fe6060f1SDimitry Andric static void __kmp_init_allocator() { 549fe6060f1SDimitry Andric __kmp_init_memkind(); 550fe6060f1SDimitry Andric __kmp_init_target_mem(); 551fe6060f1SDimitry Andric } 5520b57cec5SDimitry Andric static void __kmp_fini_allocator() { __kmp_fini_memkind(); } 5530b57cec5SDimitry Andric 5540b57cec5SDimitry Andric /* ------------------------------------------------------------------------ */ 5550b57cec5SDimitry Andric 5560b57cec5SDimitry Andric #if KMP_DYNAMIC_LIB 5570b57cec5SDimitry Andric #if KMP_OS_WINDOWS 5580b57cec5SDimitry Andric 5590b57cec5SDimitry Andric BOOL WINAPI DllMain(HINSTANCE hInstDLL, DWORD fdwReason, LPVOID lpReserved) { 5600b57cec5SDimitry Andric //__kmp_acquire_bootstrap_lock( &__kmp_initz_lock ); 5610b57cec5SDimitry Andric 5620b57cec5SDimitry Andric switch (fdwReason) { 5630b57cec5SDimitry Andric 5640b57cec5SDimitry Andric case DLL_PROCESS_ATTACH: 5650b57cec5SDimitry Andric KA_TRACE(10, ("DllMain: PROCESS_ATTACH\n")); 5660b57cec5SDimitry Andric 5670b57cec5SDimitry Andric return TRUE; 5680b57cec5SDimitry Andric 5690b57cec5SDimitry Andric case DLL_PROCESS_DETACH: 5700b57cec5SDimitry Andric KA_TRACE(10, ("DllMain: PROCESS_DETACH T#%d\n", __kmp_gtid_get_specific())); 5710b57cec5SDimitry Andric 572fe6060f1SDimitry Andric // According to Windows* documentation for DllMain entry point: 573fe6060f1SDimitry Andric // for DLL_PROCESS_DETACH, lpReserved is used for telling the difference: 574fe6060f1SDimitry Andric // lpReserved == NULL when FreeLibrary() is called, 575fe6060f1SDimitry Andric // lpReserved != NULL when the process is terminated. 576fe6060f1SDimitry Andric // When FreeLibrary() is called, worker threads remain alive. So the 577fe6060f1SDimitry Andric // runtime's state is consistent and executing proper shutdown is OK. 578fe6060f1SDimitry Andric // When the process is terminated, worker threads have exited or been 579fe6060f1SDimitry Andric // forcefully terminated by the OS and only the shutdown thread remains. 580fe6060f1SDimitry Andric // This can leave the runtime in an inconsistent state. 581fe6060f1SDimitry Andric // Hence, only attempt proper cleanup when FreeLibrary() is called. 582fe6060f1SDimitry Andric // Otherwise, rely on OS to reclaim resources. 583fe6060f1SDimitry Andric if (lpReserved == NULL) 5840b57cec5SDimitry Andric __kmp_internal_end_library(__kmp_gtid_get_specific()); 5850b57cec5SDimitry Andric 5860b57cec5SDimitry Andric return TRUE; 5870b57cec5SDimitry Andric 5880b57cec5SDimitry Andric case DLL_THREAD_ATTACH: 5890b57cec5SDimitry Andric KA_TRACE(10, ("DllMain: THREAD_ATTACH\n")); 5900b57cec5SDimitry Andric 5910b57cec5SDimitry Andric /* if we want to register new siblings all the time here call 5920b57cec5SDimitry Andric * __kmp_get_gtid(); */ 5930b57cec5SDimitry Andric return TRUE; 5940b57cec5SDimitry Andric 5950b57cec5SDimitry Andric case DLL_THREAD_DETACH: 5960b57cec5SDimitry Andric KA_TRACE(10, ("DllMain: THREAD_DETACH T#%d\n", __kmp_gtid_get_specific())); 5970b57cec5SDimitry Andric 5980b57cec5SDimitry Andric __kmp_internal_end_thread(__kmp_gtid_get_specific()); 5990b57cec5SDimitry Andric return TRUE; 6000b57cec5SDimitry Andric } 6010b57cec5SDimitry Andric 6020b57cec5SDimitry Andric return TRUE; 6030b57cec5SDimitry Andric } 6040b57cec5SDimitry Andric 6050b57cec5SDimitry Andric #endif /* KMP_OS_WINDOWS */ 6060b57cec5SDimitry Andric #endif /* KMP_DYNAMIC_LIB */ 6070b57cec5SDimitry Andric 6080b57cec5SDimitry Andric /* __kmp_parallel_deo -- Wait until it's our turn. */ 6090b57cec5SDimitry Andric void __kmp_parallel_deo(int *gtid_ref, int *cid_ref, ident_t *loc_ref) { 6100b57cec5SDimitry Andric int gtid = *gtid_ref; 6110b57cec5SDimitry Andric #ifdef BUILD_PARALLEL_ORDERED 6120b57cec5SDimitry Andric kmp_team_t *team = __kmp_team_from_gtid(gtid); 6130b57cec5SDimitry Andric #endif /* BUILD_PARALLEL_ORDERED */ 6140b57cec5SDimitry Andric 6150b57cec5SDimitry Andric if (__kmp_env_consistency_check) { 6160b57cec5SDimitry Andric if (__kmp_threads[gtid]->th.th_root->r.r_active) 6170b57cec5SDimitry Andric #if KMP_USE_DYNAMIC_LOCK 6180b57cec5SDimitry Andric __kmp_push_sync(gtid, ct_ordered_in_parallel, loc_ref, NULL, 0); 6190b57cec5SDimitry Andric #else 6200b57cec5SDimitry Andric __kmp_push_sync(gtid, ct_ordered_in_parallel, loc_ref, NULL); 6210b57cec5SDimitry Andric #endif 6220b57cec5SDimitry Andric } 6230b57cec5SDimitry Andric #ifdef BUILD_PARALLEL_ORDERED 6240b57cec5SDimitry Andric if (!team->t.t_serialized) { 6250b57cec5SDimitry Andric KMP_MB(); 6260b57cec5SDimitry Andric KMP_WAIT(&team->t.t_ordered.dt.t_value, __kmp_tid_from_gtid(gtid), KMP_EQ, 6270b57cec5SDimitry Andric NULL); 6280b57cec5SDimitry Andric KMP_MB(); 6290b57cec5SDimitry Andric } 6300b57cec5SDimitry Andric #endif /* BUILD_PARALLEL_ORDERED */ 6310b57cec5SDimitry Andric } 6320b57cec5SDimitry Andric 6330b57cec5SDimitry Andric /* __kmp_parallel_dxo -- Signal the next task. */ 6340b57cec5SDimitry Andric void __kmp_parallel_dxo(int *gtid_ref, int *cid_ref, ident_t *loc_ref) { 6350b57cec5SDimitry Andric int gtid = *gtid_ref; 6360b57cec5SDimitry Andric #ifdef BUILD_PARALLEL_ORDERED 6370b57cec5SDimitry Andric int tid = __kmp_tid_from_gtid(gtid); 6380b57cec5SDimitry Andric kmp_team_t *team = __kmp_team_from_gtid(gtid); 6390b57cec5SDimitry Andric #endif /* BUILD_PARALLEL_ORDERED */ 6400b57cec5SDimitry Andric 6410b57cec5SDimitry Andric if (__kmp_env_consistency_check) { 6420b57cec5SDimitry Andric if (__kmp_threads[gtid]->th.th_root->r.r_active) 6430b57cec5SDimitry Andric __kmp_pop_sync(gtid, ct_ordered_in_parallel, loc_ref); 6440b57cec5SDimitry Andric } 6450b57cec5SDimitry Andric #ifdef BUILD_PARALLEL_ORDERED 6460b57cec5SDimitry Andric if (!team->t.t_serialized) { 6470b57cec5SDimitry Andric KMP_MB(); /* Flush all pending memory write invalidates. */ 6480b57cec5SDimitry Andric 6490b57cec5SDimitry Andric /* use the tid of the next thread in this team */ 6500b57cec5SDimitry Andric /* TODO replace with general release procedure */ 6510b57cec5SDimitry Andric team->t.t_ordered.dt.t_value = ((tid + 1) % team->t.t_nproc); 6520b57cec5SDimitry Andric 6530b57cec5SDimitry Andric KMP_MB(); /* Flush all pending memory write invalidates. */ 6540b57cec5SDimitry Andric } 6550b57cec5SDimitry Andric #endif /* BUILD_PARALLEL_ORDERED */ 6560b57cec5SDimitry Andric } 6570b57cec5SDimitry Andric 6580b57cec5SDimitry Andric /* ------------------------------------------------------------------------ */ 6590b57cec5SDimitry Andric /* The BARRIER for a SINGLE process section is always explicit */ 6600b57cec5SDimitry Andric 6610b57cec5SDimitry Andric int __kmp_enter_single(int gtid, ident_t *id_ref, int push_ws) { 6620b57cec5SDimitry Andric int status; 6630b57cec5SDimitry Andric kmp_info_t *th; 6640b57cec5SDimitry Andric kmp_team_t *team; 6650b57cec5SDimitry Andric 6660b57cec5SDimitry Andric if (!TCR_4(__kmp_init_parallel)) 6670b57cec5SDimitry Andric __kmp_parallel_initialize(); 6680b57cec5SDimitry Andric __kmp_resume_if_soft_paused(); 6690b57cec5SDimitry Andric 6700b57cec5SDimitry Andric th = __kmp_threads[gtid]; 6710b57cec5SDimitry Andric team = th->th.th_team; 6720b57cec5SDimitry Andric status = 0; 6730b57cec5SDimitry Andric 6740b57cec5SDimitry Andric th->th.th_ident = id_ref; 6750b57cec5SDimitry Andric 6760b57cec5SDimitry Andric if (team->t.t_serialized) { 6770b57cec5SDimitry Andric status = 1; 6780b57cec5SDimitry Andric } else { 6790b57cec5SDimitry Andric kmp_int32 old_this = th->th.th_local.this_construct; 6800b57cec5SDimitry Andric 6810b57cec5SDimitry Andric ++th->th.th_local.this_construct; 6820b57cec5SDimitry Andric /* try to set team count to thread count--success means thread got the 6830b57cec5SDimitry Andric single block */ 6840b57cec5SDimitry Andric /* TODO: Should this be acquire or release? */ 6850b57cec5SDimitry Andric if (team->t.t_construct == old_this) { 6860b57cec5SDimitry Andric status = __kmp_atomic_compare_store_acq(&team->t.t_construct, old_this, 6870b57cec5SDimitry Andric th->th.th_local.this_construct); 6880b57cec5SDimitry Andric } 6890b57cec5SDimitry Andric #if USE_ITT_BUILD 6900b57cec5SDimitry Andric if (__itt_metadata_add_ptr && __kmp_forkjoin_frames_mode == 3 && 6910b57cec5SDimitry Andric KMP_MASTER_GTID(gtid) && th->th.th_teams_microtask == NULL && 692fe6060f1SDimitry Andric team->t.t_active_level == 1) { 693fe6060f1SDimitry Andric // Only report metadata by primary thread of active team at level 1 6940b57cec5SDimitry Andric __kmp_itt_metadata_single(id_ref); 6950b57cec5SDimitry Andric } 6960b57cec5SDimitry Andric #endif /* USE_ITT_BUILD */ 6970b57cec5SDimitry Andric } 6980b57cec5SDimitry Andric 6990b57cec5SDimitry Andric if (__kmp_env_consistency_check) { 7000b57cec5SDimitry Andric if (status && push_ws) { 7010b57cec5SDimitry Andric __kmp_push_workshare(gtid, ct_psingle, id_ref); 7020b57cec5SDimitry Andric } else { 7030b57cec5SDimitry Andric __kmp_check_workshare(gtid, ct_psingle, id_ref); 7040b57cec5SDimitry Andric } 7050b57cec5SDimitry Andric } 7060b57cec5SDimitry Andric #if USE_ITT_BUILD 7070b57cec5SDimitry Andric if (status) { 7080b57cec5SDimitry Andric __kmp_itt_single_start(gtid); 7090b57cec5SDimitry Andric } 7100b57cec5SDimitry Andric #endif /* USE_ITT_BUILD */ 7110b57cec5SDimitry Andric return status; 7120b57cec5SDimitry Andric } 7130b57cec5SDimitry Andric 7140b57cec5SDimitry Andric void __kmp_exit_single(int gtid) { 7150b57cec5SDimitry Andric #if USE_ITT_BUILD 7160b57cec5SDimitry Andric __kmp_itt_single_end(gtid); 7170b57cec5SDimitry Andric #endif /* USE_ITT_BUILD */ 7180b57cec5SDimitry Andric if (__kmp_env_consistency_check) 7190b57cec5SDimitry Andric __kmp_pop_workshare(gtid, ct_psingle, NULL); 7200b57cec5SDimitry Andric } 7210b57cec5SDimitry Andric 7220b57cec5SDimitry Andric /* determine if we can go parallel or must use a serialized parallel region and 7230b57cec5SDimitry Andric * how many threads we can use 7240b57cec5SDimitry Andric * set_nproc is the number of threads requested for the team 7250b57cec5SDimitry Andric * returns 0 if we should serialize or only use one thread, 7260b57cec5SDimitry Andric * otherwise the number of threads to use 7270b57cec5SDimitry Andric * The forkjoin lock is held by the caller. */ 7280b57cec5SDimitry Andric static int __kmp_reserve_threads(kmp_root_t *root, kmp_team_t *parent_team, 7290b57cec5SDimitry Andric int master_tid, int set_nthreads, 7300b57cec5SDimitry Andric int enter_teams) { 7310b57cec5SDimitry Andric int capacity; 7320b57cec5SDimitry Andric int new_nthreads; 7330b57cec5SDimitry Andric KMP_DEBUG_ASSERT(__kmp_init_serial); 7340b57cec5SDimitry Andric KMP_DEBUG_ASSERT(root && parent_team); 7350b57cec5SDimitry Andric kmp_info_t *this_thr = parent_team->t.t_threads[master_tid]; 7360b57cec5SDimitry Andric 7370b57cec5SDimitry Andric // If dyn-var is set, dynamically adjust the number of desired threads, 7380b57cec5SDimitry Andric // according to the method specified by dynamic_mode. 7390b57cec5SDimitry Andric new_nthreads = set_nthreads; 7400b57cec5SDimitry Andric if (!get__dynamic_2(parent_team, master_tid)) { 7410b57cec5SDimitry Andric ; 7420b57cec5SDimitry Andric } 7430b57cec5SDimitry Andric #ifdef USE_LOAD_BALANCE 7440b57cec5SDimitry Andric else if (__kmp_global.g.g_dynamic_mode == dynamic_load_balance) { 7450b57cec5SDimitry Andric new_nthreads = __kmp_load_balance_nproc(root, set_nthreads); 7460b57cec5SDimitry Andric if (new_nthreads == 1) { 7470b57cec5SDimitry Andric KC_TRACE(10, ("__kmp_reserve_threads: T#%d load balance reduced " 7480b57cec5SDimitry Andric "reservation to 1 thread\n", 7490b57cec5SDimitry Andric master_tid)); 7500b57cec5SDimitry Andric return 1; 7510b57cec5SDimitry Andric } 7520b57cec5SDimitry Andric if (new_nthreads < set_nthreads) { 7530b57cec5SDimitry Andric KC_TRACE(10, ("__kmp_reserve_threads: T#%d load balance reduced " 7540b57cec5SDimitry Andric "reservation to %d threads\n", 7550b57cec5SDimitry Andric master_tid, new_nthreads)); 7560b57cec5SDimitry Andric } 7570b57cec5SDimitry Andric } 7580b57cec5SDimitry Andric #endif /* USE_LOAD_BALANCE */ 7590b57cec5SDimitry Andric else if (__kmp_global.g.g_dynamic_mode == dynamic_thread_limit) { 7600b57cec5SDimitry Andric new_nthreads = __kmp_avail_proc - __kmp_nth + 7610b57cec5SDimitry Andric (root->r.r_active ? 1 : root->r.r_hot_team->t.t_nproc); 7620b57cec5SDimitry Andric if (new_nthreads <= 1) { 7630b57cec5SDimitry Andric KC_TRACE(10, ("__kmp_reserve_threads: T#%d thread limit reduced " 7640b57cec5SDimitry Andric "reservation to 1 thread\n", 7650b57cec5SDimitry Andric master_tid)); 7660b57cec5SDimitry Andric return 1; 7670b57cec5SDimitry Andric } 7680b57cec5SDimitry Andric if (new_nthreads < set_nthreads) { 7690b57cec5SDimitry Andric KC_TRACE(10, ("__kmp_reserve_threads: T#%d thread limit reduced " 7700b57cec5SDimitry Andric "reservation to %d threads\n", 7710b57cec5SDimitry Andric master_tid, new_nthreads)); 7720b57cec5SDimitry Andric } else { 7730b57cec5SDimitry Andric new_nthreads = set_nthreads; 7740b57cec5SDimitry Andric } 7750b57cec5SDimitry Andric } else if (__kmp_global.g.g_dynamic_mode == dynamic_random) { 7760b57cec5SDimitry Andric if (set_nthreads > 2) { 7770b57cec5SDimitry Andric new_nthreads = __kmp_get_random(parent_team->t.t_threads[master_tid]); 7780b57cec5SDimitry Andric new_nthreads = (new_nthreads % set_nthreads) + 1; 7790b57cec5SDimitry Andric if (new_nthreads == 1) { 7800b57cec5SDimitry Andric KC_TRACE(10, ("__kmp_reserve_threads: T#%d dynamic random reduced " 7810b57cec5SDimitry Andric "reservation to 1 thread\n", 7820b57cec5SDimitry Andric master_tid)); 7830b57cec5SDimitry Andric return 1; 7840b57cec5SDimitry Andric } 7850b57cec5SDimitry Andric if (new_nthreads < set_nthreads) { 7860b57cec5SDimitry Andric KC_TRACE(10, ("__kmp_reserve_threads: T#%d dynamic random reduced " 7870b57cec5SDimitry Andric "reservation to %d threads\n", 7880b57cec5SDimitry Andric master_tid, new_nthreads)); 7890b57cec5SDimitry Andric } 7900b57cec5SDimitry Andric } 7910b57cec5SDimitry Andric } else { 7920b57cec5SDimitry Andric KMP_ASSERT(0); 7930b57cec5SDimitry Andric } 7940b57cec5SDimitry Andric 7950b57cec5SDimitry Andric // Respect KMP_ALL_THREADS/KMP_DEVICE_THREAD_LIMIT. 7960b57cec5SDimitry Andric if (__kmp_nth + new_nthreads - 7970b57cec5SDimitry Andric (root->r.r_active ? 1 : root->r.r_hot_team->t.t_nproc) > 7980b57cec5SDimitry Andric __kmp_max_nth) { 7990b57cec5SDimitry Andric int tl_nthreads = __kmp_max_nth - __kmp_nth + 8000b57cec5SDimitry Andric (root->r.r_active ? 1 : root->r.r_hot_team->t.t_nproc); 8010b57cec5SDimitry Andric if (tl_nthreads <= 0) { 8020b57cec5SDimitry Andric tl_nthreads = 1; 8030b57cec5SDimitry Andric } 8040b57cec5SDimitry Andric 8050b57cec5SDimitry Andric // If dyn-var is false, emit a 1-time warning. 8060b57cec5SDimitry Andric if (!get__dynamic_2(parent_team, master_tid) && (!__kmp_reserve_warn)) { 8070b57cec5SDimitry Andric __kmp_reserve_warn = 1; 8080b57cec5SDimitry Andric __kmp_msg(kmp_ms_warning, 8090b57cec5SDimitry Andric KMP_MSG(CantFormThrTeam, set_nthreads, tl_nthreads), 8100b57cec5SDimitry Andric KMP_HNT(Unset_ALL_THREADS), __kmp_msg_null); 8110b57cec5SDimitry Andric } 8120b57cec5SDimitry Andric if (tl_nthreads == 1) { 8130b57cec5SDimitry Andric KC_TRACE(10, ("__kmp_reserve_threads: T#%d KMP_DEVICE_THREAD_LIMIT " 8140b57cec5SDimitry Andric "reduced reservation to 1 thread\n", 8150b57cec5SDimitry Andric master_tid)); 8160b57cec5SDimitry Andric return 1; 8170b57cec5SDimitry Andric } 8180b57cec5SDimitry Andric KC_TRACE(10, ("__kmp_reserve_threads: T#%d KMP_DEVICE_THREAD_LIMIT reduced " 8190b57cec5SDimitry Andric "reservation to %d threads\n", 8200b57cec5SDimitry Andric master_tid, tl_nthreads)); 8210b57cec5SDimitry Andric new_nthreads = tl_nthreads; 8220b57cec5SDimitry Andric } 8230b57cec5SDimitry Andric 8240b57cec5SDimitry Andric // Respect OMP_THREAD_LIMIT 8250b57cec5SDimitry Andric int cg_nthreads = this_thr->th.th_cg_roots->cg_nthreads; 8260b57cec5SDimitry Andric int max_cg_threads = this_thr->th.th_cg_roots->cg_thread_limit; 8270b57cec5SDimitry Andric if (cg_nthreads + new_nthreads - 8280b57cec5SDimitry Andric (root->r.r_active ? 1 : root->r.r_hot_team->t.t_nproc) > 8290b57cec5SDimitry Andric max_cg_threads) { 8300b57cec5SDimitry Andric int tl_nthreads = max_cg_threads - cg_nthreads + 8310b57cec5SDimitry Andric (root->r.r_active ? 1 : root->r.r_hot_team->t.t_nproc); 8320b57cec5SDimitry Andric if (tl_nthreads <= 0) { 8330b57cec5SDimitry Andric tl_nthreads = 1; 8340b57cec5SDimitry Andric } 8350b57cec5SDimitry Andric 8360b57cec5SDimitry Andric // If dyn-var is false, emit a 1-time warning. 8370b57cec5SDimitry Andric if (!get__dynamic_2(parent_team, master_tid) && (!__kmp_reserve_warn)) { 8380b57cec5SDimitry Andric __kmp_reserve_warn = 1; 8390b57cec5SDimitry Andric __kmp_msg(kmp_ms_warning, 8400b57cec5SDimitry Andric KMP_MSG(CantFormThrTeam, set_nthreads, tl_nthreads), 8410b57cec5SDimitry Andric KMP_HNT(Unset_ALL_THREADS), __kmp_msg_null); 8420b57cec5SDimitry Andric } 8430b57cec5SDimitry Andric if (tl_nthreads == 1) { 8440b57cec5SDimitry Andric KC_TRACE(10, ("__kmp_reserve_threads: T#%d OMP_THREAD_LIMIT " 8450b57cec5SDimitry Andric "reduced reservation to 1 thread\n", 8460b57cec5SDimitry Andric master_tid)); 8470b57cec5SDimitry Andric return 1; 8480b57cec5SDimitry Andric } 8490b57cec5SDimitry Andric KC_TRACE(10, ("__kmp_reserve_threads: T#%d OMP_THREAD_LIMIT reduced " 8500b57cec5SDimitry Andric "reservation to %d threads\n", 8510b57cec5SDimitry Andric master_tid, tl_nthreads)); 8520b57cec5SDimitry Andric new_nthreads = tl_nthreads; 8530b57cec5SDimitry Andric } 8540b57cec5SDimitry Andric 8550b57cec5SDimitry Andric // Check if the threads array is large enough, or needs expanding. 8560b57cec5SDimitry Andric // See comment in __kmp_register_root() about the adjustment if 8570b57cec5SDimitry Andric // __kmp_threads[0] == NULL. 8580b57cec5SDimitry Andric capacity = __kmp_threads_capacity; 8590b57cec5SDimitry Andric if (TCR_PTR(__kmp_threads[0]) == NULL) { 8600b57cec5SDimitry Andric --capacity; 8610b57cec5SDimitry Andric } 862d409305fSDimitry Andric // If it is not for initializing the hidden helper team, we need to take 863d409305fSDimitry Andric // __kmp_hidden_helper_threads_num out of the capacity because it is included 864d409305fSDimitry Andric // in __kmp_threads_capacity. 865d409305fSDimitry Andric if (__kmp_enable_hidden_helper && !TCR_4(__kmp_init_hidden_helper_threads)) { 866d409305fSDimitry Andric capacity -= __kmp_hidden_helper_threads_num; 867d409305fSDimitry Andric } 8680b57cec5SDimitry Andric if (__kmp_nth + new_nthreads - 8690b57cec5SDimitry Andric (root->r.r_active ? 1 : root->r.r_hot_team->t.t_nproc) > 8700b57cec5SDimitry Andric capacity) { 8710b57cec5SDimitry Andric // Expand the threads array. 8720b57cec5SDimitry Andric int slotsRequired = __kmp_nth + new_nthreads - 8730b57cec5SDimitry Andric (root->r.r_active ? 1 : root->r.r_hot_team->t.t_nproc) - 8740b57cec5SDimitry Andric capacity; 8750b57cec5SDimitry Andric int slotsAdded = __kmp_expand_threads(slotsRequired); 8760b57cec5SDimitry Andric if (slotsAdded < slotsRequired) { 8770b57cec5SDimitry Andric // The threads array was not expanded enough. 8780b57cec5SDimitry Andric new_nthreads -= (slotsRequired - slotsAdded); 8790b57cec5SDimitry Andric KMP_ASSERT(new_nthreads >= 1); 8800b57cec5SDimitry Andric 8810b57cec5SDimitry Andric // If dyn-var is false, emit a 1-time warning. 8820b57cec5SDimitry Andric if (!get__dynamic_2(parent_team, master_tid) && (!__kmp_reserve_warn)) { 8830b57cec5SDimitry Andric __kmp_reserve_warn = 1; 8840b57cec5SDimitry Andric if (__kmp_tp_cached) { 8850b57cec5SDimitry Andric __kmp_msg(kmp_ms_warning, 8860b57cec5SDimitry Andric KMP_MSG(CantFormThrTeam, set_nthreads, new_nthreads), 8870b57cec5SDimitry Andric KMP_HNT(Set_ALL_THREADPRIVATE, __kmp_tp_capacity), 8880b57cec5SDimitry Andric KMP_HNT(PossibleSystemLimitOnThreads), __kmp_msg_null); 8890b57cec5SDimitry Andric } else { 8900b57cec5SDimitry Andric __kmp_msg(kmp_ms_warning, 8910b57cec5SDimitry Andric KMP_MSG(CantFormThrTeam, set_nthreads, new_nthreads), 8920b57cec5SDimitry Andric KMP_HNT(SystemLimitOnThreads), __kmp_msg_null); 8930b57cec5SDimitry Andric } 8940b57cec5SDimitry Andric } 8950b57cec5SDimitry Andric } 8960b57cec5SDimitry Andric } 8970b57cec5SDimitry Andric 8980b57cec5SDimitry Andric #ifdef KMP_DEBUG 8990b57cec5SDimitry Andric if (new_nthreads == 1) { 9000b57cec5SDimitry Andric KC_TRACE(10, 9010b57cec5SDimitry Andric ("__kmp_reserve_threads: T#%d serializing team after reclaiming " 9020b57cec5SDimitry Andric "dead roots and rechecking; requested %d threads\n", 9030b57cec5SDimitry Andric __kmp_get_gtid(), set_nthreads)); 9040b57cec5SDimitry Andric } else { 9050b57cec5SDimitry Andric KC_TRACE(10, ("__kmp_reserve_threads: T#%d allocating %d threads; requested" 9060b57cec5SDimitry Andric " %d threads\n", 9070b57cec5SDimitry Andric __kmp_get_gtid(), new_nthreads, set_nthreads)); 9080b57cec5SDimitry Andric } 9090b57cec5SDimitry Andric #endif // KMP_DEBUG 9100b57cec5SDimitry Andric return new_nthreads; 9110b57cec5SDimitry Andric } 9120b57cec5SDimitry Andric 9130b57cec5SDimitry Andric /* Allocate threads from the thread pool and assign them to the new team. We are 9140b57cec5SDimitry Andric assured that there are enough threads available, because we checked on that 9150b57cec5SDimitry Andric earlier within critical section forkjoin */ 9160b57cec5SDimitry Andric static void __kmp_fork_team_threads(kmp_root_t *root, kmp_team_t *team, 917349cc55cSDimitry Andric kmp_info_t *master_th, int master_gtid, 918349cc55cSDimitry Andric int fork_teams_workers) { 9190b57cec5SDimitry Andric int i; 9200b57cec5SDimitry Andric int use_hot_team; 9210b57cec5SDimitry Andric 9220b57cec5SDimitry Andric KA_TRACE(10, ("__kmp_fork_team_threads: new_nprocs = %d\n", team->t.t_nproc)); 9230b57cec5SDimitry Andric KMP_DEBUG_ASSERT(master_gtid == __kmp_get_gtid()); 9240b57cec5SDimitry Andric KMP_MB(); 9250b57cec5SDimitry Andric 926fe6060f1SDimitry Andric /* first, let's setup the primary thread */ 9270b57cec5SDimitry Andric master_th->th.th_info.ds.ds_tid = 0; 9280b57cec5SDimitry Andric master_th->th.th_team = team; 9290b57cec5SDimitry Andric master_th->th.th_team_nproc = team->t.t_nproc; 9300b57cec5SDimitry Andric master_th->th.th_team_master = master_th; 9310b57cec5SDimitry Andric master_th->th.th_team_serialized = FALSE; 9320b57cec5SDimitry Andric master_th->th.th_dispatch = &team->t.t_dispatch[0]; 9330b57cec5SDimitry Andric 9340b57cec5SDimitry Andric /* make sure we are not the optimized hot team */ 9350b57cec5SDimitry Andric #if KMP_NESTED_HOT_TEAMS 9360b57cec5SDimitry Andric use_hot_team = 0; 9370b57cec5SDimitry Andric kmp_hot_team_ptr_t *hot_teams = master_th->th.th_hot_teams; 9380b57cec5SDimitry Andric if (hot_teams) { // hot teams array is not allocated if 9390b57cec5SDimitry Andric // KMP_HOT_TEAMS_MAX_LEVEL=0 9400b57cec5SDimitry Andric int level = team->t.t_active_level - 1; // index in array of hot teams 9410b57cec5SDimitry Andric if (master_th->th.th_teams_microtask) { // are we inside the teams? 9420b57cec5SDimitry Andric if (master_th->th.th_teams_size.nteams > 1) { 9430b57cec5SDimitry Andric ++level; // level was not increased in teams construct for 9440b57cec5SDimitry Andric // team_of_masters 9450b57cec5SDimitry Andric } 9460b57cec5SDimitry Andric if (team->t.t_pkfn != (microtask_t)__kmp_teams_master && 9470b57cec5SDimitry Andric master_th->th.th_teams_level == team->t.t_level) { 9480b57cec5SDimitry Andric ++level; // level was not increased in teams construct for 9490b57cec5SDimitry Andric // team_of_workers before the parallel 9500b57cec5SDimitry Andric } // team->t.t_level will be increased inside parallel 9510b57cec5SDimitry Andric } 9520b57cec5SDimitry Andric if (level < __kmp_hot_teams_max_level) { 9530b57cec5SDimitry Andric if (hot_teams[level].hot_team) { 9540b57cec5SDimitry Andric // hot team has already been allocated for given level 9550b57cec5SDimitry Andric KMP_DEBUG_ASSERT(hot_teams[level].hot_team == team); 9560b57cec5SDimitry Andric use_hot_team = 1; // the team is ready to use 9570b57cec5SDimitry Andric } else { 9580b57cec5SDimitry Andric use_hot_team = 0; // AC: threads are not allocated yet 9590b57cec5SDimitry Andric hot_teams[level].hot_team = team; // remember new hot team 9600b57cec5SDimitry Andric hot_teams[level].hot_team_nth = team->t.t_nproc; 9610b57cec5SDimitry Andric } 9620b57cec5SDimitry Andric } else { 9630b57cec5SDimitry Andric use_hot_team = 0; 9640b57cec5SDimitry Andric } 9650b57cec5SDimitry Andric } 9660b57cec5SDimitry Andric #else 9670b57cec5SDimitry Andric use_hot_team = team == root->r.r_hot_team; 9680b57cec5SDimitry Andric #endif 9690b57cec5SDimitry Andric if (!use_hot_team) { 9700b57cec5SDimitry Andric 971fe6060f1SDimitry Andric /* install the primary thread */ 9720b57cec5SDimitry Andric team->t.t_threads[0] = master_th; 9730b57cec5SDimitry Andric __kmp_initialize_info(master_th, team, 0, master_gtid); 9740b57cec5SDimitry Andric 9750b57cec5SDimitry Andric /* now, install the worker threads */ 9760b57cec5SDimitry Andric for (i = 1; i < team->t.t_nproc; i++) { 9770b57cec5SDimitry Andric 9780b57cec5SDimitry Andric /* fork or reallocate a new thread and install it in team */ 9790b57cec5SDimitry Andric kmp_info_t *thr = __kmp_allocate_thread(root, team, i); 9800b57cec5SDimitry Andric team->t.t_threads[i] = thr; 9810b57cec5SDimitry Andric KMP_DEBUG_ASSERT(thr); 9820b57cec5SDimitry Andric KMP_DEBUG_ASSERT(thr->th.th_team == team); 9830b57cec5SDimitry Andric /* align team and thread arrived states */ 9840b57cec5SDimitry Andric KA_TRACE(20, ("__kmp_fork_team_threads: T#%d(%d:%d) init arrived " 9850b57cec5SDimitry Andric "T#%d(%d:%d) join =%llu, plain=%llu\n", 9860b57cec5SDimitry Andric __kmp_gtid_from_tid(0, team), team->t.t_id, 0, 9870b57cec5SDimitry Andric __kmp_gtid_from_tid(i, team), team->t.t_id, i, 9880b57cec5SDimitry Andric team->t.t_bar[bs_forkjoin_barrier].b_arrived, 9890b57cec5SDimitry Andric team->t.t_bar[bs_plain_barrier].b_arrived)); 9900b57cec5SDimitry Andric thr->th.th_teams_microtask = master_th->th.th_teams_microtask; 9910b57cec5SDimitry Andric thr->th.th_teams_level = master_th->th.th_teams_level; 9920b57cec5SDimitry Andric thr->th.th_teams_size = master_th->th.th_teams_size; 9930b57cec5SDimitry Andric { // Initialize threads' barrier data. 9940b57cec5SDimitry Andric int b; 9950b57cec5SDimitry Andric kmp_balign_t *balign = team->t.t_threads[i]->th.th_bar; 9960b57cec5SDimitry Andric for (b = 0; b < bs_last_barrier; ++b) { 9970b57cec5SDimitry Andric balign[b].bb.b_arrived = team->t.t_bar[b].b_arrived; 9980b57cec5SDimitry Andric KMP_DEBUG_ASSERT(balign[b].bb.wait_flag != KMP_BARRIER_PARENT_FLAG); 9990b57cec5SDimitry Andric #if USE_DEBUGGER 10000b57cec5SDimitry Andric balign[b].bb.b_worker_arrived = team->t.t_bar[b].b_team_arrived; 10010b57cec5SDimitry Andric #endif 10020b57cec5SDimitry Andric } 10030b57cec5SDimitry Andric } 10040b57cec5SDimitry Andric } 10050b57cec5SDimitry Andric 10060b57cec5SDimitry Andric #if KMP_AFFINITY_SUPPORTED 1007349cc55cSDimitry Andric // Do not partition the places list for teams construct workers who 1008349cc55cSDimitry Andric // haven't actually been forked to do real work yet. This partitioning 1009349cc55cSDimitry Andric // will take place in the parallel region nested within the teams construct. 1010349cc55cSDimitry Andric if (!fork_teams_workers) { 10110b57cec5SDimitry Andric __kmp_partition_places(team); 1012349cc55cSDimitry Andric } 10130b57cec5SDimitry Andric #endif 10140b57cec5SDimitry Andric } 10150b57cec5SDimitry Andric 10160b57cec5SDimitry Andric if (__kmp_display_affinity && team->t.t_display_affinity != 1) { 10170b57cec5SDimitry Andric for (i = 0; i < team->t.t_nproc; i++) { 10180b57cec5SDimitry Andric kmp_info_t *thr = team->t.t_threads[i]; 10190b57cec5SDimitry Andric if (thr->th.th_prev_num_threads != team->t.t_nproc || 10200b57cec5SDimitry Andric thr->th.th_prev_level != team->t.t_level) { 10210b57cec5SDimitry Andric team->t.t_display_affinity = 1; 10220b57cec5SDimitry Andric break; 10230b57cec5SDimitry Andric } 10240b57cec5SDimitry Andric } 10250b57cec5SDimitry Andric } 10260b57cec5SDimitry Andric 10270b57cec5SDimitry Andric KMP_MB(); 10280b57cec5SDimitry Andric } 10290b57cec5SDimitry Andric 10300b57cec5SDimitry Andric #if KMP_ARCH_X86 || KMP_ARCH_X86_64 10310b57cec5SDimitry Andric // Propagate any changes to the floating point control registers out to the team 10320b57cec5SDimitry Andric // We try to avoid unnecessary writes to the relevant cache line in the team 10330b57cec5SDimitry Andric // structure, so we don't make changes unless they are needed. 10340b57cec5SDimitry Andric inline static void propagateFPControl(kmp_team_t *team) { 10350b57cec5SDimitry Andric if (__kmp_inherit_fp_control) { 10360b57cec5SDimitry Andric kmp_int16 x87_fpu_control_word; 10370b57cec5SDimitry Andric kmp_uint32 mxcsr; 10380b57cec5SDimitry Andric 1039fe6060f1SDimitry Andric // Get primary thread's values of FPU control flags (both X87 and vector) 10400b57cec5SDimitry Andric __kmp_store_x87_fpu_control_word(&x87_fpu_control_word); 10410b57cec5SDimitry Andric __kmp_store_mxcsr(&mxcsr); 10420b57cec5SDimitry Andric mxcsr &= KMP_X86_MXCSR_MASK; 10430b57cec5SDimitry Andric 10440b57cec5SDimitry Andric // There is no point looking at t_fp_control_saved here. 10450b57cec5SDimitry Andric // If it is TRUE, we still have to update the values if they are different 10460b57cec5SDimitry Andric // from those we now have. If it is FALSE we didn't save anything yet, but 10470b57cec5SDimitry Andric // our objective is the same. We have to ensure that the values in the team 10480b57cec5SDimitry Andric // are the same as those we have. 10490b57cec5SDimitry Andric // So, this code achieves what we need whether or not t_fp_control_saved is 10500b57cec5SDimitry Andric // true. By checking whether the value needs updating we avoid unnecessary 10510b57cec5SDimitry Andric // writes that would put the cache-line into a written state, causing all 10520b57cec5SDimitry Andric // threads in the team to have to read it again. 10530b57cec5SDimitry Andric KMP_CHECK_UPDATE(team->t.t_x87_fpu_control_word, x87_fpu_control_word); 10540b57cec5SDimitry Andric KMP_CHECK_UPDATE(team->t.t_mxcsr, mxcsr); 10550b57cec5SDimitry Andric // Although we don't use this value, other code in the runtime wants to know 10560b57cec5SDimitry Andric // whether it should restore them. So we must ensure it is correct. 10570b57cec5SDimitry Andric KMP_CHECK_UPDATE(team->t.t_fp_control_saved, TRUE); 10580b57cec5SDimitry Andric } else { 10590b57cec5SDimitry Andric // Similarly here. Don't write to this cache-line in the team structure 10600b57cec5SDimitry Andric // unless we have to. 10610b57cec5SDimitry Andric KMP_CHECK_UPDATE(team->t.t_fp_control_saved, FALSE); 10620b57cec5SDimitry Andric } 10630b57cec5SDimitry Andric } 10640b57cec5SDimitry Andric 10650b57cec5SDimitry Andric // Do the opposite, setting the hardware registers to the updated values from 10660b57cec5SDimitry Andric // the team. 10670b57cec5SDimitry Andric inline static void updateHWFPControl(kmp_team_t *team) { 10680b57cec5SDimitry Andric if (__kmp_inherit_fp_control && team->t.t_fp_control_saved) { 10690b57cec5SDimitry Andric // Only reset the fp control regs if they have been changed in the team. 10700b57cec5SDimitry Andric // the parallel region that we are exiting. 10710b57cec5SDimitry Andric kmp_int16 x87_fpu_control_word; 10720b57cec5SDimitry Andric kmp_uint32 mxcsr; 10730b57cec5SDimitry Andric __kmp_store_x87_fpu_control_word(&x87_fpu_control_word); 10740b57cec5SDimitry Andric __kmp_store_mxcsr(&mxcsr); 10750b57cec5SDimitry Andric mxcsr &= KMP_X86_MXCSR_MASK; 10760b57cec5SDimitry Andric 10770b57cec5SDimitry Andric if (team->t.t_x87_fpu_control_word != x87_fpu_control_word) { 10780b57cec5SDimitry Andric __kmp_clear_x87_fpu_status_word(); 10790b57cec5SDimitry Andric __kmp_load_x87_fpu_control_word(&team->t.t_x87_fpu_control_word); 10800b57cec5SDimitry Andric } 10810b57cec5SDimitry Andric 10820b57cec5SDimitry Andric if (team->t.t_mxcsr != mxcsr) { 10830b57cec5SDimitry Andric __kmp_load_mxcsr(&team->t.t_mxcsr); 10840b57cec5SDimitry Andric } 10850b57cec5SDimitry Andric } 10860b57cec5SDimitry Andric } 10870b57cec5SDimitry Andric #else 10880b57cec5SDimitry Andric #define propagateFPControl(x) ((void)0) 10890b57cec5SDimitry Andric #define updateHWFPControl(x) ((void)0) 10900b57cec5SDimitry Andric #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */ 10910b57cec5SDimitry Andric 10920b57cec5SDimitry Andric static void __kmp_alloc_argv_entries(int argc, kmp_team_t *team, 10930b57cec5SDimitry Andric int realloc); // forward declaration 10940b57cec5SDimitry Andric 10950b57cec5SDimitry Andric /* Run a parallel region that has been serialized, so runs only in a team of the 1096fe6060f1SDimitry Andric single primary thread. */ 10970b57cec5SDimitry Andric void __kmp_serialized_parallel(ident_t *loc, kmp_int32 global_tid) { 10980b57cec5SDimitry Andric kmp_info_t *this_thr; 10990b57cec5SDimitry Andric kmp_team_t *serial_team; 11000b57cec5SDimitry Andric 11010b57cec5SDimitry Andric KC_TRACE(10, ("__kmpc_serialized_parallel: called by T#%d\n", global_tid)); 11020b57cec5SDimitry Andric 11030b57cec5SDimitry Andric /* Skip all this code for autopar serialized loops since it results in 11040b57cec5SDimitry Andric unacceptable overhead */ 11050b57cec5SDimitry Andric if (loc != NULL && (loc->flags & KMP_IDENT_AUTOPAR)) 11060b57cec5SDimitry Andric return; 11070b57cec5SDimitry Andric 11080b57cec5SDimitry Andric if (!TCR_4(__kmp_init_parallel)) 11090b57cec5SDimitry Andric __kmp_parallel_initialize(); 11100b57cec5SDimitry Andric __kmp_resume_if_soft_paused(); 11110b57cec5SDimitry Andric 11120b57cec5SDimitry Andric this_thr = __kmp_threads[global_tid]; 11130b57cec5SDimitry Andric serial_team = this_thr->th.th_serial_team; 11140b57cec5SDimitry Andric 11150b57cec5SDimitry Andric /* utilize the serialized team held by this thread */ 11160b57cec5SDimitry Andric KMP_DEBUG_ASSERT(serial_team); 11170b57cec5SDimitry Andric KMP_MB(); 11180b57cec5SDimitry Andric 11190b57cec5SDimitry Andric if (__kmp_tasking_mode != tskm_immediate_exec) { 11200b57cec5SDimitry Andric KMP_DEBUG_ASSERT( 11210b57cec5SDimitry Andric this_thr->th.th_task_team == 11220b57cec5SDimitry Andric this_thr->th.th_team->t.t_task_team[this_thr->th.th_task_state]); 11230b57cec5SDimitry Andric KMP_DEBUG_ASSERT(serial_team->t.t_task_team[this_thr->th.th_task_state] == 11240b57cec5SDimitry Andric NULL); 11250b57cec5SDimitry Andric KA_TRACE(20, ("__kmpc_serialized_parallel: T#%d pushing task_team %p / " 11260b57cec5SDimitry Andric "team %p, new task_team = NULL\n", 11270b57cec5SDimitry Andric global_tid, this_thr->th.th_task_team, this_thr->th.th_team)); 11280b57cec5SDimitry Andric this_thr->th.th_task_team = NULL; 11290b57cec5SDimitry Andric } 11300b57cec5SDimitry Andric 11310b57cec5SDimitry Andric kmp_proc_bind_t proc_bind = this_thr->th.th_set_proc_bind; 11320b57cec5SDimitry Andric if (this_thr->th.th_current_task->td_icvs.proc_bind == proc_bind_false) { 11330b57cec5SDimitry Andric proc_bind = proc_bind_false; 11340b57cec5SDimitry Andric } else if (proc_bind == proc_bind_default) { 11350b57cec5SDimitry Andric // No proc_bind clause was specified, so use the current value 11360b57cec5SDimitry Andric // of proc-bind-var for this parallel region. 11370b57cec5SDimitry Andric proc_bind = this_thr->th.th_current_task->td_icvs.proc_bind; 11380b57cec5SDimitry Andric } 11390b57cec5SDimitry Andric // Reset for next parallel region 11400b57cec5SDimitry Andric this_thr->th.th_set_proc_bind = proc_bind_default; 11410b57cec5SDimitry Andric 11420b57cec5SDimitry Andric #if OMPT_SUPPORT 11430b57cec5SDimitry Andric ompt_data_t ompt_parallel_data = ompt_data_none; 11440b57cec5SDimitry Andric void *codeptr = OMPT_LOAD_RETURN_ADDRESS(global_tid); 11450b57cec5SDimitry Andric if (ompt_enabled.enabled && 11460b57cec5SDimitry Andric this_thr->th.ompt_thread_info.state != ompt_state_overhead) { 11470b57cec5SDimitry Andric 11480b57cec5SDimitry Andric ompt_task_info_t *parent_task_info; 11490b57cec5SDimitry Andric parent_task_info = OMPT_CUR_TASK_INFO(this_thr); 11500b57cec5SDimitry Andric 11510b57cec5SDimitry Andric parent_task_info->frame.enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); 11520b57cec5SDimitry Andric if (ompt_enabled.ompt_callback_parallel_begin) { 11530b57cec5SDimitry Andric int team_size = 1; 11540b57cec5SDimitry Andric 11550b57cec5SDimitry Andric ompt_callbacks.ompt_callback(ompt_callback_parallel_begin)( 11560b57cec5SDimitry Andric &(parent_task_info->task_data), &(parent_task_info->frame), 1157489b1cf2SDimitry Andric &ompt_parallel_data, team_size, 1158489b1cf2SDimitry Andric ompt_parallel_invoker_program | ompt_parallel_team, codeptr); 11590b57cec5SDimitry Andric } 11600b57cec5SDimitry Andric } 11610b57cec5SDimitry Andric #endif // OMPT_SUPPORT 11620b57cec5SDimitry Andric 11630b57cec5SDimitry Andric if (this_thr->th.th_team != serial_team) { 11640b57cec5SDimitry Andric // Nested level will be an index in the nested nthreads array 11650b57cec5SDimitry Andric int level = this_thr->th.th_team->t.t_level; 11660b57cec5SDimitry Andric 11670b57cec5SDimitry Andric if (serial_team->t.t_serialized) { 11680b57cec5SDimitry Andric /* this serial team was already used 11690b57cec5SDimitry Andric TODO increase performance by making this locks more specific */ 11700b57cec5SDimitry Andric kmp_team_t *new_team; 11710b57cec5SDimitry Andric 11720b57cec5SDimitry Andric __kmp_acquire_bootstrap_lock(&__kmp_forkjoin_lock); 11730b57cec5SDimitry Andric 11740b57cec5SDimitry Andric new_team = 11750b57cec5SDimitry Andric __kmp_allocate_team(this_thr->th.th_root, 1, 1, 11760b57cec5SDimitry Andric #if OMPT_SUPPORT 11770b57cec5SDimitry Andric ompt_parallel_data, 11780b57cec5SDimitry Andric #endif 11790b57cec5SDimitry Andric proc_bind, &this_thr->th.th_current_task->td_icvs, 11800b57cec5SDimitry Andric 0 USE_NESTED_HOT_ARG(NULL)); 11810b57cec5SDimitry Andric __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock); 11820b57cec5SDimitry Andric KMP_ASSERT(new_team); 11830b57cec5SDimitry Andric 11840b57cec5SDimitry Andric /* setup new serialized team and install it */ 11850b57cec5SDimitry Andric new_team->t.t_threads[0] = this_thr; 11860b57cec5SDimitry Andric new_team->t.t_parent = this_thr->th.th_team; 11870b57cec5SDimitry Andric serial_team = new_team; 11880b57cec5SDimitry Andric this_thr->th.th_serial_team = serial_team; 11890b57cec5SDimitry Andric 11900b57cec5SDimitry Andric KF_TRACE( 11910b57cec5SDimitry Andric 10, 11920b57cec5SDimitry Andric ("__kmpc_serialized_parallel: T#%d allocated new serial team %p\n", 11930b57cec5SDimitry Andric global_tid, serial_team)); 11940b57cec5SDimitry Andric 11950b57cec5SDimitry Andric /* TODO the above breaks the requirement that if we run out of resources, 11960b57cec5SDimitry Andric then we can still guarantee that serialized teams are ok, since we may 11970b57cec5SDimitry Andric need to allocate a new one */ 11980b57cec5SDimitry Andric } else { 11990b57cec5SDimitry Andric KF_TRACE( 12000b57cec5SDimitry Andric 10, 12010b57cec5SDimitry Andric ("__kmpc_serialized_parallel: T#%d reusing cached serial team %p\n", 12020b57cec5SDimitry Andric global_tid, serial_team)); 12030b57cec5SDimitry Andric } 12040b57cec5SDimitry Andric 12050b57cec5SDimitry Andric /* we have to initialize this serial team */ 12060b57cec5SDimitry Andric KMP_DEBUG_ASSERT(serial_team->t.t_threads); 12070b57cec5SDimitry Andric KMP_DEBUG_ASSERT(serial_team->t.t_threads[0] == this_thr); 12080b57cec5SDimitry Andric KMP_DEBUG_ASSERT(this_thr->th.th_team != serial_team); 12090b57cec5SDimitry Andric serial_team->t.t_ident = loc; 12100b57cec5SDimitry Andric serial_team->t.t_serialized = 1; 12110b57cec5SDimitry Andric serial_team->t.t_nproc = 1; 12120b57cec5SDimitry Andric serial_team->t.t_parent = this_thr->th.th_team; 12130b57cec5SDimitry Andric serial_team->t.t_sched.sched = this_thr->th.th_team->t.t_sched.sched; 12140b57cec5SDimitry Andric this_thr->th.th_team = serial_team; 12150b57cec5SDimitry Andric serial_team->t.t_master_tid = this_thr->th.th_info.ds.ds_tid; 12160b57cec5SDimitry Andric 1217349cc55cSDimitry Andric KF_TRACE(10, ("__kmpc_serialized_parallel: T#%d curtask=%p\n", global_tid, 12180b57cec5SDimitry Andric this_thr->th.th_current_task)); 12190b57cec5SDimitry Andric KMP_ASSERT(this_thr->th.th_current_task->td_flags.executing == 1); 12200b57cec5SDimitry Andric this_thr->th.th_current_task->td_flags.executing = 0; 12210b57cec5SDimitry Andric 12220b57cec5SDimitry Andric __kmp_push_current_task_to_thread(this_thr, serial_team, 0); 12230b57cec5SDimitry Andric 12240b57cec5SDimitry Andric /* TODO: GEH: do ICVs work for nested serialized teams? Don't we need an 12250b57cec5SDimitry Andric implicit task for each serialized task represented by 12260b57cec5SDimitry Andric team->t.t_serialized? */ 12270b57cec5SDimitry Andric copy_icvs(&this_thr->th.th_current_task->td_icvs, 12280b57cec5SDimitry Andric &this_thr->th.th_current_task->td_parent->td_icvs); 12290b57cec5SDimitry Andric 12300b57cec5SDimitry Andric // Thread value exists in the nested nthreads array for the next nested 12310b57cec5SDimitry Andric // level 12320b57cec5SDimitry Andric if (__kmp_nested_nth.used && (level + 1 < __kmp_nested_nth.used)) { 12330b57cec5SDimitry Andric this_thr->th.th_current_task->td_icvs.nproc = 12340b57cec5SDimitry Andric __kmp_nested_nth.nth[level + 1]; 12350b57cec5SDimitry Andric } 12360b57cec5SDimitry Andric 12370b57cec5SDimitry Andric if (__kmp_nested_proc_bind.used && 12380b57cec5SDimitry Andric (level + 1 < __kmp_nested_proc_bind.used)) { 12390b57cec5SDimitry Andric this_thr->th.th_current_task->td_icvs.proc_bind = 12400b57cec5SDimitry Andric __kmp_nested_proc_bind.bind_types[level + 1]; 12410b57cec5SDimitry Andric } 12420b57cec5SDimitry Andric 12430b57cec5SDimitry Andric #if USE_DEBUGGER 12440b57cec5SDimitry Andric serial_team->t.t_pkfn = (microtask_t)(~0); // For the debugger. 12450b57cec5SDimitry Andric #endif 12460b57cec5SDimitry Andric this_thr->th.th_info.ds.ds_tid = 0; 12470b57cec5SDimitry Andric 12480b57cec5SDimitry Andric /* set thread cache values */ 12490b57cec5SDimitry Andric this_thr->th.th_team_nproc = 1; 12500b57cec5SDimitry Andric this_thr->th.th_team_master = this_thr; 12510b57cec5SDimitry Andric this_thr->th.th_team_serialized = 1; 12520b57cec5SDimitry Andric 12530b57cec5SDimitry Andric serial_team->t.t_level = serial_team->t.t_parent->t.t_level + 1; 12540b57cec5SDimitry Andric serial_team->t.t_active_level = serial_team->t.t_parent->t.t_active_level; 12550b57cec5SDimitry Andric serial_team->t.t_def_allocator = this_thr->th.th_def_allocator; // save 12560b57cec5SDimitry Andric 12570b57cec5SDimitry Andric propagateFPControl(serial_team); 12580b57cec5SDimitry Andric 12590b57cec5SDimitry Andric /* check if we need to allocate dispatch buffers stack */ 12600b57cec5SDimitry Andric KMP_DEBUG_ASSERT(serial_team->t.t_dispatch); 12610b57cec5SDimitry Andric if (!serial_team->t.t_dispatch->th_disp_buffer) { 12620b57cec5SDimitry Andric serial_team->t.t_dispatch->th_disp_buffer = 12630b57cec5SDimitry Andric (dispatch_private_info_t *)__kmp_allocate( 12640b57cec5SDimitry Andric sizeof(dispatch_private_info_t)); 12650b57cec5SDimitry Andric } 12660b57cec5SDimitry Andric this_thr->th.th_dispatch = serial_team->t.t_dispatch; 12670b57cec5SDimitry Andric 12680b57cec5SDimitry Andric KMP_MB(); 12690b57cec5SDimitry Andric 12700b57cec5SDimitry Andric } else { 12710b57cec5SDimitry Andric /* this serialized team is already being used, 12720b57cec5SDimitry Andric * that's fine, just add another nested level */ 12730b57cec5SDimitry Andric KMP_DEBUG_ASSERT(this_thr->th.th_team == serial_team); 12740b57cec5SDimitry Andric KMP_DEBUG_ASSERT(serial_team->t.t_threads); 12750b57cec5SDimitry Andric KMP_DEBUG_ASSERT(serial_team->t.t_threads[0] == this_thr); 12760b57cec5SDimitry Andric ++serial_team->t.t_serialized; 12770b57cec5SDimitry Andric this_thr->th.th_team_serialized = serial_team->t.t_serialized; 12780b57cec5SDimitry Andric 12790b57cec5SDimitry Andric // Nested level will be an index in the nested nthreads array 12800b57cec5SDimitry Andric int level = this_thr->th.th_team->t.t_level; 12810b57cec5SDimitry Andric // Thread value exists in the nested nthreads array for the next nested 12820b57cec5SDimitry Andric // level 12830b57cec5SDimitry Andric if (__kmp_nested_nth.used && (level + 1 < __kmp_nested_nth.used)) { 12840b57cec5SDimitry Andric this_thr->th.th_current_task->td_icvs.nproc = 12850b57cec5SDimitry Andric __kmp_nested_nth.nth[level + 1]; 12860b57cec5SDimitry Andric } 12870b57cec5SDimitry Andric serial_team->t.t_level++; 12880b57cec5SDimitry Andric KF_TRACE(10, ("__kmpc_serialized_parallel: T#%d increasing nesting level " 12890b57cec5SDimitry Andric "of serial team %p to %d\n", 12900b57cec5SDimitry Andric global_tid, serial_team, serial_team->t.t_level)); 12910b57cec5SDimitry Andric 12920b57cec5SDimitry Andric /* allocate/push dispatch buffers stack */ 12930b57cec5SDimitry Andric KMP_DEBUG_ASSERT(serial_team->t.t_dispatch); 12940b57cec5SDimitry Andric { 12950b57cec5SDimitry Andric dispatch_private_info_t *disp_buffer = 12960b57cec5SDimitry Andric (dispatch_private_info_t *)__kmp_allocate( 12970b57cec5SDimitry Andric sizeof(dispatch_private_info_t)); 12980b57cec5SDimitry Andric disp_buffer->next = serial_team->t.t_dispatch->th_disp_buffer; 12990b57cec5SDimitry Andric serial_team->t.t_dispatch->th_disp_buffer = disp_buffer; 13000b57cec5SDimitry Andric } 13010b57cec5SDimitry Andric this_thr->th.th_dispatch = serial_team->t.t_dispatch; 13020b57cec5SDimitry Andric 13030b57cec5SDimitry Andric KMP_MB(); 13040b57cec5SDimitry Andric } 13050b57cec5SDimitry Andric KMP_CHECK_UPDATE(serial_team->t.t_cancel_request, cancel_noreq); 13060b57cec5SDimitry Andric 13070b57cec5SDimitry Andric // Perform the display affinity functionality for 13080b57cec5SDimitry Andric // serialized parallel regions 13090b57cec5SDimitry Andric if (__kmp_display_affinity) { 13100b57cec5SDimitry Andric if (this_thr->th.th_prev_level != serial_team->t.t_level || 13110b57cec5SDimitry Andric this_thr->th.th_prev_num_threads != 1) { 13120b57cec5SDimitry Andric // NULL means use the affinity-format-var ICV 13130b57cec5SDimitry Andric __kmp_aux_display_affinity(global_tid, NULL); 13140b57cec5SDimitry Andric this_thr->th.th_prev_level = serial_team->t.t_level; 13150b57cec5SDimitry Andric this_thr->th.th_prev_num_threads = 1; 13160b57cec5SDimitry Andric } 13170b57cec5SDimitry Andric } 13180b57cec5SDimitry Andric 13190b57cec5SDimitry Andric if (__kmp_env_consistency_check) 13200b57cec5SDimitry Andric __kmp_push_parallel(global_tid, NULL); 13210b57cec5SDimitry Andric #if OMPT_SUPPORT 13220b57cec5SDimitry Andric serial_team->t.ompt_team_info.master_return_address = codeptr; 13230b57cec5SDimitry Andric if (ompt_enabled.enabled && 13240b57cec5SDimitry Andric this_thr->th.ompt_thread_info.state != ompt_state_overhead) { 1325fe6060f1SDimitry Andric OMPT_CUR_TASK_INFO(this_thr)->frame.exit_frame.ptr = 1326fe6060f1SDimitry Andric OMPT_GET_FRAME_ADDRESS(0); 13270b57cec5SDimitry Andric 13280b57cec5SDimitry Andric ompt_lw_taskteam_t lw_taskteam; 13290b57cec5SDimitry Andric __ompt_lw_taskteam_init(&lw_taskteam, this_thr, global_tid, 13300b57cec5SDimitry Andric &ompt_parallel_data, codeptr); 13310b57cec5SDimitry Andric 13320b57cec5SDimitry Andric __ompt_lw_taskteam_link(&lw_taskteam, this_thr, 1); 13330b57cec5SDimitry Andric // don't use lw_taskteam after linking. content was swaped 13340b57cec5SDimitry Andric 13350b57cec5SDimitry Andric /* OMPT implicit task begin */ 13360b57cec5SDimitry Andric if (ompt_enabled.ompt_callback_implicit_task) { 13370b57cec5SDimitry Andric ompt_callbacks.ompt_callback(ompt_callback_implicit_task)( 13380b57cec5SDimitry Andric ompt_scope_begin, OMPT_CUR_TEAM_DATA(this_thr), 1339fe6060f1SDimitry Andric OMPT_CUR_TASK_DATA(this_thr), 1, __kmp_tid_from_gtid(global_tid), 1340fe6060f1SDimitry Andric ompt_task_implicit); // TODO: Can this be ompt_task_initial? 1341fe6060f1SDimitry Andric OMPT_CUR_TASK_INFO(this_thr)->thread_num = 1342fe6060f1SDimitry Andric __kmp_tid_from_gtid(global_tid); 13430b57cec5SDimitry Andric } 13440b57cec5SDimitry Andric 13450b57cec5SDimitry Andric /* OMPT state */ 13460b57cec5SDimitry Andric this_thr->th.ompt_thread_info.state = ompt_state_work_parallel; 1347fe6060f1SDimitry Andric OMPT_CUR_TASK_INFO(this_thr)->frame.exit_frame.ptr = 1348fe6060f1SDimitry Andric OMPT_GET_FRAME_ADDRESS(0); 13490b57cec5SDimitry Andric } 13500b57cec5SDimitry Andric #endif 13510b57cec5SDimitry Andric } 13520b57cec5SDimitry Andric 13530b57cec5SDimitry Andric /* most of the work for a fork */ 13540b57cec5SDimitry Andric /* return true if we really went parallel, false if serialized */ 13550b57cec5SDimitry Andric int __kmp_fork_call(ident_t *loc, int gtid, 13560b57cec5SDimitry Andric enum fork_context_e call_context, // Intel, GNU, ... 13570b57cec5SDimitry Andric kmp_int32 argc, microtask_t microtask, launch_t invoker, 135816794618SDimitry Andric kmp_va_list ap) { 13590b57cec5SDimitry Andric void **argv; 13600b57cec5SDimitry Andric int i; 13610b57cec5SDimitry Andric int master_tid; 13620b57cec5SDimitry Andric int master_this_cons; 13630b57cec5SDimitry Andric kmp_team_t *team; 13640b57cec5SDimitry Andric kmp_team_t *parent_team; 13650b57cec5SDimitry Andric kmp_info_t *master_th; 13660b57cec5SDimitry Andric kmp_root_t *root; 13670b57cec5SDimitry Andric int nthreads; 13680b57cec5SDimitry Andric int master_active; 13690b57cec5SDimitry Andric int master_set_numthreads; 13700b57cec5SDimitry Andric int level; 13710b57cec5SDimitry Andric int active_level; 13720b57cec5SDimitry Andric int teams_level; 13730b57cec5SDimitry Andric #if KMP_NESTED_HOT_TEAMS 13740b57cec5SDimitry Andric kmp_hot_team_ptr_t **p_hot_teams; 13750b57cec5SDimitry Andric #endif 13760b57cec5SDimitry Andric { // KMP_TIME_BLOCK 13770b57cec5SDimitry Andric KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_fork_call); 13780b57cec5SDimitry Andric KMP_COUNT_VALUE(OMP_PARALLEL_args, argc); 13790b57cec5SDimitry Andric 13800b57cec5SDimitry Andric KA_TRACE(20, ("__kmp_fork_call: enter T#%d\n", gtid)); 13810b57cec5SDimitry Andric if (__kmp_stkpadding > 0 && __kmp_root[gtid] != NULL) { 13820b57cec5SDimitry Andric /* Some systems prefer the stack for the root thread(s) to start with */ 13830b57cec5SDimitry Andric /* some gap from the parent stack to prevent false sharing. */ 13840b57cec5SDimitry Andric void *dummy = KMP_ALLOCA(__kmp_stkpadding); 13850b57cec5SDimitry Andric /* These 2 lines below are so this does not get optimized out */ 13860b57cec5SDimitry Andric if (__kmp_stkpadding > KMP_MAX_STKPADDING) 13870b57cec5SDimitry Andric __kmp_stkpadding += (short)((kmp_int64)dummy); 13880b57cec5SDimitry Andric } 13890b57cec5SDimitry Andric 13900b57cec5SDimitry Andric /* initialize if needed */ 13910b57cec5SDimitry Andric KMP_DEBUG_ASSERT( 13920b57cec5SDimitry Andric __kmp_init_serial); // AC: potentially unsafe, not in sync with shutdown 13930b57cec5SDimitry Andric if (!TCR_4(__kmp_init_parallel)) 13940b57cec5SDimitry Andric __kmp_parallel_initialize(); 13950b57cec5SDimitry Andric __kmp_resume_if_soft_paused(); 13960b57cec5SDimitry Andric 13970b57cec5SDimitry Andric /* setup current data */ 13980b57cec5SDimitry Andric master_th = __kmp_threads[gtid]; // AC: potentially unsafe, not in sync with 13990b57cec5SDimitry Andric // shutdown 14000b57cec5SDimitry Andric parent_team = master_th->th.th_team; 14010b57cec5SDimitry Andric master_tid = master_th->th.th_info.ds.ds_tid; 14020b57cec5SDimitry Andric master_this_cons = master_th->th.th_local.this_construct; 14030b57cec5SDimitry Andric root = master_th->th.th_root; 14040b57cec5SDimitry Andric master_active = root->r.r_active; 14050b57cec5SDimitry Andric master_set_numthreads = master_th->th.th_set_nproc; 14060b57cec5SDimitry Andric 14070b57cec5SDimitry Andric #if OMPT_SUPPORT 14080b57cec5SDimitry Andric ompt_data_t ompt_parallel_data = ompt_data_none; 14090b57cec5SDimitry Andric ompt_data_t *parent_task_data; 14100b57cec5SDimitry Andric ompt_frame_t *ompt_frame; 14110b57cec5SDimitry Andric ompt_data_t *implicit_task_data; 14120b57cec5SDimitry Andric void *return_address = NULL; 14130b57cec5SDimitry Andric 14140b57cec5SDimitry Andric if (ompt_enabled.enabled) { 14150b57cec5SDimitry Andric __ompt_get_task_info_internal(0, NULL, &parent_task_data, &ompt_frame, 14160b57cec5SDimitry Andric NULL, NULL); 14170b57cec5SDimitry Andric return_address = OMPT_LOAD_RETURN_ADDRESS(gtid); 14180b57cec5SDimitry Andric } 14190b57cec5SDimitry Andric #endif 14200b57cec5SDimitry Andric 1421fe6060f1SDimitry Andric // Assign affinity to root thread if it hasn't happened yet 1422fe6060f1SDimitry Andric __kmp_assign_root_init_mask(); 1423fe6060f1SDimitry Andric 14240b57cec5SDimitry Andric // Nested level will be an index in the nested nthreads array 14250b57cec5SDimitry Andric level = parent_team->t.t_level; 14260b57cec5SDimitry Andric // used to launch non-serial teams even if nested is not allowed 14270b57cec5SDimitry Andric active_level = parent_team->t.t_active_level; 14280b57cec5SDimitry Andric // needed to check nesting inside the teams 14290b57cec5SDimitry Andric teams_level = master_th->th.th_teams_level; 14300b57cec5SDimitry Andric #if KMP_NESTED_HOT_TEAMS 14310b57cec5SDimitry Andric p_hot_teams = &master_th->th.th_hot_teams; 14320b57cec5SDimitry Andric if (*p_hot_teams == NULL && __kmp_hot_teams_max_level > 0) { 14330b57cec5SDimitry Andric *p_hot_teams = (kmp_hot_team_ptr_t *)__kmp_allocate( 14340b57cec5SDimitry Andric sizeof(kmp_hot_team_ptr_t) * __kmp_hot_teams_max_level); 14350b57cec5SDimitry Andric (*p_hot_teams)[0].hot_team = root->r.r_hot_team; 14360b57cec5SDimitry Andric // it is either actual or not needed (when active_level > 0) 14370b57cec5SDimitry Andric (*p_hot_teams)[0].hot_team_nth = 1; 14380b57cec5SDimitry Andric } 14390b57cec5SDimitry Andric #endif 14400b57cec5SDimitry Andric 14410b57cec5SDimitry Andric #if OMPT_SUPPORT 14420b57cec5SDimitry Andric if (ompt_enabled.enabled) { 14430b57cec5SDimitry Andric if (ompt_enabled.ompt_callback_parallel_begin) { 14440b57cec5SDimitry Andric int team_size = master_set_numthreads 14450b57cec5SDimitry Andric ? master_set_numthreads 14460b57cec5SDimitry Andric : get__nproc_2(parent_team, master_tid); 1447489b1cf2SDimitry Andric int flags = OMPT_INVOKER(call_context) | 1448489b1cf2SDimitry Andric ((microtask == (microtask_t)__kmp_teams_master) 1449489b1cf2SDimitry Andric ? ompt_parallel_league 1450489b1cf2SDimitry Andric : ompt_parallel_team); 14510b57cec5SDimitry Andric ompt_callbacks.ompt_callback(ompt_callback_parallel_begin)( 1452489b1cf2SDimitry Andric parent_task_data, ompt_frame, &ompt_parallel_data, team_size, flags, 1453489b1cf2SDimitry Andric return_address); 14540b57cec5SDimitry Andric } 14550b57cec5SDimitry Andric master_th->th.ompt_thread_info.state = ompt_state_overhead; 14560b57cec5SDimitry Andric } 14570b57cec5SDimitry Andric #endif 14580b57cec5SDimitry Andric 14590b57cec5SDimitry Andric master_th->th.th_ident = loc; 14600b57cec5SDimitry Andric 14610b57cec5SDimitry Andric if (master_th->th.th_teams_microtask && ap && 14620b57cec5SDimitry Andric microtask != (microtask_t)__kmp_teams_master && level == teams_level) { 14630b57cec5SDimitry Andric // AC: This is start of parallel that is nested inside teams construct. 14640b57cec5SDimitry Andric // The team is actual (hot), all workers are ready at the fork barrier. 14650b57cec5SDimitry Andric // No lock needed to initialize the team a bit, then free workers. 14660b57cec5SDimitry Andric parent_team->t.t_ident = loc; 14670b57cec5SDimitry Andric __kmp_alloc_argv_entries(argc, parent_team, TRUE); 14680b57cec5SDimitry Andric parent_team->t.t_argc = argc; 14690b57cec5SDimitry Andric argv = (void **)parent_team->t.t_argv; 14700b57cec5SDimitry Andric for (i = argc - 1; i >= 0; --i) 147116794618SDimitry Andric *argv++ = va_arg(kmp_va_deref(ap), void *); 14720b57cec5SDimitry Andric // Increment our nested depth levels, but not increase the serialization 14730b57cec5SDimitry Andric if (parent_team == master_th->th.th_serial_team) { 14740b57cec5SDimitry Andric // AC: we are in serialized parallel 14750b57cec5SDimitry Andric __kmpc_serialized_parallel(loc, gtid); 14760b57cec5SDimitry Andric KMP_DEBUG_ASSERT(parent_team->t.t_serialized > 1); 1477489b1cf2SDimitry Andric 1478e8d8bef9SDimitry Andric if (call_context == fork_context_gnu) { 1479e8d8bef9SDimitry Andric // AC: need to decrement t_serialized for enquiry functions to work 1480e8d8bef9SDimitry Andric // correctly, will restore at join time 1481e8d8bef9SDimitry Andric parent_team->t.t_serialized--; 1482e8d8bef9SDimitry Andric return TRUE; 1483e8d8bef9SDimitry Andric } 1484e8d8bef9SDimitry Andric 1485fe6060f1SDimitry Andric #if OMPD_SUPPORT 1486fe6060f1SDimitry Andric parent_team->t.t_pkfn = microtask; 1487fe6060f1SDimitry Andric #endif 1488fe6060f1SDimitry Andric 14890b57cec5SDimitry Andric #if OMPT_SUPPORT 14900b57cec5SDimitry Andric void *dummy; 1491489b1cf2SDimitry Andric void **exit_frame_p; 14920b57cec5SDimitry Andric 14930b57cec5SDimitry Andric ompt_lw_taskteam_t lw_taskteam; 14940b57cec5SDimitry Andric 14950b57cec5SDimitry Andric if (ompt_enabled.enabled) { 14960b57cec5SDimitry Andric __ompt_lw_taskteam_init(&lw_taskteam, master_th, gtid, 14970b57cec5SDimitry Andric &ompt_parallel_data, return_address); 1498489b1cf2SDimitry Andric exit_frame_p = &(lw_taskteam.ompt_task_info.frame.exit_frame.ptr); 14990b57cec5SDimitry Andric 15000b57cec5SDimitry Andric __ompt_lw_taskteam_link(&lw_taskteam, master_th, 0); 15010b57cec5SDimitry Andric // don't use lw_taskteam after linking. content was swaped 15020b57cec5SDimitry Andric 15030b57cec5SDimitry Andric /* OMPT implicit task begin */ 15040b57cec5SDimitry Andric implicit_task_data = OMPT_CUR_TASK_DATA(master_th); 15050b57cec5SDimitry Andric if (ompt_enabled.ompt_callback_implicit_task) { 1506fe6060f1SDimitry Andric OMPT_CUR_TASK_INFO(master_th)->thread_num = 1507fe6060f1SDimitry Andric __kmp_tid_from_gtid(gtid); 1508489b1cf2SDimitry Andric ompt_callbacks.ompt_callback(ompt_callback_implicit_task)( 1509489b1cf2SDimitry Andric ompt_scope_begin, OMPT_CUR_TEAM_DATA(master_th), 1510489b1cf2SDimitry Andric implicit_task_data, 1, 1511489b1cf2SDimitry Andric OMPT_CUR_TASK_INFO(master_th)->thread_num, ompt_task_implicit); 15120b57cec5SDimitry Andric } 15130b57cec5SDimitry Andric 15140b57cec5SDimitry Andric /* OMPT state */ 15150b57cec5SDimitry Andric master_th->th.ompt_thread_info.state = ompt_state_work_parallel; 15160b57cec5SDimitry Andric } else { 1517489b1cf2SDimitry Andric exit_frame_p = &dummy; 15180b57cec5SDimitry Andric } 15190b57cec5SDimitry Andric #endif 1520489b1cf2SDimitry Andric // AC: need to decrement t_serialized for enquiry functions to work 1521489b1cf2SDimitry Andric // correctly, will restore at join time 1522489b1cf2SDimitry Andric parent_team->t.t_serialized--; 15230b57cec5SDimitry Andric 15240b57cec5SDimitry Andric { 15250b57cec5SDimitry Andric KMP_TIME_PARTITIONED_BLOCK(OMP_parallel); 15260b57cec5SDimitry Andric KMP_SET_THREAD_STATE_BLOCK(IMPLICIT_TASK); 15270b57cec5SDimitry Andric __kmp_invoke_microtask(microtask, gtid, 0, argc, parent_team->t.t_argv 15280b57cec5SDimitry Andric #if OMPT_SUPPORT 15290b57cec5SDimitry Andric , 1530489b1cf2SDimitry Andric exit_frame_p 15310b57cec5SDimitry Andric #endif 15320b57cec5SDimitry Andric ); 15330b57cec5SDimitry Andric } 15340b57cec5SDimitry Andric 15350b57cec5SDimitry Andric #if OMPT_SUPPORT 15360b57cec5SDimitry Andric if (ompt_enabled.enabled) { 1537489b1cf2SDimitry Andric *exit_frame_p = NULL; 15380b57cec5SDimitry Andric OMPT_CUR_TASK_INFO(master_th)->frame.exit_frame = ompt_data_none; 15390b57cec5SDimitry Andric if (ompt_enabled.ompt_callback_implicit_task) { 15400b57cec5SDimitry Andric ompt_callbacks.ompt_callback(ompt_callback_implicit_task)( 15410b57cec5SDimitry Andric ompt_scope_end, NULL, implicit_task_data, 1, 1542489b1cf2SDimitry Andric OMPT_CUR_TASK_INFO(master_th)->thread_num, ompt_task_implicit); 15430b57cec5SDimitry Andric } 1544489b1cf2SDimitry Andric ompt_parallel_data = *OMPT_CUR_TEAM_DATA(master_th); 15450b57cec5SDimitry Andric __ompt_lw_taskteam_unlink(master_th); 15460b57cec5SDimitry Andric if (ompt_enabled.ompt_callback_parallel_end) { 15470b57cec5SDimitry Andric ompt_callbacks.ompt_callback(ompt_callback_parallel_end)( 1548489b1cf2SDimitry Andric &ompt_parallel_data, OMPT_CUR_TASK_DATA(master_th), 1549489b1cf2SDimitry Andric OMPT_INVOKER(call_context) | ompt_parallel_team, 1550489b1cf2SDimitry Andric return_address); 15510b57cec5SDimitry Andric } 15520b57cec5SDimitry Andric master_th->th.ompt_thread_info.state = ompt_state_overhead; 15530b57cec5SDimitry Andric } 15540b57cec5SDimitry Andric #endif 15550b57cec5SDimitry Andric return TRUE; 15560b57cec5SDimitry Andric } 15570b57cec5SDimitry Andric 15580b57cec5SDimitry Andric parent_team->t.t_pkfn = microtask; 15590b57cec5SDimitry Andric parent_team->t.t_invoke = invoker; 15600b57cec5SDimitry Andric KMP_ATOMIC_INC(&root->r.r_in_parallel); 15610b57cec5SDimitry Andric parent_team->t.t_active_level++; 15620b57cec5SDimitry Andric parent_team->t.t_level++; 15630b57cec5SDimitry Andric parent_team->t.t_def_allocator = master_th->th.th_def_allocator; // save 15640b57cec5SDimitry Andric 1565489b1cf2SDimitry Andric #if OMPT_SUPPORT 1566489b1cf2SDimitry Andric if (ompt_enabled.enabled) { 1567489b1cf2SDimitry Andric ompt_lw_taskteam_t lw_taskteam; 1568489b1cf2SDimitry Andric __ompt_lw_taskteam_init(&lw_taskteam, master_th, gtid, 1569489b1cf2SDimitry Andric &ompt_parallel_data, return_address); 1570489b1cf2SDimitry Andric __ompt_lw_taskteam_link(&lw_taskteam, master_th, 1, true); 1571489b1cf2SDimitry Andric } 1572489b1cf2SDimitry Andric #endif 1573489b1cf2SDimitry Andric 15740b57cec5SDimitry Andric /* Change number of threads in the team if requested */ 15750b57cec5SDimitry Andric if (master_set_numthreads) { // The parallel has num_threads clause 1576349cc55cSDimitry Andric if (master_set_numthreads <= master_th->th.th_teams_size.nth) { 15770b57cec5SDimitry Andric // AC: only can reduce number of threads dynamically, can't increase 15780b57cec5SDimitry Andric kmp_info_t **other_threads = parent_team->t.t_threads; 1579349cc55cSDimitry Andric // NOTE: if using distributed barrier, we need to run this code block 1580349cc55cSDimitry Andric // even when the team size appears not to have changed from the max. 1581349cc55cSDimitry Andric int old_proc = master_th->th.th_teams_size.nth; 1582349cc55cSDimitry Andric if (__kmp_barrier_release_pattern[bs_forkjoin_barrier] == 1583349cc55cSDimitry Andric bp_dist_bar) { 1584349cc55cSDimitry Andric __kmp_resize_dist_barrier(parent_team, old_proc, 1585349cc55cSDimitry Andric master_set_numthreads); 1586349cc55cSDimitry Andric __kmp_add_threads_to_team(parent_team, master_set_numthreads); 1587349cc55cSDimitry Andric } 15880b57cec5SDimitry Andric parent_team->t.t_nproc = master_set_numthreads; 15890b57cec5SDimitry Andric for (i = 0; i < master_set_numthreads; ++i) { 15900b57cec5SDimitry Andric other_threads[i]->th.th_team_nproc = master_set_numthreads; 15910b57cec5SDimitry Andric } 15920b57cec5SDimitry Andric } 1593349cc55cSDimitry Andric // Keep extra threads hot in the team for possible next parallels 15940b57cec5SDimitry Andric master_th->th.th_set_nproc = 0; 15950b57cec5SDimitry Andric } 15960b57cec5SDimitry Andric 15970b57cec5SDimitry Andric #if USE_DEBUGGER 15980b57cec5SDimitry Andric if (__kmp_debugging) { // Let debugger override number of threads. 15990b57cec5SDimitry Andric int nth = __kmp_omp_num_threads(loc); 16000b57cec5SDimitry Andric if (nth > 0) { // 0 means debugger doesn't want to change num threads 16010b57cec5SDimitry Andric master_set_numthreads = nth; 16020b57cec5SDimitry Andric } 16030b57cec5SDimitry Andric } 16040b57cec5SDimitry Andric #endif 16050b57cec5SDimitry Andric 1606349cc55cSDimitry Andric // Figure out the proc_bind policy for the nested parallel within teams 1607349cc55cSDimitry Andric kmp_proc_bind_t proc_bind = master_th->th.th_set_proc_bind; 1608349cc55cSDimitry Andric // proc_bind_default means don't update 1609349cc55cSDimitry Andric kmp_proc_bind_t proc_bind_icv = proc_bind_default; 1610349cc55cSDimitry Andric if (master_th->th.th_current_task->td_icvs.proc_bind == proc_bind_false) { 1611349cc55cSDimitry Andric proc_bind = proc_bind_false; 1612349cc55cSDimitry Andric } else { 1613349cc55cSDimitry Andric // No proc_bind clause specified; use current proc-bind-var 1614349cc55cSDimitry Andric if (proc_bind == proc_bind_default) { 1615349cc55cSDimitry Andric proc_bind = master_th->th.th_current_task->td_icvs.proc_bind; 1616349cc55cSDimitry Andric } 1617349cc55cSDimitry Andric /* else: The proc_bind policy was specified explicitly on parallel 1618349cc55cSDimitry Andric clause. 1619349cc55cSDimitry Andric This overrides proc-bind-var for this parallel region, but does not 1620349cc55cSDimitry Andric change proc-bind-var. */ 1621349cc55cSDimitry Andric // Figure the value of proc-bind-var for the child threads. 1622349cc55cSDimitry Andric if ((level + 1 < __kmp_nested_proc_bind.used) && 1623349cc55cSDimitry Andric (__kmp_nested_proc_bind.bind_types[level + 1] != 1624349cc55cSDimitry Andric master_th->th.th_current_task->td_icvs.proc_bind)) { 1625349cc55cSDimitry Andric proc_bind_icv = __kmp_nested_proc_bind.bind_types[level + 1]; 1626349cc55cSDimitry Andric } 1627349cc55cSDimitry Andric } 1628349cc55cSDimitry Andric KMP_CHECK_UPDATE(parent_team->t.t_proc_bind, proc_bind); 1629349cc55cSDimitry Andric // Need to change the bind-var ICV to correct value for each implicit task 1630349cc55cSDimitry Andric if (proc_bind_icv != proc_bind_default && 1631349cc55cSDimitry Andric master_th->th.th_current_task->td_icvs.proc_bind != proc_bind_icv) { 1632349cc55cSDimitry Andric kmp_info_t **other_threads = parent_team->t.t_threads; 1633349cc55cSDimitry Andric for (i = 0; i < master_th->th.th_team_nproc; ++i) { 1634349cc55cSDimitry Andric other_threads[i]->th.th_current_task->td_icvs.proc_bind = 1635349cc55cSDimitry Andric proc_bind_icv; 1636349cc55cSDimitry Andric } 1637349cc55cSDimitry Andric } 1638349cc55cSDimitry Andric // Reset for next parallel region 1639349cc55cSDimitry Andric master_th->th.th_set_proc_bind = proc_bind_default; 1640349cc55cSDimitry Andric 1641e8d8bef9SDimitry Andric #if USE_ITT_BUILD && USE_ITT_NOTIFY 1642e8d8bef9SDimitry Andric if (((__itt_frame_submit_v3_ptr && __itt_get_timestamp_ptr) || 1643e8d8bef9SDimitry Andric KMP_ITT_DEBUG) && 1644e8d8bef9SDimitry Andric __kmp_forkjoin_frames_mode == 3 && 1645e8d8bef9SDimitry Andric parent_team->t.t_active_level == 1 // only report frames at level 1 1646e8d8bef9SDimitry Andric && master_th->th.th_teams_size.nteams == 1) { 1647e8d8bef9SDimitry Andric kmp_uint64 tmp_time = __itt_get_timestamp(); 1648e8d8bef9SDimitry Andric master_th->th.th_frame_time = tmp_time; 1649e8d8bef9SDimitry Andric parent_team->t.t_region_time = tmp_time; 1650e8d8bef9SDimitry Andric } 1651e8d8bef9SDimitry Andric if (__itt_stack_caller_create_ptr) { 1652fe6060f1SDimitry Andric KMP_DEBUG_ASSERT(parent_team->t.t_stack_id == NULL); 1653e8d8bef9SDimitry Andric // create new stack stitching id before entering fork barrier 1654e8d8bef9SDimitry Andric parent_team->t.t_stack_id = __kmp_itt_stack_caller_create(); 1655e8d8bef9SDimitry Andric } 1656e8d8bef9SDimitry Andric #endif /* USE_ITT_BUILD && USE_ITT_NOTIFY */ 1657349cc55cSDimitry Andric #if KMP_AFFINITY_SUPPORTED 1658349cc55cSDimitry Andric __kmp_partition_places(parent_team); 1659349cc55cSDimitry Andric #endif 1660e8d8bef9SDimitry Andric 16610b57cec5SDimitry Andric KF_TRACE(10, ("__kmp_fork_call: before internal fork: root=%p, team=%p, " 16620b57cec5SDimitry Andric "master_th=%p, gtid=%d\n", 16630b57cec5SDimitry Andric root, parent_team, master_th, gtid)); 16640b57cec5SDimitry Andric __kmp_internal_fork(loc, gtid, parent_team); 16650b57cec5SDimitry Andric KF_TRACE(10, ("__kmp_fork_call: after internal fork: root=%p, team=%p, " 16660b57cec5SDimitry Andric "master_th=%p, gtid=%d\n", 16670b57cec5SDimitry Andric root, parent_team, master_th, gtid)); 16680b57cec5SDimitry Andric 1669e8d8bef9SDimitry Andric if (call_context == fork_context_gnu) 1670e8d8bef9SDimitry Andric return TRUE; 1671e8d8bef9SDimitry Andric 1672fe6060f1SDimitry Andric /* Invoke microtask for PRIMARY thread */ 16730b57cec5SDimitry Andric KA_TRACE(20, ("__kmp_fork_call: T#%d(%d:0) invoke microtask = %p\n", gtid, 16740b57cec5SDimitry Andric parent_team->t.t_id, parent_team->t.t_pkfn)); 16750b57cec5SDimitry Andric 16760b57cec5SDimitry Andric if (!parent_team->t.t_invoke(gtid)) { 1677fe6060f1SDimitry Andric KMP_ASSERT2(0, "cannot invoke microtask for PRIMARY thread"); 16780b57cec5SDimitry Andric } 16790b57cec5SDimitry Andric KA_TRACE(20, ("__kmp_fork_call: T#%d(%d:0) done microtask = %p\n", gtid, 16800b57cec5SDimitry Andric parent_team->t.t_id, parent_team->t.t_pkfn)); 16810b57cec5SDimitry Andric KMP_MB(); /* Flush all pending memory write invalidates. */ 16820b57cec5SDimitry Andric 16830b57cec5SDimitry Andric KA_TRACE(20, ("__kmp_fork_call: parallel exit T#%d\n", gtid)); 16840b57cec5SDimitry Andric 16850b57cec5SDimitry Andric return TRUE; 16860b57cec5SDimitry Andric } // Parallel closely nested in teams construct 16870b57cec5SDimitry Andric 16880b57cec5SDimitry Andric #if KMP_DEBUG 16890b57cec5SDimitry Andric if (__kmp_tasking_mode != tskm_immediate_exec) { 16900b57cec5SDimitry Andric KMP_DEBUG_ASSERT(master_th->th.th_task_team == 16910b57cec5SDimitry Andric parent_team->t.t_task_team[master_th->th.th_task_state]); 16920b57cec5SDimitry Andric } 16930b57cec5SDimitry Andric #endif 16940b57cec5SDimitry Andric 1695349cc55cSDimitry Andric // Need this to happen before we determine the number of threads, not while 1696349cc55cSDimitry Andric // we are allocating the team 1697349cc55cSDimitry Andric //__kmp_push_current_task_to_thread(master_th, parent_team, 0); 1698fe6060f1SDimitry Andric int enter_teams = 0; 16990b57cec5SDimitry Andric if (parent_team->t.t_active_level >= 17000b57cec5SDimitry Andric master_th->th.th_current_task->td_icvs.max_active_levels) { 17010b57cec5SDimitry Andric nthreads = 1; 17020b57cec5SDimitry Andric } else { 1703fe6060f1SDimitry Andric enter_teams = ((ap == NULL && active_level == 0) || 17040b57cec5SDimitry Andric (ap && teams_level > 0 && teams_level == level)); 1705349cc55cSDimitry Andric nthreads = master_set_numthreads 17060b57cec5SDimitry Andric ? master_set_numthreads 1707349cc55cSDimitry Andric // TODO: get nproc directly from current task 1708349cc55cSDimitry Andric : get__nproc_2(parent_team, master_tid); 17090b57cec5SDimitry Andric // Check if we need to take forkjoin lock? (no need for serialized 17100b57cec5SDimitry Andric // parallel out of teams construct). This code moved here from 17110b57cec5SDimitry Andric // __kmp_reserve_threads() to speedup nested serialized parallels. 17120b57cec5SDimitry Andric if (nthreads > 1) { 17130b57cec5SDimitry Andric if ((get__max_active_levels(master_th) == 1 && 17140b57cec5SDimitry Andric (root->r.r_in_parallel && !enter_teams)) || 17150b57cec5SDimitry Andric (__kmp_library == library_serial)) { 17160b57cec5SDimitry Andric KC_TRACE(10, ("__kmp_fork_call: T#%d serializing team; requested %d" 17170b57cec5SDimitry Andric " threads\n", 17180b57cec5SDimitry Andric gtid, nthreads)); 17190b57cec5SDimitry Andric nthreads = 1; 17200b57cec5SDimitry Andric } 17210b57cec5SDimitry Andric } 17220b57cec5SDimitry Andric if (nthreads > 1) { 17230b57cec5SDimitry Andric /* determine how many new threads we can use */ 17240b57cec5SDimitry Andric __kmp_acquire_bootstrap_lock(&__kmp_forkjoin_lock); 17250b57cec5SDimitry Andric /* AC: If we execute teams from parallel region (on host), then teams 17260b57cec5SDimitry Andric should be created but each can only have 1 thread if nesting is 17270b57cec5SDimitry Andric disabled. If teams called from serial region, then teams and their 17280b57cec5SDimitry Andric threads should be created regardless of the nesting setting. */ 17290b57cec5SDimitry Andric nthreads = __kmp_reserve_threads(root, parent_team, master_tid, 17300b57cec5SDimitry Andric nthreads, enter_teams); 17310b57cec5SDimitry Andric if (nthreads == 1) { 17320b57cec5SDimitry Andric // Free lock for single thread execution here; for multi-thread 17330b57cec5SDimitry Andric // execution it will be freed later after team of threads created 17340b57cec5SDimitry Andric // and initialized 17350b57cec5SDimitry Andric __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock); 17360b57cec5SDimitry Andric } 17370b57cec5SDimitry Andric } 17380b57cec5SDimitry Andric } 17390b57cec5SDimitry Andric KMP_DEBUG_ASSERT(nthreads > 0); 17400b57cec5SDimitry Andric 17410b57cec5SDimitry Andric // If we temporarily changed the set number of threads then restore it now 17420b57cec5SDimitry Andric master_th->th.th_set_nproc = 0; 17430b57cec5SDimitry Andric 17440b57cec5SDimitry Andric /* create a serialized parallel region? */ 17450b57cec5SDimitry Andric if (nthreads == 1) { 17460b57cec5SDimitry Andric /* josh todo: hypothetical question: what do we do for OS X*? */ 17470b57cec5SDimitry Andric #if KMP_OS_LINUX && \ 17480b57cec5SDimitry Andric (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) 17490b57cec5SDimitry Andric void *args[argc]; 17500b57cec5SDimitry Andric #else 17510b57cec5SDimitry Andric void **args = (void **)KMP_ALLOCA(argc * sizeof(void *)); 17520b57cec5SDimitry Andric #endif /* KMP_OS_LINUX && ( KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || \ 17530b57cec5SDimitry Andric KMP_ARCH_AARCH64) */ 17540b57cec5SDimitry Andric 17550b57cec5SDimitry Andric KA_TRACE(20, 17560b57cec5SDimitry Andric ("__kmp_fork_call: T#%d serializing parallel region\n", gtid)); 17570b57cec5SDimitry Andric 17580b57cec5SDimitry Andric __kmpc_serialized_parallel(loc, gtid); 17590b57cec5SDimitry Andric 1760fe6060f1SDimitry Andric #if OMPD_SUPPORT 1761fe6060f1SDimitry Andric master_th->th.th_serial_team->t.t_pkfn = microtask; 1762fe6060f1SDimitry Andric #endif 1763fe6060f1SDimitry Andric 17640b57cec5SDimitry Andric if (call_context == fork_context_intel) { 17650b57cec5SDimitry Andric /* TODO this sucks, use the compiler itself to pass args! :) */ 17660b57cec5SDimitry Andric master_th->th.th_serial_team->t.t_ident = loc; 17670b57cec5SDimitry Andric if (!ap) { 17680b57cec5SDimitry Andric // revert change made in __kmpc_serialized_parallel() 17690b57cec5SDimitry Andric master_th->th.th_serial_team->t.t_level--; 17700b57cec5SDimitry Andric // Get args from parent team for teams construct 17710b57cec5SDimitry Andric 17720b57cec5SDimitry Andric #if OMPT_SUPPORT 17730b57cec5SDimitry Andric void *dummy; 1774489b1cf2SDimitry Andric void **exit_frame_p; 17750b57cec5SDimitry Andric ompt_task_info_t *task_info; 17760b57cec5SDimitry Andric 17770b57cec5SDimitry Andric ompt_lw_taskteam_t lw_taskteam; 17780b57cec5SDimitry Andric 17790b57cec5SDimitry Andric if (ompt_enabled.enabled) { 17800b57cec5SDimitry Andric __ompt_lw_taskteam_init(&lw_taskteam, master_th, gtid, 17810b57cec5SDimitry Andric &ompt_parallel_data, return_address); 17820b57cec5SDimitry Andric 17830b57cec5SDimitry Andric __ompt_lw_taskteam_link(&lw_taskteam, master_th, 0); 17840b57cec5SDimitry Andric // don't use lw_taskteam after linking. content was swaped 17850b57cec5SDimitry Andric 17860b57cec5SDimitry Andric task_info = OMPT_CUR_TASK_INFO(master_th); 1787489b1cf2SDimitry Andric exit_frame_p = &(task_info->frame.exit_frame.ptr); 17880b57cec5SDimitry Andric if (ompt_enabled.ompt_callback_implicit_task) { 1789fe6060f1SDimitry Andric OMPT_CUR_TASK_INFO(master_th)->thread_num = 1790fe6060f1SDimitry Andric __kmp_tid_from_gtid(gtid); 1791489b1cf2SDimitry Andric ompt_callbacks.ompt_callback(ompt_callback_implicit_task)( 1792489b1cf2SDimitry Andric ompt_scope_begin, OMPT_CUR_TEAM_DATA(master_th), 1793489b1cf2SDimitry Andric &(task_info->task_data), 1, 1794489b1cf2SDimitry Andric OMPT_CUR_TASK_INFO(master_th)->thread_num, 1795489b1cf2SDimitry Andric ompt_task_implicit); 17960b57cec5SDimitry Andric } 17970b57cec5SDimitry Andric 17980b57cec5SDimitry Andric /* OMPT state */ 17990b57cec5SDimitry Andric master_th->th.ompt_thread_info.state = ompt_state_work_parallel; 18000b57cec5SDimitry Andric } else { 1801489b1cf2SDimitry Andric exit_frame_p = &dummy; 18020b57cec5SDimitry Andric } 18030b57cec5SDimitry Andric #endif 18040b57cec5SDimitry Andric 18050b57cec5SDimitry Andric { 18060b57cec5SDimitry Andric KMP_TIME_PARTITIONED_BLOCK(OMP_parallel); 18070b57cec5SDimitry Andric KMP_SET_THREAD_STATE_BLOCK(IMPLICIT_TASK); 18080b57cec5SDimitry Andric __kmp_invoke_microtask(microtask, gtid, 0, argc, 18090b57cec5SDimitry Andric parent_team->t.t_argv 18100b57cec5SDimitry Andric #if OMPT_SUPPORT 18110b57cec5SDimitry Andric , 1812489b1cf2SDimitry Andric exit_frame_p 18130b57cec5SDimitry Andric #endif 18140b57cec5SDimitry Andric ); 18150b57cec5SDimitry Andric } 18160b57cec5SDimitry Andric 18170b57cec5SDimitry Andric #if OMPT_SUPPORT 18180b57cec5SDimitry Andric if (ompt_enabled.enabled) { 1819489b1cf2SDimitry Andric *exit_frame_p = NULL; 18200b57cec5SDimitry Andric if (ompt_enabled.ompt_callback_implicit_task) { 18210b57cec5SDimitry Andric ompt_callbacks.ompt_callback(ompt_callback_implicit_task)( 18220b57cec5SDimitry Andric ompt_scope_end, NULL, &(task_info->task_data), 1, 1823489b1cf2SDimitry Andric OMPT_CUR_TASK_INFO(master_th)->thread_num, 1824489b1cf2SDimitry Andric ompt_task_implicit); 18250b57cec5SDimitry Andric } 1826489b1cf2SDimitry Andric ompt_parallel_data = *OMPT_CUR_TEAM_DATA(master_th); 18270b57cec5SDimitry Andric __ompt_lw_taskteam_unlink(master_th); 18280b57cec5SDimitry Andric if (ompt_enabled.ompt_callback_parallel_end) { 18290b57cec5SDimitry Andric ompt_callbacks.ompt_callback(ompt_callback_parallel_end)( 1830489b1cf2SDimitry Andric &ompt_parallel_data, parent_task_data, 1831489b1cf2SDimitry Andric OMPT_INVOKER(call_context) | ompt_parallel_team, 1832489b1cf2SDimitry Andric return_address); 18330b57cec5SDimitry Andric } 18340b57cec5SDimitry Andric master_th->th.ompt_thread_info.state = ompt_state_overhead; 18350b57cec5SDimitry Andric } 18360b57cec5SDimitry Andric #endif 18370b57cec5SDimitry Andric } else if (microtask == (microtask_t)__kmp_teams_master) { 18380b57cec5SDimitry Andric KMP_DEBUG_ASSERT(master_th->th.th_team == 18390b57cec5SDimitry Andric master_th->th.th_serial_team); 18400b57cec5SDimitry Andric team = master_th->th.th_team; 18410b57cec5SDimitry Andric // team->t.t_pkfn = microtask; 18420b57cec5SDimitry Andric team->t.t_invoke = invoker; 18430b57cec5SDimitry Andric __kmp_alloc_argv_entries(argc, team, TRUE); 18440b57cec5SDimitry Andric team->t.t_argc = argc; 18450b57cec5SDimitry Andric argv = (void **)team->t.t_argv; 18460b57cec5SDimitry Andric if (ap) { 18470b57cec5SDimitry Andric for (i = argc - 1; i >= 0; --i) 184816794618SDimitry Andric *argv++ = va_arg(kmp_va_deref(ap), void *); 18490b57cec5SDimitry Andric } else { 18500b57cec5SDimitry Andric for (i = 0; i < argc; ++i) 18510b57cec5SDimitry Andric // Get args from parent team for teams construct 18520b57cec5SDimitry Andric argv[i] = parent_team->t.t_argv[i]; 18530b57cec5SDimitry Andric } 18540b57cec5SDimitry Andric // AC: revert change made in __kmpc_serialized_parallel() 18550b57cec5SDimitry Andric // because initial code in teams should have level=0 18560b57cec5SDimitry Andric team->t.t_level--; 18570b57cec5SDimitry Andric // AC: call special invoker for outer "parallel" of teams construct 18580b57cec5SDimitry Andric invoker(gtid); 1859489b1cf2SDimitry Andric #if OMPT_SUPPORT 1860489b1cf2SDimitry Andric if (ompt_enabled.enabled) { 1861489b1cf2SDimitry Andric ompt_task_info_t *task_info = OMPT_CUR_TASK_INFO(master_th); 1862489b1cf2SDimitry Andric if (ompt_enabled.ompt_callback_implicit_task) { 1863489b1cf2SDimitry Andric ompt_callbacks.ompt_callback(ompt_callback_implicit_task)( 1864489b1cf2SDimitry Andric ompt_scope_end, NULL, &(task_info->task_data), 0, 1865489b1cf2SDimitry Andric OMPT_CUR_TASK_INFO(master_th)->thread_num, ompt_task_initial); 1866489b1cf2SDimitry Andric } 1867489b1cf2SDimitry Andric if (ompt_enabled.ompt_callback_parallel_end) { 1868489b1cf2SDimitry Andric ompt_callbacks.ompt_callback(ompt_callback_parallel_end)( 1869489b1cf2SDimitry Andric &ompt_parallel_data, parent_task_data, 1870489b1cf2SDimitry Andric OMPT_INVOKER(call_context) | ompt_parallel_league, 1871489b1cf2SDimitry Andric return_address); 1872489b1cf2SDimitry Andric } 1873489b1cf2SDimitry Andric master_th->th.ompt_thread_info.state = ompt_state_overhead; 1874489b1cf2SDimitry Andric } 1875489b1cf2SDimitry Andric #endif 18760b57cec5SDimitry Andric } else { 18770b57cec5SDimitry Andric argv = args; 18780b57cec5SDimitry Andric for (i = argc - 1; i >= 0; --i) 187916794618SDimitry Andric *argv++ = va_arg(kmp_va_deref(ap), void *); 18800b57cec5SDimitry Andric KMP_MB(); 18810b57cec5SDimitry Andric 18820b57cec5SDimitry Andric #if OMPT_SUPPORT 18830b57cec5SDimitry Andric void *dummy; 1884489b1cf2SDimitry Andric void **exit_frame_p; 18850b57cec5SDimitry Andric ompt_task_info_t *task_info; 18860b57cec5SDimitry Andric 18870b57cec5SDimitry Andric ompt_lw_taskteam_t lw_taskteam; 18880b57cec5SDimitry Andric 18890b57cec5SDimitry Andric if (ompt_enabled.enabled) { 18900b57cec5SDimitry Andric __ompt_lw_taskteam_init(&lw_taskteam, master_th, gtid, 18910b57cec5SDimitry Andric &ompt_parallel_data, return_address); 18920b57cec5SDimitry Andric __ompt_lw_taskteam_link(&lw_taskteam, master_th, 0); 18930b57cec5SDimitry Andric // don't use lw_taskteam after linking. content was swaped 18940b57cec5SDimitry Andric task_info = OMPT_CUR_TASK_INFO(master_th); 1895489b1cf2SDimitry Andric exit_frame_p = &(task_info->frame.exit_frame.ptr); 18960b57cec5SDimitry Andric 18970b57cec5SDimitry Andric /* OMPT implicit task begin */ 18980b57cec5SDimitry Andric implicit_task_data = OMPT_CUR_TASK_DATA(master_th); 18990b57cec5SDimitry Andric if (ompt_enabled.ompt_callback_implicit_task) { 19000b57cec5SDimitry Andric ompt_callbacks.ompt_callback(ompt_callback_implicit_task)( 19010b57cec5SDimitry Andric ompt_scope_begin, OMPT_CUR_TEAM_DATA(master_th), 1902489b1cf2SDimitry Andric implicit_task_data, 1, __kmp_tid_from_gtid(gtid), 1903489b1cf2SDimitry Andric ompt_task_implicit); 1904fe6060f1SDimitry Andric OMPT_CUR_TASK_INFO(master_th)->thread_num = 1905fe6060f1SDimitry Andric __kmp_tid_from_gtid(gtid); 19060b57cec5SDimitry Andric } 19070b57cec5SDimitry Andric 19080b57cec5SDimitry Andric /* OMPT state */ 19090b57cec5SDimitry Andric master_th->th.ompt_thread_info.state = ompt_state_work_parallel; 19100b57cec5SDimitry Andric } else { 1911489b1cf2SDimitry Andric exit_frame_p = &dummy; 19120b57cec5SDimitry Andric } 19130b57cec5SDimitry Andric #endif 19140b57cec5SDimitry Andric 19150b57cec5SDimitry Andric { 19160b57cec5SDimitry Andric KMP_TIME_PARTITIONED_BLOCK(OMP_parallel); 19170b57cec5SDimitry Andric KMP_SET_THREAD_STATE_BLOCK(IMPLICIT_TASK); 19180b57cec5SDimitry Andric __kmp_invoke_microtask(microtask, gtid, 0, argc, args 19190b57cec5SDimitry Andric #if OMPT_SUPPORT 19200b57cec5SDimitry Andric , 1921489b1cf2SDimitry Andric exit_frame_p 19220b57cec5SDimitry Andric #endif 19230b57cec5SDimitry Andric ); 19240b57cec5SDimitry Andric } 19250b57cec5SDimitry Andric 19260b57cec5SDimitry Andric #if OMPT_SUPPORT 19270b57cec5SDimitry Andric if (ompt_enabled.enabled) { 1928489b1cf2SDimitry Andric *exit_frame_p = NULL; 19290b57cec5SDimitry Andric if (ompt_enabled.ompt_callback_implicit_task) { 19300b57cec5SDimitry Andric ompt_callbacks.ompt_callback(ompt_callback_implicit_task)( 19310b57cec5SDimitry Andric ompt_scope_end, NULL, &(task_info->task_data), 1, 1932489b1cf2SDimitry Andric OMPT_CUR_TASK_INFO(master_th)->thread_num, 1933489b1cf2SDimitry Andric ompt_task_implicit); 19340b57cec5SDimitry Andric } 19350b57cec5SDimitry Andric 19360b57cec5SDimitry Andric ompt_parallel_data = *OMPT_CUR_TEAM_DATA(master_th); 19370b57cec5SDimitry Andric __ompt_lw_taskteam_unlink(master_th); 19380b57cec5SDimitry Andric if (ompt_enabled.ompt_callback_parallel_end) { 19390b57cec5SDimitry Andric ompt_callbacks.ompt_callback(ompt_callback_parallel_end)( 19400b57cec5SDimitry Andric &ompt_parallel_data, parent_task_data, 1941489b1cf2SDimitry Andric OMPT_INVOKER(call_context) | ompt_parallel_team, 1942489b1cf2SDimitry Andric return_address); 19430b57cec5SDimitry Andric } 19440b57cec5SDimitry Andric master_th->th.ompt_thread_info.state = ompt_state_overhead; 19450b57cec5SDimitry Andric } 19460b57cec5SDimitry Andric #endif 19470b57cec5SDimitry Andric } 19480b57cec5SDimitry Andric } else if (call_context == fork_context_gnu) { 19490b57cec5SDimitry Andric #if OMPT_SUPPORT 19500b57cec5SDimitry Andric ompt_lw_taskteam_t lwt; 19510b57cec5SDimitry Andric __ompt_lw_taskteam_init(&lwt, master_th, gtid, &ompt_parallel_data, 19520b57cec5SDimitry Andric return_address); 19530b57cec5SDimitry Andric 19540b57cec5SDimitry Andric lwt.ompt_task_info.frame.exit_frame = ompt_data_none; 19550b57cec5SDimitry Andric __ompt_lw_taskteam_link(&lwt, master_th, 1); 19560b57cec5SDimitry Andric // don't use lw_taskteam after linking. content was swaped 19570b57cec5SDimitry Andric #endif 19580b57cec5SDimitry Andric 19590b57cec5SDimitry Andric // we were called from GNU native code 19600b57cec5SDimitry Andric KA_TRACE(20, ("__kmp_fork_call: T#%d serial exit\n", gtid)); 19610b57cec5SDimitry Andric return FALSE; 19620b57cec5SDimitry Andric } else { 19630b57cec5SDimitry Andric KMP_ASSERT2(call_context < fork_context_last, 19640b57cec5SDimitry Andric "__kmp_fork_call: unknown fork_context parameter"); 19650b57cec5SDimitry Andric } 19660b57cec5SDimitry Andric 19670b57cec5SDimitry Andric KA_TRACE(20, ("__kmp_fork_call: T#%d serial exit\n", gtid)); 19680b57cec5SDimitry Andric KMP_MB(); 19690b57cec5SDimitry Andric return FALSE; 19700b57cec5SDimitry Andric } // if (nthreads == 1) 19710b57cec5SDimitry Andric 19720b57cec5SDimitry Andric // GEH: only modify the executing flag in the case when not serialized 19730b57cec5SDimitry Andric // serialized case is handled in kmpc_serialized_parallel 19740b57cec5SDimitry Andric KF_TRACE(10, ("__kmp_fork_call: parent_team_aclevel=%d, master_th=%p, " 19750b57cec5SDimitry Andric "curtask=%p, curtask_max_aclevel=%d\n", 19760b57cec5SDimitry Andric parent_team->t.t_active_level, master_th, 19770b57cec5SDimitry Andric master_th->th.th_current_task, 19780b57cec5SDimitry Andric master_th->th.th_current_task->td_icvs.max_active_levels)); 19790b57cec5SDimitry Andric // TODO: GEH - cannot do this assertion because root thread not set up as 19800b57cec5SDimitry Andric // executing 19810b57cec5SDimitry Andric // KMP_ASSERT( master_th->th.th_current_task->td_flags.executing == 1 ); 19820b57cec5SDimitry Andric master_th->th.th_current_task->td_flags.executing = 0; 19830b57cec5SDimitry Andric 19840b57cec5SDimitry Andric if (!master_th->th.th_teams_microtask || level > teams_level) { 19850b57cec5SDimitry Andric /* Increment our nested depth level */ 19860b57cec5SDimitry Andric KMP_ATOMIC_INC(&root->r.r_in_parallel); 19870b57cec5SDimitry Andric } 19880b57cec5SDimitry Andric 19890b57cec5SDimitry Andric // See if we need to make a copy of the ICVs. 19900b57cec5SDimitry Andric int nthreads_icv = master_th->th.th_current_task->td_icvs.nproc; 19910b57cec5SDimitry Andric if ((level + 1 < __kmp_nested_nth.used) && 19920b57cec5SDimitry Andric (__kmp_nested_nth.nth[level + 1] != nthreads_icv)) { 19930b57cec5SDimitry Andric nthreads_icv = __kmp_nested_nth.nth[level + 1]; 19940b57cec5SDimitry Andric } else { 19950b57cec5SDimitry Andric nthreads_icv = 0; // don't update 19960b57cec5SDimitry Andric } 19970b57cec5SDimitry Andric 19980b57cec5SDimitry Andric // Figure out the proc_bind_policy for the new team. 19990b57cec5SDimitry Andric kmp_proc_bind_t proc_bind = master_th->th.th_set_proc_bind; 2000349cc55cSDimitry Andric // proc_bind_default means don't update 2001349cc55cSDimitry Andric kmp_proc_bind_t proc_bind_icv = proc_bind_default; 20020b57cec5SDimitry Andric if (master_th->th.th_current_task->td_icvs.proc_bind == proc_bind_false) { 20030b57cec5SDimitry Andric proc_bind = proc_bind_false; 20040b57cec5SDimitry Andric } else { 20050b57cec5SDimitry Andric // No proc_bind clause specified; use current proc-bind-var for this 20060b57cec5SDimitry Andric // parallel region 2007349cc55cSDimitry Andric if (proc_bind == proc_bind_default) { 20080b57cec5SDimitry Andric proc_bind = master_th->th.th_current_task->td_icvs.proc_bind; 20090b57cec5SDimitry Andric } 2010349cc55cSDimitry Andric // Have teams construct take proc_bind value from KMP_TEAMS_PROC_BIND 2011349cc55cSDimitry Andric if (master_th->th.th_teams_microtask && 2012349cc55cSDimitry Andric microtask == (microtask_t)__kmp_teams_master) { 2013349cc55cSDimitry Andric proc_bind = __kmp_teams_proc_bind; 2014349cc55cSDimitry Andric } 20150b57cec5SDimitry Andric /* else: The proc_bind policy was specified explicitly on parallel clause. 20160b57cec5SDimitry Andric This overrides proc-bind-var for this parallel region, but does not 20170b57cec5SDimitry Andric change proc-bind-var. */ 20180b57cec5SDimitry Andric // Figure the value of proc-bind-var for the child threads. 20190b57cec5SDimitry Andric if ((level + 1 < __kmp_nested_proc_bind.used) && 20200b57cec5SDimitry Andric (__kmp_nested_proc_bind.bind_types[level + 1] != 20210b57cec5SDimitry Andric master_th->th.th_current_task->td_icvs.proc_bind)) { 2022349cc55cSDimitry Andric // Do not modify the proc bind icv for the two teams construct forks 2023349cc55cSDimitry Andric // They just let the proc bind icv pass through 2024349cc55cSDimitry Andric if (!master_th->th.th_teams_microtask || 2025349cc55cSDimitry Andric !(microtask == (microtask_t)__kmp_teams_master || ap == NULL)) 20260b57cec5SDimitry Andric proc_bind_icv = __kmp_nested_proc_bind.bind_types[level + 1]; 20270b57cec5SDimitry Andric } 20280b57cec5SDimitry Andric } 20290b57cec5SDimitry Andric 20300b57cec5SDimitry Andric // Reset for next parallel region 20310b57cec5SDimitry Andric master_th->th.th_set_proc_bind = proc_bind_default; 20320b57cec5SDimitry Andric 20330b57cec5SDimitry Andric if ((nthreads_icv > 0) || (proc_bind_icv != proc_bind_default)) { 20340b57cec5SDimitry Andric kmp_internal_control_t new_icvs; 20350b57cec5SDimitry Andric copy_icvs(&new_icvs, &master_th->th.th_current_task->td_icvs); 20360b57cec5SDimitry Andric new_icvs.next = NULL; 20370b57cec5SDimitry Andric if (nthreads_icv > 0) { 20380b57cec5SDimitry Andric new_icvs.nproc = nthreads_icv; 20390b57cec5SDimitry Andric } 20400b57cec5SDimitry Andric if (proc_bind_icv != proc_bind_default) { 20410b57cec5SDimitry Andric new_icvs.proc_bind = proc_bind_icv; 20420b57cec5SDimitry Andric } 20430b57cec5SDimitry Andric 20440b57cec5SDimitry Andric /* allocate a new parallel team */ 20450b57cec5SDimitry Andric KF_TRACE(10, ("__kmp_fork_call: before __kmp_allocate_team\n")); 20460b57cec5SDimitry Andric team = __kmp_allocate_team(root, nthreads, nthreads, 20470b57cec5SDimitry Andric #if OMPT_SUPPORT 20480b57cec5SDimitry Andric ompt_parallel_data, 20490b57cec5SDimitry Andric #endif 20500b57cec5SDimitry Andric proc_bind, &new_icvs, 20510b57cec5SDimitry Andric argc USE_NESTED_HOT_ARG(master_th)); 2052349cc55cSDimitry Andric if (__kmp_barrier_release_pattern[bs_forkjoin_barrier] == bp_dist_bar) 2053349cc55cSDimitry Andric copy_icvs((kmp_internal_control_t *)team->t.b->team_icvs, &new_icvs); 20540b57cec5SDimitry Andric } else { 20550b57cec5SDimitry Andric /* allocate a new parallel team */ 20560b57cec5SDimitry Andric KF_TRACE(10, ("__kmp_fork_call: before __kmp_allocate_team\n")); 20570b57cec5SDimitry Andric team = __kmp_allocate_team(root, nthreads, nthreads, 20580b57cec5SDimitry Andric #if OMPT_SUPPORT 20590b57cec5SDimitry Andric ompt_parallel_data, 20600b57cec5SDimitry Andric #endif 20610b57cec5SDimitry Andric proc_bind, 20620b57cec5SDimitry Andric &master_th->th.th_current_task->td_icvs, 20630b57cec5SDimitry Andric argc USE_NESTED_HOT_ARG(master_th)); 2064349cc55cSDimitry Andric if (__kmp_barrier_release_pattern[bs_forkjoin_barrier] == bp_dist_bar) 2065349cc55cSDimitry Andric copy_icvs((kmp_internal_control_t *)team->t.b->team_icvs, 2066349cc55cSDimitry Andric &master_th->th.th_current_task->td_icvs); 20670b57cec5SDimitry Andric } 20680b57cec5SDimitry Andric KF_TRACE( 20690b57cec5SDimitry Andric 10, ("__kmp_fork_call: after __kmp_allocate_team - team = %p\n", team)); 20700b57cec5SDimitry Andric 20710b57cec5SDimitry Andric /* setup the new team */ 20720b57cec5SDimitry Andric KMP_CHECK_UPDATE(team->t.t_master_tid, master_tid); 20730b57cec5SDimitry Andric KMP_CHECK_UPDATE(team->t.t_master_this_cons, master_this_cons); 20740b57cec5SDimitry Andric KMP_CHECK_UPDATE(team->t.t_ident, loc); 20750b57cec5SDimitry Andric KMP_CHECK_UPDATE(team->t.t_parent, parent_team); 20760b57cec5SDimitry Andric KMP_CHECK_UPDATE_SYNC(team->t.t_pkfn, microtask); 20770b57cec5SDimitry Andric #if OMPT_SUPPORT 20780b57cec5SDimitry Andric KMP_CHECK_UPDATE_SYNC(team->t.ompt_team_info.master_return_address, 20790b57cec5SDimitry Andric return_address); 20800b57cec5SDimitry Andric #endif 20810b57cec5SDimitry Andric KMP_CHECK_UPDATE(team->t.t_invoke, invoker); // TODO move to root, maybe 20820b57cec5SDimitry Andric // TODO: parent_team->t.t_level == INT_MAX ??? 20830b57cec5SDimitry Andric if (!master_th->th.th_teams_microtask || level > teams_level) { 20840b57cec5SDimitry Andric int new_level = parent_team->t.t_level + 1; 20850b57cec5SDimitry Andric KMP_CHECK_UPDATE(team->t.t_level, new_level); 20860b57cec5SDimitry Andric new_level = parent_team->t.t_active_level + 1; 20870b57cec5SDimitry Andric KMP_CHECK_UPDATE(team->t.t_active_level, new_level); 20880b57cec5SDimitry Andric } else { 20890b57cec5SDimitry Andric // AC: Do not increase parallel level at start of the teams construct 20900b57cec5SDimitry Andric int new_level = parent_team->t.t_level; 20910b57cec5SDimitry Andric KMP_CHECK_UPDATE(team->t.t_level, new_level); 20920b57cec5SDimitry Andric new_level = parent_team->t.t_active_level; 20930b57cec5SDimitry Andric KMP_CHECK_UPDATE(team->t.t_active_level, new_level); 20940b57cec5SDimitry Andric } 20950b57cec5SDimitry Andric kmp_r_sched_t new_sched = get__sched_2(parent_team, master_tid); 2096fe6060f1SDimitry Andric // set primary thread's schedule as new run-time schedule 20970b57cec5SDimitry Andric KMP_CHECK_UPDATE(team->t.t_sched.sched, new_sched.sched); 20980b57cec5SDimitry Andric 20990b57cec5SDimitry Andric KMP_CHECK_UPDATE(team->t.t_cancel_request, cancel_noreq); 21000b57cec5SDimitry Andric KMP_CHECK_UPDATE(team->t.t_def_allocator, master_th->th.th_def_allocator); 21010b57cec5SDimitry Andric 21020b57cec5SDimitry Andric // Update the floating point rounding in the team if required. 21030b57cec5SDimitry Andric propagateFPControl(team); 2104fe6060f1SDimitry Andric #if OMPD_SUPPORT 2105fe6060f1SDimitry Andric if (ompd_state & OMPD_ENABLE_BP) 2106fe6060f1SDimitry Andric ompd_bp_parallel_begin(); 2107fe6060f1SDimitry Andric #endif 21080b57cec5SDimitry Andric 21090b57cec5SDimitry Andric if (__kmp_tasking_mode != tskm_immediate_exec) { 2110fe6060f1SDimitry Andric // Set primary thread's task team to team's task team. Unless this is hot 2111fe6060f1SDimitry Andric // team, it should be NULL. 21120b57cec5SDimitry Andric KMP_DEBUG_ASSERT(master_th->th.th_task_team == 21130b57cec5SDimitry Andric parent_team->t.t_task_team[master_th->th.th_task_state]); 2114fe6060f1SDimitry Andric KA_TRACE(20, ("__kmp_fork_call: Primary T#%d pushing task_team %p / team " 21150b57cec5SDimitry Andric "%p, new task_team %p / team %p\n", 21160b57cec5SDimitry Andric __kmp_gtid_from_thread(master_th), 21170b57cec5SDimitry Andric master_th->th.th_task_team, parent_team, 21180b57cec5SDimitry Andric team->t.t_task_team[master_th->th.th_task_state], team)); 21190b57cec5SDimitry Andric 21200b57cec5SDimitry Andric if (active_level || master_th->th.th_task_team) { 2121fe6060f1SDimitry Andric // Take a memo of primary thread's task_state 21220b57cec5SDimitry Andric KMP_DEBUG_ASSERT(master_th->th.th_task_state_memo_stack); 21230b57cec5SDimitry Andric if (master_th->th.th_task_state_top >= 21240b57cec5SDimitry Andric master_th->th.th_task_state_stack_sz) { // increase size 21250b57cec5SDimitry Andric kmp_uint32 new_size = 2 * master_th->th.th_task_state_stack_sz; 21260b57cec5SDimitry Andric kmp_uint8 *old_stack, *new_stack; 21270b57cec5SDimitry Andric kmp_uint32 i; 21280b57cec5SDimitry Andric new_stack = (kmp_uint8 *)__kmp_allocate(new_size); 21290b57cec5SDimitry Andric for (i = 0; i < master_th->th.th_task_state_stack_sz; ++i) { 21300b57cec5SDimitry Andric new_stack[i] = master_th->th.th_task_state_memo_stack[i]; 21310b57cec5SDimitry Andric } 21320b57cec5SDimitry Andric for (i = master_th->th.th_task_state_stack_sz; i < new_size; 21330b57cec5SDimitry Andric ++i) { // zero-init rest of stack 21340b57cec5SDimitry Andric new_stack[i] = 0; 21350b57cec5SDimitry Andric } 21360b57cec5SDimitry Andric old_stack = master_th->th.th_task_state_memo_stack; 21370b57cec5SDimitry Andric master_th->th.th_task_state_memo_stack = new_stack; 21380b57cec5SDimitry Andric master_th->th.th_task_state_stack_sz = new_size; 21390b57cec5SDimitry Andric __kmp_free(old_stack); 21400b57cec5SDimitry Andric } 2141fe6060f1SDimitry Andric // Store primary thread's task_state on stack 21420b57cec5SDimitry Andric master_th->th 21430b57cec5SDimitry Andric .th_task_state_memo_stack[master_th->th.th_task_state_top] = 21440b57cec5SDimitry Andric master_th->th.th_task_state; 21450b57cec5SDimitry Andric master_th->th.th_task_state_top++; 21460b57cec5SDimitry Andric #if KMP_NESTED_HOT_TEAMS 21470b57cec5SDimitry Andric if (master_th->th.th_hot_teams && 21480b57cec5SDimitry Andric active_level < __kmp_hot_teams_max_level && 21490b57cec5SDimitry Andric team == master_th->th.th_hot_teams[active_level].hot_team) { 2150fe6060f1SDimitry Andric // Restore primary thread's nested state if nested hot team 21510b57cec5SDimitry Andric master_th->th.th_task_state = 21520b57cec5SDimitry Andric master_th->th 21530b57cec5SDimitry Andric .th_task_state_memo_stack[master_th->th.th_task_state_top]; 21540b57cec5SDimitry Andric } else { 21550b57cec5SDimitry Andric #endif 21560b57cec5SDimitry Andric master_th->th.th_task_state = 0; 21570b57cec5SDimitry Andric #if KMP_NESTED_HOT_TEAMS 21580b57cec5SDimitry Andric } 21590b57cec5SDimitry Andric #endif 21600b57cec5SDimitry Andric } 21610b57cec5SDimitry Andric #if !KMP_NESTED_HOT_TEAMS 21620b57cec5SDimitry Andric KMP_DEBUG_ASSERT((master_th->th.th_task_team == NULL) || 21630b57cec5SDimitry Andric (team == root->r.r_hot_team)); 21640b57cec5SDimitry Andric #endif 21650b57cec5SDimitry Andric } 21660b57cec5SDimitry Andric 21670b57cec5SDimitry Andric KA_TRACE( 21680b57cec5SDimitry Andric 20, 21690b57cec5SDimitry Andric ("__kmp_fork_call: T#%d(%d:%d)->(%d:0) created a team of %d threads\n", 21700b57cec5SDimitry Andric gtid, parent_team->t.t_id, team->t.t_master_tid, team->t.t_id, 21710b57cec5SDimitry Andric team->t.t_nproc)); 21720b57cec5SDimitry Andric KMP_DEBUG_ASSERT(team != root->r.r_hot_team || 21730b57cec5SDimitry Andric (team->t.t_master_tid == 0 && 21740b57cec5SDimitry Andric (team->t.t_parent == root->r.r_root_team || 21750b57cec5SDimitry Andric team->t.t_parent->t.t_serialized))); 21760b57cec5SDimitry Andric KMP_MB(); 21770b57cec5SDimitry Andric 21780b57cec5SDimitry Andric /* now, setup the arguments */ 21790b57cec5SDimitry Andric argv = (void **)team->t.t_argv; 21800b57cec5SDimitry Andric if (ap) { 21810b57cec5SDimitry Andric for (i = argc - 1; i >= 0; --i) { 218216794618SDimitry Andric void *new_argv = va_arg(kmp_va_deref(ap), void *); 21830b57cec5SDimitry Andric KMP_CHECK_UPDATE(*argv, new_argv); 21840b57cec5SDimitry Andric argv++; 21850b57cec5SDimitry Andric } 21860b57cec5SDimitry Andric } else { 21870b57cec5SDimitry Andric for (i = 0; i < argc; ++i) { 21880b57cec5SDimitry Andric // Get args from parent team for teams construct 21890b57cec5SDimitry Andric KMP_CHECK_UPDATE(argv[i], team->t.t_parent->t.t_argv[i]); 21900b57cec5SDimitry Andric } 21910b57cec5SDimitry Andric } 21920b57cec5SDimitry Andric 21930b57cec5SDimitry Andric /* now actually fork the threads */ 21940b57cec5SDimitry Andric KMP_CHECK_UPDATE(team->t.t_master_active, master_active); 21950b57cec5SDimitry Andric if (!root->r.r_active) // Only do assignment if it prevents cache ping-pong 21960b57cec5SDimitry Andric root->r.r_active = TRUE; 21970b57cec5SDimitry Andric 2198349cc55cSDimitry Andric __kmp_fork_team_threads(root, team, master_th, gtid, !ap); 21990b57cec5SDimitry Andric __kmp_setup_icv_copy(team, nthreads, 22000b57cec5SDimitry Andric &master_th->th.th_current_task->td_icvs, loc); 22010b57cec5SDimitry Andric 22020b57cec5SDimitry Andric #if OMPT_SUPPORT 22030b57cec5SDimitry Andric master_th->th.ompt_thread_info.state = ompt_state_work_parallel; 22040b57cec5SDimitry Andric #endif 22050b57cec5SDimitry Andric 22060b57cec5SDimitry Andric __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock); 22070b57cec5SDimitry Andric 22080b57cec5SDimitry Andric #if USE_ITT_BUILD 22090b57cec5SDimitry Andric if (team->t.t_active_level == 1 // only report frames at level 1 22100b57cec5SDimitry Andric && !master_th->th.th_teams_microtask) { // not in teams construct 22110b57cec5SDimitry Andric #if USE_ITT_NOTIFY 22120b57cec5SDimitry Andric if ((__itt_frame_submit_v3_ptr || KMP_ITT_DEBUG) && 22130b57cec5SDimitry Andric (__kmp_forkjoin_frames_mode == 3 || 22140b57cec5SDimitry Andric __kmp_forkjoin_frames_mode == 1)) { 22150b57cec5SDimitry Andric kmp_uint64 tmp_time = 0; 22160b57cec5SDimitry Andric if (__itt_get_timestamp_ptr) 22170b57cec5SDimitry Andric tmp_time = __itt_get_timestamp(); 22180b57cec5SDimitry Andric // Internal fork - report frame begin 22190b57cec5SDimitry Andric master_th->th.th_frame_time = tmp_time; 22200b57cec5SDimitry Andric if (__kmp_forkjoin_frames_mode == 3) 22210b57cec5SDimitry Andric team->t.t_region_time = tmp_time; 22220b57cec5SDimitry Andric } else 22230b57cec5SDimitry Andric // only one notification scheme (either "submit" or "forking/joined", not both) 22240b57cec5SDimitry Andric #endif /* USE_ITT_NOTIFY */ 22250b57cec5SDimitry Andric if ((__itt_frame_begin_v3_ptr || KMP_ITT_DEBUG) && 22260b57cec5SDimitry Andric __kmp_forkjoin_frames && !__kmp_forkjoin_frames_mode) { 22270b57cec5SDimitry Andric // Mark start of "parallel" region for Intel(R) VTune(TM) analyzer. 22280b57cec5SDimitry Andric __kmp_itt_region_forking(gtid, team->t.t_nproc, 0); 22290b57cec5SDimitry Andric } 22300b57cec5SDimitry Andric } 22310b57cec5SDimitry Andric #endif /* USE_ITT_BUILD */ 22320b57cec5SDimitry Andric 22330b57cec5SDimitry Andric /* now go on and do the work */ 22340b57cec5SDimitry Andric KMP_DEBUG_ASSERT(team == __kmp_threads[gtid]->th.th_team); 22350b57cec5SDimitry Andric KMP_MB(); 22360b57cec5SDimitry Andric KF_TRACE(10, 22370b57cec5SDimitry Andric ("__kmp_internal_fork : root=%p, team=%p, master_th=%p, gtid=%d\n", 22380b57cec5SDimitry Andric root, team, master_th, gtid)); 22390b57cec5SDimitry Andric 22400b57cec5SDimitry Andric #if USE_ITT_BUILD 22410b57cec5SDimitry Andric if (__itt_stack_caller_create_ptr) { 2242fe6060f1SDimitry Andric // create new stack stitching id before entering fork barrier 2243fe6060f1SDimitry Andric if (!enter_teams) { 2244fe6060f1SDimitry Andric KMP_DEBUG_ASSERT(team->t.t_stack_id == NULL); 2245fe6060f1SDimitry Andric team->t.t_stack_id = __kmp_itt_stack_caller_create(); 2246fe6060f1SDimitry Andric } else if (parent_team->t.t_serialized) { 2247fe6060f1SDimitry Andric // keep stack stitching id in the serialized parent_team; 2248fe6060f1SDimitry Andric // current team will be used for parallel inside the teams; 2249fe6060f1SDimitry Andric // if parent_team is active, then it already keeps stack stitching id 2250fe6060f1SDimitry Andric // for the league of teams 2251fe6060f1SDimitry Andric KMP_DEBUG_ASSERT(parent_team->t.t_stack_id == NULL); 2252fe6060f1SDimitry Andric parent_team->t.t_stack_id = __kmp_itt_stack_caller_create(); 2253fe6060f1SDimitry Andric } 22540b57cec5SDimitry Andric } 22550b57cec5SDimitry Andric #endif /* USE_ITT_BUILD */ 22560b57cec5SDimitry Andric 2257fe6060f1SDimitry Andric // AC: skip __kmp_internal_fork at teams construct, let only primary 22580b57cec5SDimitry Andric // threads execute 22590b57cec5SDimitry Andric if (ap) { 22600b57cec5SDimitry Andric __kmp_internal_fork(loc, gtid, team); 22610b57cec5SDimitry Andric KF_TRACE(10, ("__kmp_internal_fork : after : root=%p, team=%p, " 22620b57cec5SDimitry Andric "master_th=%p, gtid=%d\n", 22630b57cec5SDimitry Andric root, team, master_th, gtid)); 22640b57cec5SDimitry Andric } 22650b57cec5SDimitry Andric 22660b57cec5SDimitry Andric if (call_context == fork_context_gnu) { 22670b57cec5SDimitry Andric KA_TRACE(20, ("__kmp_fork_call: parallel exit T#%d\n", gtid)); 22680b57cec5SDimitry Andric return TRUE; 22690b57cec5SDimitry Andric } 22700b57cec5SDimitry Andric 2271fe6060f1SDimitry Andric /* Invoke microtask for PRIMARY thread */ 22720b57cec5SDimitry Andric KA_TRACE(20, ("__kmp_fork_call: T#%d(%d:0) invoke microtask = %p\n", gtid, 22730b57cec5SDimitry Andric team->t.t_id, team->t.t_pkfn)); 22740b57cec5SDimitry Andric } // END of timer KMP_fork_call block 22750b57cec5SDimitry Andric 22760b57cec5SDimitry Andric #if KMP_STATS_ENABLED 22770b57cec5SDimitry Andric // If beginning a teams construct, then change thread state 22780b57cec5SDimitry Andric stats_state_e previous_state = KMP_GET_THREAD_STATE(); 22790b57cec5SDimitry Andric if (!ap) { 22800b57cec5SDimitry Andric KMP_SET_THREAD_STATE(stats_state_e::TEAMS_REGION); 22810b57cec5SDimitry Andric } 22820b57cec5SDimitry Andric #endif 22830b57cec5SDimitry Andric 22840b57cec5SDimitry Andric if (!team->t.t_invoke(gtid)) { 2285fe6060f1SDimitry Andric KMP_ASSERT2(0, "cannot invoke microtask for PRIMARY thread"); 22860b57cec5SDimitry Andric } 22870b57cec5SDimitry Andric 22880b57cec5SDimitry Andric #if KMP_STATS_ENABLED 22890b57cec5SDimitry Andric // If was beginning of a teams construct, then reset thread state 22900b57cec5SDimitry Andric if (!ap) { 22910b57cec5SDimitry Andric KMP_SET_THREAD_STATE(previous_state); 22920b57cec5SDimitry Andric } 22930b57cec5SDimitry Andric #endif 22940b57cec5SDimitry Andric 22950b57cec5SDimitry Andric KA_TRACE(20, ("__kmp_fork_call: T#%d(%d:0) done microtask = %p\n", gtid, 22960b57cec5SDimitry Andric team->t.t_id, team->t.t_pkfn)); 22970b57cec5SDimitry Andric KMP_MB(); /* Flush all pending memory write invalidates. */ 22980b57cec5SDimitry Andric 22990b57cec5SDimitry Andric KA_TRACE(20, ("__kmp_fork_call: parallel exit T#%d\n", gtid)); 23000b57cec5SDimitry Andric #if OMPT_SUPPORT 23010b57cec5SDimitry Andric if (ompt_enabled.enabled) { 23020b57cec5SDimitry Andric master_th->th.ompt_thread_info.state = ompt_state_overhead; 23030b57cec5SDimitry Andric } 23040b57cec5SDimitry Andric #endif 23050b57cec5SDimitry Andric 23060b57cec5SDimitry Andric return TRUE; 23070b57cec5SDimitry Andric } 23080b57cec5SDimitry Andric 23090b57cec5SDimitry Andric #if OMPT_SUPPORT 23100b57cec5SDimitry Andric static inline void __kmp_join_restore_state(kmp_info_t *thread, 23110b57cec5SDimitry Andric kmp_team_t *team) { 23120b57cec5SDimitry Andric // restore state outside the region 23130b57cec5SDimitry Andric thread->th.ompt_thread_info.state = 23140b57cec5SDimitry Andric ((team->t.t_serialized) ? ompt_state_work_serial 23150b57cec5SDimitry Andric : ompt_state_work_parallel); 23160b57cec5SDimitry Andric } 23170b57cec5SDimitry Andric 23180b57cec5SDimitry Andric static inline void __kmp_join_ompt(int gtid, kmp_info_t *thread, 23190b57cec5SDimitry Andric kmp_team_t *team, ompt_data_t *parallel_data, 2320489b1cf2SDimitry Andric int flags, void *codeptr) { 23210b57cec5SDimitry Andric ompt_task_info_t *task_info = __ompt_get_task_info_object(0); 23220b57cec5SDimitry Andric if (ompt_enabled.ompt_callback_parallel_end) { 23230b57cec5SDimitry Andric ompt_callbacks.ompt_callback(ompt_callback_parallel_end)( 2324489b1cf2SDimitry Andric parallel_data, &(task_info->task_data), flags, codeptr); 23250b57cec5SDimitry Andric } 23260b57cec5SDimitry Andric 23270b57cec5SDimitry Andric task_info->frame.enter_frame = ompt_data_none; 23280b57cec5SDimitry Andric __kmp_join_restore_state(thread, team); 23290b57cec5SDimitry Andric } 23300b57cec5SDimitry Andric #endif 23310b57cec5SDimitry Andric 23320b57cec5SDimitry Andric void __kmp_join_call(ident_t *loc, int gtid 23330b57cec5SDimitry Andric #if OMPT_SUPPORT 23340b57cec5SDimitry Andric , 23350b57cec5SDimitry Andric enum fork_context_e fork_context 23360b57cec5SDimitry Andric #endif 23370b57cec5SDimitry Andric , 23380b57cec5SDimitry Andric int exit_teams) { 23390b57cec5SDimitry Andric KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_join_call); 23400b57cec5SDimitry Andric kmp_team_t *team; 23410b57cec5SDimitry Andric kmp_team_t *parent_team; 23420b57cec5SDimitry Andric kmp_info_t *master_th; 23430b57cec5SDimitry Andric kmp_root_t *root; 23440b57cec5SDimitry Andric int master_active; 23450b57cec5SDimitry Andric 23460b57cec5SDimitry Andric KA_TRACE(20, ("__kmp_join_call: enter T#%d\n", gtid)); 23470b57cec5SDimitry Andric 23480b57cec5SDimitry Andric /* setup current data */ 23490b57cec5SDimitry Andric master_th = __kmp_threads[gtid]; 23500b57cec5SDimitry Andric root = master_th->th.th_root; 23510b57cec5SDimitry Andric team = master_th->th.th_team; 23520b57cec5SDimitry Andric parent_team = team->t.t_parent; 23530b57cec5SDimitry Andric 23540b57cec5SDimitry Andric master_th->th.th_ident = loc; 23550b57cec5SDimitry Andric 23560b57cec5SDimitry Andric #if OMPT_SUPPORT 2357489b1cf2SDimitry Andric void *team_microtask = (void *)team->t.t_pkfn; 2358e8d8bef9SDimitry Andric // For GOMP interface with serialized parallel, need the 2359e8d8bef9SDimitry Andric // __kmpc_end_serialized_parallel to call hooks for OMPT end-implicit-task 2360e8d8bef9SDimitry Andric // and end-parallel events. 2361e8d8bef9SDimitry Andric if (ompt_enabled.enabled && 2362e8d8bef9SDimitry Andric !(team->t.t_serialized && fork_context == fork_context_gnu)) { 23630b57cec5SDimitry Andric master_th->th.ompt_thread_info.state = ompt_state_overhead; 23640b57cec5SDimitry Andric } 23650b57cec5SDimitry Andric #endif 23660b57cec5SDimitry Andric 23670b57cec5SDimitry Andric #if KMP_DEBUG 23680b57cec5SDimitry Andric if (__kmp_tasking_mode != tskm_immediate_exec && !exit_teams) { 23690b57cec5SDimitry Andric KA_TRACE(20, ("__kmp_join_call: T#%d, old team = %p old task_team = %p, " 23700b57cec5SDimitry Andric "th_task_team = %p\n", 23710b57cec5SDimitry Andric __kmp_gtid_from_thread(master_th), team, 23720b57cec5SDimitry Andric team->t.t_task_team[master_th->th.th_task_state], 23730b57cec5SDimitry Andric master_th->th.th_task_team)); 23740b57cec5SDimitry Andric KMP_DEBUG_ASSERT(master_th->th.th_task_team == 23750b57cec5SDimitry Andric team->t.t_task_team[master_th->th.th_task_state]); 23760b57cec5SDimitry Andric } 23770b57cec5SDimitry Andric #endif 23780b57cec5SDimitry Andric 23790b57cec5SDimitry Andric if (team->t.t_serialized) { 23800b57cec5SDimitry Andric if (master_th->th.th_teams_microtask) { 23810b57cec5SDimitry Andric // We are in teams construct 23820b57cec5SDimitry Andric int level = team->t.t_level; 23830b57cec5SDimitry Andric int tlevel = master_th->th.th_teams_level; 23840b57cec5SDimitry Andric if (level == tlevel) { 23850b57cec5SDimitry Andric // AC: we haven't incremented it earlier at start of teams construct, 23860b57cec5SDimitry Andric // so do it here - at the end of teams construct 23870b57cec5SDimitry Andric team->t.t_level++; 23880b57cec5SDimitry Andric } else if (level == tlevel + 1) { 23890b57cec5SDimitry Andric // AC: we are exiting parallel inside teams, need to increment 23900b57cec5SDimitry Andric // serialization in order to restore it in the next call to 23910b57cec5SDimitry Andric // __kmpc_end_serialized_parallel 23920b57cec5SDimitry Andric team->t.t_serialized++; 23930b57cec5SDimitry Andric } 23940b57cec5SDimitry Andric } 23950b57cec5SDimitry Andric __kmpc_end_serialized_parallel(loc, gtid); 23960b57cec5SDimitry Andric 23970b57cec5SDimitry Andric #if OMPT_SUPPORT 23980b57cec5SDimitry Andric if (ompt_enabled.enabled) { 23990b57cec5SDimitry Andric __kmp_join_restore_state(master_th, parent_team); 24000b57cec5SDimitry Andric } 24010b57cec5SDimitry Andric #endif 24020b57cec5SDimitry Andric 24030b57cec5SDimitry Andric return; 24040b57cec5SDimitry Andric } 24050b57cec5SDimitry Andric 24060b57cec5SDimitry Andric master_active = team->t.t_master_active; 24070b57cec5SDimitry Andric 24080b57cec5SDimitry Andric if (!exit_teams) { 24090b57cec5SDimitry Andric // AC: No barrier for internal teams at exit from teams construct. 24100b57cec5SDimitry Andric // But there is barrier for external team (league). 24110b57cec5SDimitry Andric __kmp_internal_join(loc, gtid, team); 2412fe6060f1SDimitry Andric #if USE_ITT_BUILD 2413fe6060f1SDimitry Andric if (__itt_stack_caller_create_ptr) { 2414fe6060f1SDimitry Andric KMP_DEBUG_ASSERT(team->t.t_stack_id != NULL); 2415fe6060f1SDimitry Andric // destroy the stack stitching id after join barrier 2416fe6060f1SDimitry Andric __kmp_itt_stack_caller_destroy((__itt_caller)team->t.t_stack_id); 2417fe6060f1SDimitry Andric team->t.t_stack_id = NULL; 2418fe6060f1SDimitry Andric } 2419fe6060f1SDimitry Andric #endif 24200b57cec5SDimitry Andric } else { 24210b57cec5SDimitry Andric master_th->th.th_task_state = 24220b57cec5SDimitry Andric 0; // AC: no tasking in teams (out of any parallel) 2423fe6060f1SDimitry Andric #if USE_ITT_BUILD 2424fe6060f1SDimitry Andric if (__itt_stack_caller_create_ptr && parent_team->t.t_serialized) { 2425fe6060f1SDimitry Andric KMP_DEBUG_ASSERT(parent_team->t.t_stack_id != NULL); 2426fe6060f1SDimitry Andric // destroy the stack stitching id on exit from the teams construct 2427fe6060f1SDimitry Andric // if parent_team is active, then the id will be destroyed later on 2428fe6060f1SDimitry Andric // by master of the league of teams 2429fe6060f1SDimitry Andric __kmp_itt_stack_caller_destroy((__itt_caller)parent_team->t.t_stack_id); 2430fe6060f1SDimitry Andric parent_team->t.t_stack_id = NULL; 2431fe6060f1SDimitry Andric } 2432fe6060f1SDimitry Andric #endif 2433349cc55cSDimitry Andric 2434349cc55cSDimitry Andric if (team->t.t_nproc > 1 && 2435349cc55cSDimitry Andric __kmp_barrier_gather_pattern[bs_forkjoin_barrier] == bp_dist_bar) { 2436349cc55cSDimitry Andric team->t.b->update_num_threads(team->t.t_nproc); 2437349cc55cSDimitry Andric __kmp_add_threads_to_team(team, team->t.t_nproc); 2438349cc55cSDimitry Andric } 24390b57cec5SDimitry Andric } 24400b57cec5SDimitry Andric 24410b57cec5SDimitry Andric KMP_MB(); 24420b57cec5SDimitry Andric 24430b57cec5SDimitry Andric #if OMPT_SUPPORT 24440b57cec5SDimitry Andric ompt_data_t *parallel_data = &(team->t.ompt_team_info.parallel_data); 24450b57cec5SDimitry Andric void *codeptr = team->t.ompt_team_info.master_return_address; 24460b57cec5SDimitry Andric #endif 24470b57cec5SDimitry Andric 24480b57cec5SDimitry Andric #if USE_ITT_BUILD 24490b57cec5SDimitry Andric // Mark end of "parallel" region for Intel(R) VTune(TM) analyzer. 24500b57cec5SDimitry Andric if (team->t.t_active_level == 1 && 2451e8d8bef9SDimitry Andric (!master_th->th.th_teams_microtask || /* not in teams construct */ 2452e8d8bef9SDimitry Andric master_th->th.th_teams_size.nteams == 1)) { 24530b57cec5SDimitry Andric master_th->th.th_ident = loc; 24540b57cec5SDimitry Andric // only one notification scheme (either "submit" or "forking/joined", not 24550b57cec5SDimitry Andric // both) 24560b57cec5SDimitry Andric if ((__itt_frame_submit_v3_ptr || KMP_ITT_DEBUG) && 24570b57cec5SDimitry Andric __kmp_forkjoin_frames_mode == 3) 24580b57cec5SDimitry Andric __kmp_itt_frame_submit(gtid, team->t.t_region_time, 24590b57cec5SDimitry Andric master_th->th.th_frame_time, 0, loc, 24600b57cec5SDimitry Andric master_th->th.th_team_nproc, 1); 24610b57cec5SDimitry Andric else if ((__itt_frame_end_v3_ptr || KMP_ITT_DEBUG) && 24620b57cec5SDimitry Andric !__kmp_forkjoin_frames_mode && __kmp_forkjoin_frames) 24630b57cec5SDimitry Andric __kmp_itt_region_joined(gtid); 24640b57cec5SDimitry Andric } // active_level == 1 24650b57cec5SDimitry Andric #endif /* USE_ITT_BUILD */ 24660b57cec5SDimitry Andric 2467349cc55cSDimitry Andric #if KMP_AFFINITY_SUPPORTED 2468349cc55cSDimitry Andric if (!exit_teams) { 2469349cc55cSDimitry Andric // Restore master thread's partition. 2470349cc55cSDimitry Andric master_th->th.th_first_place = team->t.t_first_place; 2471349cc55cSDimitry Andric master_th->th.th_last_place = team->t.t_last_place; 2472349cc55cSDimitry Andric } 2473349cc55cSDimitry Andric #endif // KMP_AFFINITY_SUPPORTED 2474349cc55cSDimitry Andric 24750b57cec5SDimitry Andric if (master_th->th.th_teams_microtask && !exit_teams && 24760b57cec5SDimitry Andric team->t.t_pkfn != (microtask_t)__kmp_teams_master && 24770b57cec5SDimitry Andric team->t.t_level == master_th->th.th_teams_level + 1) { 24780b57cec5SDimitry Andric // AC: We need to leave the team structure intact at the end of parallel 24790b57cec5SDimitry Andric // inside the teams construct, so that at the next parallel same (hot) team 24800b57cec5SDimitry Andric // works, only adjust nesting levels 2481489b1cf2SDimitry Andric #if OMPT_SUPPORT 2482489b1cf2SDimitry Andric ompt_data_t ompt_parallel_data = ompt_data_none; 2483489b1cf2SDimitry Andric if (ompt_enabled.enabled) { 2484489b1cf2SDimitry Andric ompt_task_info_t *task_info = __ompt_get_task_info_object(0); 2485489b1cf2SDimitry Andric if (ompt_enabled.ompt_callback_implicit_task) { 2486489b1cf2SDimitry Andric int ompt_team_size = team->t.t_nproc; 2487489b1cf2SDimitry Andric ompt_callbacks.ompt_callback(ompt_callback_implicit_task)( 2488489b1cf2SDimitry Andric ompt_scope_end, NULL, &(task_info->task_data), ompt_team_size, 2489489b1cf2SDimitry Andric OMPT_CUR_TASK_INFO(master_th)->thread_num, ompt_task_implicit); 2490489b1cf2SDimitry Andric } 2491489b1cf2SDimitry Andric task_info->frame.exit_frame = ompt_data_none; 2492489b1cf2SDimitry Andric task_info->task_data = ompt_data_none; 2493489b1cf2SDimitry Andric ompt_parallel_data = *OMPT_CUR_TEAM_DATA(master_th); 2494489b1cf2SDimitry Andric __ompt_lw_taskteam_unlink(master_th); 2495489b1cf2SDimitry Andric } 2496489b1cf2SDimitry Andric #endif 24970b57cec5SDimitry Andric /* Decrement our nested depth level */ 24980b57cec5SDimitry Andric team->t.t_level--; 24990b57cec5SDimitry Andric team->t.t_active_level--; 25000b57cec5SDimitry Andric KMP_ATOMIC_DEC(&root->r.r_in_parallel); 25010b57cec5SDimitry Andric 25020b57cec5SDimitry Andric // Restore number of threads in the team if needed. This code relies on 25030b57cec5SDimitry Andric // the proper adjustment of th_teams_size.nth after the fork in 2504fe6060f1SDimitry Andric // __kmp_teams_master on each teams primary thread in the case that 25050b57cec5SDimitry Andric // __kmp_reserve_threads reduced it. 25060b57cec5SDimitry Andric if (master_th->th.th_team_nproc < master_th->th.th_teams_size.nth) { 25070b57cec5SDimitry Andric int old_num = master_th->th.th_team_nproc; 25080b57cec5SDimitry Andric int new_num = master_th->th.th_teams_size.nth; 25090b57cec5SDimitry Andric kmp_info_t **other_threads = team->t.t_threads; 25100b57cec5SDimitry Andric team->t.t_nproc = new_num; 25110b57cec5SDimitry Andric for (int i = 0; i < old_num; ++i) { 25120b57cec5SDimitry Andric other_threads[i]->th.th_team_nproc = new_num; 25130b57cec5SDimitry Andric } 25140b57cec5SDimitry Andric // Adjust states of non-used threads of the team 25150b57cec5SDimitry Andric for (int i = old_num; i < new_num; ++i) { 25160b57cec5SDimitry Andric // Re-initialize thread's barrier data. 25170b57cec5SDimitry Andric KMP_DEBUG_ASSERT(other_threads[i]); 25180b57cec5SDimitry Andric kmp_balign_t *balign = other_threads[i]->th.th_bar; 25190b57cec5SDimitry Andric for (int b = 0; b < bs_last_barrier; ++b) { 25200b57cec5SDimitry Andric balign[b].bb.b_arrived = team->t.t_bar[b].b_arrived; 25210b57cec5SDimitry Andric KMP_DEBUG_ASSERT(balign[b].bb.wait_flag != KMP_BARRIER_PARENT_FLAG); 25220b57cec5SDimitry Andric #if USE_DEBUGGER 25230b57cec5SDimitry Andric balign[b].bb.b_worker_arrived = team->t.t_bar[b].b_team_arrived; 25240b57cec5SDimitry Andric #endif 25250b57cec5SDimitry Andric } 25260b57cec5SDimitry Andric if (__kmp_tasking_mode != tskm_immediate_exec) { 25270b57cec5SDimitry Andric // Synchronize thread's task state 25280b57cec5SDimitry Andric other_threads[i]->th.th_task_state = master_th->th.th_task_state; 25290b57cec5SDimitry Andric } 25300b57cec5SDimitry Andric } 25310b57cec5SDimitry Andric } 25320b57cec5SDimitry Andric 25330b57cec5SDimitry Andric #if OMPT_SUPPORT 25340b57cec5SDimitry Andric if (ompt_enabled.enabled) { 2535489b1cf2SDimitry Andric __kmp_join_ompt(gtid, master_th, parent_team, &ompt_parallel_data, 2536489b1cf2SDimitry Andric OMPT_INVOKER(fork_context) | ompt_parallel_team, codeptr); 25370b57cec5SDimitry Andric } 25380b57cec5SDimitry Andric #endif 25390b57cec5SDimitry Andric 25400b57cec5SDimitry Andric return; 25410b57cec5SDimitry Andric } 25420b57cec5SDimitry Andric 25430b57cec5SDimitry Andric /* do cleanup and restore the parent team */ 25440b57cec5SDimitry Andric master_th->th.th_info.ds.ds_tid = team->t.t_master_tid; 25450b57cec5SDimitry Andric master_th->th.th_local.this_construct = team->t.t_master_this_cons; 25460b57cec5SDimitry Andric 25470b57cec5SDimitry Andric master_th->th.th_dispatch = &parent_team->t.t_dispatch[team->t.t_master_tid]; 25480b57cec5SDimitry Andric 25490b57cec5SDimitry Andric /* jc: The following lock has instructions with REL and ACQ semantics, 25500b57cec5SDimitry Andric separating the parallel user code called in this parallel region 25510b57cec5SDimitry Andric from the serial user code called after this function returns. */ 25520b57cec5SDimitry Andric __kmp_acquire_bootstrap_lock(&__kmp_forkjoin_lock); 25530b57cec5SDimitry Andric 25540b57cec5SDimitry Andric if (!master_th->th.th_teams_microtask || 25550b57cec5SDimitry Andric team->t.t_level > master_th->th.th_teams_level) { 25560b57cec5SDimitry Andric /* Decrement our nested depth level */ 25570b57cec5SDimitry Andric KMP_ATOMIC_DEC(&root->r.r_in_parallel); 25580b57cec5SDimitry Andric } 25590b57cec5SDimitry Andric KMP_DEBUG_ASSERT(root->r.r_in_parallel >= 0); 25600b57cec5SDimitry Andric 25610b57cec5SDimitry Andric #if OMPT_SUPPORT 25620b57cec5SDimitry Andric if (ompt_enabled.enabled) { 25630b57cec5SDimitry Andric ompt_task_info_t *task_info = __ompt_get_task_info_object(0); 25640b57cec5SDimitry Andric if (ompt_enabled.ompt_callback_implicit_task) { 2565489b1cf2SDimitry Andric int flags = (team_microtask == (void *)__kmp_teams_master) 2566489b1cf2SDimitry Andric ? ompt_task_initial 2567489b1cf2SDimitry Andric : ompt_task_implicit; 2568489b1cf2SDimitry Andric int ompt_team_size = (flags == ompt_task_initial) ? 0 : team->t.t_nproc; 25690b57cec5SDimitry Andric ompt_callbacks.ompt_callback(ompt_callback_implicit_task)( 25700b57cec5SDimitry Andric ompt_scope_end, NULL, &(task_info->task_data), ompt_team_size, 2571489b1cf2SDimitry Andric OMPT_CUR_TASK_INFO(master_th)->thread_num, flags); 25720b57cec5SDimitry Andric } 25730b57cec5SDimitry Andric task_info->frame.exit_frame = ompt_data_none; 25740b57cec5SDimitry Andric task_info->task_data = ompt_data_none; 25750b57cec5SDimitry Andric } 25760b57cec5SDimitry Andric #endif 25770b57cec5SDimitry Andric 25780b57cec5SDimitry Andric KF_TRACE(10, ("__kmp_join_call1: T#%d, this_thread=%p team=%p\n", 0, 25790b57cec5SDimitry Andric master_th, team)); 25800b57cec5SDimitry Andric __kmp_pop_current_task_from_thread(master_th); 25810b57cec5SDimitry Andric 25820b57cec5SDimitry Andric master_th->th.th_def_allocator = team->t.t_def_allocator; 25830b57cec5SDimitry Andric 2584fe6060f1SDimitry Andric #if OMPD_SUPPORT 2585fe6060f1SDimitry Andric if (ompd_state & OMPD_ENABLE_BP) 2586fe6060f1SDimitry Andric ompd_bp_parallel_end(); 2587fe6060f1SDimitry Andric #endif 25880b57cec5SDimitry Andric updateHWFPControl(team); 25890b57cec5SDimitry Andric 25900b57cec5SDimitry Andric if (root->r.r_active != master_active) 25910b57cec5SDimitry Andric root->r.r_active = master_active; 25920b57cec5SDimitry Andric 25930b57cec5SDimitry Andric __kmp_free_team(root, team USE_NESTED_HOT_ARG( 25940b57cec5SDimitry Andric master_th)); // this will free worker threads 25950b57cec5SDimitry Andric 25960b57cec5SDimitry Andric /* this race was fun to find. make sure the following is in the critical 25970b57cec5SDimitry Andric region otherwise assertions may fail occasionally since the old team may be 25980b57cec5SDimitry Andric reallocated and the hierarchy appears inconsistent. it is actually safe to 25990b57cec5SDimitry Andric run and won't cause any bugs, but will cause those assertion failures. it's 26000b57cec5SDimitry Andric only one deref&assign so might as well put this in the critical region */ 26010b57cec5SDimitry Andric master_th->th.th_team = parent_team; 26020b57cec5SDimitry Andric master_th->th.th_team_nproc = parent_team->t.t_nproc; 26030b57cec5SDimitry Andric master_th->th.th_team_master = parent_team->t.t_threads[0]; 26040b57cec5SDimitry Andric master_th->th.th_team_serialized = parent_team->t.t_serialized; 26050b57cec5SDimitry Andric 26060b57cec5SDimitry Andric /* restore serialized team, if need be */ 26070b57cec5SDimitry Andric if (parent_team->t.t_serialized && 26080b57cec5SDimitry Andric parent_team != master_th->th.th_serial_team && 26090b57cec5SDimitry Andric parent_team != root->r.r_root_team) { 26100b57cec5SDimitry Andric __kmp_free_team(root, 26110b57cec5SDimitry Andric master_th->th.th_serial_team USE_NESTED_HOT_ARG(NULL)); 26120b57cec5SDimitry Andric master_th->th.th_serial_team = parent_team; 26130b57cec5SDimitry Andric } 26140b57cec5SDimitry Andric 26150b57cec5SDimitry Andric if (__kmp_tasking_mode != tskm_immediate_exec) { 26160b57cec5SDimitry Andric if (master_th->th.th_task_state_top > 26170b57cec5SDimitry Andric 0) { // Restore task state from memo stack 26180b57cec5SDimitry Andric KMP_DEBUG_ASSERT(master_th->th.th_task_state_memo_stack); 2619fe6060f1SDimitry Andric // Remember primary thread's state if we re-use this nested hot team 26200b57cec5SDimitry Andric master_th->th.th_task_state_memo_stack[master_th->th.th_task_state_top] = 26210b57cec5SDimitry Andric master_th->th.th_task_state; 26220b57cec5SDimitry Andric --master_th->th.th_task_state_top; // pop 26230b57cec5SDimitry Andric // Now restore state at this level 26240b57cec5SDimitry Andric master_th->th.th_task_state = 26250b57cec5SDimitry Andric master_th->th 26260b57cec5SDimitry Andric .th_task_state_memo_stack[master_th->th.th_task_state_top]; 26270b57cec5SDimitry Andric } 2628fe6060f1SDimitry Andric // Copy the task team from the parent team to the primary thread 26290b57cec5SDimitry Andric master_th->th.th_task_team = 26300b57cec5SDimitry Andric parent_team->t.t_task_team[master_th->th.th_task_state]; 26310b57cec5SDimitry Andric KA_TRACE(20, 2632fe6060f1SDimitry Andric ("__kmp_join_call: Primary T#%d restoring task_team %p, team %p\n", 26330b57cec5SDimitry Andric __kmp_gtid_from_thread(master_th), master_th->th.th_task_team, 26340b57cec5SDimitry Andric parent_team)); 26350b57cec5SDimitry Andric } 26360b57cec5SDimitry Andric 26370b57cec5SDimitry Andric // TODO: GEH - cannot do this assertion because root thread not set up as 26380b57cec5SDimitry Andric // executing 26390b57cec5SDimitry Andric // KMP_ASSERT( master_th->th.th_current_task->td_flags.executing == 0 ); 26400b57cec5SDimitry Andric master_th->th.th_current_task->td_flags.executing = 1; 26410b57cec5SDimitry Andric 26420b57cec5SDimitry Andric __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock); 26430b57cec5SDimitry Andric 26440b57cec5SDimitry Andric #if OMPT_SUPPORT 2645489b1cf2SDimitry Andric int flags = 2646489b1cf2SDimitry Andric OMPT_INVOKER(fork_context) | 2647489b1cf2SDimitry Andric ((team_microtask == (void *)__kmp_teams_master) ? ompt_parallel_league 2648489b1cf2SDimitry Andric : ompt_parallel_team); 26490b57cec5SDimitry Andric if (ompt_enabled.enabled) { 2650489b1cf2SDimitry Andric __kmp_join_ompt(gtid, master_th, parent_team, parallel_data, flags, 26510b57cec5SDimitry Andric codeptr); 26520b57cec5SDimitry Andric } 26530b57cec5SDimitry Andric #endif 26540b57cec5SDimitry Andric 26550b57cec5SDimitry Andric KMP_MB(); 26560b57cec5SDimitry Andric KA_TRACE(20, ("__kmp_join_call: exit T#%d\n", gtid)); 26570b57cec5SDimitry Andric } 26580b57cec5SDimitry Andric 26590b57cec5SDimitry Andric /* Check whether we should push an internal control record onto the 26600b57cec5SDimitry Andric serial team stack. If so, do it. */ 26610b57cec5SDimitry Andric void __kmp_save_internal_controls(kmp_info_t *thread) { 26620b57cec5SDimitry Andric 26630b57cec5SDimitry Andric if (thread->th.th_team != thread->th.th_serial_team) { 26640b57cec5SDimitry Andric return; 26650b57cec5SDimitry Andric } 26660b57cec5SDimitry Andric if (thread->th.th_team->t.t_serialized > 1) { 26670b57cec5SDimitry Andric int push = 0; 26680b57cec5SDimitry Andric 26690b57cec5SDimitry Andric if (thread->th.th_team->t.t_control_stack_top == NULL) { 26700b57cec5SDimitry Andric push = 1; 26710b57cec5SDimitry Andric } else { 26720b57cec5SDimitry Andric if (thread->th.th_team->t.t_control_stack_top->serial_nesting_level != 26730b57cec5SDimitry Andric thread->th.th_team->t.t_serialized) { 26740b57cec5SDimitry Andric push = 1; 26750b57cec5SDimitry Andric } 26760b57cec5SDimitry Andric } 26770b57cec5SDimitry Andric if (push) { /* push a record on the serial team's stack */ 26780b57cec5SDimitry Andric kmp_internal_control_t *control = 26790b57cec5SDimitry Andric (kmp_internal_control_t *)__kmp_allocate( 26800b57cec5SDimitry Andric sizeof(kmp_internal_control_t)); 26810b57cec5SDimitry Andric 26820b57cec5SDimitry Andric copy_icvs(control, &thread->th.th_current_task->td_icvs); 26830b57cec5SDimitry Andric 26840b57cec5SDimitry Andric control->serial_nesting_level = thread->th.th_team->t.t_serialized; 26850b57cec5SDimitry Andric 26860b57cec5SDimitry Andric control->next = thread->th.th_team->t.t_control_stack_top; 26870b57cec5SDimitry Andric thread->th.th_team->t.t_control_stack_top = control; 26880b57cec5SDimitry Andric } 26890b57cec5SDimitry Andric } 26900b57cec5SDimitry Andric } 26910b57cec5SDimitry Andric 26920b57cec5SDimitry Andric /* Changes set_nproc */ 26930b57cec5SDimitry Andric void __kmp_set_num_threads(int new_nth, int gtid) { 26940b57cec5SDimitry Andric kmp_info_t *thread; 26950b57cec5SDimitry Andric kmp_root_t *root; 26960b57cec5SDimitry Andric 26970b57cec5SDimitry Andric KF_TRACE(10, ("__kmp_set_num_threads: new __kmp_nth = %d\n", new_nth)); 26980b57cec5SDimitry Andric KMP_DEBUG_ASSERT(__kmp_init_serial); 26990b57cec5SDimitry Andric 27000b57cec5SDimitry Andric if (new_nth < 1) 27010b57cec5SDimitry Andric new_nth = 1; 27020b57cec5SDimitry Andric else if (new_nth > __kmp_max_nth) 27030b57cec5SDimitry Andric new_nth = __kmp_max_nth; 27040b57cec5SDimitry Andric 27050b57cec5SDimitry Andric KMP_COUNT_VALUE(OMP_set_numthreads, new_nth); 27060b57cec5SDimitry Andric thread = __kmp_threads[gtid]; 27070b57cec5SDimitry Andric if (thread->th.th_current_task->td_icvs.nproc == new_nth) 27080b57cec5SDimitry Andric return; // nothing to do 27090b57cec5SDimitry Andric 27100b57cec5SDimitry Andric __kmp_save_internal_controls(thread); 27110b57cec5SDimitry Andric 27120b57cec5SDimitry Andric set__nproc(thread, new_nth); 27130b57cec5SDimitry Andric 27140b57cec5SDimitry Andric // If this omp_set_num_threads() call will cause the hot team size to be 27150b57cec5SDimitry Andric // reduced (in the absence of a num_threads clause), then reduce it now, 27160b57cec5SDimitry Andric // rather than waiting for the next parallel region. 27170b57cec5SDimitry Andric root = thread->th.th_root; 27180b57cec5SDimitry Andric if (__kmp_init_parallel && (!root->r.r_active) && 27190b57cec5SDimitry Andric (root->r.r_hot_team->t.t_nproc > new_nth) 27200b57cec5SDimitry Andric #if KMP_NESTED_HOT_TEAMS 27210b57cec5SDimitry Andric && __kmp_hot_teams_max_level && !__kmp_hot_teams_mode 27220b57cec5SDimitry Andric #endif 27230b57cec5SDimitry Andric ) { 27240b57cec5SDimitry Andric kmp_team_t *hot_team = root->r.r_hot_team; 27250b57cec5SDimitry Andric int f; 27260b57cec5SDimitry Andric 27270b57cec5SDimitry Andric __kmp_acquire_bootstrap_lock(&__kmp_forkjoin_lock); 27280b57cec5SDimitry Andric 2729349cc55cSDimitry Andric if (__kmp_barrier_release_pattern[bs_forkjoin_barrier] == bp_dist_bar) { 2730349cc55cSDimitry Andric __kmp_resize_dist_barrier(hot_team, hot_team->t.t_nproc, new_nth); 2731349cc55cSDimitry Andric } 27320b57cec5SDimitry Andric // Release the extra threads we don't need any more. 27330b57cec5SDimitry Andric for (f = new_nth; f < hot_team->t.t_nproc; f++) { 27340b57cec5SDimitry Andric KMP_DEBUG_ASSERT(hot_team->t.t_threads[f] != NULL); 27350b57cec5SDimitry Andric if (__kmp_tasking_mode != tskm_immediate_exec) { 27360b57cec5SDimitry Andric // When decreasing team size, threads no longer in the team should unref 27370b57cec5SDimitry Andric // task team. 27380b57cec5SDimitry Andric hot_team->t.t_threads[f]->th.th_task_team = NULL; 27390b57cec5SDimitry Andric } 27400b57cec5SDimitry Andric __kmp_free_thread(hot_team->t.t_threads[f]); 27410b57cec5SDimitry Andric hot_team->t.t_threads[f] = NULL; 27420b57cec5SDimitry Andric } 27430b57cec5SDimitry Andric hot_team->t.t_nproc = new_nth; 27440b57cec5SDimitry Andric #if KMP_NESTED_HOT_TEAMS 27450b57cec5SDimitry Andric if (thread->th.th_hot_teams) { 27460b57cec5SDimitry Andric KMP_DEBUG_ASSERT(hot_team == thread->th.th_hot_teams[0].hot_team); 27470b57cec5SDimitry Andric thread->th.th_hot_teams[0].hot_team_nth = new_nth; 27480b57cec5SDimitry Andric } 27490b57cec5SDimitry Andric #endif 27500b57cec5SDimitry Andric 2751349cc55cSDimitry Andric if (__kmp_barrier_release_pattern[bs_forkjoin_barrier] == bp_dist_bar) { 2752349cc55cSDimitry Andric hot_team->t.b->update_num_threads(new_nth); 2753349cc55cSDimitry Andric __kmp_add_threads_to_team(hot_team, new_nth); 2754349cc55cSDimitry Andric } 2755349cc55cSDimitry Andric 27560b57cec5SDimitry Andric __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock); 27570b57cec5SDimitry Andric 27580b57cec5SDimitry Andric // Update the t_nproc field in the threads that are still active. 27590b57cec5SDimitry Andric for (f = 0; f < new_nth; f++) { 27600b57cec5SDimitry Andric KMP_DEBUG_ASSERT(hot_team->t.t_threads[f] != NULL); 27610b57cec5SDimitry Andric hot_team->t.t_threads[f]->th.th_team_nproc = new_nth; 27620b57cec5SDimitry Andric } 27630b57cec5SDimitry Andric // Special flag in case omp_set_num_threads() call 27640b57cec5SDimitry Andric hot_team->t.t_size_changed = -1; 27650b57cec5SDimitry Andric } 27660b57cec5SDimitry Andric } 27670b57cec5SDimitry Andric 27680b57cec5SDimitry Andric /* Changes max_active_levels */ 27690b57cec5SDimitry Andric void __kmp_set_max_active_levels(int gtid, int max_active_levels) { 27700b57cec5SDimitry Andric kmp_info_t *thread; 27710b57cec5SDimitry Andric 27720b57cec5SDimitry Andric KF_TRACE(10, ("__kmp_set_max_active_levels: new max_active_levels for thread " 27730b57cec5SDimitry Andric "%d = (%d)\n", 27740b57cec5SDimitry Andric gtid, max_active_levels)); 27750b57cec5SDimitry Andric KMP_DEBUG_ASSERT(__kmp_init_serial); 27760b57cec5SDimitry Andric 27770b57cec5SDimitry Andric // validate max_active_levels 27780b57cec5SDimitry Andric if (max_active_levels < 0) { 27790b57cec5SDimitry Andric KMP_WARNING(ActiveLevelsNegative, max_active_levels); 27800b57cec5SDimitry Andric // We ignore this call if the user has specified a negative value. 27810b57cec5SDimitry Andric // The current setting won't be changed. The last valid setting will be 27820b57cec5SDimitry Andric // used. A warning will be issued (if warnings are allowed as controlled by 27830b57cec5SDimitry Andric // the KMP_WARNINGS env var). 27840b57cec5SDimitry Andric KF_TRACE(10, ("__kmp_set_max_active_levels: the call is ignored: new " 27850b57cec5SDimitry Andric "max_active_levels for thread %d = (%d)\n", 27860b57cec5SDimitry Andric gtid, max_active_levels)); 27870b57cec5SDimitry Andric return; 27880b57cec5SDimitry Andric } 27890b57cec5SDimitry Andric if (max_active_levels <= KMP_MAX_ACTIVE_LEVELS_LIMIT) { 27900b57cec5SDimitry Andric // it's OK, the max_active_levels is within the valid range: [ 0; 27910b57cec5SDimitry Andric // KMP_MAX_ACTIVE_LEVELS_LIMIT ] 27920b57cec5SDimitry Andric // We allow a zero value. (implementation defined behavior) 27930b57cec5SDimitry Andric } else { 27940b57cec5SDimitry Andric KMP_WARNING(ActiveLevelsExceedLimit, max_active_levels, 27950b57cec5SDimitry Andric KMP_MAX_ACTIVE_LEVELS_LIMIT); 27960b57cec5SDimitry Andric max_active_levels = KMP_MAX_ACTIVE_LEVELS_LIMIT; 27970b57cec5SDimitry Andric // Current upper limit is MAX_INT. (implementation defined behavior) 27980b57cec5SDimitry Andric // If the input exceeds the upper limit, we correct the input to be the 27990b57cec5SDimitry Andric // upper limit. (implementation defined behavior) 28000b57cec5SDimitry Andric // Actually, the flow should never get here until we use MAX_INT limit. 28010b57cec5SDimitry Andric } 28020b57cec5SDimitry Andric KF_TRACE(10, ("__kmp_set_max_active_levels: after validation: new " 28030b57cec5SDimitry Andric "max_active_levels for thread %d = (%d)\n", 28040b57cec5SDimitry Andric gtid, max_active_levels)); 28050b57cec5SDimitry Andric 28060b57cec5SDimitry Andric thread = __kmp_threads[gtid]; 28070b57cec5SDimitry Andric 28080b57cec5SDimitry Andric __kmp_save_internal_controls(thread); 28090b57cec5SDimitry Andric 28100b57cec5SDimitry Andric set__max_active_levels(thread, max_active_levels); 28110b57cec5SDimitry Andric } 28120b57cec5SDimitry Andric 28130b57cec5SDimitry Andric /* Gets max_active_levels */ 28140b57cec5SDimitry Andric int __kmp_get_max_active_levels(int gtid) { 28150b57cec5SDimitry Andric kmp_info_t *thread; 28160b57cec5SDimitry Andric 28170b57cec5SDimitry Andric KF_TRACE(10, ("__kmp_get_max_active_levels: thread %d\n", gtid)); 28180b57cec5SDimitry Andric KMP_DEBUG_ASSERT(__kmp_init_serial); 28190b57cec5SDimitry Andric 28200b57cec5SDimitry Andric thread = __kmp_threads[gtid]; 28210b57cec5SDimitry Andric KMP_DEBUG_ASSERT(thread->th.th_current_task); 28220b57cec5SDimitry Andric KF_TRACE(10, ("__kmp_get_max_active_levels: thread %d, curtask=%p, " 28230b57cec5SDimitry Andric "curtask_maxaclevel=%d\n", 28240b57cec5SDimitry Andric gtid, thread->th.th_current_task, 28250b57cec5SDimitry Andric thread->th.th_current_task->td_icvs.max_active_levels)); 28260b57cec5SDimitry Andric return thread->th.th_current_task->td_icvs.max_active_levels; 28270b57cec5SDimitry Andric } 28280b57cec5SDimitry Andric 2829fe6060f1SDimitry Andric // nteams-var per-device ICV 2830fe6060f1SDimitry Andric void __kmp_set_num_teams(int num_teams) { 2831fe6060f1SDimitry Andric if (num_teams > 0) 2832fe6060f1SDimitry Andric __kmp_nteams = num_teams; 2833fe6060f1SDimitry Andric } 2834fe6060f1SDimitry Andric int __kmp_get_max_teams(void) { return __kmp_nteams; } 2835fe6060f1SDimitry Andric // teams-thread-limit-var per-device ICV 2836fe6060f1SDimitry Andric void __kmp_set_teams_thread_limit(int limit) { 2837fe6060f1SDimitry Andric if (limit > 0) 2838fe6060f1SDimitry Andric __kmp_teams_thread_limit = limit; 2839fe6060f1SDimitry Andric } 2840fe6060f1SDimitry Andric int __kmp_get_teams_thread_limit(void) { return __kmp_teams_thread_limit; } 2841fe6060f1SDimitry Andric 28420b57cec5SDimitry Andric KMP_BUILD_ASSERT(sizeof(kmp_sched_t) == sizeof(int)); 28430b57cec5SDimitry Andric KMP_BUILD_ASSERT(sizeof(enum sched_type) == sizeof(int)); 28440b57cec5SDimitry Andric 28450b57cec5SDimitry Andric /* Changes def_sched_var ICV values (run-time schedule kind and chunk) */ 28460b57cec5SDimitry Andric void __kmp_set_schedule(int gtid, kmp_sched_t kind, int chunk) { 28470b57cec5SDimitry Andric kmp_info_t *thread; 28480b57cec5SDimitry Andric kmp_sched_t orig_kind; 28490b57cec5SDimitry Andric // kmp_team_t *team; 28500b57cec5SDimitry Andric 28510b57cec5SDimitry Andric KF_TRACE(10, ("__kmp_set_schedule: new schedule for thread %d = (%d, %d)\n", 28520b57cec5SDimitry Andric gtid, (int)kind, chunk)); 28530b57cec5SDimitry Andric KMP_DEBUG_ASSERT(__kmp_init_serial); 28540b57cec5SDimitry Andric 28550b57cec5SDimitry Andric // Check if the kind parameter is valid, correct if needed. 28560b57cec5SDimitry Andric // Valid parameters should fit in one of two intervals - standard or extended: 28570b57cec5SDimitry Andric // <lower>, <valid>, <upper_std>, <lower_ext>, <valid>, <upper> 28580b57cec5SDimitry Andric // 2008-01-25: 0, 1 - 4, 5, 100, 101 - 102, 103 28590b57cec5SDimitry Andric orig_kind = kind; 28600b57cec5SDimitry Andric kind = __kmp_sched_without_mods(kind); 28610b57cec5SDimitry Andric 28620b57cec5SDimitry Andric if (kind <= kmp_sched_lower || kind >= kmp_sched_upper || 28630b57cec5SDimitry Andric (kind <= kmp_sched_lower_ext && kind >= kmp_sched_upper_std)) { 28640b57cec5SDimitry Andric // TODO: Hint needs attention in case we change the default schedule. 28650b57cec5SDimitry Andric __kmp_msg(kmp_ms_warning, KMP_MSG(ScheduleKindOutOfRange, kind), 28660b57cec5SDimitry Andric KMP_HNT(DefaultScheduleKindUsed, "static, no chunk"), 28670b57cec5SDimitry Andric __kmp_msg_null); 28680b57cec5SDimitry Andric kind = kmp_sched_default; 28690b57cec5SDimitry Andric chunk = 0; // ignore chunk value in case of bad kind 28700b57cec5SDimitry Andric } 28710b57cec5SDimitry Andric 28720b57cec5SDimitry Andric thread = __kmp_threads[gtid]; 28730b57cec5SDimitry Andric 28740b57cec5SDimitry Andric __kmp_save_internal_controls(thread); 28750b57cec5SDimitry Andric 28760b57cec5SDimitry Andric if (kind < kmp_sched_upper_std) { 28770b57cec5SDimitry Andric if (kind == kmp_sched_static && chunk < KMP_DEFAULT_CHUNK) { 28780b57cec5SDimitry Andric // differ static chunked vs. unchunked: chunk should be invalid to 28790b57cec5SDimitry Andric // indicate unchunked schedule (which is the default) 28800b57cec5SDimitry Andric thread->th.th_current_task->td_icvs.sched.r_sched_type = kmp_sch_static; 28810b57cec5SDimitry Andric } else { 28820b57cec5SDimitry Andric thread->th.th_current_task->td_icvs.sched.r_sched_type = 28830b57cec5SDimitry Andric __kmp_sch_map[kind - kmp_sched_lower - 1]; 28840b57cec5SDimitry Andric } 28850b57cec5SDimitry Andric } else { 28860b57cec5SDimitry Andric // __kmp_sch_map[ kind - kmp_sched_lower_ext + kmp_sched_upper_std - 28870b57cec5SDimitry Andric // kmp_sched_lower - 2 ]; 28880b57cec5SDimitry Andric thread->th.th_current_task->td_icvs.sched.r_sched_type = 28890b57cec5SDimitry Andric __kmp_sch_map[kind - kmp_sched_lower_ext + kmp_sched_upper_std - 28900b57cec5SDimitry Andric kmp_sched_lower - 2]; 28910b57cec5SDimitry Andric } 28920b57cec5SDimitry Andric __kmp_sched_apply_mods_intkind( 28930b57cec5SDimitry Andric orig_kind, &(thread->th.th_current_task->td_icvs.sched.r_sched_type)); 28940b57cec5SDimitry Andric if (kind == kmp_sched_auto || chunk < 1) { 28950b57cec5SDimitry Andric // ignore parameter chunk for schedule auto 28960b57cec5SDimitry Andric thread->th.th_current_task->td_icvs.sched.chunk = KMP_DEFAULT_CHUNK; 28970b57cec5SDimitry Andric } else { 28980b57cec5SDimitry Andric thread->th.th_current_task->td_icvs.sched.chunk = chunk; 28990b57cec5SDimitry Andric } 29000b57cec5SDimitry Andric } 29010b57cec5SDimitry Andric 29020b57cec5SDimitry Andric /* Gets def_sched_var ICV values */ 29030b57cec5SDimitry Andric void __kmp_get_schedule(int gtid, kmp_sched_t *kind, int *chunk) { 29040b57cec5SDimitry Andric kmp_info_t *thread; 29050b57cec5SDimitry Andric enum sched_type th_type; 29060b57cec5SDimitry Andric 29070b57cec5SDimitry Andric KF_TRACE(10, ("__kmp_get_schedule: thread %d\n", gtid)); 29080b57cec5SDimitry Andric KMP_DEBUG_ASSERT(__kmp_init_serial); 29090b57cec5SDimitry Andric 29100b57cec5SDimitry Andric thread = __kmp_threads[gtid]; 29110b57cec5SDimitry Andric 29120b57cec5SDimitry Andric th_type = thread->th.th_current_task->td_icvs.sched.r_sched_type; 29130b57cec5SDimitry Andric switch (SCHEDULE_WITHOUT_MODIFIERS(th_type)) { 29140b57cec5SDimitry Andric case kmp_sch_static: 29150b57cec5SDimitry Andric case kmp_sch_static_greedy: 29160b57cec5SDimitry Andric case kmp_sch_static_balanced: 29170b57cec5SDimitry Andric *kind = kmp_sched_static; 29180b57cec5SDimitry Andric __kmp_sched_apply_mods_stdkind(kind, th_type); 29190b57cec5SDimitry Andric *chunk = 0; // chunk was not set, try to show this fact via zero value 29200b57cec5SDimitry Andric return; 29210b57cec5SDimitry Andric case kmp_sch_static_chunked: 29220b57cec5SDimitry Andric *kind = kmp_sched_static; 29230b57cec5SDimitry Andric break; 29240b57cec5SDimitry Andric case kmp_sch_dynamic_chunked: 29250b57cec5SDimitry Andric *kind = kmp_sched_dynamic; 29260b57cec5SDimitry Andric break; 29270b57cec5SDimitry Andric case kmp_sch_guided_chunked: 29280b57cec5SDimitry Andric case kmp_sch_guided_iterative_chunked: 29290b57cec5SDimitry Andric case kmp_sch_guided_analytical_chunked: 29300b57cec5SDimitry Andric *kind = kmp_sched_guided; 29310b57cec5SDimitry Andric break; 29320b57cec5SDimitry Andric case kmp_sch_auto: 29330b57cec5SDimitry Andric *kind = kmp_sched_auto; 29340b57cec5SDimitry Andric break; 29350b57cec5SDimitry Andric case kmp_sch_trapezoidal: 29360b57cec5SDimitry Andric *kind = kmp_sched_trapezoidal; 29370b57cec5SDimitry Andric break; 29380b57cec5SDimitry Andric #if KMP_STATIC_STEAL_ENABLED 29390b57cec5SDimitry Andric case kmp_sch_static_steal: 29400b57cec5SDimitry Andric *kind = kmp_sched_static_steal; 29410b57cec5SDimitry Andric break; 29420b57cec5SDimitry Andric #endif 29430b57cec5SDimitry Andric default: 29440b57cec5SDimitry Andric KMP_FATAL(UnknownSchedulingType, th_type); 29450b57cec5SDimitry Andric } 29460b57cec5SDimitry Andric 29470b57cec5SDimitry Andric __kmp_sched_apply_mods_stdkind(kind, th_type); 29480b57cec5SDimitry Andric *chunk = thread->th.th_current_task->td_icvs.sched.chunk; 29490b57cec5SDimitry Andric } 29500b57cec5SDimitry Andric 29510b57cec5SDimitry Andric int __kmp_get_ancestor_thread_num(int gtid, int level) { 29520b57cec5SDimitry Andric 29530b57cec5SDimitry Andric int ii, dd; 29540b57cec5SDimitry Andric kmp_team_t *team; 29550b57cec5SDimitry Andric kmp_info_t *thr; 29560b57cec5SDimitry Andric 29570b57cec5SDimitry Andric KF_TRACE(10, ("__kmp_get_ancestor_thread_num: thread %d %d\n", gtid, level)); 29580b57cec5SDimitry Andric KMP_DEBUG_ASSERT(__kmp_init_serial); 29590b57cec5SDimitry Andric 29600b57cec5SDimitry Andric // validate level 29610b57cec5SDimitry Andric if (level == 0) 29620b57cec5SDimitry Andric return 0; 29630b57cec5SDimitry Andric if (level < 0) 29640b57cec5SDimitry Andric return -1; 29650b57cec5SDimitry Andric thr = __kmp_threads[gtid]; 29660b57cec5SDimitry Andric team = thr->th.th_team; 29670b57cec5SDimitry Andric ii = team->t.t_level; 29680b57cec5SDimitry Andric if (level > ii) 29690b57cec5SDimitry Andric return -1; 29700b57cec5SDimitry Andric 29710b57cec5SDimitry Andric if (thr->th.th_teams_microtask) { 29720b57cec5SDimitry Andric // AC: we are in teams region where multiple nested teams have same level 29730b57cec5SDimitry Andric int tlevel = thr->th.th_teams_level; // the level of the teams construct 29740b57cec5SDimitry Andric if (level <= 29750b57cec5SDimitry Andric tlevel) { // otherwise usual algorithm works (will not touch the teams) 29760b57cec5SDimitry Andric KMP_DEBUG_ASSERT(ii >= tlevel); 29770b57cec5SDimitry Andric // AC: As we need to pass by the teams league, we need to artificially 29780b57cec5SDimitry Andric // increase ii 29790b57cec5SDimitry Andric if (ii == tlevel) { 29800b57cec5SDimitry Andric ii += 2; // three teams have same level 29810b57cec5SDimitry Andric } else { 29820b57cec5SDimitry Andric ii++; // two teams have same level 29830b57cec5SDimitry Andric } 29840b57cec5SDimitry Andric } 29850b57cec5SDimitry Andric } 29860b57cec5SDimitry Andric 29870b57cec5SDimitry Andric if (ii == level) 29880b57cec5SDimitry Andric return __kmp_tid_from_gtid(gtid); 29890b57cec5SDimitry Andric 29900b57cec5SDimitry Andric dd = team->t.t_serialized; 29910b57cec5SDimitry Andric level++; 29920b57cec5SDimitry Andric while (ii > level) { 29930b57cec5SDimitry Andric for (dd = team->t.t_serialized; (dd > 0) && (ii > level); dd--, ii--) { 29940b57cec5SDimitry Andric } 29950b57cec5SDimitry Andric if ((team->t.t_serialized) && (!dd)) { 29960b57cec5SDimitry Andric team = team->t.t_parent; 29970b57cec5SDimitry Andric continue; 29980b57cec5SDimitry Andric } 29990b57cec5SDimitry Andric if (ii > level) { 30000b57cec5SDimitry Andric team = team->t.t_parent; 30010b57cec5SDimitry Andric dd = team->t.t_serialized; 30020b57cec5SDimitry Andric ii--; 30030b57cec5SDimitry Andric } 30040b57cec5SDimitry Andric } 30050b57cec5SDimitry Andric 30060b57cec5SDimitry Andric return (dd > 1) ? (0) : (team->t.t_master_tid); 30070b57cec5SDimitry Andric } 30080b57cec5SDimitry Andric 30090b57cec5SDimitry Andric int __kmp_get_team_size(int gtid, int level) { 30100b57cec5SDimitry Andric 30110b57cec5SDimitry Andric int ii, dd; 30120b57cec5SDimitry Andric kmp_team_t *team; 30130b57cec5SDimitry Andric kmp_info_t *thr; 30140b57cec5SDimitry Andric 30150b57cec5SDimitry Andric KF_TRACE(10, ("__kmp_get_team_size: thread %d %d\n", gtid, level)); 30160b57cec5SDimitry Andric KMP_DEBUG_ASSERT(__kmp_init_serial); 30170b57cec5SDimitry Andric 30180b57cec5SDimitry Andric // validate level 30190b57cec5SDimitry Andric if (level == 0) 30200b57cec5SDimitry Andric return 1; 30210b57cec5SDimitry Andric if (level < 0) 30220b57cec5SDimitry Andric return -1; 30230b57cec5SDimitry Andric thr = __kmp_threads[gtid]; 30240b57cec5SDimitry Andric team = thr->th.th_team; 30250b57cec5SDimitry Andric ii = team->t.t_level; 30260b57cec5SDimitry Andric if (level > ii) 30270b57cec5SDimitry Andric return -1; 30280b57cec5SDimitry Andric 30290b57cec5SDimitry Andric if (thr->th.th_teams_microtask) { 30300b57cec5SDimitry Andric // AC: we are in teams region where multiple nested teams have same level 30310b57cec5SDimitry Andric int tlevel = thr->th.th_teams_level; // the level of the teams construct 30320b57cec5SDimitry Andric if (level <= 30330b57cec5SDimitry Andric tlevel) { // otherwise usual algorithm works (will not touch the teams) 30340b57cec5SDimitry Andric KMP_DEBUG_ASSERT(ii >= tlevel); 30350b57cec5SDimitry Andric // AC: As we need to pass by the teams league, we need to artificially 30360b57cec5SDimitry Andric // increase ii 30370b57cec5SDimitry Andric if (ii == tlevel) { 30380b57cec5SDimitry Andric ii += 2; // three teams have same level 30390b57cec5SDimitry Andric } else { 30400b57cec5SDimitry Andric ii++; // two teams have same level 30410b57cec5SDimitry Andric } 30420b57cec5SDimitry Andric } 30430b57cec5SDimitry Andric } 30440b57cec5SDimitry Andric 30450b57cec5SDimitry Andric while (ii > level) { 30460b57cec5SDimitry Andric for (dd = team->t.t_serialized; (dd > 0) && (ii > level); dd--, ii--) { 30470b57cec5SDimitry Andric } 30480b57cec5SDimitry Andric if (team->t.t_serialized && (!dd)) { 30490b57cec5SDimitry Andric team = team->t.t_parent; 30500b57cec5SDimitry Andric continue; 30510b57cec5SDimitry Andric } 30520b57cec5SDimitry Andric if (ii > level) { 30530b57cec5SDimitry Andric team = team->t.t_parent; 30540b57cec5SDimitry Andric ii--; 30550b57cec5SDimitry Andric } 30560b57cec5SDimitry Andric } 30570b57cec5SDimitry Andric 30580b57cec5SDimitry Andric return team->t.t_nproc; 30590b57cec5SDimitry Andric } 30600b57cec5SDimitry Andric 30610b57cec5SDimitry Andric kmp_r_sched_t __kmp_get_schedule_global() { 30620b57cec5SDimitry Andric // This routine created because pairs (__kmp_sched, __kmp_chunk) and 30630b57cec5SDimitry Andric // (__kmp_static, __kmp_guided) may be changed by kmp_set_defaults 30640b57cec5SDimitry Andric // independently. So one can get the updated schedule here. 30650b57cec5SDimitry Andric 30660b57cec5SDimitry Andric kmp_r_sched_t r_sched; 30670b57cec5SDimitry Andric 30680b57cec5SDimitry Andric // create schedule from 4 globals: __kmp_sched, __kmp_chunk, __kmp_static, 30690b57cec5SDimitry Andric // __kmp_guided. __kmp_sched should keep original value, so that user can set 30700b57cec5SDimitry Andric // KMP_SCHEDULE multiple times, and thus have different run-time schedules in 30710b57cec5SDimitry Andric // different roots (even in OMP 2.5) 30720b57cec5SDimitry Andric enum sched_type s = SCHEDULE_WITHOUT_MODIFIERS(__kmp_sched); 30730b57cec5SDimitry Andric enum sched_type sched_modifiers = SCHEDULE_GET_MODIFIERS(__kmp_sched); 30740b57cec5SDimitry Andric if (s == kmp_sch_static) { 30750b57cec5SDimitry Andric // replace STATIC with more detailed schedule (balanced or greedy) 30760b57cec5SDimitry Andric r_sched.r_sched_type = __kmp_static; 30770b57cec5SDimitry Andric } else if (s == kmp_sch_guided_chunked) { 30780b57cec5SDimitry Andric // replace GUIDED with more detailed schedule (iterative or analytical) 30790b57cec5SDimitry Andric r_sched.r_sched_type = __kmp_guided; 30800b57cec5SDimitry Andric } else { // (STATIC_CHUNKED), or (DYNAMIC_CHUNKED), or other 30810b57cec5SDimitry Andric r_sched.r_sched_type = __kmp_sched; 30820b57cec5SDimitry Andric } 30830b57cec5SDimitry Andric SCHEDULE_SET_MODIFIERS(r_sched.r_sched_type, sched_modifiers); 30840b57cec5SDimitry Andric 30850b57cec5SDimitry Andric if (__kmp_chunk < KMP_DEFAULT_CHUNK) { 30860b57cec5SDimitry Andric // __kmp_chunk may be wrong here (if it was not ever set) 30870b57cec5SDimitry Andric r_sched.chunk = KMP_DEFAULT_CHUNK; 30880b57cec5SDimitry Andric } else { 30890b57cec5SDimitry Andric r_sched.chunk = __kmp_chunk; 30900b57cec5SDimitry Andric } 30910b57cec5SDimitry Andric 30920b57cec5SDimitry Andric return r_sched; 30930b57cec5SDimitry Andric } 30940b57cec5SDimitry Andric 30950b57cec5SDimitry Andric /* Allocate (realloc == FALSE) * or reallocate (realloc == TRUE) 30960b57cec5SDimitry Andric at least argc number of *t_argv entries for the requested team. */ 30970b57cec5SDimitry Andric static void __kmp_alloc_argv_entries(int argc, kmp_team_t *team, int realloc) { 30980b57cec5SDimitry Andric 30990b57cec5SDimitry Andric KMP_DEBUG_ASSERT(team); 31000b57cec5SDimitry Andric if (!realloc || argc > team->t.t_max_argc) { 31010b57cec5SDimitry Andric 31020b57cec5SDimitry Andric KA_TRACE(100, ("__kmp_alloc_argv_entries: team %d: needed entries=%d, " 31030b57cec5SDimitry Andric "current entries=%d\n", 31040b57cec5SDimitry Andric team->t.t_id, argc, (realloc) ? team->t.t_max_argc : 0)); 31050b57cec5SDimitry Andric /* if previously allocated heap space for args, free them */ 31060b57cec5SDimitry Andric if (realloc && team->t.t_argv != &team->t.t_inline_argv[0]) 31070b57cec5SDimitry Andric __kmp_free((void *)team->t.t_argv); 31080b57cec5SDimitry Andric 31090b57cec5SDimitry Andric if (argc <= KMP_INLINE_ARGV_ENTRIES) { 31100b57cec5SDimitry Andric /* use unused space in the cache line for arguments */ 31110b57cec5SDimitry Andric team->t.t_max_argc = KMP_INLINE_ARGV_ENTRIES; 31120b57cec5SDimitry Andric KA_TRACE(100, ("__kmp_alloc_argv_entries: team %d: inline allocate %d " 31130b57cec5SDimitry Andric "argv entries\n", 31140b57cec5SDimitry Andric team->t.t_id, team->t.t_max_argc)); 31150b57cec5SDimitry Andric team->t.t_argv = &team->t.t_inline_argv[0]; 31160b57cec5SDimitry Andric if (__kmp_storage_map) { 31170b57cec5SDimitry Andric __kmp_print_storage_map_gtid( 31180b57cec5SDimitry Andric -1, &team->t.t_inline_argv[0], 31190b57cec5SDimitry Andric &team->t.t_inline_argv[KMP_INLINE_ARGV_ENTRIES], 31200b57cec5SDimitry Andric (sizeof(void *) * KMP_INLINE_ARGV_ENTRIES), "team_%d.t_inline_argv", 31210b57cec5SDimitry Andric team->t.t_id); 31220b57cec5SDimitry Andric } 31230b57cec5SDimitry Andric } else { 31240b57cec5SDimitry Andric /* allocate space for arguments in the heap */ 31250b57cec5SDimitry Andric team->t.t_max_argc = (argc <= (KMP_MIN_MALLOC_ARGV_ENTRIES >> 1)) 31260b57cec5SDimitry Andric ? KMP_MIN_MALLOC_ARGV_ENTRIES 31270b57cec5SDimitry Andric : 2 * argc; 31280b57cec5SDimitry Andric KA_TRACE(100, ("__kmp_alloc_argv_entries: team %d: dynamic allocate %d " 31290b57cec5SDimitry Andric "argv entries\n", 31300b57cec5SDimitry Andric team->t.t_id, team->t.t_max_argc)); 31310b57cec5SDimitry Andric team->t.t_argv = 31320b57cec5SDimitry Andric (void **)__kmp_page_allocate(sizeof(void *) * team->t.t_max_argc); 31330b57cec5SDimitry Andric if (__kmp_storage_map) { 31340b57cec5SDimitry Andric __kmp_print_storage_map_gtid(-1, &team->t.t_argv[0], 31350b57cec5SDimitry Andric &team->t.t_argv[team->t.t_max_argc], 31360b57cec5SDimitry Andric sizeof(void *) * team->t.t_max_argc, 31370b57cec5SDimitry Andric "team_%d.t_argv", team->t.t_id); 31380b57cec5SDimitry Andric } 31390b57cec5SDimitry Andric } 31400b57cec5SDimitry Andric } 31410b57cec5SDimitry Andric } 31420b57cec5SDimitry Andric 31430b57cec5SDimitry Andric static void __kmp_allocate_team_arrays(kmp_team_t *team, int max_nth) { 31440b57cec5SDimitry Andric int i; 31450b57cec5SDimitry Andric int num_disp_buff = max_nth > 1 ? __kmp_dispatch_num_buffers : 2; 31460b57cec5SDimitry Andric team->t.t_threads = 31470b57cec5SDimitry Andric (kmp_info_t **)__kmp_allocate(sizeof(kmp_info_t *) * max_nth); 31480b57cec5SDimitry Andric team->t.t_disp_buffer = (dispatch_shared_info_t *)__kmp_allocate( 31490b57cec5SDimitry Andric sizeof(dispatch_shared_info_t) * num_disp_buff); 31500b57cec5SDimitry Andric team->t.t_dispatch = 31510b57cec5SDimitry Andric (kmp_disp_t *)__kmp_allocate(sizeof(kmp_disp_t) * max_nth); 31520b57cec5SDimitry Andric team->t.t_implicit_task_taskdata = 31530b57cec5SDimitry Andric (kmp_taskdata_t *)__kmp_allocate(sizeof(kmp_taskdata_t) * max_nth); 31540b57cec5SDimitry Andric team->t.t_max_nproc = max_nth; 31550b57cec5SDimitry Andric 31560b57cec5SDimitry Andric /* setup dispatch buffers */ 31570b57cec5SDimitry Andric for (i = 0; i < num_disp_buff; ++i) { 31580b57cec5SDimitry Andric team->t.t_disp_buffer[i].buffer_index = i; 31590b57cec5SDimitry Andric team->t.t_disp_buffer[i].doacross_buf_idx = i; 31600b57cec5SDimitry Andric } 31610b57cec5SDimitry Andric } 31620b57cec5SDimitry Andric 31630b57cec5SDimitry Andric static void __kmp_free_team_arrays(kmp_team_t *team) { 31640b57cec5SDimitry Andric /* Note: this does not free the threads in t_threads (__kmp_free_threads) */ 31650b57cec5SDimitry Andric int i; 31660b57cec5SDimitry Andric for (i = 0; i < team->t.t_max_nproc; ++i) { 31670b57cec5SDimitry Andric if (team->t.t_dispatch[i].th_disp_buffer != NULL) { 31680b57cec5SDimitry Andric __kmp_free(team->t.t_dispatch[i].th_disp_buffer); 31690b57cec5SDimitry Andric team->t.t_dispatch[i].th_disp_buffer = NULL; 31700b57cec5SDimitry Andric } 31710b57cec5SDimitry Andric } 31720b57cec5SDimitry Andric #if KMP_USE_HIER_SCHED 31730b57cec5SDimitry Andric __kmp_dispatch_free_hierarchies(team); 31740b57cec5SDimitry Andric #endif 31750b57cec5SDimitry Andric __kmp_free(team->t.t_threads); 31760b57cec5SDimitry Andric __kmp_free(team->t.t_disp_buffer); 31770b57cec5SDimitry Andric __kmp_free(team->t.t_dispatch); 31780b57cec5SDimitry Andric __kmp_free(team->t.t_implicit_task_taskdata); 31790b57cec5SDimitry Andric team->t.t_threads = NULL; 31800b57cec5SDimitry Andric team->t.t_disp_buffer = NULL; 31810b57cec5SDimitry Andric team->t.t_dispatch = NULL; 31820b57cec5SDimitry Andric team->t.t_implicit_task_taskdata = 0; 31830b57cec5SDimitry Andric } 31840b57cec5SDimitry Andric 31850b57cec5SDimitry Andric static void __kmp_reallocate_team_arrays(kmp_team_t *team, int max_nth) { 31860b57cec5SDimitry Andric kmp_info_t **oldThreads = team->t.t_threads; 31870b57cec5SDimitry Andric 31880b57cec5SDimitry Andric __kmp_free(team->t.t_disp_buffer); 31890b57cec5SDimitry Andric __kmp_free(team->t.t_dispatch); 31900b57cec5SDimitry Andric __kmp_free(team->t.t_implicit_task_taskdata); 31910b57cec5SDimitry Andric __kmp_allocate_team_arrays(team, max_nth); 31920b57cec5SDimitry Andric 31930b57cec5SDimitry Andric KMP_MEMCPY(team->t.t_threads, oldThreads, 31940b57cec5SDimitry Andric team->t.t_nproc * sizeof(kmp_info_t *)); 31950b57cec5SDimitry Andric 31960b57cec5SDimitry Andric __kmp_free(oldThreads); 31970b57cec5SDimitry Andric } 31980b57cec5SDimitry Andric 31990b57cec5SDimitry Andric static kmp_internal_control_t __kmp_get_global_icvs(void) { 32000b57cec5SDimitry Andric 32010b57cec5SDimitry Andric kmp_r_sched_t r_sched = 32020b57cec5SDimitry Andric __kmp_get_schedule_global(); // get current state of scheduling globals 32030b57cec5SDimitry Andric 32040b57cec5SDimitry Andric KMP_DEBUG_ASSERT(__kmp_nested_proc_bind.used > 0); 32050b57cec5SDimitry Andric 32060b57cec5SDimitry Andric kmp_internal_control_t g_icvs = { 32070b57cec5SDimitry Andric 0, // int serial_nesting_level; //corresponds to value of th_team_serialized 32080b57cec5SDimitry Andric (kmp_int8)__kmp_global.g.g_dynamic, // internal control for dynamic 32090b57cec5SDimitry Andric // adjustment of threads (per thread) 32100b57cec5SDimitry Andric (kmp_int8)__kmp_env_blocktime, // int bt_set; //internal control for 32110b57cec5SDimitry Andric // whether blocktime is explicitly set 32120b57cec5SDimitry Andric __kmp_dflt_blocktime, // int blocktime; //internal control for blocktime 32130b57cec5SDimitry Andric #if KMP_USE_MONITOR 32140b57cec5SDimitry Andric __kmp_bt_intervals, // int bt_intervals; //internal control for blocktime 32150b57cec5SDimitry Andric // intervals 32160b57cec5SDimitry Andric #endif 32170b57cec5SDimitry Andric __kmp_dflt_team_nth, // int nproc; //internal control for # of threads for 32180b57cec5SDimitry Andric // next parallel region (per thread) 32190b57cec5SDimitry Andric // (use a max ub on value if __kmp_parallel_initialize not called yet) 32200b57cec5SDimitry Andric __kmp_cg_max_nth, // int thread_limit; 32210b57cec5SDimitry Andric __kmp_dflt_max_active_levels, // int max_active_levels; //internal control 32220b57cec5SDimitry Andric // for max_active_levels 32230b57cec5SDimitry Andric r_sched, // kmp_r_sched_t sched; //internal control for runtime schedule 32240b57cec5SDimitry Andric // {sched,chunk} pair 32250b57cec5SDimitry Andric __kmp_nested_proc_bind.bind_types[0], 32260b57cec5SDimitry Andric __kmp_default_device, 32270b57cec5SDimitry Andric NULL // struct kmp_internal_control *next; 32280b57cec5SDimitry Andric }; 32290b57cec5SDimitry Andric 32300b57cec5SDimitry Andric return g_icvs; 32310b57cec5SDimitry Andric } 32320b57cec5SDimitry Andric 32330b57cec5SDimitry Andric static kmp_internal_control_t __kmp_get_x_global_icvs(const kmp_team_t *team) { 32340b57cec5SDimitry Andric 32350b57cec5SDimitry Andric kmp_internal_control_t gx_icvs; 32360b57cec5SDimitry Andric gx_icvs.serial_nesting_level = 32370b57cec5SDimitry Andric 0; // probably =team->t.t_serial like in save_inter_controls 32380b57cec5SDimitry Andric copy_icvs(&gx_icvs, &team->t.t_threads[0]->th.th_current_task->td_icvs); 32390b57cec5SDimitry Andric gx_icvs.next = NULL; 32400b57cec5SDimitry Andric 32410b57cec5SDimitry Andric return gx_icvs; 32420b57cec5SDimitry Andric } 32430b57cec5SDimitry Andric 32440b57cec5SDimitry Andric static void __kmp_initialize_root(kmp_root_t *root) { 32450b57cec5SDimitry Andric int f; 32460b57cec5SDimitry Andric kmp_team_t *root_team; 32470b57cec5SDimitry Andric kmp_team_t *hot_team; 32480b57cec5SDimitry Andric int hot_team_max_nth; 32490b57cec5SDimitry Andric kmp_r_sched_t r_sched = 32500b57cec5SDimitry Andric __kmp_get_schedule_global(); // get current state of scheduling globals 32510b57cec5SDimitry Andric kmp_internal_control_t r_icvs = __kmp_get_global_icvs(); 32520b57cec5SDimitry Andric KMP_DEBUG_ASSERT(root); 32530b57cec5SDimitry Andric KMP_ASSERT(!root->r.r_begin); 32540b57cec5SDimitry Andric 32550b57cec5SDimitry Andric /* setup the root state structure */ 32560b57cec5SDimitry Andric __kmp_init_lock(&root->r.r_begin_lock); 32570b57cec5SDimitry Andric root->r.r_begin = FALSE; 32580b57cec5SDimitry Andric root->r.r_active = FALSE; 32590b57cec5SDimitry Andric root->r.r_in_parallel = 0; 32600b57cec5SDimitry Andric root->r.r_blocktime = __kmp_dflt_blocktime; 3261fe6060f1SDimitry Andric #if KMP_AFFINITY_SUPPORTED 3262fe6060f1SDimitry Andric root->r.r_affinity_assigned = FALSE; 3263fe6060f1SDimitry Andric #endif 32640b57cec5SDimitry Andric 32650b57cec5SDimitry Andric /* setup the root team for this task */ 32660b57cec5SDimitry Andric /* allocate the root team structure */ 32670b57cec5SDimitry Andric KF_TRACE(10, ("__kmp_initialize_root: before root_team\n")); 32680b57cec5SDimitry Andric 32690b57cec5SDimitry Andric root_team = 32700b57cec5SDimitry Andric __kmp_allocate_team(root, 32710b57cec5SDimitry Andric 1, // new_nproc 32720b57cec5SDimitry Andric 1, // max_nproc 32730b57cec5SDimitry Andric #if OMPT_SUPPORT 32740b57cec5SDimitry Andric ompt_data_none, // root parallel id 32750b57cec5SDimitry Andric #endif 32760b57cec5SDimitry Andric __kmp_nested_proc_bind.bind_types[0], &r_icvs, 32770b57cec5SDimitry Andric 0 // argc 3278fe6060f1SDimitry Andric USE_NESTED_HOT_ARG(NULL) // primary thread is unknown 32790b57cec5SDimitry Andric ); 32800b57cec5SDimitry Andric #if USE_DEBUGGER 32810b57cec5SDimitry Andric // Non-NULL value should be assigned to make the debugger display the root 32820b57cec5SDimitry Andric // team. 32830b57cec5SDimitry Andric TCW_SYNC_PTR(root_team->t.t_pkfn, (microtask_t)(~0)); 32840b57cec5SDimitry Andric #endif 32850b57cec5SDimitry Andric 32860b57cec5SDimitry Andric KF_TRACE(10, ("__kmp_initialize_root: after root_team = %p\n", root_team)); 32870b57cec5SDimitry Andric 32880b57cec5SDimitry Andric root->r.r_root_team = root_team; 32890b57cec5SDimitry Andric root_team->t.t_control_stack_top = NULL; 32900b57cec5SDimitry Andric 32910b57cec5SDimitry Andric /* initialize root team */ 32920b57cec5SDimitry Andric root_team->t.t_threads[0] = NULL; 32930b57cec5SDimitry Andric root_team->t.t_nproc = 1; 32940b57cec5SDimitry Andric root_team->t.t_serialized = 1; 32950b57cec5SDimitry Andric // TODO???: root_team->t.t_max_active_levels = __kmp_dflt_max_active_levels; 32960b57cec5SDimitry Andric root_team->t.t_sched.sched = r_sched.sched; 32970b57cec5SDimitry Andric KA_TRACE( 32980b57cec5SDimitry Andric 20, 32990b57cec5SDimitry Andric ("__kmp_initialize_root: init root team %d arrived: join=%u, plain=%u\n", 33000b57cec5SDimitry Andric root_team->t.t_id, KMP_INIT_BARRIER_STATE, KMP_INIT_BARRIER_STATE)); 33010b57cec5SDimitry Andric 33020b57cec5SDimitry Andric /* setup the hot team for this task */ 33030b57cec5SDimitry Andric /* allocate the hot team structure */ 33040b57cec5SDimitry Andric KF_TRACE(10, ("__kmp_initialize_root: before hot_team\n")); 33050b57cec5SDimitry Andric 33060b57cec5SDimitry Andric hot_team = 33070b57cec5SDimitry Andric __kmp_allocate_team(root, 33080b57cec5SDimitry Andric 1, // new_nproc 33090b57cec5SDimitry Andric __kmp_dflt_team_nth_ub * 2, // max_nproc 33100b57cec5SDimitry Andric #if OMPT_SUPPORT 33110b57cec5SDimitry Andric ompt_data_none, // root parallel id 33120b57cec5SDimitry Andric #endif 33130b57cec5SDimitry Andric __kmp_nested_proc_bind.bind_types[0], &r_icvs, 33140b57cec5SDimitry Andric 0 // argc 3315fe6060f1SDimitry Andric USE_NESTED_HOT_ARG(NULL) // primary thread is unknown 33160b57cec5SDimitry Andric ); 33170b57cec5SDimitry Andric KF_TRACE(10, ("__kmp_initialize_root: after hot_team = %p\n", hot_team)); 33180b57cec5SDimitry Andric 33190b57cec5SDimitry Andric root->r.r_hot_team = hot_team; 33200b57cec5SDimitry Andric root_team->t.t_control_stack_top = NULL; 33210b57cec5SDimitry Andric 33220b57cec5SDimitry Andric /* first-time initialization */ 33230b57cec5SDimitry Andric hot_team->t.t_parent = root_team; 33240b57cec5SDimitry Andric 33250b57cec5SDimitry Andric /* initialize hot team */ 33260b57cec5SDimitry Andric hot_team_max_nth = hot_team->t.t_max_nproc; 33270b57cec5SDimitry Andric for (f = 0; f < hot_team_max_nth; ++f) { 33280b57cec5SDimitry Andric hot_team->t.t_threads[f] = NULL; 33290b57cec5SDimitry Andric } 33300b57cec5SDimitry Andric hot_team->t.t_nproc = 1; 33310b57cec5SDimitry Andric // TODO???: hot_team->t.t_max_active_levels = __kmp_dflt_max_active_levels; 33320b57cec5SDimitry Andric hot_team->t.t_sched.sched = r_sched.sched; 33330b57cec5SDimitry Andric hot_team->t.t_size_changed = 0; 33340b57cec5SDimitry Andric } 33350b57cec5SDimitry Andric 33360b57cec5SDimitry Andric #ifdef KMP_DEBUG 33370b57cec5SDimitry Andric 33380b57cec5SDimitry Andric typedef struct kmp_team_list_item { 33390b57cec5SDimitry Andric kmp_team_p const *entry; 33400b57cec5SDimitry Andric struct kmp_team_list_item *next; 33410b57cec5SDimitry Andric } kmp_team_list_item_t; 33420b57cec5SDimitry Andric typedef kmp_team_list_item_t *kmp_team_list_t; 33430b57cec5SDimitry Andric 33440b57cec5SDimitry Andric static void __kmp_print_structure_team_accum( // Add team to list of teams. 33450b57cec5SDimitry Andric kmp_team_list_t list, // List of teams. 33460b57cec5SDimitry Andric kmp_team_p const *team // Team to add. 33470b57cec5SDimitry Andric ) { 33480b57cec5SDimitry Andric 33490b57cec5SDimitry Andric // List must terminate with item where both entry and next are NULL. 33500b57cec5SDimitry Andric // Team is added to the list only once. 33510b57cec5SDimitry Andric // List is sorted in ascending order by team id. 33520b57cec5SDimitry Andric // Team id is *not* a key. 33530b57cec5SDimitry Andric 33540b57cec5SDimitry Andric kmp_team_list_t l; 33550b57cec5SDimitry Andric 33560b57cec5SDimitry Andric KMP_DEBUG_ASSERT(list != NULL); 33570b57cec5SDimitry Andric if (team == NULL) { 33580b57cec5SDimitry Andric return; 33590b57cec5SDimitry Andric } 33600b57cec5SDimitry Andric 33610b57cec5SDimitry Andric __kmp_print_structure_team_accum(list, team->t.t_parent); 33620b57cec5SDimitry Andric __kmp_print_structure_team_accum(list, team->t.t_next_pool); 33630b57cec5SDimitry Andric 33640b57cec5SDimitry Andric // Search list for the team. 33650b57cec5SDimitry Andric l = list; 33660b57cec5SDimitry Andric while (l->next != NULL && l->entry != team) { 33670b57cec5SDimitry Andric l = l->next; 33680b57cec5SDimitry Andric } 33690b57cec5SDimitry Andric if (l->next != NULL) { 33700b57cec5SDimitry Andric return; // Team has been added before, exit. 33710b57cec5SDimitry Andric } 33720b57cec5SDimitry Andric 33730b57cec5SDimitry Andric // Team is not found. Search list again for insertion point. 33740b57cec5SDimitry Andric l = list; 33750b57cec5SDimitry Andric while (l->next != NULL && l->entry->t.t_id <= team->t.t_id) { 33760b57cec5SDimitry Andric l = l->next; 33770b57cec5SDimitry Andric } 33780b57cec5SDimitry Andric 33790b57cec5SDimitry Andric // Insert team. 33800b57cec5SDimitry Andric { 33810b57cec5SDimitry Andric kmp_team_list_item_t *item = (kmp_team_list_item_t *)KMP_INTERNAL_MALLOC( 33820b57cec5SDimitry Andric sizeof(kmp_team_list_item_t)); 33830b57cec5SDimitry Andric *item = *l; 33840b57cec5SDimitry Andric l->entry = team; 33850b57cec5SDimitry Andric l->next = item; 33860b57cec5SDimitry Andric } 33870b57cec5SDimitry Andric } 33880b57cec5SDimitry Andric 33890b57cec5SDimitry Andric static void __kmp_print_structure_team(char const *title, kmp_team_p const *team 33900b57cec5SDimitry Andric 33910b57cec5SDimitry Andric ) { 33920b57cec5SDimitry Andric __kmp_printf("%s", title); 33930b57cec5SDimitry Andric if (team != NULL) { 33940b57cec5SDimitry Andric __kmp_printf("%2x %p\n", team->t.t_id, team); 33950b57cec5SDimitry Andric } else { 33960b57cec5SDimitry Andric __kmp_printf(" - (nil)\n"); 33970b57cec5SDimitry Andric } 33980b57cec5SDimitry Andric } 33990b57cec5SDimitry Andric 34000b57cec5SDimitry Andric static void __kmp_print_structure_thread(char const *title, 34010b57cec5SDimitry Andric kmp_info_p const *thread) { 34020b57cec5SDimitry Andric __kmp_printf("%s", title); 34030b57cec5SDimitry Andric if (thread != NULL) { 34040b57cec5SDimitry Andric __kmp_printf("%2d %p\n", thread->th.th_info.ds.ds_gtid, thread); 34050b57cec5SDimitry Andric } else { 34060b57cec5SDimitry Andric __kmp_printf(" - (nil)\n"); 34070b57cec5SDimitry Andric } 34080b57cec5SDimitry Andric } 34090b57cec5SDimitry Andric 34100b57cec5SDimitry Andric void __kmp_print_structure(void) { 34110b57cec5SDimitry Andric 34120b57cec5SDimitry Andric kmp_team_list_t list; 34130b57cec5SDimitry Andric 34140b57cec5SDimitry Andric // Initialize list of teams. 34150b57cec5SDimitry Andric list = 34160b57cec5SDimitry Andric (kmp_team_list_item_t *)KMP_INTERNAL_MALLOC(sizeof(kmp_team_list_item_t)); 34170b57cec5SDimitry Andric list->entry = NULL; 34180b57cec5SDimitry Andric list->next = NULL; 34190b57cec5SDimitry Andric 34200b57cec5SDimitry Andric __kmp_printf("\n------------------------------\nGlobal Thread " 34210b57cec5SDimitry Andric "Table\n------------------------------\n"); 34220b57cec5SDimitry Andric { 34230b57cec5SDimitry Andric int gtid; 34240b57cec5SDimitry Andric for (gtid = 0; gtid < __kmp_threads_capacity; ++gtid) { 34250b57cec5SDimitry Andric __kmp_printf("%2d", gtid); 34260b57cec5SDimitry Andric if (__kmp_threads != NULL) { 34270b57cec5SDimitry Andric __kmp_printf(" %p", __kmp_threads[gtid]); 34280b57cec5SDimitry Andric } 34290b57cec5SDimitry Andric if (__kmp_root != NULL) { 34300b57cec5SDimitry Andric __kmp_printf(" %p", __kmp_root[gtid]); 34310b57cec5SDimitry Andric } 34320b57cec5SDimitry Andric __kmp_printf("\n"); 34330b57cec5SDimitry Andric } 34340b57cec5SDimitry Andric } 34350b57cec5SDimitry Andric 34360b57cec5SDimitry Andric // Print out __kmp_threads array. 34370b57cec5SDimitry Andric __kmp_printf("\n------------------------------\nThreads\n--------------------" 34380b57cec5SDimitry Andric "----------\n"); 34390b57cec5SDimitry Andric if (__kmp_threads != NULL) { 34400b57cec5SDimitry Andric int gtid; 34410b57cec5SDimitry Andric for (gtid = 0; gtid < __kmp_threads_capacity; ++gtid) { 34420b57cec5SDimitry Andric kmp_info_t const *thread = __kmp_threads[gtid]; 34430b57cec5SDimitry Andric if (thread != NULL) { 34440b57cec5SDimitry Andric __kmp_printf("GTID %2d %p:\n", gtid, thread); 34450b57cec5SDimitry Andric __kmp_printf(" Our Root: %p\n", thread->th.th_root); 34460b57cec5SDimitry Andric __kmp_print_structure_team(" Our Team: ", thread->th.th_team); 34470b57cec5SDimitry Andric __kmp_print_structure_team(" Serial Team: ", 34480b57cec5SDimitry Andric thread->th.th_serial_team); 34490b57cec5SDimitry Andric __kmp_printf(" Threads: %2d\n", thread->th.th_team_nproc); 3450fe6060f1SDimitry Andric __kmp_print_structure_thread(" Primary: ", 34510b57cec5SDimitry Andric thread->th.th_team_master); 34520b57cec5SDimitry Andric __kmp_printf(" Serialized?: %2d\n", thread->th.th_team_serialized); 34530b57cec5SDimitry Andric __kmp_printf(" Set NProc: %2d\n", thread->th.th_set_nproc); 34540b57cec5SDimitry Andric __kmp_printf(" Set Proc Bind: %2d\n", thread->th.th_set_proc_bind); 34550b57cec5SDimitry Andric __kmp_print_structure_thread(" Next in pool: ", 34560b57cec5SDimitry Andric thread->th.th_next_pool); 34570b57cec5SDimitry Andric __kmp_printf("\n"); 34580b57cec5SDimitry Andric __kmp_print_structure_team_accum(list, thread->th.th_team); 34590b57cec5SDimitry Andric __kmp_print_structure_team_accum(list, thread->th.th_serial_team); 34600b57cec5SDimitry Andric } 34610b57cec5SDimitry Andric } 34620b57cec5SDimitry Andric } else { 34630b57cec5SDimitry Andric __kmp_printf("Threads array is not allocated.\n"); 34640b57cec5SDimitry Andric } 34650b57cec5SDimitry Andric 34660b57cec5SDimitry Andric // Print out __kmp_root array. 34670b57cec5SDimitry Andric __kmp_printf("\n------------------------------\nUbers\n----------------------" 34680b57cec5SDimitry Andric "--------\n"); 34690b57cec5SDimitry Andric if (__kmp_root != NULL) { 34700b57cec5SDimitry Andric int gtid; 34710b57cec5SDimitry Andric for (gtid = 0; gtid < __kmp_threads_capacity; ++gtid) { 34720b57cec5SDimitry Andric kmp_root_t const *root = __kmp_root[gtid]; 34730b57cec5SDimitry Andric if (root != NULL) { 34740b57cec5SDimitry Andric __kmp_printf("GTID %2d %p:\n", gtid, root); 34750b57cec5SDimitry Andric __kmp_print_structure_team(" Root Team: ", root->r.r_root_team); 34760b57cec5SDimitry Andric __kmp_print_structure_team(" Hot Team: ", root->r.r_hot_team); 34770b57cec5SDimitry Andric __kmp_print_structure_thread(" Uber Thread: ", 34780b57cec5SDimitry Andric root->r.r_uber_thread); 34790b57cec5SDimitry Andric __kmp_printf(" Active?: %2d\n", root->r.r_active); 34800b57cec5SDimitry Andric __kmp_printf(" In Parallel: %2d\n", 34810b57cec5SDimitry Andric KMP_ATOMIC_LD_RLX(&root->r.r_in_parallel)); 34820b57cec5SDimitry Andric __kmp_printf("\n"); 34830b57cec5SDimitry Andric __kmp_print_structure_team_accum(list, root->r.r_root_team); 34840b57cec5SDimitry Andric __kmp_print_structure_team_accum(list, root->r.r_hot_team); 34850b57cec5SDimitry Andric } 34860b57cec5SDimitry Andric } 34870b57cec5SDimitry Andric } else { 34880b57cec5SDimitry Andric __kmp_printf("Ubers array is not allocated.\n"); 34890b57cec5SDimitry Andric } 34900b57cec5SDimitry Andric 34910b57cec5SDimitry Andric __kmp_printf("\n------------------------------\nTeams\n----------------------" 34920b57cec5SDimitry Andric "--------\n"); 34930b57cec5SDimitry Andric while (list->next != NULL) { 34940b57cec5SDimitry Andric kmp_team_p const *team = list->entry; 34950b57cec5SDimitry Andric int i; 34960b57cec5SDimitry Andric __kmp_printf("Team %2x %p:\n", team->t.t_id, team); 34970b57cec5SDimitry Andric __kmp_print_structure_team(" Parent Team: ", team->t.t_parent); 3498fe6060f1SDimitry Andric __kmp_printf(" Primary TID: %2d\n", team->t.t_master_tid); 34990b57cec5SDimitry Andric __kmp_printf(" Max threads: %2d\n", team->t.t_max_nproc); 35000b57cec5SDimitry Andric __kmp_printf(" Levels of serial: %2d\n", team->t.t_serialized); 35010b57cec5SDimitry Andric __kmp_printf(" Number threads: %2d\n", team->t.t_nproc); 35020b57cec5SDimitry Andric for (i = 0; i < team->t.t_nproc; ++i) { 35030b57cec5SDimitry Andric __kmp_printf(" Thread %2d: ", i); 35040b57cec5SDimitry Andric __kmp_print_structure_thread("", team->t.t_threads[i]); 35050b57cec5SDimitry Andric } 35060b57cec5SDimitry Andric __kmp_print_structure_team(" Next in pool: ", team->t.t_next_pool); 35070b57cec5SDimitry Andric __kmp_printf("\n"); 35080b57cec5SDimitry Andric list = list->next; 35090b57cec5SDimitry Andric } 35100b57cec5SDimitry Andric 35110b57cec5SDimitry Andric // Print out __kmp_thread_pool and __kmp_team_pool. 35120b57cec5SDimitry Andric __kmp_printf("\n------------------------------\nPools\n----------------------" 35130b57cec5SDimitry Andric "--------\n"); 35140b57cec5SDimitry Andric __kmp_print_structure_thread("Thread pool: ", 35150b57cec5SDimitry Andric CCAST(kmp_info_t *, __kmp_thread_pool)); 35160b57cec5SDimitry Andric __kmp_print_structure_team("Team pool: ", 35170b57cec5SDimitry Andric CCAST(kmp_team_t *, __kmp_team_pool)); 35180b57cec5SDimitry Andric __kmp_printf("\n"); 35190b57cec5SDimitry Andric 35200b57cec5SDimitry Andric // Free team list. 35210b57cec5SDimitry Andric while (list != NULL) { 35220b57cec5SDimitry Andric kmp_team_list_item_t *item = list; 35230b57cec5SDimitry Andric list = list->next; 35240b57cec5SDimitry Andric KMP_INTERNAL_FREE(item); 35250b57cec5SDimitry Andric } 35260b57cec5SDimitry Andric } 35270b57cec5SDimitry Andric 35280b57cec5SDimitry Andric #endif 35290b57cec5SDimitry Andric 35300b57cec5SDimitry Andric //--------------------------------------------------------------------------- 35310b57cec5SDimitry Andric // Stuff for per-thread fast random number generator 35320b57cec5SDimitry Andric // Table of primes 35330b57cec5SDimitry Andric static const unsigned __kmp_primes[] = { 35340b57cec5SDimitry Andric 0x9e3779b1, 0xffe6cc59, 0x2109f6dd, 0x43977ab5, 0xba5703f5, 0xb495a877, 35350b57cec5SDimitry Andric 0xe1626741, 0x79695e6b, 0xbc98c09f, 0xd5bee2b3, 0x287488f9, 0x3af18231, 35360b57cec5SDimitry Andric 0x9677cd4d, 0xbe3a6929, 0xadc6a877, 0xdcf0674b, 0xbe4d6fe9, 0x5f15e201, 35370b57cec5SDimitry Andric 0x99afc3fd, 0xf3f16801, 0xe222cfff, 0x24ba5fdb, 0x0620452d, 0x79f149e3, 35380b57cec5SDimitry Andric 0xc8b93f49, 0x972702cd, 0xb07dd827, 0x6c97d5ed, 0x085a3d61, 0x46eb5ea7, 35390b57cec5SDimitry Andric 0x3d9910ed, 0x2e687b5b, 0x29609227, 0x6eb081f1, 0x0954c4e1, 0x9d114db9, 35400b57cec5SDimitry Andric 0x542acfa9, 0xb3e6bd7b, 0x0742d917, 0xe9f3ffa7, 0x54581edb, 0xf2480f45, 35410b57cec5SDimitry Andric 0x0bb9288f, 0xef1affc7, 0x85fa0ca7, 0x3ccc14db, 0xe6baf34b, 0x343377f7, 35420b57cec5SDimitry Andric 0x5ca19031, 0xe6d9293b, 0xf0a9f391, 0x5d2e980b, 0xfc411073, 0xc3749363, 35430b57cec5SDimitry Andric 0xb892d829, 0x3549366b, 0x629750ad, 0xb98294e5, 0x892d9483, 0xc235baf3, 35440b57cec5SDimitry Andric 0x3d2402a3, 0x6bdef3c9, 0xbec333cd, 0x40c9520f}; 35450b57cec5SDimitry Andric 35460b57cec5SDimitry Andric //--------------------------------------------------------------------------- 35470b57cec5SDimitry Andric // __kmp_get_random: Get a random number using a linear congruential method. 35480b57cec5SDimitry Andric unsigned short __kmp_get_random(kmp_info_t *thread) { 35490b57cec5SDimitry Andric unsigned x = thread->th.th_x; 3550e8d8bef9SDimitry Andric unsigned short r = (unsigned short)(x >> 16); 35510b57cec5SDimitry Andric 35520b57cec5SDimitry Andric thread->th.th_x = x * thread->th.th_a + 1; 35530b57cec5SDimitry Andric 35540b57cec5SDimitry Andric KA_TRACE(30, ("__kmp_get_random: THREAD: %d, RETURN: %u\n", 35550b57cec5SDimitry Andric thread->th.th_info.ds.ds_tid, r)); 35560b57cec5SDimitry Andric 35570b57cec5SDimitry Andric return r; 35580b57cec5SDimitry Andric } 35590b57cec5SDimitry Andric //-------------------------------------------------------- 35600b57cec5SDimitry Andric // __kmp_init_random: Initialize a random number generator 35610b57cec5SDimitry Andric void __kmp_init_random(kmp_info_t *thread) { 35620b57cec5SDimitry Andric unsigned seed = thread->th.th_info.ds.ds_tid; 35630b57cec5SDimitry Andric 35640b57cec5SDimitry Andric thread->th.th_a = 35650b57cec5SDimitry Andric __kmp_primes[seed % (sizeof(__kmp_primes) / sizeof(__kmp_primes[0]))]; 35660b57cec5SDimitry Andric thread->th.th_x = (seed + 1) * thread->th.th_a + 1; 35670b57cec5SDimitry Andric KA_TRACE(30, 35680b57cec5SDimitry Andric ("__kmp_init_random: THREAD: %u; A: %u\n", seed, thread->th.th_a)); 35690b57cec5SDimitry Andric } 35700b57cec5SDimitry Andric 35710b57cec5SDimitry Andric #if KMP_OS_WINDOWS 35720b57cec5SDimitry Andric /* reclaim array entries for root threads that are already dead, returns number 35730b57cec5SDimitry Andric * reclaimed */ 35740b57cec5SDimitry Andric static int __kmp_reclaim_dead_roots(void) { 35750b57cec5SDimitry Andric int i, r = 0; 35760b57cec5SDimitry Andric 35770b57cec5SDimitry Andric for (i = 0; i < __kmp_threads_capacity; ++i) { 35780b57cec5SDimitry Andric if (KMP_UBER_GTID(i) && 35790b57cec5SDimitry Andric !__kmp_still_running((kmp_info_t *)TCR_SYNC_PTR(__kmp_threads[i])) && 35800b57cec5SDimitry Andric !__kmp_root[i] 35810b57cec5SDimitry Andric ->r.r_active) { // AC: reclaim only roots died in non-active state 35820b57cec5SDimitry Andric r += __kmp_unregister_root_other_thread(i); 35830b57cec5SDimitry Andric } 35840b57cec5SDimitry Andric } 35850b57cec5SDimitry Andric return r; 35860b57cec5SDimitry Andric } 35870b57cec5SDimitry Andric #endif 35880b57cec5SDimitry Andric 35890b57cec5SDimitry Andric /* This function attempts to create free entries in __kmp_threads and 35900b57cec5SDimitry Andric __kmp_root, and returns the number of free entries generated. 35910b57cec5SDimitry Andric 35920b57cec5SDimitry Andric For Windows* OS static library, the first mechanism used is to reclaim array 35930b57cec5SDimitry Andric entries for root threads that are already dead. 35940b57cec5SDimitry Andric 35950b57cec5SDimitry Andric On all platforms, expansion is attempted on the arrays __kmp_threads_ and 35960b57cec5SDimitry Andric __kmp_root, with appropriate update to __kmp_threads_capacity. Array 35970b57cec5SDimitry Andric capacity is increased by doubling with clipping to __kmp_tp_capacity, if 35980b57cec5SDimitry Andric threadprivate cache array has been created. Synchronization with 35990b57cec5SDimitry Andric __kmpc_threadprivate_cached is done using __kmp_tp_cached_lock. 36000b57cec5SDimitry Andric 36010b57cec5SDimitry Andric After any dead root reclamation, if the clipping value allows array expansion 36020b57cec5SDimitry Andric to result in the generation of a total of nNeed free slots, the function does 36030b57cec5SDimitry Andric that expansion. If not, nothing is done beyond the possible initial root 36040b57cec5SDimitry Andric thread reclamation. 36050b57cec5SDimitry Andric 36060b57cec5SDimitry Andric If any argument is negative, the behavior is undefined. */ 36070b57cec5SDimitry Andric static int __kmp_expand_threads(int nNeed) { 36080b57cec5SDimitry Andric int added = 0; 36090b57cec5SDimitry Andric int minimumRequiredCapacity; 36100b57cec5SDimitry Andric int newCapacity; 36110b57cec5SDimitry Andric kmp_info_t **newThreads; 36120b57cec5SDimitry Andric kmp_root_t **newRoot; 36130b57cec5SDimitry Andric 36140b57cec5SDimitry Andric // All calls to __kmp_expand_threads should be under __kmp_forkjoin_lock, so 36150b57cec5SDimitry Andric // resizing __kmp_threads does not need additional protection if foreign 36160b57cec5SDimitry Andric // threads are present 36170b57cec5SDimitry Andric 36180b57cec5SDimitry Andric #if KMP_OS_WINDOWS && !KMP_DYNAMIC_LIB 36190b57cec5SDimitry Andric /* only for Windows static library */ 36200b57cec5SDimitry Andric /* reclaim array entries for root threads that are already dead */ 36210b57cec5SDimitry Andric added = __kmp_reclaim_dead_roots(); 36220b57cec5SDimitry Andric 36230b57cec5SDimitry Andric if (nNeed) { 36240b57cec5SDimitry Andric nNeed -= added; 36250b57cec5SDimitry Andric if (nNeed < 0) 36260b57cec5SDimitry Andric nNeed = 0; 36270b57cec5SDimitry Andric } 36280b57cec5SDimitry Andric #endif 36290b57cec5SDimitry Andric if (nNeed <= 0) 36300b57cec5SDimitry Andric return added; 36310b57cec5SDimitry Andric 36320b57cec5SDimitry Andric // Note that __kmp_threads_capacity is not bounded by __kmp_max_nth. If 36330b57cec5SDimitry Andric // __kmp_max_nth is set to some value less than __kmp_sys_max_nth by the 36340b57cec5SDimitry Andric // user via KMP_DEVICE_THREAD_LIMIT, then __kmp_threads_capacity may become 36350b57cec5SDimitry Andric // > __kmp_max_nth in one of two ways: 36360b57cec5SDimitry Andric // 36370b57cec5SDimitry Andric // 1) The initialization thread (gtid = 0) exits. __kmp_threads[0] 36385ffd83dbSDimitry Andric // may not be reused by another thread, so we may need to increase 36390b57cec5SDimitry Andric // __kmp_threads_capacity to __kmp_max_nth + 1. 36400b57cec5SDimitry Andric // 36410b57cec5SDimitry Andric // 2) New foreign root(s) are encountered. We always register new foreign 36420b57cec5SDimitry Andric // roots. This may cause a smaller # of threads to be allocated at 36430b57cec5SDimitry Andric // subsequent parallel regions, but the worker threads hang around (and 36440b57cec5SDimitry Andric // eventually go to sleep) and need slots in the __kmp_threads[] array. 36450b57cec5SDimitry Andric // 36460b57cec5SDimitry Andric // Anyway, that is the reason for moving the check to see if 36470b57cec5SDimitry Andric // __kmp_max_nth was exceeded into __kmp_reserve_threads() 36480b57cec5SDimitry Andric // instead of having it performed here. -BB 36490b57cec5SDimitry Andric 36500b57cec5SDimitry Andric KMP_DEBUG_ASSERT(__kmp_sys_max_nth >= __kmp_threads_capacity); 36510b57cec5SDimitry Andric 36520b57cec5SDimitry Andric /* compute expansion headroom to check if we can expand */ 36530b57cec5SDimitry Andric if (__kmp_sys_max_nth - __kmp_threads_capacity < nNeed) { 36540b57cec5SDimitry Andric /* possible expansion too small -- give up */ 36550b57cec5SDimitry Andric return added; 36560b57cec5SDimitry Andric } 36570b57cec5SDimitry Andric minimumRequiredCapacity = __kmp_threads_capacity + nNeed; 36580b57cec5SDimitry Andric 36590b57cec5SDimitry Andric newCapacity = __kmp_threads_capacity; 36600b57cec5SDimitry Andric do { 36610b57cec5SDimitry Andric newCapacity = newCapacity <= (__kmp_sys_max_nth >> 1) ? (newCapacity << 1) 36620b57cec5SDimitry Andric : __kmp_sys_max_nth; 36630b57cec5SDimitry Andric } while (newCapacity < minimumRequiredCapacity); 36640b57cec5SDimitry Andric newThreads = (kmp_info_t **)__kmp_allocate( 36650b57cec5SDimitry Andric (sizeof(kmp_info_t *) + sizeof(kmp_root_t *)) * newCapacity + CACHE_LINE); 36660b57cec5SDimitry Andric newRoot = 36670b57cec5SDimitry Andric (kmp_root_t **)((char *)newThreads + sizeof(kmp_info_t *) * newCapacity); 36680b57cec5SDimitry Andric KMP_MEMCPY(newThreads, __kmp_threads, 36690b57cec5SDimitry Andric __kmp_threads_capacity * sizeof(kmp_info_t *)); 36700b57cec5SDimitry Andric KMP_MEMCPY(newRoot, __kmp_root, 36710b57cec5SDimitry Andric __kmp_threads_capacity * sizeof(kmp_root_t *)); 367281ad6265SDimitry Andric // Put old __kmp_threads array on a list. Any ongoing references to the old 367381ad6265SDimitry Andric // list will be valid. This list is cleaned up at library shutdown. 367481ad6265SDimitry Andric kmp_old_threads_list_t *node = 367581ad6265SDimitry Andric (kmp_old_threads_list_t *)__kmp_allocate(sizeof(kmp_old_threads_list_t)); 367681ad6265SDimitry Andric node->threads = __kmp_threads; 367781ad6265SDimitry Andric node->next = __kmp_old_threads_list; 367881ad6265SDimitry Andric __kmp_old_threads_list = node; 36790b57cec5SDimitry Andric 36800b57cec5SDimitry Andric *(kmp_info_t * *volatile *)&__kmp_threads = newThreads; 36810b57cec5SDimitry Andric *(kmp_root_t * *volatile *)&__kmp_root = newRoot; 36820b57cec5SDimitry Andric added += newCapacity - __kmp_threads_capacity; 36830b57cec5SDimitry Andric *(volatile int *)&__kmp_threads_capacity = newCapacity; 36840b57cec5SDimitry Andric 36850b57cec5SDimitry Andric if (newCapacity > __kmp_tp_capacity) { 36860b57cec5SDimitry Andric __kmp_acquire_bootstrap_lock(&__kmp_tp_cached_lock); 36870b57cec5SDimitry Andric if (__kmp_tp_cached && newCapacity > __kmp_tp_capacity) { 36880b57cec5SDimitry Andric __kmp_threadprivate_resize_cache(newCapacity); 36890b57cec5SDimitry Andric } else { // increase __kmp_tp_capacity to correspond with kmp_threads size 36900b57cec5SDimitry Andric *(volatile int *)&__kmp_tp_capacity = newCapacity; 36910b57cec5SDimitry Andric } 36920b57cec5SDimitry Andric __kmp_release_bootstrap_lock(&__kmp_tp_cached_lock); 36930b57cec5SDimitry Andric } 36940b57cec5SDimitry Andric 36950b57cec5SDimitry Andric return added; 36960b57cec5SDimitry Andric } 36970b57cec5SDimitry Andric 36980b57cec5SDimitry Andric /* Register the current thread as a root thread and obtain our gtid. We must 36990b57cec5SDimitry Andric have the __kmp_initz_lock held at this point. Argument TRUE only if are the 37000b57cec5SDimitry Andric thread that calls from __kmp_do_serial_initialize() */ 37010b57cec5SDimitry Andric int __kmp_register_root(int initial_thread) { 37020b57cec5SDimitry Andric kmp_info_t *root_thread; 37030b57cec5SDimitry Andric kmp_root_t *root; 37040b57cec5SDimitry Andric int gtid; 37050b57cec5SDimitry Andric int capacity; 37060b57cec5SDimitry Andric __kmp_acquire_bootstrap_lock(&__kmp_forkjoin_lock); 37070b57cec5SDimitry Andric KA_TRACE(20, ("__kmp_register_root: entered\n")); 37080b57cec5SDimitry Andric KMP_MB(); 37090b57cec5SDimitry Andric 37100b57cec5SDimitry Andric /* 2007-03-02: 37110b57cec5SDimitry Andric If initial thread did not invoke OpenMP RTL yet, and this thread is not an 37120b57cec5SDimitry Andric initial one, "__kmp_all_nth >= __kmp_threads_capacity" condition does not 37130b57cec5SDimitry Andric work as expected -- it may return false (that means there is at least one 37140b57cec5SDimitry Andric empty slot in __kmp_threads array), but it is possible the only free slot 37150b57cec5SDimitry Andric is #0, which is reserved for initial thread and so cannot be used for this 37160b57cec5SDimitry Andric one. Following code workarounds this bug. 37170b57cec5SDimitry Andric 37180b57cec5SDimitry Andric However, right solution seems to be not reserving slot #0 for initial 37190b57cec5SDimitry Andric thread because: 37200b57cec5SDimitry Andric (1) there is no magic in slot #0, 37210b57cec5SDimitry Andric (2) we cannot detect initial thread reliably (the first thread which does 37220b57cec5SDimitry Andric serial initialization may be not a real initial thread). 37230b57cec5SDimitry Andric */ 37240b57cec5SDimitry Andric capacity = __kmp_threads_capacity; 37250b57cec5SDimitry Andric if (!initial_thread && TCR_PTR(__kmp_threads[0]) == NULL) { 37260b57cec5SDimitry Andric --capacity; 37270b57cec5SDimitry Andric } 37280b57cec5SDimitry Andric 3729d409305fSDimitry Andric // If it is not for initializing the hidden helper team, we need to take 3730d409305fSDimitry Andric // __kmp_hidden_helper_threads_num out of the capacity because it is included 3731d409305fSDimitry Andric // in __kmp_threads_capacity. 3732d409305fSDimitry Andric if (__kmp_enable_hidden_helper && !TCR_4(__kmp_init_hidden_helper_threads)) { 3733d409305fSDimitry Andric capacity -= __kmp_hidden_helper_threads_num; 3734d409305fSDimitry Andric } 3735d409305fSDimitry Andric 37360b57cec5SDimitry Andric /* see if there are too many threads */ 37370b57cec5SDimitry Andric if (__kmp_all_nth >= capacity && !__kmp_expand_threads(1)) { 37380b57cec5SDimitry Andric if (__kmp_tp_cached) { 37390b57cec5SDimitry Andric __kmp_fatal(KMP_MSG(CantRegisterNewThread), 37400b57cec5SDimitry Andric KMP_HNT(Set_ALL_THREADPRIVATE, __kmp_tp_capacity), 37410b57cec5SDimitry Andric KMP_HNT(PossibleSystemLimitOnThreads), __kmp_msg_null); 37420b57cec5SDimitry Andric } else { 37430b57cec5SDimitry Andric __kmp_fatal(KMP_MSG(CantRegisterNewThread), KMP_HNT(SystemLimitOnThreads), 37440b57cec5SDimitry Andric __kmp_msg_null); 37450b57cec5SDimitry Andric } 37460b57cec5SDimitry Andric } 37470b57cec5SDimitry Andric 3748e8d8bef9SDimitry Andric // When hidden helper task is enabled, __kmp_threads is organized as follows: 3749e8d8bef9SDimitry Andric // 0: initial thread, also a regular OpenMP thread. 3750e8d8bef9SDimitry Andric // [1, __kmp_hidden_helper_threads_num]: slots for hidden helper threads. 3751e8d8bef9SDimitry Andric // [__kmp_hidden_helper_threads_num + 1, __kmp_threads_capacity): slots for 3752e8d8bef9SDimitry Andric // regular OpenMP threads. 3753e8d8bef9SDimitry Andric if (TCR_4(__kmp_init_hidden_helper_threads)) { 3754e8d8bef9SDimitry Andric // Find an available thread slot for hidden helper thread. Slots for hidden 3755e8d8bef9SDimitry Andric // helper threads start from 1 to __kmp_hidden_helper_threads_num. 3756e8d8bef9SDimitry Andric for (gtid = 1; TCR_PTR(__kmp_threads[gtid]) != NULL && 3757e8d8bef9SDimitry Andric gtid <= __kmp_hidden_helper_threads_num; 37580b57cec5SDimitry Andric gtid++) 37590b57cec5SDimitry Andric ; 3760e8d8bef9SDimitry Andric KMP_ASSERT(gtid <= __kmp_hidden_helper_threads_num); 3761e8d8bef9SDimitry Andric KA_TRACE(1, ("__kmp_register_root: found slot in threads array for " 3762e8d8bef9SDimitry Andric "hidden helper thread: T#%d\n", 3763e8d8bef9SDimitry Andric gtid)); 3764e8d8bef9SDimitry Andric } else { 3765e8d8bef9SDimitry Andric /* find an available thread slot */ 3766e8d8bef9SDimitry Andric // Don't reassign the zero slot since we need that to only be used by 3767e8d8bef9SDimitry Andric // initial thread. Slots for hidden helper threads should also be skipped. 3768d409305fSDimitry Andric if (initial_thread && TCR_PTR(__kmp_threads[0]) == NULL) { 3769e8d8bef9SDimitry Andric gtid = 0; 3770e8d8bef9SDimitry Andric } else { 3771e8d8bef9SDimitry Andric for (gtid = __kmp_hidden_helper_threads_num + 1; 3772e8d8bef9SDimitry Andric TCR_PTR(__kmp_threads[gtid]) != NULL; gtid++) 3773e8d8bef9SDimitry Andric ; 3774e8d8bef9SDimitry Andric } 3775e8d8bef9SDimitry Andric KA_TRACE( 3776e8d8bef9SDimitry Andric 1, ("__kmp_register_root: found slot in threads array: T#%d\n", gtid)); 37770b57cec5SDimitry Andric KMP_ASSERT(gtid < __kmp_threads_capacity); 3778e8d8bef9SDimitry Andric } 37790b57cec5SDimitry Andric 37800b57cec5SDimitry Andric /* update global accounting */ 37810b57cec5SDimitry Andric __kmp_all_nth++; 37820b57cec5SDimitry Andric TCW_4(__kmp_nth, __kmp_nth + 1); 37830b57cec5SDimitry Andric 37840b57cec5SDimitry Andric // if __kmp_adjust_gtid_mode is set, then we use method #1 (sp search) for low 37850b57cec5SDimitry Andric // numbers of procs, and method #2 (keyed API call) for higher numbers. 37860b57cec5SDimitry Andric if (__kmp_adjust_gtid_mode) { 37870b57cec5SDimitry Andric if (__kmp_all_nth >= __kmp_tls_gtid_min) { 37880b57cec5SDimitry Andric if (TCR_4(__kmp_gtid_mode) != 2) { 37890b57cec5SDimitry Andric TCW_4(__kmp_gtid_mode, 2); 37900b57cec5SDimitry Andric } 37910b57cec5SDimitry Andric } else { 37920b57cec5SDimitry Andric if (TCR_4(__kmp_gtid_mode) != 1) { 37930b57cec5SDimitry Andric TCW_4(__kmp_gtid_mode, 1); 37940b57cec5SDimitry Andric } 37950b57cec5SDimitry Andric } 37960b57cec5SDimitry Andric } 37970b57cec5SDimitry Andric 37980b57cec5SDimitry Andric #ifdef KMP_ADJUST_BLOCKTIME 37990b57cec5SDimitry Andric /* Adjust blocktime to zero if necessary */ 38000b57cec5SDimitry Andric /* Middle initialization might not have occurred yet */ 38010b57cec5SDimitry Andric if (!__kmp_env_blocktime && (__kmp_avail_proc > 0)) { 38020b57cec5SDimitry Andric if (__kmp_nth > __kmp_avail_proc) { 38030b57cec5SDimitry Andric __kmp_zero_bt = TRUE; 38040b57cec5SDimitry Andric } 38050b57cec5SDimitry Andric } 38060b57cec5SDimitry Andric #endif /* KMP_ADJUST_BLOCKTIME */ 38070b57cec5SDimitry Andric 38080b57cec5SDimitry Andric /* setup this new hierarchy */ 38090b57cec5SDimitry Andric if (!(root = __kmp_root[gtid])) { 38100b57cec5SDimitry Andric root = __kmp_root[gtid] = (kmp_root_t *)__kmp_allocate(sizeof(kmp_root_t)); 38110b57cec5SDimitry Andric KMP_DEBUG_ASSERT(!root->r.r_root_team); 38120b57cec5SDimitry Andric } 38130b57cec5SDimitry Andric 38140b57cec5SDimitry Andric #if KMP_STATS_ENABLED 38150b57cec5SDimitry Andric // Initialize stats as soon as possible (right after gtid assignment). 38160b57cec5SDimitry Andric __kmp_stats_thread_ptr = __kmp_stats_list->push_back(gtid); 38170b57cec5SDimitry Andric __kmp_stats_thread_ptr->startLife(); 38180b57cec5SDimitry Andric KMP_SET_THREAD_STATE(SERIAL_REGION); 38190b57cec5SDimitry Andric KMP_INIT_PARTITIONED_TIMERS(OMP_serial); 38200b57cec5SDimitry Andric #endif 38210b57cec5SDimitry Andric __kmp_initialize_root(root); 38220b57cec5SDimitry Andric 38230b57cec5SDimitry Andric /* setup new root thread structure */ 38240b57cec5SDimitry Andric if (root->r.r_uber_thread) { 38250b57cec5SDimitry Andric root_thread = root->r.r_uber_thread; 38260b57cec5SDimitry Andric } else { 38270b57cec5SDimitry Andric root_thread = (kmp_info_t *)__kmp_allocate(sizeof(kmp_info_t)); 38280b57cec5SDimitry Andric if (__kmp_storage_map) { 38290b57cec5SDimitry Andric __kmp_print_thread_storage_map(root_thread, gtid); 38300b57cec5SDimitry Andric } 38310b57cec5SDimitry Andric root_thread->th.th_info.ds.ds_gtid = gtid; 38320b57cec5SDimitry Andric #if OMPT_SUPPORT 38330b57cec5SDimitry Andric root_thread->th.ompt_thread_info.thread_data = ompt_data_none; 38340b57cec5SDimitry Andric #endif 38350b57cec5SDimitry Andric root_thread->th.th_root = root; 38360b57cec5SDimitry Andric if (__kmp_env_consistency_check) { 38370b57cec5SDimitry Andric root_thread->th.th_cons = __kmp_allocate_cons_stack(gtid); 38380b57cec5SDimitry Andric } 38390b57cec5SDimitry Andric #if USE_FAST_MEMORY 38400b57cec5SDimitry Andric __kmp_initialize_fast_memory(root_thread); 38410b57cec5SDimitry Andric #endif /* USE_FAST_MEMORY */ 38420b57cec5SDimitry Andric 38430b57cec5SDimitry Andric #if KMP_USE_BGET 38440b57cec5SDimitry Andric KMP_DEBUG_ASSERT(root_thread->th.th_local.bget_data == NULL); 38450b57cec5SDimitry Andric __kmp_initialize_bget(root_thread); 38460b57cec5SDimitry Andric #endif 38470b57cec5SDimitry Andric __kmp_init_random(root_thread); // Initialize random number generator 38480b57cec5SDimitry Andric } 38490b57cec5SDimitry Andric 38500b57cec5SDimitry Andric /* setup the serial team held in reserve by the root thread */ 38510b57cec5SDimitry Andric if (!root_thread->th.th_serial_team) { 38520b57cec5SDimitry Andric kmp_internal_control_t r_icvs = __kmp_get_global_icvs(); 38530b57cec5SDimitry Andric KF_TRACE(10, ("__kmp_register_root: before serial_team\n")); 38540b57cec5SDimitry Andric root_thread->th.th_serial_team = __kmp_allocate_team( 38550b57cec5SDimitry Andric root, 1, 1, 38560b57cec5SDimitry Andric #if OMPT_SUPPORT 38570b57cec5SDimitry Andric ompt_data_none, // root parallel id 38580b57cec5SDimitry Andric #endif 38590b57cec5SDimitry Andric proc_bind_default, &r_icvs, 0 USE_NESTED_HOT_ARG(NULL)); 38600b57cec5SDimitry Andric } 38610b57cec5SDimitry Andric KMP_ASSERT(root_thread->th.th_serial_team); 38620b57cec5SDimitry Andric KF_TRACE(10, ("__kmp_register_root: after serial_team = %p\n", 38630b57cec5SDimitry Andric root_thread->th.th_serial_team)); 38640b57cec5SDimitry Andric 38650b57cec5SDimitry Andric /* drop root_thread into place */ 38660b57cec5SDimitry Andric TCW_SYNC_PTR(__kmp_threads[gtid], root_thread); 38670b57cec5SDimitry Andric 38680b57cec5SDimitry Andric root->r.r_root_team->t.t_threads[0] = root_thread; 38690b57cec5SDimitry Andric root->r.r_hot_team->t.t_threads[0] = root_thread; 38700b57cec5SDimitry Andric root_thread->th.th_serial_team->t.t_threads[0] = root_thread; 38710b57cec5SDimitry Andric // AC: the team created in reserve, not for execution (it is unused for now). 38720b57cec5SDimitry Andric root_thread->th.th_serial_team->t.t_serialized = 0; 38730b57cec5SDimitry Andric root->r.r_uber_thread = root_thread; 38740b57cec5SDimitry Andric 38750b57cec5SDimitry Andric /* initialize the thread, get it ready to go */ 38760b57cec5SDimitry Andric __kmp_initialize_info(root_thread, root->r.r_root_team, 0, gtid); 38770b57cec5SDimitry Andric TCW_4(__kmp_init_gtid, TRUE); 38780b57cec5SDimitry Andric 3879fe6060f1SDimitry Andric /* prepare the primary thread for get_gtid() */ 38800b57cec5SDimitry Andric __kmp_gtid_set_specific(gtid); 38810b57cec5SDimitry Andric 38820b57cec5SDimitry Andric #if USE_ITT_BUILD 38830b57cec5SDimitry Andric __kmp_itt_thread_name(gtid); 38840b57cec5SDimitry Andric #endif /* USE_ITT_BUILD */ 38850b57cec5SDimitry Andric 38860b57cec5SDimitry Andric #ifdef KMP_TDATA_GTID 38870b57cec5SDimitry Andric __kmp_gtid = gtid; 38880b57cec5SDimitry Andric #endif 38890b57cec5SDimitry Andric __kmp_create_worker(gtid, root_thread, __kmp_stksize); 38900b57cec5SDimitry Andric KMP_DEBUG_ASSERT(__kmp_gtid_get_specific() == gtid); 38910b57cec5SDimitry Andric 38920b57cec5SDimitry Andric KA_TRACE(20, ("__kmp_register_root: T#%d init T#%d(%d:%d) arrived: join=%u, " 38930b57cec5SDimitry Andric "plain=%u\n", 38940b57cec5SDimitry Andric gtid, __kmp_gtid_from_tid(0, root->r.r_hot_team), 38950b57cec5SDimitry Andric root->r.r_hot_team->t.t_id, 0, KMP_INIT_BARRIER_STATE, 38960b57cec5SDimitry Andric KMP_INIT_BARRIER_STATE)); 38970b57cec5SDimitry Andric { // Initialize barrier data. 38980b57cec5SDimitry Andric int b; 38990b57cec5SDimitry Andric for (b = 0; b < bs_last_barrier; ++b) { 39000b57cec5SDimitry Andric root_thread->th.th_bar[b].bb.b_arrived = KMP_INIT_BARRIER_STATE; 39010b57cec5SDimitry Andric #if USE_DEBUGGER 39020b57cec5SDimitry Andric root_thread->th.th_bar[b].bb.b_worker_arrived = 0; 39030b57cec5SDimitry Andric #endif 39040b57cec5SDimitry Andric } 39050b57cec5SDimitry Andric } 39060b57cec5SDimitry Andric KMP_DEBUG_ASSERT(root->r.r_hot_team->t.t_bar[bs_forkjoin_barrier].b_arrived == 39070b57cec5SDimitry Andric KMP_INIT_BARRIER_STATE); 39080b57cec5SDimitry Andric 39090b57cec5SDimitry Andric #if KMP_AFFINITY_SUPPORTED 39100b57cec5SDimitry Andric root_thread->th.th_current_place = KMP_PLACE_UNDEFINED; 39110b57cec5SDimitry Andric root_thread->th.th_new_place = KMP_PLACE_UNDEFINED; 39120b57cec5SDimitry Andric root_thread->th.th_first_place = KMP_PLACE_UNDEFINED; 39130b57cec5SDimitry Andric root_thread->th.th_last_place = KMP_PLACE_UNDEFINED; 39140b57cec5SDimitry Andric #endif /* KMP_AFFINITY_SUPPORTED */ 39150b57cec5SDimitry Andric root_thread->th.th_def_allocator = __kmp_def_allocator; 39160b57cec5SDimitry Andric root_thread->th.th_prev_level = 0; 39170b57cec5SDimitry Andric root_thread->th.th_prev_num_threads = 1; 39180b57cec5SDimitry Andric 39190b57cec5SDimitry Andric kmp_cg_root_t *tmp = (kmp_cg_root_t *)__kmp_allocate(sizeof(kmp_cg_root_t)); 39200b57cec5SDimitry Andric tmp->cg_root = root_thread; 39210b57cec5SDimitry Andric tmp->cg_thread_limit = __kmp_cg_max_nth; 39220b57cec5SDimitry Andric tmp->cg_nthreads = 1; 39230b57cec5SDimitry Andric KA_TRACE(100, ("__kmp_register_root: Thread %p created node %p with" 39240b57cec5SDimitry Andric " cg_nthreads init to 1\n", 39250b57cec5SDimitry Andric root_thread, tmp)); 39260b57cec5SDimitry Andric tmp->up = NULL; 39270b57cec5SDimitry Andric root_thread->th.th_cg_roots = tmp; 39280b57cec5SDimitry Andric 39290b57cec5SDimitry Andric __kmp_root_counter++; 39300b57cec5SDimitry Andric 39310b57cec5SDimitry Andric #if OMPT_SUPPORT 39320b57cec5SDimitry Andric if (!initial_thread && ompt_enabled.enabled) { 39330b57cec5SDimitry Andric 39340b57cec5SDimitry Andric kmp_info_t *root_thread = ompt_get_thread(); 39350b57cec5SDimitry Andric 39360b57cec5SDimitry Andric ompt_set_thread_state(root_thread, ompt_state_overhead); 39370b57cec5SDimitry Andric 39380b57cec5SDimitry Andric if (ompt_enabled.ompt_callback_thread_begin) { 39390b57cec5SDimitry Andric ompt_callbacks.ompt_callback(ompt_callback_thread_begin)( 39400b57cec5SDimitry Andric ompt_thread_initial, __ompt_get_thread_data_internal()); 39410b57cec5SDimitry Andric } 39420b57cec5SDimitry Andric ompt_data_t *task_data; 39430b57cec5SDimitry Andric ompt_data_t *parallel_data; 3944fe6060f1SDimitry Andric __ompt_get_task_info_internal(0, NULL, &task_data, NULL, ¶llel_data, 3945fe6060f1SDimitry Andric NULL); 39460b57cec5SDimitry Andric if (ompt_enabled.ompt_callback_implicit_task) { 39470b57cec5SDimitry Andric ompt_callbacks.ompt_callback(ompt_callback_implicit_task)( 39480b57cec5SDimitry Andric ompt_scope_begin, parallel_data, task_data, 1, 1, ompt_task_initial); 39490b57cec5SDimitry Andric } 39500b57cec5SDimitry Andric 39510b57cec5SDimitry Andric ompt_set_thread_state(root_thread, ompt_state_work_serial); 39520b57cec5SDimitry Andric } 39530b57cec5SDimitry Andric #endif 3954fe6060f1SDimitry Andric #if OMPD_SUPPORT 3955fe6060f1SDimitry Andric if (ompd_state & OMPD_ENABLE_BP) 3956fe6060f1SDimitry Andric ompd_bp_thread_begin(); 3957fe6060f1SDimitry Andric #endif 39580b57cec5SDimitry Andric 39590b57cec5SDimitry Andric KMP_MB(); 39600b57cec5SDimitry Andric __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock); 39610b57cec5SDimitry Andric 39620b57cec5SDimitry Andric return gtid; 39630b57cec5SDimitry Andric } 39640b57cec5SDimitry Andric 39650b57cec5SDimitry Andric #if KMP_NESTED_HOT_TEAMS 39660b57cec5SDimitry Andric static int __kmp_free_hot_teams(kmp_root_t *root, kmp_info_t *thr, int level, 39670b57cec5SDimitry Andric const int max_level) { 39680b57cec5SDimitry Andric int i, n, nth; 39690b57cec5SDimitry Andric kmp_hot_team_ptr_t *hot_teams = thr->th.th_hot_teams; 39700b57cec5SDimitry Andric if (!hot_teams || !hot_teams[level].hot_team) { 39710b57cec5SDimitry Andric return 0; 39720b57cec5SDimitry Andric } 39730b57cec5SDimitry Andric KMP_DEBUG_ASSERT(level < max_level); 39740b57cec5SDimitry Andric kmp_team_t *team = hot_teams[level].hot_team; 39750b57cec5SDimitry Andric nth = hot_teams[level].hot_team_nth; 3976fe6060f1SDimitry Andric n = nth - 1; // primary thread is not freed 39770b57cec5SDimitry Andric if (level < max_level - 1) { 39780b57cec5SDimitry Andric for (i = 0; i < nth; ++i) { 39790b57cec5SDimitry Andric kmp_info_t *th = team->t.t_threads[i]; 39800b57cec5SDimitry Andric n += __kmp_free_hot_teams(root, th, level + 1, max_level); 39810b57cec5SDimitry Andric if (i > 0 && th->th.th_hot_teams) { 39820b57cec5SDimitry Andric __kmp_free(th->th.th_hot_teams); 39830b57cec5SDimitry Andric th->th.th_hot_teams = NULL; 39840b57cec5SDimitry Andric } 39850b57cec5SDimitry Andric } 39860b57cec5SDimitry Andric } 39870b57cec5SDimitry Andric __kmp_free_team(root, team, NULL); 39880b57cec5SDimitry Andric return n; 39890b57cec5SDimitry Andric } 39900b57cec5SDimitry Andric #endif 39910b57cec5SDimitry Andric 39920b57cec5SDimitry Andric // Resets a root thread and clear its root and hot teams. 39930b57cec5SDimitry Andric // Returns the number of __kmp_threads entries directly and indirectly freed. 39940b57cec5SDimitry Andric static int __kmp_reset_root(int gtid, kmp_root_t *root) { 39950b57cec5SDimitry Andric kmp_team_t *root_team = root->r.r_root_team; 39960b57cec5SDimitry Andric kmp_team_t *hot_team = root->r.r_hot_team; 39970b57cec5SDimitry Andric int n = hot_team->t.t_nproc; 39980b57cec5SDimitry Andric int i; 39990b57cec5SDimitry Andric 40000b57cec5SDimitry Andric KMP_DEBUG_ASSERT(!root->r.r_active); 40010b57cec5SDimitry Andric 40020b57cec5SDimitry Andric root->r.r_root_team = NULL; 40030b57cec5SDimitry Andric root->r.r_hot_team = NULL; 40040b57cec5SDimitry Andric // __kmp_free_team() does not free hot teams, so we have to clear r_hot_team 40050b57cec5SDimitry Andric // before call to __kmp_free_team(). 40060b57cec5SDimitry Andric __kmp_free_team(root, root_team USE_NESTED_HOT_ARG(NULL)); 40070b57cec5SDimitry Andric #if KMP_NESTED_HOT_TEAMS 40080b57cec5SDimitry Andric if (__kmp_hot_teams_max_level > 40090b57cec5SDimitry Andric 0) { // need to free nested hot teams and their threads if any 40100b57cec5SDimitry Andric for (i = 0; i < hot_team->t.t_nproc; ++i) { 40110b57cec5SDimitry Andric kmp_info_t *th = hot_team->t.t_threads[i]; 40120b57cec5SDimitry Andric if (__kmp_hot_teams_max_level > 1) { 40130b57cec5SDimitry Andric n += __kmp_free_hot_teams(root, th, 1, __kmp_hot_teams_max_level); 40140b57cec5SDimitry Andric } 40150b57cec5SDimitry Andric if (th->th.th_hot_teams) { 40160b57cec5SDimitry Andric __kmp_free(th->th.th_hot_teams); 40170b57cec5SDimitry Andric th->th.th_hot_teams = NULL; 40180b57cec5SDimitry Andric } 40190b57cec5SDimitry Andric } 40200b57cec5SDimitry Andric } 40210b57cec5SDimitry Andric #endif 40220b57cec5SDimitry Andric __kmp_free_team(root, hot_team USE_NESTED_HOT_ARG(NULL)); 40230b57cec5SDimitry Andric 40240b57cec5SDimitry Andric // Before we can reap the thread, we need to make certain that all other 40250b57cec5SDimitry Andric // threads in the teams that had this root as ancestor have stopped trying to 40260b57cec5SDimitry Andric // steal tasks. 40270b57cec5SDimitry Andric if (__kmp_tasking_mode != tskm_immediate_exec) { 40280b57cec5SDimitry Andric __kmp_wait_to_unref_task_teams(); 40290b57cec5SDimitry Andric } 40300b57cec5SDimitry Andric 40310b57cec5SDimitry Andric #if KMP_OS_WINDOWS 40320b57cec5SDimitry Andric /* Close Handle of root duplicated in __kmp_create_worker (tr #62919) */ 40330b57cec5SDimitry Andric KA_TRACE( 40340b57cec5SDimitry Andric 10, ("__kmp_reset_root: free handle, th = %p, handle = %" KMP_UINTPTR_SPEC 40350b57cec5SDimitry Andric "\n", 40360b57cec5SDimitry Andric (LPVOID) & (root->r.r_uber_thread->th), 40370b57cec5SDimitry Andric root->r.r_uber_thread->th.th_info.ds.ds_thread)); 40380b57cec5SDimitry Andric __kmp_free_handle(root->r.r_uber_thread->th.th_info.ds.ds_thread); 40390b57cec5SDimitry Andric #endif /* KMP_OS_WINDOWS */ 40400b57cec5SDimitry Andric 4041fe6060f1SDimitry Andric #if OMPD_SUPPORT 4042fe6060f1SDimitry Andric if (ompd_state & OMPD_ENABLE_BP) 4043fe6060f1SDimitry Andric ompd_bp_thread_end(); 4044fe6060f1SDimitry Andric #endif 4045fe6060f1SDimitry Andric 40460b57cec5SDimitry Andric #if OMPT_SUPPORT 40470b57cec5SDimitry Andric ompt_data_t *task_data; 40480b57cec5SDimitry Andric ompt_data_t *parallel_data; 4049fe6060f1SDimitry Andric __ompt_get_task_info_internal(0, NULL, &task_data, NULL, ¶llel_data, 4050fe6060f1SDimitry Andric NULL); 40510b57cec5SDimitry Andric if (ompt_enabled.ompt_callback_implicit_task) { 40520b57cec5SDimitry Andric ompt_callbacks.ompt_callback(ompt_callback_implicit_task)( 40530b57cec5SDimitry Andric ompt_scope_end, parallel_data, task_data, 0, 1, ompt_task_initial); 40540b57cec5SDimitry Andric } 40550b57cec5SDimitry Andric if (ompt_enabled.ompt_callback_thread_end) { 40560b57cec5SDimitry Andric ompt_callbacks.ompt_callback(ompt_callback_thread_end)( 40570b57cec5SDimitry Andric &(root->r.r_uber_thread->th.ompt_thread_info.thread_data)); 40580b57cec5SDimitry Andric } 40590b57cec5SDimitry Andric #endif 40600b57cec5SDimitry Andric 40610b57cec5SDimitry Andric TCW_4(__kmp_nth, 40620b57cec5SDimitry Andric __kmp_nth - 1); // __kmp_reap_thread will decrement __kmp_all_nth. 40630b57cec5SDimitry Andric i = root->r.r_uber_thread->th.th_cg_roots->cg_nthreads--; 40640b57cec5SDimitry Andric KA_TRACE(100, ("__kmp_reset_root: Thread %p decrement cg_nthreads on node %p" 40650b57cec5SDimitry Andric " to %d\n", 40660b57cec5SDimitry Andric root->r.r_uber_thread, root->r.r_uber_thread->th.th_cg_roots, 40670b57cec5SDimitry Andric root->r.r_uber_thread->th.th_cg_roots->cg_nthreads)); 40680b57cec5SDimitry Andric if (i == 1) { 40690b57cec5SDimitry Andric // need to free contention group structure 40700b57cec5SDimitry Andric KMP_DEBUG_ASSERT(root->r.r_uber_thread == 40710b57cec5SDimitry Andric root->r.r_uber_thread->th.th_cg_roots->cg_root); 40720b57cec5SDimitry Andric KMP_DEBUG_ASSERT(root->r.r_uber_thread->th.th_cg_roots->up == NULL); 40730b57cec5SDimitry Andric __kmp_free(root->r.r_uber_thread->th.th_cg_roots); 40740b57cec5SDimitry Andric root->r.r_uber_thread->th.th_cg_roots = NULL; 40750b57cec5SDimitry Andric } 40760b57cec5SDimitry Andric __kmp_reap_thread(root->r.r_uber_thread, 1); 40770b57cec5SDimitry Andric 4078480093f4SDimitry Andric // We canot put root thread to __kmp_thread_pool, so we have to reap it 4079480093f4SDimitry Andric // instead of freeing. 40800b57cec5SDimitry Andric root->r.r_uber_thread = NULL; 40810b57cec5SDimitry Andric /* mark root as no longer in use */ 40820b57cec5SDimitry Andric root->r.r_begin = FALSE; 40830b57cec5SDimitry Andric 40840b57cec5SDimitry Andric return n; 40850b57cec5SDimitry Andric } 40860b57cec5SDimitry Andric 40870b57cec5SDimitry Andric void __kmp_unregister_root_current_thread(int gtid) { 40880b57cec5SDimitry Andric KA_TRACE(1, ("__kmp_unregister_root_current_thread: enter T#%d\n", gtid)); 40890b57cec5SDimitry Andric /* this lock should be ok, since unregister_root_current_thread is never 40900b57cec5SDimitry Andric called during an abort, only during a normal close. furthermore, if you 40910b57cec5SDimitry Andric have the forkjoin lock, you should never try to get the initz lock */ 40920b57cec5SDimitry Andric __kmp_acquire_bootstrap_lock(&__kmp_forkjoin_lock); 40930b57cec5SDimitry Andric if (TCR_4(__kmp_global.g.g_done) || !__kmp_init_serial) { 40940b57cec5SDimitry Andric KC_TRACE(10, ("__kmp_unregister_root_current_thread: already finished, " 40950b57cec5SDimitry Andric "exiting T#%d\n", 40960b57cec5SDimitry Andric gtid)); 40970b57cec5SDimitry Andric __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock); 40980b57cec5SDimitry Andric return; 40990b57cec5SDimitry Andric } 41000b57cec5SDimitry Andric kmp_root_t *root = __kmp_root[gtid]; 41010b57cec5SDimitry Andric 41020b57cec5SDimitry Andric KMP_DEBUG_ASSERT(__kmp_threads && __kmp_threads[gtid]); 41030b57cec5SDimitry Andric KMP_ASSERT(KMP_UBER_GTID(gtid)); 41040b57cec5SDimitry Andric KMP_ASSERT(root == __kmp_threads[gtid]->th.th_root); 41050b57cec5SDimitry Andric KMP_ASSERT(root->r.r_active == FALSE); 41060b57cec5SDimitry Andric 41070b57cec5SDimitry Andric KMP_MB(); 41080b57cec5SDimitry Andric 41090b57cec5SDimitry Andric kmp_info_t *thread = __kmp_threads[gtid]; 41100b57cec5SDimitry Andric kmp_team_t *team = thread->th.th_team; 41110b57cec5SDimitry Andric kmp_task_team_t *task_team = thread->th.th_task_team; 41120b57cec5SDimitry Andric 41130b57cec5SDimitry Andric // we need to wait for the proxy tasks before finishing the thread 411404eeddc0SDimitry Andric if (task_team != NULL && (task_team->tt.tt_found_proxy_tasks || 411504eeddc0SDimitry Andric task_team->tt.tt_hidden_helper_task_encountered)) { 41160b57cec5SDimitry Andric #if OMPT_SUPPORT 41170b57cec5SDimitry Andric // the runtime is shutting down so we won't report any events 41180b57cec5SDimitry Andric thread->th.ompt_thread_info.state = ompt_state_undefined; 41190b57cec5SDimitry Andric #endif 41200b57cec5SDimitry Andric __kmp_task_team_wait(thread, team USE_ITT_BUILD_ARG(NULL)); 41210b57cec5SDimitry Andric } 41220b57cec5SDimitry Andric 41230b57cec5SDimitry Andric __kmp_reset_root(gtid, root); 41240b57cec5SDimitry Andric 41250b57cec5SDimitry Andric KMP_MB(); 41260b57cec5SDimitry Andric KC_TRACE(10, 41270b57cec5SDimitry Andric ("__kmp_unregister_root_current_thread: T#%d unregistered\n", gtid)); 41280b57cec5SDimitry Andric 41290b57cec5SDimitry Andric __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock); 41300b57cec5SDimitry Andric } 41310b57cec5SDimitry Andric 41320b57cec5SDimitry Andric #if KMP_OS_WINDOWS 41330b57cec5SDimitry Andric /* __kmp_forkjoin_lock must be already held 41340b57cec5SDimitry Andric Unregisters a root thread that is not the current thread. Returns the number 41350b57cec5SDimitry Andric of __kmp_threads entries freed as a result. */ 41360b57cec5SDimitry Andric static int __kmp_unregister_root_other_thread(int gtid) { 41370b57cec5SDimitry Andric kmp_root_t *root = __kmp_root[gtid]; 41380b57cec5SDimitry Andric int r; 41390b57cec5SDimitry Andric 41400b57cec5SDimitry Andric KA_TRACE(1, ("__kmp_unregister_root_other_thread: enter T#%d\n", gtid)); 41410b57cec5SDimitry Andric KMP_DEBUG_ASSERT(__kmp_threads && __kmp_threads[gtid]); 41420b57cec5SDimitry Andric KMP_ASSERT(KMP_UBER_GTID(gtid)); 41430b57cec5SDimitry Andric KMP_ASSERT(root == __kmp_threads[gtid]->th.th_root); 41440b57cec5SDimitry Andric KMP_ASSERT(root->r.r_active == FALSE); 41450b57cec5SDimitry Andric 41460b57cec5SDimitry Andric r = __kmp_reset_root(gtid, root); 41470b57cec5SDimitry Andric KC_TRACE(10, 41480b57cec5SDimitry Andric ("__kmp_unregister_root_other_thread: T#%d unregistered\n", gtid)); 41490b57cec5SDimitry Andric return r; 41500b57cec5SDimitry Andric } 41510b57cec5SDimitry Andric #endif 41520b57cec5SDimitry Andric 41530b57cec5SDimitry Andric #if KMP_DEBUG 41540b57cec5SDimitry Andric void __kmp_task_info() { 41550b57cec5SDimitry Andric 41560b57cec5SDimitry Andric kmp_int32 gtid = __kmp_entry_gtid(); 41570b57cec5SDimitry Andric kmp_int32 tid = __kmp_tid_from_gtid(gtid); 41580b57cec5SDimitry Andric kmp_info_t *this_thr = __kmp_threads[gtid]; 41590b57cec5SDimitry Andric kmp_team_t *steam = this_thr->th.th_serial_team; 41600b57cec5SDimitry Andric kmp_team_t *team = this_thr->th.th_team; 41610b57cec5SDimitry Andric 41620b57cec5SDimitry Andric __kmp_printf( 41630b57cec5SDimitry Andric "__kmp_task_info: gtid=%d tid=%d t_thread=%p team=%p steam=%p curtask=%p " 41640b57cec5SDimitry Andric "ptask=%p\n", 41650b57cec5SDimitry Andric gtid, tid, this_thr, team, steam, this_thr->th.th_current_task, 41660b57cec5SDimitry Andric team->t.t_implicit_task_taskdata[tid].td_parent); 41670b57cec5SDimitry Andric } 41680b57cec5SDimitry Andric #endif // KMP_DEBUG 41690b57cec5SDimitry Andric 41700b57cec5SDimitry Andric /* TODO optimize with one big memclr, take out what isn't needed, split 41710b57cec5SDimitry Andric responsibility to workers as much as possible, and delay initialization of 41720b57cec5SDimitry Andric features as much as possible */ 41730b57cec5SDimitry Andric static void __kmp_initialize_info(kmp_info_t *this_thr, kmp_team_t *team, 41740b57cec5SDimitry Andric int tid, int gtid) { 41750b57cec5SDimitry Andric /* this_thr->th.th_info.ds.ds_gtid is setup in 41760b57cec5SDimitry Andric kmp_allocate_thread/create_worker. 41770b57cec5SDimitry Andric this_thr->th.th_serial_team is setup in __kmp_allocate_thread */ 41780b57cec5SDimitry Andric KMP_DEBUG_ASSERT(this_thr != NULL); 41790b57cec5SDimitry Andric KMP_DEBUG_ASSERT(this_thr->th.th_serial_team); 41800b57cec5SDimitry Andric KMP_DEBUG_ASSERT(team); 41810b57cec5SDimitry Andric KMP_DEBUG_ASSERT(team->t.t_threads); 41820b57cec5SDimitry Andric KMP_DEBUG_ASSERT(team->t.t_dispatch); 4183fe6060f1SDimitry Andric kmp_info_t *master = team->t.t_threads[0]; 41840b57cec5SDimitry Andric KMP_DEBUG_ASSERT(master); 41850b57cec5SDimitry Andric KMP_DEBUG_ASSERT(master->th.th_root); 41860b57cec5SDimitry Andric 41870b57cec5SDimitry Andric KMP_MB(); 41880b57cec5SDimitry Andric 41890b57cec5SDimitry Andric TCW_SYNC_PTR(this_thr->th.th_team, team); 41900b57cec5SDimitry Andric 41910b57cec5SDimitry Andric this_thr->th.th_info.ds.ds_tid = tid; 41920b57cec5SDimitry Andric this_thr->th.th_set_nproc = 0; 41930b57cec5SDimitry Andric if (__kmp_tasking_mode != tskm_immediate_exec) 41940b57cec5SDimitry Andric // When tasking is possible, threads are not safe to reap until they are 41950b57cec5SDimitry Andric // done tasking; this will be set when tasking code is exited in wait 41960b57cec5SDimitry Andric this_thr->th.th_reap_state = KMP_NOT_SAFE_TO_REAP; 41970b57cec5SDimitry Andric else // no tasking --> always safe to reap 41980b57cec5SDimitry Andric this_thr->th.th_reap_state = KMP_SAFE_TO_REAP; 41990b57cec5SDimitry Andric this_thr->th.th_set_proc_bind = proc_bind_default; 42000b57cec5SDimitry Andric #if KMP_AFFINITY_SUPPORTED 42010b57cec5SDimitry Andric this_thr->th.th_new_place = this_thr->th.th_current_place; 42020b57cec5SDimitry Andric #endif 42030b57cec5SDimitry Andric this_thr->th.th_root = master->th.th_root; 42040b57cec5SDimitry Andric 42050b57cec5SDimitry Andric /* setup the thread's cache of the team structure */ 42060b57cec5SDimitry Andric this_thr->th.th_team_nproc = team->t.t_nproc; 42070b57cec5SDimitry Andric this_thr->th.th_team_master = master; 42080b57cec5SDimitry Andric this_thr->th.th_team_serialized = team->t.t_serialized; 42090b57cec5SDimitry Andric 42100b57cec5SDimitry Andric KMP_DEBUG_ASSERT(team->t.t_implicit_task_taskdata); 42110b57cec5SDimitry Andric 42120b57cec5SDimitry Andric KF_TRACE(10, ("__kmp_initialize_info1: T#%d:%d this_thread=%p curtask=%p\n", 42130b57cec5SDimitry Andric tid, gtid, this_thr, this_thr->th.th_current_task)); 42140b57cec5SDimitry Andric 42150b57cec5SDimitry Andric __kmp_init_implicit_task(this_thr->th.th_team_master->th.th_ident, this_thr, 42160b57cec5SDimitry Andric team, tid, TRUE); 42170b57cec5SDimitry Andric 42180b57cec5SDimitry Andric KF_TRACE(10, ("__kmp_initialize_info2: T#%d:%d this_thread=%p curtask=%p\n", 42190b57cec5SDimitry Andric tid, gtid, this_thr, this_thr->th.th_current_task)); 42200b57cec5SDimitry Andric // TODO: Initialize ICVs from parent; GEH - isn't that already done in 42210b57cec5SDimitry Andric // __kmp_initialize_team()? 42220b57cec5SDimitry Andric 42230b57cec5SDimitry Andric /* TODO no worksharing in speculative threads */ 42240b57cec5SDimitry Andric this_thr->th.th_dispatch = &team->t.t_dispatch[tid]; 42250b57cec5SDimitry Andric 42260b57cec5SDimitry Andric this_thr->th.th_local.this_construct = 0; 42270b57cec5SDimitry Andric 42280b57cec5SDimitry Andric if (!this_thr->th.th_pri_common) { 42290b57cec5SDimitry Andric this_thr->th.th_pri_common = 42300b57cec5SDimitry Andric (struct common_table *)__kmp_allocate(sizeof(struct common_table)); 42310b57cec5SDimitry Andric if (__kmp_storage_map) { 42320b57cec5SDimitry Andric __kmp_print_storage_map_gtid( 42330b57cec5SDimitry Andric gtid, this_thr->th.th_pri_common, this_thr->th.th_pri_common + 1, 42340b57cec5SDimitry Andric sizeof(struct common_table), "th_%d.th_pri_common\n", gtid); 42350b57cec5SDimitry Andric } 42360b57cec5SDimitry Andric this_thr->th.th_pri_head = NULL; 42370b57cec5SDimitry Andric } 42380b57cec5SDimitry Andric 4239fe6060f1SDimitry Andric if (this_thr != master && // Primary thread's CG root is initialized elsewhere 42400b57cec5SDimitry Andric this_thr->th.th_cg_roots != master->th.th_cg_roots) { // CG root not set 4241fe6060f1SDimitry Andric // Make new thread's CG root same as primary thread's 42420b57cec5SDimitry Andric KMP_DEBUG_ASSERT(master->th.th_cg_roots); 42430b57cec5SDimitry Andric kmp_cg_root_t *tmp = this_thr->th.th_cg_roots; 42440b57cec5SDimitry Andric if (tmp) { 42450b57cec5SDimitry Andric // worker changes CG, need to check if old CG should be freed 42460b57cec5SDimitry Andric int i = tmp->cg_nthreads--; 42470b57cec5SDimitry Andric KA_TRACE(100, ("__kmp_initialize_info: Thread %p decrement cg_nthreads" 42480b57cec5SDimitry Andric " on node %p of thread %p to %d\n", 42490b57cec5SDimitry Andric this_thr, tmp, tmp->cg_root, tmp->cg_nthreads)); 42500b57cec5SDimitry Andric if (i == 1) { 42510b57cec5SDimitry Andric __kmp_free(tmp); // last thread left CG --> free it 42520b57cec5SDimitry Andric } 42530b57cec5SDimitry Andric } 42540b57cec5SDimitry Andric this_thr->th.th_cg_roots = master->th.th_cg_roots; 42550b57cec5SDimitry Andric // Increment new thread's CG root's counter to add the new thread 42560b57cec5SDimitry Andric this_thr->th.th_cg_roots->cg_nthreads++; 42570b57cec5SDimitry Andric KA_TRACE(100, ("__kmp_initialize_info: Thread %p increment cg_nthreads on" 42580b57cec5SDimitry Andric " node %p of thread %p to %d\n", 42590b57cec5SDimitry Andric this_thr, this_thr->th.th_cg_roots, 42600b57cec5SDimitry Andric this_thr->th.th_cg_roots->cg_root, 42610b57cec5SDimitry Andric this_thr->th.th_cg_roots->cg_nthreads)); 42620b57cec5SDimitry Andric this_thr->th.th_current_task->td_icvs.thread_limit = 42630b57cec5SDimitry Andric this_thr->th.th_cg_roots->cg_thread_limit; 42640b57cec5SDimitry Andric } 42650b57cec5SDimitry Andric 42660b57cec5SDimitry Andric /* Initialize dynamic dispatch */ 42670b57cec5SDimitry Andric { 42680b57cec5SDimitry Andric volatile kmp_disp_t *dispatch = this_thr->th.th_dispatch; 42690b57cec5SDimitry Andric // Use team max_nproc since this will never change for the team. 42700b57cec5SDimitry Andric size_t disp_size = 42710b57cec5SDimitry Andric sizeof(dispatch_private_info_t) * 42720b57cec5SDimitry Andric (team->t.t_max_nproc == 1 ? 1 : __kmp_dispatch_num_buffers); 42730b57cec5SDimitry Andric KD_TRACE(10, ("__kmp_initialize_info: T#%d max_nproc: %d\n", gtid, 42740b57cec5SDimitry Andric team->t.t_max_nproc)); 42750b57cec5SDimitry Andric KMP_ASSERT(dispatch); 42760b57cec5SDimitry Andric KMP_DEBUG_ASSERT(team->t.t_dispatch); 42770b57cec5SDimitry Andric KMP_DEBUG_ASSERT(dispatch == &team->t.t_dispatch[tid]); 42780b57cec5SDimitry Andric 42790b57cec5SDimitry Andric dispatch->th_disp_index = 0; 42800b57cec5SDimitry Andric dispatch->th_doacross_buf_idx = 0; 42810b57cec5SDimitry Andric if (!dispatch->th_disp_buffer) { 42820b57cec5SDimitry Andric dispatch->th_disp_buffer = 42830b57cec5SDimitry Andric (dispatch_private_info_t *)__kmp_allocate(disp_size); 42840b57cec5SDimitry Andric 42850b57cec5SDimitry Andric if (__kmp_storage_map) { 42860b57cec5SDimitry Andric __kmp_print_storage_map_gtid( 42870b57cec5SDimitry Andric gtid, &dispatch->th_disp_buffer[0], 42880b57cec5SDimitry Andric &dispatch->th_disp_buffer[team->t.t_max_nproc == 1 42890b57cec5SDimitry Andric ? 1 42900b57cec5SDimitry Andric : __kmp_dispatch_num_buffers], 4291fe6060f1SDimitry Andric disp_size, 4292fe6060f1SDimitry Andric "th_%d.th_dispatch.th_disp_buffer " 42930b57cec5SDimitry Andric "(team_%d.t_dispatch[%d].th_disp_buffer)", 42940b57cec5SDimitry Andric gtid, team->t.t_id, gtid); 42950b57cec5SDimitry Andric } 42960b57cec5SDimitry Andric } else { 42970b57cec5SDimitry Andric memset(&dispatch->th_disp_buffer[0], '\0', disp_size); 42980b57cec5SDimitry Andric } 42990b57cec5SDimitry Andric 43000b57cec5SDimitry Andric dispatch->th_dispatch_pr_current = 0; 43010b57cec5SDimitry Andric dispatch->th_dispatch_sh_current = 0; 43020b57cec5SDimitry Andric 43030b57cec5SDimitry Andric dispatch->th_deo_fcn = 0; /* ORDERED */ 43040b57cec5SDimitry Andric dispatch->th_dxo_fcn = 0; /* END ORDERED */ 43050b57cec5SDimitry Andric } 43060b57cec5SDimitry Andric 43070b57cec5SDimitry Andric this_thr->th.th_next_pool = NULL; 43080b57cec5SDimitry Andric 43090b57cec5SDimitry Andric if (!this_thr->th.th_task_state_memo_stack) { 43100b57cec5SDimitry Andric size_t i; 43110b57cec5SDimitry Andric this_thr->th.th_task_state_memo_stack = 43120b57cec5SDimitry Andric (kmp_uint8 *)__kmp_allocate(4 * sizeof(kmp_uint8)); 43130b57cec5SDimitry Andric this_thr->th.th_task_state_top = 0; 43140b57cec5SDimitry Andric this_thr->th.th_task_state_stack_sz = 4; 43150b57cec5SDimitry Andric for (i = 0; i < this_thr->th.th_task_state_stack_sz; 43160b57cec5SDimitry Andric ++i) // zero init the stack 43170b57cec5SDimitry Andric this_thr->th.th_task_state_memo_stack[i] = 0; 43180b57cec5SDimitry Andric } 43190b57cec5SDimitry Andric 43200b57cec5SDimitry Andric KMP_DEBUG_ASSERT(!this_thr->th.th_spin_here); 43210b57cec5SDimitry Andric KMP_DEBUG_ASSERT(this_thr->th.th_next_waiting == 0); 43220b57cec5SDimitry Andric 43230b57cec5SDimitry Andric KMP_MB(); 43240b57cec5SDimitry Andric } 43250b57cec5SDimitry Andric 43260b57cec5SDimitry Andric /* allocate a new thread for the requesting team. this is only called from 43270b57cec5SDimitry Andric within a forkjoin critical section. we will first try to get an available 43280b57cec5SDimitry Andric thread from the thread pool. if none is available, we will fork a new one 43290b57cec5SDimitry Andric assuming we are able to create a new one. this should be assured, as the 43300b57cec5SDimitry Andric caller should check on this first. */ 43310b57cec5SDimitry Andric kmp_info_t *__kmp_allocate_thread(kmp_root_t *root, kmp_team_t *team, 43320b57cec5SDimitry Andric int new_tid) { 43330b57cec5SDimitry Andric kmp_team_t *serial_team; 43340b57cec5SDimitry Andric kmp_info_t *new_thr; 43350b57cec5SDimitry Andric int new_gtid; 43360b57cec5SDimitry Andric 43370b57cec5SDimitry Andric KA_TRACE(20, ("__kmp_allocate_thread: T#%d\n", __kmp_get_gtid())); 43380b57cec5SDimitry Andric KMP_DEBUG_ASSERT(root && team); 43390b57cec5SDimitry Andric #if !KMP_NESTED_HOT_TEAMS 43400b57cec5SDimitry Andric KMP_DEBUG_ASSERT(KMP_MASTER_GTID(__kmp_get_gtid())); 43410b57cec5SDimitry Andric #endif 43420b57cec5SDimitry Andric KMP_MB(); 43430b57cec5SDimitry Andric 43440b57cec5SDimitry Andric /* first, try to get one from the thread pool */ 43450b57cec5SDimitry Andric if (__kmp_thread_pool) { 43460b57cec5SDimitry Andric new_thr = CCAST(kmp_info_t *, __kmp_thread_pool); 43470b57cec5SDimitry Andric __kmp_thread_pool = (volatile kmp_info_t *)new_thr->th.th_next_pool; 43480b57cec5SDimitry Andric if (new_thr == __kmp_thread_pool_insert_pt) { 43490b57cec5SDimitry Andric __kmp_thread_pool_insert_pt = NULL; 43500b57cec5SDimitry Andric } 43510b57cec5SDimitry Andric TCW_4(new_thr->th.th_in_pool, FALSE); 43520b57cec5SDimitry Andric __kmp_suspend_initialize_thread(new_thr); 43530b57cec5SDimitry Andric __kmp_lock_suspend_mx(new_thr); 43540b57cec5SDimitry Andric if (new_thr->th.th_active_in_pool == TRUE) { 43550b57cec5SDimitry Andric KMP_DEBUG_ASSERT(new_thr->th.th_active == TRUE); 43560b57cec5SDimitry Andric KMP_ATOMIC_DEC(&__kmp_thread_pool_active_nth); 43570b57cec5SDimitry Andric new_thr->th.th_active_in_pool = FALSE; 43580b57cec5SDimitry Andric } 43590b57cec5SDimitry Andric __kmp_unlock_suspend_mx(new_thr); 43600b57cec5SDimitry Andric 43610b57cec5SDimitry Andric KA_TRACE(20, ("__kmp_allocate_thread: T#%d using thread T#%d\n", 43620b57cec5SDimitry Andric __kmp_get_gtid(), new_thr->th.th_info.ds.ds_gtid)); 43630b57cec5SDimitry Andric KMP_ASSERT(!new_thr->th.th_team); 43640b57cec5SDimitry Andric KMP_DEBUG_ASSERT(__kmp_nth < __kmp_threads_capacity); 43650b57cec5SDimitry Andric 43660b57cec5SDimitry Andric /* setup the thread structure */ 43670b57cec5SDimitry Andric __kmp_initialize_info(new_thr, team, new_tid, 43680b57cec5SDimitry Andric new_thr->th.th_info.ds.ds_gtid); 43690b57cec5SDimitry Andric KMP_DEBUG_ASSERT(new_thr->th.th_serial_team); 43700b57cec5SDimitry Andric 43710b57cec5SDimitry Andric TCW_4(__kmp_nth, __kmp_nth + 1); 43720b57cec5SDimitry Andric 43730b57cec5SDimitry Andric new_thr->th.th_task_state = 0; 43740b57cec5SDimitry Andric new_thr->th.th_task_state_top = 0; 43750b57cec5SDimitry Andric new_thr->th.th_task_state_stack_sz = 4; 43760b57cec5SDimitry Andric 4377349cc55cSDimitry Andric if (__kmp_barrier_gather_pattern[bs_forkjoin_barrier] == bp_dist_bar) { 4378349cc55cSDimitry Andric // Make sure pool thread has transitioned to waiting on own thread struct 4379349cc55cSDimitry Andric KMP_DEBUG_ASSERT(new_thr->th.th_used_in_team.load() == 0); 4380349cc55cSDimitry Andric // Thread activated in __kmp_allocate_team when increasing team size 4381349cc55cSDimitry Andric } 4382349cc55cSDimitry Andric 43830b57cec5SDimitry Andric #ifdef KMP_ADJUST_BLOCKTIME 43840b57cec5SDimitry Andric /* Adjust blocktime back to zero if necessary */ 43850b57cec5SDimitry Andric /* Middle initialization might not have occurred yet */ 43860b57cec5SDimitry Andric if (!__kmp_env_blocktime && (__kmp_avail_proc > 0)) { 43870b57cec5SDimitry Andric if (__kmp_nth > __kmp_avail_proc) { 43880b57cec5SDimitry Andric __kmp_zero_bt = TRUE; 43890b57cec5SDimitry Andric } 43900b57cec5SDimitry Andric } 43910b57cec5SDimitry Andric #endif /* KMP_ADJUST_BLOCKTIME */ 43920b57cec5SDimitry Andric 43930b57cec5SDimitry Andric #if KMP_DEBUG 43940b57cec5SDimitry Andric // If thread entered pool via __kmp_free_thread, wait_flag should != 43950b57cec5SDimitry Andric // KMP_BARRIER_PARENT_FLAG. 43960b57cec5SDimitry Andric int b; 43970b57cec5SDimitry Andric kmp_balign_t *balign = new_thr->th.th_bar; 43980b57cec5SDimitry Andric for (b = 0; b < bs_last_barrier; ++b) 43990b57cec5SDimitry Andric KMP_DEBUG_ASSERT(balign[b].bb.wait_flag != KMP_BARRIER_PARENT_FLAG); 44000b57cec5SDimitry Andric #endif 44010b57cec5SDimitry Andric 44020b57cec5SDimitry Andric KF_TRACE(10, ("__kmp_allocate_thread: T#%d using thread %p T#%d\n", 44030b57cec5SDimitry Andric __kmp_get_gtid(), new_thr, new_thr->th.th_info.ds.ds_gtid)); 44040b57cec5SDimitry Andric 44050b57cec5SDimitry Andric KMP_MB(); 44060b57cec5SDimitry Andric return new_thr; 44070b57cec5SDimitry Andric } 44080b57cec5SDimitry Andric 44090b57cec5SDimitry Andric /* no, well fork a new one */ 44100b57cec5SDimitry Andric KMP_ASSERT(__kmp_nth == __kmp_all_nth); 44110b57cec5SDimitry Andric KMP_ASSERT(__kmp_all_nth < __kmp_threads_capacity); 44120b57cec5SDimitry Andric 44130b57cec5SDimitry Andric #if KMP_USE_MONITOR 44140b57cec5SDimitry Andric // If this is the first worker thread the RTL is creating, then also 44150b57cec5SDimitry Andric // launch the monitor thread. We try to do this as early as possible. 44160b57cec5SDimitry Andric if (!TCR_4(__kmp_init_monitor)) { 44170b57cec5SDimitry Andric __kmp_acquire_bootstrap_lock(&__kmp_monitor_lock); 44180b57cec5SDimitry Andric if (!TCR_4(__kmp_init_monitor)) { 44190b57cec5SDimitry Andric KF_TRACE(10, ("before __kmp_create_monitor\n")); 44200b57cec5SDimitry Andric TCW_4(__kmp_init_monitor, 1); 44210b57cec5SDimitry Andric __kmp_create_monitor(&__kmp_monitor); 44220b57cec5SDimitry Andric KF_TRACE(10, ("after __kmp_create_monitor\n")); 44230b57cec5SDimitry Andric #if KMP_OS_WINDOWS 44240b57cec5SDimitry Andric // AC: wait until monitor has started. This is a fix for CQ232808. 44250b57cec5SDimitry Andric // The reason is that if the library is loaded/unloaded in a loop with 44260b57cec5SDimitry Andric // small (parallel) work in between, then there is high probability that 44270b57cec5SDimitry Andric // monitor thread started after the library shutdown. At shutdown it is 4428fe6060f1SDimitry Andric // too late to cope with the problem, because when the primary thread is 4429fe6060f1SDimitry Andric // in DllMain (process detach) the monitor has no chances to start (it is 4430fe6060f1SDimitry Andric // blocked), and primary thread has no means to inform the monitor that 4431fe6060f1SDimitry Andric // the library has gone, because all the memory which the monitor can 4432fe6060f1SDimitry Andric // access is going to be released/reset. 44330b57cec5SDimitry Andric while (TCR_4(__kmp_init_monitor) < 2) { 44340b57cec5SDimitry Andric KMP_YIELD(TRUE); 44350b57cec5SDimitry Andric } 44360b57cec5SDimitry Andric KF_TRACE(10, ("after monitor thread has started\n")); 44370b57cec5SDimitry Andric #endif 44380b57cec5SDimitry Andric } 44390b57cec5SDimitry Andric __kmp_release_bootstrap_lock(&__kmp_monitor_lock); 44400b57cec5SDimitry Andric } 44410b57cec5SDimitry Andric #endif 44420b57cec5SDimitry Andric 44430b57cec5SDimitry Andric KMP_MB(); 4444e8d8bef9SDimitry Andric 4445e8d8bef9SDimitry Andric { 4446e8d8bef9SDimitry Andric int new_start_gtid = TCR_4(__kmp_init_hidden_helper_threads) 4447e8d8bef9SDimitry Andric ? 1 4448e8d8bef9SDimitry Andric : __kmp_hidden_helper_threads_num + 1; 4449e8d8bef9SDimitry Andric 4450e8d8bef9SDimitry Andric for (new_gtid = new_start_gtid; TCR_PTR(__kmp_threads[new_gtid]) != NULL; 4451e8d8bef9SDimitry Andric ++new_gtid) { 44520b57cec5SDimitry Andric KMP_DEBUG_ASSERT(new_gtid < __kmp_threads_capacity); 44530b57cec5SDimitry Andric } 44540b57cec5SDimitry Andric 4455e8d8bef9SDimitry Andric if (TCR_4(__kmp_init_hidden_helper_threads)) { 4456e8d8bef9SDimitry Andric KMP_DEBUG_ASSERT(new_gtid <= __kmp_hidden_helper_threads_num); 4457e8d8bef9SDimitry Andric } 4458e8d8bef9SDimitry Andric } 4459e8d8bef9SDimitry Andric 44600b57cec5SDimitry Andric /* allocate space for it. */ 44610b57cec5SDimitry Andric new_thr = (kmp_info_t *)__kmp_allocate(sizeof(kmp_info_t)); 44620b57cec5SDimitry Andric 44630b57cec5SDimitry Andric TCW_SYNC_PTR(__kmp_threads[new_gtid], new_thr); 44640b57cec5SDimitry Andric 4465e8d8bef9SDimitry Andric #if USE_ITT_BUILD && USE_ITT_NOTIFY && KMP_DEBUG 4466e8d8bef9SDimitry Andric // suppress race conditions detection on synchronization flags in debug mode 4467e8d8bef9SDimitry Andric // this helps to analyze library internals eliminating false positives 4468e8d8bef9SDimitry Andric __itt_suppress_mark_range( 4469e8d8bef9SDimitry Andric __itt_suppress_range, __itt_suppress_threading_errors, 4470e8d8bef9SDimitry Andric &new_thr->th.th_sleep_loc, sizeof(new_thr->th.th_sleep_loc)); 4471e8d8bef9SDimitry Andric __itt_suppress_mark_range( 4472e8d8bef9SDimitry Andric __itt_suppress_range, __itt_suppress_threading_errors, 4473e8d8bef9SDimitry Andric &new_thr->th.th_reap_state, sizeof(new_thr->th.th_reap_state)); 4474e8d8bef9SDimitry Andric #if KMP_OS_WINDOWS 4475e8d8bef9SDimitry Andric __itt_suppress_mark_range( 4476e8d8bef9SDimitry Andric __itt_suppress_range, __itt_suppress_threading_errors, 4477e8d8bef9SDimitry Andric &new_thr->th.th_suspend_init, sizeof(new_thr->th.th_suspend_init)); 4478e8d8bef9SDimitry Andric #else 4479e8d8bef9SDimitry Andric __itt_suppress_mark_range(__itt_suppress_range, 4480e8d8bef9SDimitry Andric __itt_suppress_threading_errors, 4481e8d8bef9SDimitry Andric &new_thr->th.th_suspend_init_count, 4482e8d8bef9SDimitry Andric sizeof(new_thr->th.th_suspend_init_count)); 4483e8d8bef9SDimitry Andric #endif 4484e8d8bef9SDimitry Andric // TODO: check if we need to also suppress b_arrived flags 4485e8d8bef9SDimitry Andric __itt_suppress_mark_range(__itt_suppress_range, 4486e8d8bef9SDimitry Andric __itt_suppress_threading_errors, 4487e8d8bef9SDimitry Andric CCAST(kmp_uint64 *, &new_thr->th.th_bar[0].bb.b_go), 4488e8d8bef9SDimitry Andric sizeof(new_thr->th.th_bar[0].bb.b_go)); 4489e8d8bef9SDimitry Andric __itt_suppress_mark_range(__itt_suppress_range, 4490e8d8bef9SDimitry Andric __itt_suppress_threading_errors, 4491e8d8bef9SDimitry Andric CCAST(kmp_uint64 *, &new_thr->th.th_bar[1].bb.b_go), 4492e8d8bef9SDimitry Andric sizeof(new_thr->th.th_bar[1].bb.b_go)); 4493e8d8bef9SDimitry Andric __itt_suppress_mark_range(__itt_suppress_range, 4494e8d8bef9SDimitry Andric __itt_suppress_threading_errors, 4495e8d8bef9SDimitry Andric CCAST(kmp_uint64 *, &new_thr->th.th_bar[2].bb.b_go), 4496e8d8bef9SDimitry Andric sizeof(new_thr->th.th_bar[2].bb.b_go)); 4497e8d8bef9SDimitry Andric #endif /* USE_ITT_BUILD && USE_ITT_NOTIFY && KMP_DEBUG */ 44980b57cec5SDimitry Andric if (__kmp_storage_map) { 44990b57cec5SDimitry Andric __kmp_print_thread_storage_map(new_thr, new_gtid); 45000b57cec5SDimitry Andric } 45010b57cec5SDimitry Andric 4502fe6060f1SDimitry Andric // add the reserve serialized team, initialized from the team's primary thread 45030b57cec5SDimitry Andric { 45040b57cec5SDimitry Andric kmp_internal_control_t r_icvs = __kmp_get_x_global_icvs(team); 45050b57cec5SDimitry Andric KF_TRACE(10, ("__kmp_allocate_thread: before th_serial/serial_team\n")); 45060b57cec5SDimitry Andric new_thr->th.th_serial_team = serial_team = 45070b57cec5SDimitry Andric (kmp_team_t *)__kmp_allocate_team(root, 1, 1, 45080b57cec5SDimitry Andric #if OMPT_SUPPORT 45090b57cec5SDimitry Andric ompt_data_none, // root parallel id 45100b57cec5SDimitry Andric #endif 45110b57cec5SDimitry Andric proc_bind_default, &r_icvs, 45120b57cec5SDimitry Andric 0 USE_NESTED_HOT_ARG(NULL)); 45130b57cec5SDimitry Andric } 45140b57cec5SDimitry Andric KMP_ASSERT(serial_team); 45150b57cec5SDimitry Andric serial_team->t.t_serialized = 0; // AC: the team created in reserve, not for 45160b57cec5SDimitry Andric // execution (it is unused for now). 45170b57cec5SDimitry Andric serial_team->t.t_threads[0] = new_thr; 45180b57cec5SDimitry Andric KF_TRACE(10, 45190b57cec5SDimitry Andric ("__kmp_allocate_thread: after th_serial/serial_team : new_thr=%p\n", 45200b57cec5SDimitry Andric new_thr)); 45210b57cec5SDimitry Andric 45220b57cec5SDimitry Andric /* setup the thread structures */ 45230b57cec5SDimitry Andric __kmp_initialize_info(new_thr, team, new_tid, new_gtid); 45240b57cec5SDimitry Andric 45250b57cec5SDimitry Andric #if USE_FAST_MEMORY 45260b57cec5SDimitry Andric __kmp_initialize_fast_memory(new_thr); 45270b57cec5SDimitry Andric #endif /* USE_FAST_MEMORY */ 45280b57cec5SDimitry Andric 45290b57cec5SDimitry Andric #if KMP_USE_BGET 45300b57cec5SDimitry Andric KMP_DEBUG_ASSERT(new_thr->th.th_local.bget_data == NULL); 45310b57cec5SDimitry Andric __kmp_initialize_bget(new_thr); 45320b57cec5SDimitry Andric #endif 45330b57cec5SDimitry Andric 45340b57cec5SDimitry Andric __kmp_init_random(new_thr); // Initialize random number generator 45350b57cec5SDimitry Andric 45360b57cec5SDimitry Andric /* Initialize these only once when thread is grabbed for a team allocation */ 45370b57cec5SDimitry Andric KA_TRACE(20, 45380b57cec5SDimitry Andric ("__kmp_allocate_thread: T#%d init go fork=%u, plain=%u\n", 45390b57cec5SDimitry Andric __kmp_get_gtid(), KMP_INIT_BARRIER_STATE, KMP_INIT_BARRIER_STATE)); 45400b57cec5SDimitry Andric 45410b57cec5SDimitry Andric int b; 45420b57cec5SDimitry Andric kmp_balign_t *balign = new_thr->th.th_bar; 45430b57cec5SDimitry Andric for (b = 0; b < bs_last_barrier; ++b) { 45440b57cec5SDimitry Andric balign[b].bb.b_go = KMP_INIT_BARRIER_STATE; 45450b57cec5SDimitry Andric balign[b].bb.team = NULL; 45460b57cec5SDimitry Andric balign[b].bb.wait_flag = KMP_BARRIER_NOT_WAITING; 45470b57cec5SDimitry Andric balign[b].bb.use_oncore_barrier = 0; 45480b57cec5SDimitry Andric } 45490b57cec5SDimitry Andric 4550349cc55cSDimitry Andric TCW_PTR(new_thr->th.th_sleep_loc, NULL); 4551349cc55cSDimitry Andric new_thr->th.th_sleep_loc_type = flag_unset; 4552349cc55cSDimitry Andric 45530b57cec5SDimitry Andric new_thr->th.th_spin_here = FALSE; 45540b57cec5SDimitry Andric new_thr->th.th_next_waiting = 0; 45550b57cec5SDimitry Andric #if KMP_OS_UNIX 45560b57cec5SDimitry Andric new_thr->th.th_blocking = false; 45570b57cec5SDimitry Andric #endif 45580b57cec5SDimitry Andric 45590b57cec5SDimitry Andric #if KMP_AFFINITY_SUPPORTED 45600b57cec5SDimitry Andric new_thr->th.th_current_place = KMP_PLACE_UNDEFINED; 45610b57cec5SDimitry Andric new_thr->th.th_new_place = KMP_PLACE_UNDEFINED; 45620b57cec5SDimitry Andric new_thr->th.th_first_place = KMP_PLACE_UNDEFINED; 45630b57cec5SDimitry Andric new_thr->th.th_last_place = KMP_PLACE_UNDEFINED; 45640b57cec5SDimitry Andric #endif 45650b57cec5SDimitry Andric new_thr->th.th_def_allocator = __kmp_def_allocator; 45660b57cec5SDimitry Andric new_thr->th.th_prev_level = 0; 45670b57cec5SDimitry Andric new_thr->th.th_prev_num_threads = 1; 45680b57cec5SDimitry Andric 45690b57cec5SDimitry Andric TCW_4(new_thr->th.th_in_pool, FALSE); 45700b57cec5SDimitry Andric new_thr->th.th_active_in_pool = FALSE; 45710b57cec5SDimitry Andric TCW_4(new_thr->th.th_active, TRUE); 45720b57cec5SDimitry Andric 45730b57cec5SDimitry Andric /* adjust the global counters */ 45740b57cec5SDimitry Andric __kmp_all_nth++; 45750b57cec5SDimitry Andric __kmp_nth++; 45760b57cec5SDimitry Andric 45770b57cec5SDimitry Andric // if __kmp_adjust_gtid_mode is set, then we use method #1 (sp search) for low 45780b57cec5SDimitry Andric // numbers of procs, and method #2 (keyed API call) for higher numbers. 45790b57cec5SDimitry Andric if (__kmp_adjust_gtid_mode) { 45800b57cec5SDimitry Andric if (__kmp_all_nth >= __kmp_tls_gtid_min) { 45810b57cec5SDimitry Andric if (TCR_4(__kmp_gtid_mode) != 2) { 45820b57cec5SDimitry Andric TCW_4(__kmp_gtid_mode, 2); 45830b57cec5SDimitry Andric } 45840b57cec5SDimitry Andric } else { 45850b57cec5SDimitry Andric if (TCR_4(__kmp_gtid_mode) != 1) { 45860b57cec5SDimitry Andric TCW_4(__kmp_gtid_mode, 1); 45870b57cec5SDimitry Andric } 45880b57cec5SDimitry Andric } 45890b57cec5SDimitry Andric } 45900b57cec5SDimitry Andric 45910b57cec5SDimitry Andric #ifdef KMP_ADJUST_BLOCKTIME 45920b57cec5SDimitry Andric /* Adjust blocktime back to zero if necessary */ 45930b57cec5SDimitry Andric /* Middle initialization might not have occurred yet */ 45940b57cec5SDimitry Andric if (!__kmp_env_blocktime && (__kmp_avail_proc > 0)) { 45950b57cec5SDimitry Andric if (__kmp_nth > __kmp_avail_proc) { 45960b57cec5SDimitry Andric __kmp_zero_bt = TRUE; 45970b57cec5SDimitry Andric } 45980b57cec5SDimitry Andric } 45990b57cec5SDimitry Andric #endif /* KMP_ADJUST_BLOCKTIME */ 46000b57cec5SDimitry Andric 46010b57cec5SDimitry Andric /* actually fork it and create the new worker thread */ 46020b57cec5SDimitry Andric KF_TRACE( 46030b57cec5SDimitry Andric 10, ("__kmp_allocate_thread: before __kmp_create_worker: %p\n", new_thr)); 46040b57cec5SDimitry Andric __kmp_create_worker(new_gtid, new_thr, __kmp_stksize); 46050b57cec5SDimitry Andric KF_TRACE(10, 46060b57cec5SDimitry Andric ("__kmp_allocate_thread: after __kmp_create_worker: %p\n", new_thr)); 46070b57cec5SDimitry Andric 46080b57cec5SDimitry Andric KA_TRACE(20, ("__kmp_allocate_thread: T#%d forked T#%d\n", __kmp_get_gtid(), 46090b57cec5SDimitry Andric new_gtid)); 46100b57cec5SDimitry Andric KMP_MB(); 46110b57cec5SDimitry Andric return new_thr; 46120b57cec5SDimitry Andric } 46130b57cec5SDimitry Andric 46140b57cec5SDimitry Andric /* Reinitialize team for reuse. 46150b57cec5SDimitry Andric The hot team code calls this case at every fork barrier, so EPCC barrier 46160b57cec5SDimitry Andric test are extremely sensitive to changes in it, esp. writes to the team 46170b57cec5SDimitry Andric struct, which cause a cache invalidation in all threads. 46180b57cec5SDimitry Andric IF YOU TOUCH THIS ROUTINE, RUN EPCC C SYNCBENCH ON A BIG-IRON MACHINE!!! */ 46190b57cec5SDimitry Andric static void __kmp_reinitialize_team(kmp_team_t *team, 46200b57cec5SDimitry Andric kmp_internal_control_t *new_icvs, 46210b57cec5SDimitry Andric ident_t *loc) { 46220b57cec5SDimitry Andric KF_TRACE(10, ("__kmp_reinitialize_team: enter this_thread=%p team=%p\n", 46230b57cec5SDimitry Andric team->t.t_threads[0], team)); 46240b57cec5SDimitry Andric KMP_DEBUG_ASSERT(team && new_icvs); 46250b57cec5SDimitry Andric KMP_DEBUG_ASSERT((!TCR_4(__kmp_init_parallel)) || new_icvs->nproc); 46260b57cec5SDimitry Andric KMP_CHECK_UPDATE(team->t.t_ident, loc); 46270b57cec5SDimitry Andric 46280b57cec5SDimitry Andric KMP_CHECK_UPDATE(team->t.t_id, KMP_GEN_TEAM_ID()); 4629fe6060f1SDimitry Andric // Copy ICVs to the primary thread's implicit taskdata 46300b57cec5SDimitry Andric __kmp_init_implicit_task(loc, team->t.t_threads[0], team, 0, FALSE); 46310b57cec5SDimitry Andric copy_icvs(&team->t.t_implicit_task_taskdata[0].td_icvs, new_icvs); 46320b57cec5SDimitry Andric 46330b57cec5SDimitry Andric KF_TRACE(10, ("__kmp_reinitialize_team: exit this_thread=%p team=%p\n", 46340b57cec5SDimitry Andric team->t.t_threads[0], team)); 46350b57cec5SDimitry Andric } 46360b57cec5SDimitry Andric 46370b57cec5SDimitry Andric /* Initialize the team data structure. 46380b57cec5SDimitry Andric This assumes the t_threads and t_max_nproc are already set. 46390b57cec5SDimitry Andric Also, we don't touch the arguments */ 46400b57cec5SDimitry Andric static void __kmp_initialize_team(kmp_team_t *team, int new_nproc, 46410b57cec5SDimitry Andric kmp_internal_control_t *new_icvs, 46420b57cec5SDimitry Andric ident_t *loc) { 46430b57cec5SDimitry Andric KF_TRACE(10, ("__kmp_initialize_team: enter: team=%p\n", team)); 46440b57cec5SDimitry Andric 46450b57cec5SDimitry Andric /* verify */ 46460b57cec5SDimitry Andric KMP_DEBUG_ASSERT(team); 46470b57cec5SDimitry Andric KMP_DEBUG_ASSERT(new_nproc <= team->t.t_max_nproc); 46480b57cec5SDimitry Andric KMP_DEBUG_ASSERT(team->t.t_threads); 46490b57cec5SDimitry Andric KMP_MB(); 46500b57cec5SDimitry Andric 46510b57cec5SDimitry Andric team->t.t_master_tid = 0; /* not needed */ 46520b57cec5SDimitry Andric /* team->t.t_master_bar; not needed */ 46530b57cec5SDimitry Andric team->t.t_serialized = new_nproc > 1 ? 0 : 1; 46540b57cec5SDimitry Andric team->t.t_nproc = new_nproc; 46550b57cec5SDimitry Andric 46560b57cec5SDimitry Andric /* team->t.t_parent = NULL; TODO not needed & would mess up hot team */ 46570b57cec5SDimitry Andric team->t.t_next_pool = NULL; 46580b57cec5SDimitry Andric /* memset( team->t.t_threads, 0, sizeof(kmp_info_t*)*new_nproc ); would mess 46590b57cec5SDimitry Andric * up hot team */ 46600b57cec5SDimitry Andric 46610b57cec5SDimitry Andric TCW_SYNC_PTR(team->t.t_pkfn, NULL); /* not needed */ 46620b57cec5SDimitry Andric team->t.t_invoke = NULL; /* not needed */ 46630b57cec5SDimitry Andric 46640b57cec5SDimitry Andric // TODO???: team->t.t_max_active_levels = new_max_active_levels; 46650b57cec5SDimitry Andric team->t.t_sched.sched = new_icvs->sched.sched; 46660b57cec5SDimitry Andric 46670b57cec5SDimitry Andric #if KMP_ARCH_X86 || KMP_ARCH_X86_64 46680b57cec5SDimitry Andric team->t.t_fp_control_saved = FALSE; /* not needed */ 46690b57cec5SDimitry Andric team->t.t_x87_fpu_control_word = 0; /* not needed */ 46700b57cec5SDimitry Andric team->t.t_mxcsr = 0; /* not needed */ 46710b57cec5SDimitry Andric #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */ 46720b57cec5SDimitry Andric 46730b57cec5SDimitry Andric team->t.t_construct = 0; 46740b57cec5SDimitry Andric 46750b57cec5SDimitry Andric team->t.t_ordered.dt.t_value = 0; 46760b57cec5SDimitry Andric team->t.t_master_active = FALSE; 46770b57cec5SDimitry Andric 46780b57cec5SDimitry Andric #ifdef KMP_DEBUG 46790b57cec5SDimitry Andric team->t.t_copypriv_data = NULL; /* not necessary, but nice for debugging */ 46800b57cec5SDimitry Andric #endif 46810b57cec5SDimitry Andric #if KMP_OS_WINDOWS 46820b57cec5SDimitry Andric team->t.t_copyin_counter = 0; /* for barrier-free copyin implementation */ 46830b57cec5SDimitry Andric #endif 46840b57cec5SDimitry Andric 46850b57cec5SDimitry Andric team->t.t_control_stack_top = NULL; 46860b57cec5SDimitry Andric 46870b57cec5SDimitry Andric __kmp_reinitialize_team(team, new_icvs, loc); 46880b57cec5SDimitry Andric 46890b57cec5SDimitry Andric KMP_MB(); 46900b57cec5SDimitry Andric KF_TRACE(10, ("__kmp_initialize_team: exit: team=%p\n", team)); 46910b57cec5SDimitry Andric } 46920b57cec5SDimitry Andric 4693489b1cf2SDimitry Andric #if (KMP_OS_LINUX || KMP_OS_FREEBSD) && KMP_AFFINITY_SUPPORTED 46940b57cec5SDimitry Andric /* Sets full mask for thread and returns old mask, no changes to structures. */ 46950b57cec5SDimitry Andric static void 46960b57cec5SDimitry Andric __kmp_set_thread_affinity_mask_full_tmp(kmp_affin_mask_t *old_mask) { 46970b57cec5SDimitry Andric if (KMP_AFFINITY_CAPABLE()) { 46980b57cec5SDimitry Andric int status; 46990b57cec5SDimitry Andric if (old_mask != NULL) { 47000b57cec5SDimitry Andric status = __kmp_get_system_affinity(old_mask, TRUE); 47010b57cec5SDimitry Andric int error = errno; 47020b57cec5SDimitry Andric if (status != 0) { 47030b57cec5SDimitry Andric __kmp_fatal(KMP_MSG(ChangeThreadAffMaskError), KMP_ERR(error), 47040b57cec5SDimitry Andric __kmp_msg_null); 47050b57cec5SDimitry Andric } 47060b57cec5SDimitry Andric } 47070b57cec5SDimitry Andric __kmp_set_system_affinity(__kmp_affin_fullMask, TRUE); 47080b57cec5SDimitry Andric } 47090b57cec5SDimitry Andric } 47100b57cec5SDimitry Andric #endif 47110b57cec5SDimitry Andric 47120b57cec5SDimitry Andric #if KMP_AFFINITY_SUPPORTED 47130b57cec5SDimitry Andric 47140b57cec5SDimitry Andric // __kmp_partition_places() is the heart of the OpenMP 4.0 affinity mechanism. 4715fe6060f1SDimitry Andric // It calculates the worker + primary thread's partition based upon the parent 47160b57cec5SDimitry Andric // thread's partition, and binds each worker to a thread in their partition. 4717fe6060f1SDimitry Andric // The primary thread's partition should already include its current binding. 47180b57cec5SDimitry Andric static void __kmp_partition_places(kmp_team_t *team, int update_master_only) { 4719fe6060f1SDimitry Andric // Do not partition places for the hidden helper team 4720fe6060f1SDimitry Andric if (KMP_HIDDEN_HELPER_TEAM(team)) 4721fe6060f1SDimitry Andric return; 4722fe6060f1SDimitry Andric // Copy the primary thread's place partition to the team struct 47230b57cec5SDimitry Andric kmp_info_t *master_th = team->t.t_threads[0]; 47240b57cec5SDimitry Andric KMP_DEBUG_ASSERT(master_th != NULL); 47250b57cec5SDimitry Andric kmp_proc_bind_t proc_bind = team->t.t_proc_bind; 47260b57cec5SDimitry Andric int first_place = master_th->th.th_first_place; 47270b57cec5SDimitry Andric int last_place = master_th->th.th_last_place; 47280b57cec5SDimitry Andric int masters_place = master_th->th.th_current_place; 47290b57cec5SDimitry Andric team->t.t_first_place = first_place; 47300b57cec5SDimitry Andric team->t.t_last_place = last_place; 47310b57cec5SDimitry Andric 47320b57cec5SDimitry Andric KA_TRACE(20, ("__kmp_partition_places: enter: proc_bind = %d T#%d(%d:0) " 47330b57cec5SDimitry Andric "bound to place %d partition = [%d,%d]\n", 47340b57cec5SDimitry Andric proc_bind, __kmp_gtid_from_thread(team->t.t_threads[0]), 47350b57cec5SDimitry Andric team->t.t_id, masters_place, first_place, last_place)); 47360b57cec5SDimitry Andric 47370b57cec5SDimitry Andric switch (proc_bind) { 47380b57cec5SDimitry Andric 47390b57cec5SDimitry Andric case proc_bind_default: 4740fe6060f1SDimitry Andric // Serial teams might have the proc_bind policy set to proc_bind_default. 4741fe6060f1SDimitry Andric // Not an issue -- we don't rebind primary thread for any proc_bind policy. 47420b57cec5SDimitry Andric KMP_DEBUG_ASSERT(team->t.t_nproc == 1); 47430b57cec5SDimitry Andric break; 47440b57cec5SDimitry Andric 4745fe6060f1SDimitry Andric case proc_bind_primary: { 47460b57cec5SDimitry Andric int f; 47470b57cec5SDimitry Andric int n_th = team->t.t_nproc; 47480b57cec5SDimitry Andric for (f = 1; f < n_th; f++) { 47490b57cec5SDimitry Andric kmp_info_t *th = team->t.t_threads[f]; 47500b57cec5SDimitry Andric KMP_DEBUG_ASSERT(th != NULL); 47510b57cec5SDimitry Andric th->th.th_first_place = first_place; 47520b57cec5SDimitry Andric th->th.th_last_place = last_place; 47530b57cec5SDimitry Andric th->th.th_new_place = masters_place; 47540b57cec5SDimitry Andric if (__kmp_display_affinity && masters_place != th->th.th_current_place && 47550b57cec5SDimitry Andric team->t.t_display_affinity != 1) { 47560b57cec5SDimitry Andric team->t.t_display_affinity = 1; 47570b57cec5SDimitry Andric } 47580b57cec5SDimitry Andric 4759fe6060f1SDimitry Andric KA_TRACE(100, ("__kmp_partition_places: primary: T#%d(%d:%d) place %d " 47600b57cec5SDimitry Andric "partition = [%d,%d]\n", 47610b57cec5SDimitry Andric __kmp_gtid_from_thread(team->t.t_threads[f]), team->t.t_id, 47620b57cec5SDimitry Andric f, masters_place, first_place, last_place)); 47630b57cec5SDimitry Andric } 47640b57cec5SDimitry Andric } break; 47650b57cec5SDimitry Andric 47660b57cec5SDimitry Andric case proc_bind_close: { 47670b57cec5SDimitry Andric int f; 47680b57cec5SDimitry Andric int n_th = team->t.t_nproc; 47690b57cec5SDimitry Andric int n_places; 47700b57cec5SDimitry Andric if (first_place <= last_place) { 47710b57cec5SDimitry Andric n_places = last_place - first_place + 1; 47720b57cec5SDimitry Andric } else { 47730b57cec5SDimitry Andric n_places = __kmp_affinity_num_masks - first_place + last_place + 1; 47740b57cec5SDimitry Andric } 47750b57cec5SDimitry Andric if (n_th <= n_places) { 47760b57cec5SDimitry Andric int place = masters_place; 47770b57cec5SDimitry Andric for (f = 1; f < n_th; f++) { 47780b57cec5SDimitry Andric kmp_info_t *th = team->t.t_threads[f]; 47790b57cec5SDimitry Andric KMP_DEBUG_ASSERT(th != NULL); 47800b57cec5SDimitry Andric 47810b57cec5SDimitry Andric if (place == last_place) { 47820b57cec5SDimitry Andric place = first_place; 47830b57cec5SDimitry Andric } else if (place == (int)(__kmp_affinity_num_masks - 1)) { 47840b57cec5SDimitry Andric place = 0; 47850b57cec5SDimitry Andric } else { 47860b57cec5SDimitry Andric place++; 47870b57cec5SDimitry Andric } 47880b57cec5SDimitry Andric th->th.th_first_place = first_place; 47890b57cec5SDimitry Andric th->th.th_last_place = last_place; 47900b57cec5SDimitry Andric th->th.th_new_place = place; 47910b57cec5SDimitry Andric if (__kmp_display_affinity && place != th->th.th_current_place && 47920b57cec5SDimitry Andric team->t.t_display_affinity != 1) { 47930b57cec5SDimitry Andric team->t.t_display_affinity = 1; 47940b57cec5SDimitry Andric } 47950b57cec5SDimitry Andric 47960b57cec5SDimitry Andric KA_TRACE(100, ("__kmp_partition_places: close: T#%d(%d:%d) place %d " 47970b57cec5SDimitry Andric "partition = [%d,%d]\n", 47980b57cec5SDimitry Andric __kmp_gtid_from_thread(team->t.t_threads[f]), 47990b57cec5SDimitry Andric team->t.t_id, f, place, first_place, last_place)); 48000b57cec5SDimitry Andric } 48010b57cec5SDimitry Andric } else { 48020b57cec5SDimitry Andric int S, rem, gap, s_count; 48030b57cec5SDimitry Andric S = n_th / n_places; 48040b57cec5SDimitry Andric s_count = 0; 48050b57cec5SDimitry Andric rem = n_th - (S * n_places); 48060b57cec5SDimitry Andric gap = rem > 0 ? n_places / rem : n_places; 48070b57cec5SDimitry Andric int place = masters_place; 48080b57cec5SDimitry Andric int gap_ct = gap; 48090b57cec5SDimitry Andric for (f = 0; f < n_th; f++) { 48100b57cec5SDimitry Andric kmp_info_t *th = team->t.t_threads[f]; 48110b57cec5SDimitry Andric KMP_DEBUG_ASSERT(th != NULL); 48120b57cec5SDimitry Andric 48130b57cec5SDimitry Andric th->th.th_first_place = first_place; 48140b57cec5SDimitry Andric th->th.th_last_place = last_place; 48150b57cec5SDimitry Andric th->th.th_new_place = place; 48160b57cec5SDimitry Andric if (__kmp_display_affinity && place != th->th.th_current_place && 48170b57cec5SDimitry Andric team->t.t_display_affinity != 1) { 48180b57cec5SDimitry Andric team->t.t_display_affinity = 1; 48190b57cec5SDimitry Andric } 48200b57cec5SDimitry Andric s_count++; 48210b57cec5SDimitry Andric 48220b57cec5SDimitry Andric if ((s_count == S) && rem && (gap_ct == gap)) { 48230b57cec5SDimitry Andric // do nothing, add an extra thread to place on next iteration 48240b57cec5SDimitry Andric } else if ((s_count == S + 1) && rem && (gap_ct == gap)) { 48250b57cec5SDimitry Andric // we added an extra thread to this place; move to next place 48260b57cec5SDimitry Andric if (place == last_place) { 48270b57cec5SDimitry Andric place = first_place; 48280b57cec5SDimitry Andric } else if (place == (int)(__kmp_affinity_num_masks - 1)) { 48290b57cec5SDimitry Andric place = 0; 48300b57cec5SDimitry Andric } else { 48310b57cec5SDimitry Andric place++; 48320b57cec5SDimitry Andric } 48330b57cec5SDimitry Andric s_count = 0; 48340b57cec5SDimitry Andric gap_ct = 1; 48350b57cec5SDimitry Andric rem--; 48360b57cec5SDimitry Andric } else if (s_count == S) { // place full; don't add extra 48370b57cec5SDimitry Andric if (place == last_place) { 48380b57cec5SDimitry Andric place = first_place; 48390b57cec5SDimitry Andric } else if (place == (int)(__kmp_affinity_num_masks - 1)) { 48400b57cec5SDimitry Andric place = 0; 48410b57cec5SDimitry Andric } else { 48420b57cec5SDimitry Andric place++; 48430b57cec5SDimitry Andric } 48440b57cec5SDimitry Andric gap_ct++; 48450b57cec5SDimitry Andric s_count = 0; 48460b57cec5SDimitry Andric } 48470b57cec5SDimitry Andric 48480b57cec5SDimitry Andric KA_TRACE(100, 48490b57cec5SDimitry Andric ("__kmp_partition_places: close: T#%d(%d:%d) place %d " 48500b57cec5SDimitry Andric "partition = [%d,%d]\n", 48510b57cec5SDimitry Andric __kmp_gtid_from_thread(team->t.t_threads[f]), team->t.t_id, f, 48520b57cec5SDimitry Andric th->th.th_new_place, first_place, last_place)); 48530b57cec5SDimitry Andric } 48540b57cec5SDimitry Andric KMP_DEBUG_ASSERT(place == masters_place); 48550b57cec5SDimitry Andric } 48560b57cec5SDimitry Andric } break; 48570b57cec5SDimitry Andric 48580b57cec5SDimitry Andric case proc_bind_spread: { 48590b57cec5SDimitry Andric int f; 48600b57cec5SDimitry Andric int n_th = team->t.t_nproc; 48610b57cec5SDimitry Andric int n_places; 48620b57cec5SDimitry Andric int thidx; 48630b57cec5SDimitry Andric if (first_place <= last_place) { 48640b57cec5SDimitry Andric n_places = last_place - first_place + 1; 48650b57cec5SDimitry Andric } else { 48660b57cec5SDimitry Andric n_places = __kmp_affinity_num_masks - first_place + last_place + 1; 48670b57cec5SDimitry Andric } 48680b57cec5SDimitry Andric if (n_th <= n_places) { 48690b57cec5SDimitry Andric int place = -1; 48700b57cec5SDimitry Andric 48710b57cec5SDimitry Andric if (n_places != static_cast<int>(__kmp_affinity_num_masks)) { 48720b57cec5SDimitry Andric int S = n_places / n_th; 48730b57cec5SDimitry Andric int s_count, rem, gap, gap_ct; 48740b57cec5SDimitry Andric 48750b57cec5SDimitry Andric place = masters_place; 48760b57cec5SDimitry Andric rem = n_places - n_th * S; 48770b57cec5SDimitry Andric gap = rem ? n_th / rem : 1; 48780b57cec5SDimitry Andric gap_ct = gap; 48790b57cec5SDimitry Andric thidx = n_th; 48800b57cec5SDimitry Andric if (update_master_only == 1) 48810b57cec5SDimitry Andric thidx = 1; 48820b57cec5SDimitry Andric for (f = 0; f < thidx; f++) { 48830b57cec5SDimitry Andric kmp_info_t *th = team->t.t_threads[f]; 48840b57cec5SDimitry Andric KMP_DEBUG_ASSERT(th != NULL); 48850b57cec5SDimitry Andric 48860b57cec5SDimitry Andric th->th.th_first_place = place; 48870b57cec5SDimitry Andric th->th.th_new_place = place; 48880b57cec5SDimitry Andric if (__kmp_display_affinity && place != th->th.th_current_place && 48890b57cec5SDimitry Andric team->t.t_display_affinity != 1) { 48900b57cec5SDimitry Andric team->t.t_display_affinity = 1; 48910b57cec5SDimitry Andric } 48920b57cec5SDimitry Andric s_count = 1; 48930b57cec5SDimitry Andric while (s_count < S) { 48940b57cec5SDimitry Andric if (place == last_place) { 48950b57cec5SDimitry Andric place = first_place; 48960b57cec5SDimitry Andric } else if (place == (int)(__kmp_affinity_num_masks - 1)) { 48970b57cec5SDimitry Andric place = 0; 48980b57cec5SDimitry Andric } else { 48990b57cec5SDimitry Andric place++; 49000b57cec5SDimitry Andric } 49010b57cec5SDimitry Andric s_count++; 49020b57cec5SDimitry Andric } 49030b57cec5SDimitry Andric if (rem && (gap_ct == gap)) { 49040b57cec5SDimitry Andric if (place == last_place) { 49050b57cec5SDimitry Andric place = first_place; 49060b57cec5SDimitry Andric } else if (place == (int)(__kmp_affinity_num_masks - 1)) { 49070b57cec5SDimitry Andric place = 0; 49080b57cec5SDimitry Andric } else { 49090b57cec5SDimitry Andric place++; 49100b57cec5SDimitry Andric } 49110b57cec5SDimitry Andric rem--; 49120b57cec5SDimitry Andric gap_ct = 0; 49130b57cec5SDimitry Andric } 49140b57cec5SDimitry Andric th->th.th_last_place = place; 49150b57cec5SDimitry Andric gap_ct++; 49160b57cec5SDimitry Andric 49170b57cec5SDimitry Andric if (place == last_place) { 49180b57cec5SDimitry Andric place = first_place; 49190b57cec5SDimitry Andric } else if (place == (int)(__kmp_affinity_num_masks - 1)) { 49200b57cec5SDimitry Andric place = 0; 49210b57cec5SDimitry Andric } else { 49220b57cec5SDimitry Andric place++; 49230b57cec5SDimitry Andric } 49240b57cec5SDimitry Andric 49250b57cec5SDimitry Andric KA_TRACE(100, 49260b57cec5SDimitry Andric ("__kmp_partition_places: spread: T#%d(%d:%d) place %d " 49270b57cec5SDimitry Andric "partition = [%d,%d], __kmp_affinity_num_masks: %u\n", 49280b57cec5SDimitry Andric __kmp_gtid_from_thread(team->t.t_threads[f]), team->t.t_id, 49290b57cec5SDimitry Andric f, th->th.th_new_place, th->th.th_first_place, 49300b57cec5SDimitry Andric th->th.th_last_place, __kmp_affinity_num_masks)); 49310b57cec5SDimitry Andric } 49320b57cec5SDimitry Andric } else { 49330b57cec5SDimitry Andric /* Having uniform space of available computation places I can create 49340b57cec5SDimitry Andric T partitions of round(P/T) size and put threads into the first 49350b57cec5SDimitry Andric place of each partition. */ 49360b57cec5SDimitry Andric double current = static_cast<double>(masters_place); 49370b57cec5SDimitry Andric double spacing = 49380b57cec5SDimitry Andric (static_cast<double>(n_places + 1) / static_cast<double>(n_th)); 49390b57cec5SDimitry Andric int first, last; 49400b57cec5SDimitry Andric kmp_info_t *th; 49410b57cec5SDimitry Andric 49420b57cec5SDimitry Andric thidx = n_th + 1; 49430b57cec5SDimitry Andric if (update_master_only == 1) 49440b57cec5SDimitry Andric thidx = 1; 49450b57cec5SDimitry Andric for (f = 0; f < thidx; f++) { 49460b57cec5SDimitry Andric first = static_cast<int>(current); 49470b57cec5SDimitry Andric last = static_cast<int>(current + spacing) - 1; 49480b57cec5SDimitry Andric KMP_DEBUG_ASSERT(last >= first); 49490b57cec5SDimitry Andric if (first >= n_places) { 49500b57cec5SDimitry Andric if (masters_place) { 49510b57cec5SDimitry Andric first -= n_places; 49520b57cec5SDimitry Andric last -= n_places; 49530b57cec5SDimitry Andric if (first == (masters_place + 1)) { 49540b57cec5SDimitry Andric KMP_DEBUG_ASSERT(f == n_th); 49550b57cec5SDimitry Andric first--; 49560b57cec5SDimitry Andric } 49570b57cec5SDimitry Andric if (last == masters_place) { 49580b57cec5SDimitry Andric KMP_DEBUG_ASSERT(f == (n_th - 1)); 49590b57cec5SDimitry Andric last--; 49600b57cec5SDimitry Andric } 49610b57cec5SDimitry Andric } else { 49620b57cec5SDimitry Andric KMP_DEBUG_ASSERT(f == n_th); 49630b57cec5SDimitry Andric first = 0; 49640b57cec5SDimitry Andric last = 0; 49650b57cec5SDimitry Andric } 49660b57cec5SDimitry Andric } 49670b57cec5SDimitry Andric if (last >= n_places) { 49680b57cec5SDimitry Andric last = (n_places - 1); 49690b57cec5SDimitry Andric } 49700b57cec5SDimitry Andric place = first; 49710b57cec5SDimitry Andric current += spacing; 49720b57cec5SDimitry Andric if (f < n_th) { 49730b57cec5SDimitry Andric KMP_DEBUG_ASSERT(0 <= first); 49740b57cec5SDimitry Andric KMP_DEBUG_ASSERT(n_places > first); 49750b57cec5SDimitry Andric KMP_DEBUG_ASSERT(0 <= last); 49760b57cec5SDimitry Andric KMP_DEBUG_ASSERT(n_places > last); 49770b57cec5SDimitry Andric KMP_DEBUG_ASSERT(last_place >= first_place); 49780b57cec5SDimitry Andric th = team->t.t_threads[f]; 49790b57cec5SDimitry Andric KMP_DEBUG_ASSERT(th); 49800b57cec5SDimitry Andric th->th.th_first_place = first; 49810b57cec5SDimitry Andric th->th.th_new_place = place; 49820b57cec5SDimitry Andric th->th.th_last_place = last; 49830b57cec5SDimitry Andric if (__kmp_display_affinity && place != th->th.th_current_place && 49840b57cec5SDimitry Andric team->t.t_display_affinity != 1) { 49850b57cec5SDimitry Andric team->t.t_display_affinity = 1; 49860b57cec5SDimitry Andric } 49870b57cec5SDimitry Andric KA_TRACE(100, 49880b57cec5SDimitry Andric ("__kmp_partition_places: spread: T#%d(%d:%d) place %d " 49890b57cec5SDimitry Andric "partition = [%d,%d], spacing = %.4f\n", 49900b57cec5SDimitry Andric __kmp_gtid_from_thread(team->t.t_threads[f]), 49910b57cec5SDimitry Andric team->t.t_id, f, th->th.th_new_place, 49920b57cec5SDimitry Andric th->th.th_first_place, th->th.th_last_place, spacing)); 49930b57cec5SDimitry Andric } 49940b57cec5SDimitry Andric } 49950b57cec5SDimitry Andric } 49960b57cec5SDimitry Andric KMP_DEBUG_ASSERT(update_master_only || place == masters_place); 49970b57cec5SDimitry Andric } else { 49980b57cec5SDimitry Andric int S, rem, gap, s_count; 49990b57cec5SDimitry Andric S = n_th / n_places; 50000b57cec5SDimitry Andric s_count = 0; 50010b57cec5SDimitry Andric rem = n_th - (S * n_places); 50020b57cec5SDimitry Andric gap = rem > 0 ? n_places / rem : n_places; 50030b57cec5SDimitry Andric int place = masters_place; 50040b57cec5SDimitry Andric int gap_ct = gap; 50050b57cec5SDimitry Andric thidx = n_th; 50060b57cec5SDimitry Andric if (update_master_only == 1) 50070b57cec5SDimitry Andric thidx = 1; 50080b57cec5SDimitry Andric for (f = 0; f < thidx; f++) { 50090b57cec5SDimitry Andric kmp_info_t *th = team->t.t_threads[f]; 50100b57cec5SDimitry Andric KMP_DEBUG_ASSERT(th != NULL); 50110b57cec5SDimitry Andric 50120b57cec5SDimitry Andric th->th.th_first_place = place; 50130b57cec5SDimitry Andric th->th.th_last_place = place; 50140b57cec5SDimitry Andric th->th.th_new_place = place; 50150b57cec5SDimitry Andric if (__kmp_display_affinity && place != th->th.th_current_place && 50160b57cec5SDimitry Andric team->t.t_display_affinity != 1) { 50170b57cec5SDimitry Andric team->t.t_display_affinity = 1; 50180b57cec5SDimitry Andric } 50190b57cec5SDimitry Andric s_count++; 50200b57cec5SDimitry Andric 50210b57cec5SDimitry Andric if ((s_count == S) && rem && (gap_ct == gap)) { 50220b57cec5SDimitry Andric // do nothing, add an extra thread to place on next iteration 50230b57cec5SDimitry Andric } else if ((s_count == S + 1) && rem && (gap_ct == gap)) { 50240b57cec5SDimitry Andric // we added an extra thread to this place; move on to next place 50250b57cec5SDimitry Andric if (place == last_place) { 50260b57cec5SDimitry Andric place = first_place; 50270b57cec5SDimitry Andric } else if (place == (int)(__kmp_affinity_num_masks - 1)) { 50280b57cec5SDimitry Andric place = 0; 50290b57cec5SDimitry Andric } else { 50300b57cec5SDimitry Andric place++; 50310b57cec5SDimitry Andric } 50320b57cec5SDimitry Andric s_count = 0; 50330b57cec5SDimitry Andric gap_ct = 1; 50340b57cec5SDimitry Andric rem--; 50350b57cec5SDimitry Andric } else if (s_count == S) { // place is full; don't add extra thread 50360b57cec5SDimitry Andric if (place == last_place) { 50370b57cec5SDimitry Andric place = first_place; 50380b57cec5SDimitry Andric } else if (place == (int)(__kmp_affinity_num_masks - 1)) { 50390b57cec5SDimitry Andric place = 0; 50400b57cec5SDimitry Andric } else { 50410b57cec5SDimitry Andric place++; 50420b57cec5SDimitry Andric } 50430b57cec5SDimitry Andric gap_ct++; 50440b57cec5SDimitry Andric s_count = 0; 50450b57cec5SDimitry Andric } 50460b57cec5SDimitry Andric 50470b57cec5SDimitry Andric KA_TRACE(100, ("__kmp_partition_places: spread: T#%d(%d:%d) place %d " 50480b57cec5SDimitry Andric "partition = [%d,%d]\n", 50490b57cec5SDimitry Andric __kmp_gtid_from_thread(team->t.t_threads[f]), 50500b57cec5SDimitry Andric team->t.t_id, f, th->th.th_new_place, 50510b57cec5SDimitry Andric th->th.th_first_place, th->th.th_last_place)); 50520b57cec5SDimitry Andric } 50530b57cec5SDimitry Andric KMP_DEBUG_ASSERT(update_master_only || place == masters_place); 50540b57cec5SDimitry Andric } 50550b57cec5SDimitry Andric } break; 50560b57cec5SDimitry Andric 50570b57cec5SDimitry Andric default: 50580b57cec5SDimitry Andric break; 50590b57cec5SDimitry Andric } 50600b57cec5SDimitry Andric 50610b57cec5SDimitry Andric KA_TRACE(20, ("__kmp_partition_places: exit T#%d\n", team->t.t_id)); 50620b57cec5SDimitry Andric } 50630b57cec5SDimitry Andric 50640b57cec5SDimitry Andric #endif // KMP_AFFINITY_SUPPORTED 50650b57cec5SDimitry Andric 50660b57cec5SDimitry Andric /* allocate a new team data structure to use. take one off of the free pool if 50670b57cec5SDimitry Andric available */ 50680b57cec5SDimitry Andric kmp_team_t * 50690b57cec5SDimitry Andric __kmp_allocate_team(kmp_root_t *root, int new_nproc, int max_nproc, 50700b57cec5SDimitry Andric #if OMPT_SUPPORT 50710b57cec5SDimitry Andric ompt_data_t ompt_parallel_data, 50720b57cec5SDimitry Andric #endif 50730b57cec5SDimitry Andric kmp_proc_bind_t new_proc_bind, 50740b57cec5SDimitry Andric kmp_internal_control_t *new_icvs, 50750b57cec5SDimitry Andric int argc USE_NESTED_HOT_ARG(kmp_info_t *master)) { 50760b57cec5SDimitry Andric KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_allocate_team); 50770b57cec5SDimitry Andric int f; 50780b57cec5SDimitry Andric kmp_team_t *team; 50790b57cec5SDimitry Andric int use_hot_team = !root->r.r_active; 50800b57cec5SDimitry Andric int level = 0; 5081349cc55cSDimitry Andric int do_place_partition = 1; 50820b57cec5SDimitry Andric 50830b57cec5SDimitry Andric KA_TRACE(20, ("__kmp_allocate_team: called\n")); 50840b57cec5SDimitry Andric KMP_DEBUG_ASSERT(new_nproc >= 1 && argc >= 0); 50850b57cec5SDimitry Andric KMP_DEBUG_ASSERT(max_nproc >= new_nproc); 50860b57cec5SDimitry Andric KMP_MB(); 50870b57cec5SDimitry Andric 50880b57cec5SDimitry Andric #if KMP_NESTED_HOT_TEAMS 50890b57cec5SDimitry Andric kmp_hot_team_ptr_t *hot_teams; 50900b57cec5SDimitry Andric if (master) { 50910b57cec5SDimitry Andric team = master->th.th_team; 50920b57cec5SDimitry Andric level = team->t.t_active_level; 50930b57cec5SDimitry Andric if (master->th.th_teams_microtask) { // in teams construct? 50940b57cec5SDimitry Andric if (master->th.th_teams_size.nteams > 1 && 50950b57cec5SDimitry Andric ( // #teams > 1 50960b57cec5SDimitry Andric team->t.t_pkfn == 50970b57cec5SDimitry Andric (microtask_t)__kmp_teams_master || // inner fork of the teams 50980b57cec5SDimitry Andric master->th.th_teams_level < 50990b57cec5SDimitry Andric team->t.t_level)) { // or nested parallel inside the teams 51000b57cec5SDimitry Andric ++level; // not increment if #teams==1, or for outer fork of the teams; 51010b57cec5SDimitry Andric // increment otherwise 51020b57cec5SDimitry Andric } 5103349cc55cSDimitry Andric // Do not perform the place partition if inner fork of the teams 5104349cc55cSDimitry Andric // Wait until nested parallel region encountered inside teams construct 5105349cc55cSDimitry Andric if ((master->th.th_teams_size.nteams == 1 && 5106349cc55cSDimitry Andric master->th.th_teams_level >= team->t.t_level) || 5107349cc55cSDimitry Andric (team->t.t_pkfn == (microtask_t)__kmp_teams_master)) 5108349cc55cSDimitry Andric do_place_partition = 0; 51090b57cec5SDimitry Andric } 51100b57cec5SDimitry Andric hot_teams = master->th.th_hot_teams; 51110b57cec5SDimitry Andric if (level < __kmp_hot_teams_max_level && hot_teams && 5112e8d8bef9SDimitry Andric hot_teams[level].hot_team) { 5113e8d8bef9SDimitry Andric // hot team has already been allocated for given level 51140b57cec5SDimitry Andric use_hot_team = 1; 51150b57cec5SDimitry Andric } else { 51160b57cec5SDimitry Andric use_hot_team = 0; 51170b57cec5SDimitry Andric } 5118e8d8bef9SDimitry Andric } else { 5119e8d8bef9SDimitry Andric // check we won't access uninitialized hot_teams, just in case 5120e8d8bef9SDimitry Andric KMP_DEBUG_ASSERT(new_nproc == 1); 51210b57cec5SDimitry Andric } 51220b57cec5SDimitry Andric #endif 51230b57cec5SDimitry Andric // Optimization to use a "hot" team 51240b57cec5SDimitry Andric if (use_hot_team && new_nproc > 1) { 51250b57cec5SDimitry Andric KMP_DEBUG_ASSERT(new_nproc <= max_nproc); 51260b57cec5SDimitry Andric #if KMP_NESTED_HOT_TEAMS 51270b57cec5SDimitry Andric team = hot_teams[level].hot_team; 51280b57cec5SDimitry Andric #else 51290b57cec5SDimitry Andric team = root->r.r_hot_team; 51300b57cec5SDimitry Andric #endif 51310b57cec5SDimitry Andric #if KMP_DEBUG 51320b57cec5SDimitry Andric if (__kmp_tasking_mode != tskm_immediate_exec) { 51330b57cec5SDimitry Andric KA_TRACE(20, ("__kmp_allocate_team: hot team task_team[0] = %p " 51340b57cec5SDimitry Andric "task_team[1] = %p before reinit\n", 51350b57cec5SDimitry Andric team->t.t_task_team[0], team->t.t_task_team[1])); 51360b57cec5SDimitry Andric } 51370b57cec5SDimitry Andric #endif 51380b57cec5SDimitry Andric 5139349cc55cSDimitry Andric if (team->t.t_nproc != new_nproc && 5140349cc55cSDimitry Andric __kmp_barrier_release_pattern[bs_forkjoin_barrier] == bp_dist_bar) { 5141349cc55cSDimitry Andric // Distributed barrier may need a resize 5142349cc55cSDimitry Andric int old_nthr = team->t.t_nproc; 5143349cc55cSDimitry Andric __kmp_resize_dist_barrier(team, old_nthr, new_nproc); 5144349cc55cSDimitry Andric } 5145349cc55cSDimitry Andric 5146349cc55cSDimitry Andric // If not doing the place partition, then reset the team's proc bind 5147349cc55cSDimitry Andric // to indicate that partitioning of all threads still needs to take place 5148349cc55cSDimitry Andric if (do_place_partition == 0) 5149349cc55cSDimitry Andric team->t.t_proc_bind = proc_bind_default; 51500b57cec5SDimitry Andric // Has the number of threads changed? 51510b57cec5SDimitry Andric /* Let's assume the most common case is that the number of threads is 51520b57cec5SDimitry Andric unchanged, and put that case first. */ 51530b57cec5SDimitry Andric if (team->t.t_nproc == new_nproc) { // Check changes in number of threads 51540b57cec5SDimitry Andric KA_TRACE(20, ("__kmp_allocate_team: reusing hot team\n")); 51550b57cec5SDimitry Andric // This case can mean that omp_set_num_threads() was called and the hot 51560b57cec5SDimitry Andric // team size was already reduced, so we check the special flag 51570b57cec5SDimitry Andric if (team->t.t_size_changed == -1) { 51580b57cec5SDimitry Andric team->t.t_size_changed = 1; 51590b57cec5SDimitry Andric } else { 51600b57cec5SDimitry Andric KMP_CHECK_UPDATE(team->t.t_size_changed, 0); 51610b57cec5SDimitry Andric } 51620b57cec5SDimitry Andric 51630b57cec5SDimitry Andric // TODO???: team->t.t_max_active_levels = new_max_active_levels; 51640b57cec5SDimitry Andric kmp_r_sched_t new_sched = new_icvs->sched; 5165fe6060f1SDimitry Andric // set primary thread's schedule as new run-time schedule 51660b57cec5SDimitry Andric KMP_CHECK_UPDATE(team->t.t_sched.sched, new_sched.sched); 51670b57cec5SDimitry Andric 51680b57cec5SDimitry Andric __kmp_reinitialize_team(team, new_icvs, 51690b57cec5SDimitry Andric root->r.r_uber_thread->th.th_ident); 51700b57cec5SDimitry Andric 51710b57cec5SDimitry Andric KF_TRACE(10, ("__kmp_allocate_team2: T#%d, this_thread=%p team=%p\n", 0, 51720b57cec5SDimitry Andric team->t.t_threads[0], team)); 51730b57cec5SDimitry Andric __kmp_push_current_task_to_thread(team->t.t_threads[0], team, 0); 51740b57cec5SDimitry Andric 51750b57cec5SDimitry Andric #if KMP_AFFINITY_SUPPORTED 51760b57cec5SDimitry Andric if ((team->t.t_size_changed == 0) && 51770b57cec5SDimitry Andric (team->t.t_proc_bind == new_proc_bind)) { 51780b57cec5SDimitry Andric if (new_proc_bind == proc_bind_spread) { 5179349cc55cSDimitry Andric if (do_place_partition) { 5180349cc55cSDimitry Andric // add flag to update only master for spread 5181349cc55cSDimitry Andric __kmp_partition_places(team, 1); 5182349cc55cSDimitry Andric } 51830b57cec5SDimitry Andric } 51840b57cec5SDimitry Andric KA_TRACE(200, ("__kmp_allocate_team: reusing hot team #%d bindings: " 51850b57cec5SDimitry Andric "proc_bind = %d, partition = [%d,%d]\n", 51860b57cec5SDimitry Andric team->t.t_id, new_proc_bind, team->t.t_first_place, 51870b57cec5SDimitry Andric team->t.t_last_place)); 51880b57cec5SDimitry Andric } else { 5189349cc55cSDimitry Andric if (do_place_partition) { 51900b57cec5SDimitry Andric KMP_CHECK_UPDATE(team->t.t_proc_bind, new_proc_bind); 51910b57cec5SDimitry Andric __kmp_partition_places(team); 51920b57cec5SDimitry Andric } 5193349cc55cSDimitry Andric } 51940b57cec5SDimitry Andric #else 51950b57cec5SDimitry Andric KMP_CHECK_UPDATE(team->t.t_proc_bind, new_proc_bind); 51960b57cec5SDimitry Andric #endif /* KMP_AFFINITY_SUPPORTED */ 51970b57cec5SDimitry Andric } else if (team->t.t_nproc > new_nproc) { 51980b57cec5SDimitry Andric KA_TRACE(20, 51990b57cec5SDimitry Andric ("__kmp_allocate_team: decreasing hot team thread count to %d\n", 52000b57cec5SDimitry Andric new_nproc)); 52010b57cec5SDimitry Andric 52020b57cec5SDimitry Andric team->t.t_size_changed = 1; 5203349cc55cSDimitry Andric if (__kmp_barrier_release_pattern[bs_forkjoin_barrier] == bp_dist_bar) { 5204349cc55cSDimitry Andric // Barrier size already reduced earlier in this function 5205349cc55cSDimitry Andric // Activate team threads via th_used_in_team 5206349cc55cSDimitry Andric __kmp_add_threads_to_team(team, new_nproc); 5207349cc55cSDimitry Andric } 52080b57cec5SDimitry Andric #if KMP_NESTED_HOT_TEAMS 52090b57cec5SDimitry Andric if (__kmp_hot_teams_mode == 0) { 52100b57cec5SDimitry Andric // AC: saved number of threads should correspond to team's value in this 52110b57cec5SDimitry Andric // mode, can be bigger in mode 1, when hot team has threads in reserve 52120b57cec5SDimitry Andric KMP_DEBUG_ASSERT(hot_teams[level].hot_team_nth == team->t.t_nproc); 52130b57cec5SDimitry Andric hot_teams[level].hot_team_nth = new_nproc; 52140b57cec5SDimitry Andric #endif // KMP_NESTED_HOT_TEAMS 52150b57cec5SDimitry Andric /* release the extra threads we don't need any more */ 52160b57cec5SDimitry Andric for (f = new_nproc; f < team->t.t_nproc; f++) { 52170b57cec5SDimitry Andric KMP_DEBUG_ASSERT(team->t.t_threads[f]); 52180b57cec5SDimitry Andric if (__kmp_tasking_mode != tskm_immediate_exec) { 52190b57cec5SDimitry Andric // When decreasing team size, threads no longer in the team should 52200b57cec5SDimitry Andric // unref task team. 52210b57cec5SDimitry Andric team->t.t_threads[f]->th.th_task_team = NULL; 52220b57cec5SDimitry Andric } 52230b57cec5SDimitry Andric __kmp_free_thread(team->t.t_threads[f]); 52240b57cec5SDimitry Andric team->t.t_threads[f] = NULL; 52250b57cec5SDimitry Andric } 52260b57cec5SDimitry Andric #if KMP_NESTED_HOT_TEAMS 52270b57cec5SDimitry Andric } // (__kmp_hot_teams_mode == 0) 52280b57cec5SDimitry Andric else { 52290b57cec5SDimitry Andric // When keeping extra threads in team, switch threads to wait on own 52300b57cec5SDimitry Andric // b_go flag 52310b57cec5SDimitry Andric for (f = new_nproc; f < team->t.t_nproc; ++f) { 52320b57cec5SDimitry Andric KMP_DEBUG_ASSERT(team->t.t_threads[f]); 52330b57cec5SDimitry Andric kmp_balign_t *balign = team->t.t_threads[f]->th.th_bar; 52340b57cec5SDimitry Andric for (int b = 0; b < bs_last_barrier; ++b) { 52350b57cec5SDimitry Andric if (balign[b].bb.wait_flag == KMP_BARRIER_PARENT_FLAG) { 52360b57cec5SDimitry Andric balign[b].bb.wait_flag = KMP_BARRIER_SWITCH_TO_OWN_FLAG; 52370b57cec5SDimitry Andric } 52380b57cec5SDimitry Andric KMP_CHECK_UPDATE(balign[b].bb.leaf_kids, 0); 52390b57cec5SDimitry Andric } 52400b57cec5SDimitry Andric } 52410b57cec5SDimitry Andric } 52420b57cec5SDimitry Andric #endif // KMP_NESTED_HOT_TEAMS 52430b57cec5SDimitry Andric team->t.t_nproc = new_nproc; 52440b57cec5SDimitry Andric // TODO???: team->t.t_max_active_levels = new_max_active_levels; 52450b57cec5SDimitry Andric KMP_CHECK_UPDATE(team->t.t_sched.sched, new_icvs->sched.sched); 52460b57cec5SDimitry Andric __kmp_reinitialize_team(team, new_icvs, 52470b57cec5SDimitry Andric root->r.r_uber_thread->th.th_ident); 52480b57cec5SDimitry Andric 52490b57cec5SDimitry Andric // Update remaining threads 52500b57cec5SDimitry Andric for (f = 0; f < new_nproc; ++f) { 52510b57cec5SDimitry Andric team->t.t_threads[f]->th.th_team_nproc = new_nproc; 52520b57cec5SDimitry Andric } 52530b57cec5SDimitry Andric 5254fe6060f1SDimitry Andric // restore the current task state of the primary thread: should be the 52550b57cec5SDimitry Andric // implicit task 52560b57cec5SDimitry Andric KF_TRACE(10, ("__kmp_allocate_team: T#%d, this_thread=%p team=%p\n", 0, 52570b57cec5SDimitry Andric team->t.t_threads[0], team)); 52580b57cec5SDimitry Andric 52590b57cec5SDimitry Andric __kmp_push_current_task_to_thread(team->t.t_threads[0], team, 0); 52600b57cec5SDimitry Andric 52610b57cec5SDimitry Andric #ifdef KMP_DEBUG 52620b57cec5SDimitry Andric for (f = 0; f < team->t.t_nproc; f++) { 52630b57cec5SDimitry Andric KMP_DEBUG_ASSERT(team->t.t_threads[f] && 52640b57cec5SDimitry Andric team->t.t_threads[f]->th.th_team_nproc == 52650b57cec5SDimitry Andric team->t.t_nproc); 52660b57cec5SDimitry Andric } 52670b57cec5SDimitry Andric #endif 52680b57cec5SDimitry Andric 5269349cc55cSDimitry Andric if (do_place_partition) { 52700b57cec5SDimitry Andric KMP_CHECK_UPDATE(team->t.t_proc_bind, new_proc_bind); 52710b57cec5SDimitry Andric #if KMP_AFFINITY_SUPPORTED 52720b57cec5SDimitry Andric __kmp_partition_places(team); 52730b57cec5SDimitry Andric #endif 5274349cc55cSDimitry Andric } 52750b57cec5SDimitry Andric } else { // team->t.t_nproc < new_nproc 5276489b1cf2SDimitry Andric #if (KMP_OS_LINUX || KMP_OS_FREEBSD) && KMP_AFFINITY_SUPPORTED 52770b57cec5SDimitry Andric kmp_affin_mask_t *old_mask; 52780b57cec5SDimitry Andric if (KMP_AFFINITY_CAPABLE()) { 52790b57cec5SDimitry Andric KMP_CPU_ALLOC(old_mask); 52800b57cec5SDimitry Andric } 52810b57cec5SDimitry Andric #endif 52820b57cec5SDimitry Andric 52830b57cec5SDimitry Andric KA_TRACE(20, 52840b57cec5SDimitry Andric ("__kmp_allocate_team: increasing hot team thread count to %d\n", 52850b57cec5SDimitry Andric new_nproc)); 5286349cc55cSDimitry Andric int old_nproc = team->t.t_nproc; // save old value and use to update only 52870b57cec5SDimitry Andric team->t.t_size_changed = 1; 52880b57cec5SDimitry Andric 52890b57cec5SDimitry Andric #if KMP_NESTED_HOT_TEAMS 52900b57cec5SDimitry Andric int avail_threads = hot_teams[level].hot_team_nth; 52910b57cec5SDimitry Andric if (new_nproc < avail_threads) 52920b57cec5SDimitry Andric avail_threads = new_nproc; 52930b57cec5SDimitry Andric kmp_info_t **other_threads = team->t.t_threads; 52940b57cec5SDimitry Andric for (f = team->t.t_nproc; f < avail_threads; ++f) { 52950b57cec5SDimitry Andric // Adjust barrier data of reserved threads (if any) of the team 52960b57cec5SDimitry Andric // Other data will be set in __kmp_initialize_info() below. 52970b57cec5SDimitry Andric int b; 52980b57cec5SDimitry Andric kmp_balign_t *balign = other_threads[f]->th.th_bar; 52990b57cec5SDimitry Andric for (b = 0; b < bs_last_barrier; ++b) { 53000b57cec5SDimitry Andric balign[b].bb.b_arrived = team->t.t_bar[b].b_arrived; 53010b57cec5SDimitry Andric KMP_DEBUG_ASSERT(balign[b].bb.wait_flag != KMP_BARRIER_PARENT_FLAG); 53020b57cec5SDimitry Andric #if USE_DEBUGGER 53030b57cec5SDimitry Andric balign[b].bb.b_worker_arrived = team->t.t_bar[b].b_team_arrived; 53040b57cec5SDimitry Andric #endif 53050b57cec5SDimitry Andric } 53060b57cec5SDimitry Andric } 53070b57cec5SDimitry Andric if (hot_teams[level].hot_team_nth >= new_nproc) { 53080b57cec5SDimitry Andric // we have all needed threads in reserve, no need to allocate any 53090b57cec5SDimitry Andric // this only possible in mode 1, cannot have reserved threads in mode 0 53100b57cec5SDimitry Andric KMP_DEBUG_ASSERT(__kmp_hot_teams_mode == 1); 53110b57cec5SDimitry Andric team->t.t_nproc = new_nproc; // just get reserved threads involved 53120b57cec5SDimitry Andric } else { 5313349cc55cSDimitry Andric // We may have some threads in reserve, but not enough; 5314349cc55cSDimitry Andric // get reserved threads involved if any. 5315349cc55cSDimitry Andric team->t.t_nproc = hot_teams[level].hot_team_nth; 53160b57cec5SDimitry Andric hot_teams[level].hot_team_nth = new_nproc; // adjust hot team max size 53170b57cec5SDimitry Andric #endif // KMP_NESTED_HOT_TEAMS 53180b57cec5SDimitry Andric if (team->t.t_max_nproc < new_nproc) { 53190b57cec5SDimitry Andric /* reallocate larger arrays */ 53200b57cec5SDimitry Andric __kmp_reallocate_team_arrays(team, new_nproc); 53210b57cec5SDimitry Andric __kmp_reinitialize_team(team, new_icvs, NULL); 53220b57cec5SDimitry Andric } 53230b57cec5SDimitry Andric 5324489b1cf2SDimitry Andric #if (KMP_OS_LINUX || KMP_OS_FREEBSD) && KMP_AFFINITY_SUPPORTED 5325fe6060f1SDimitry Andric /* Temporarily set full mask for primary thread before creation of 5326fe6060f1SDimitry Andric workers. The reason is that workers inherit the affinity from the 5327fe6060f1SDimitry Andric primary thread, so if a lot of workers are created on the single 5328fe6060f1SDimitry Andric core quickly, they don't get a chance to set their own affinity for 5329fe6060f1SDimitry Andric a long time. */ 53300b57cec5SDimitry Andric __kmp_set_thread_affinity_mask_full_tmp(old_mask); 53310b57cec5SDimitry Andric #endif 53320b57cec5SDimitry Andric 53330b57cec5SDimitry Andric /* allocate new threads for the hot team */ 53340b57cec5SDimitry Andric for (f = team->t.t_nproc; f < new_nproc; f++) { 53350b57cec5SDimitry Andric kmp_info_t *new_worker = __kmp_allocate_thread(root, team, f); 53360b57cec5SDimitry Andric KMP_DEBUG_ASSERT(new_worker); 53370b57cec5SDimitry Andric team->t.t_threads[f] = new_worker; 53380b57cec5SDimitry Andric 53390b57cec5SDimitry Andric KA_TRACE(20, 53400b57cec5SDimitry Andric ("__kmp_allocate_team: team %d init T#%d arrived: " 53410b57cec5SDimitry Andric "join=%llu, plain=%llu\n", 53420b57cec5SDimitry Andric team->t.t_id, __kmp_gtid_from_tid(f, team), team->t.t_id, f, 53430b57cec5SDimitry Andric team->t.t_bar[bs_forkjoin_barrier].b_arrived, 53440b57cec5SDimitry Andric team->t.t_bar[bs_plain_barrier].b_arrived)); 53450b57cec5SDimitry Andric 53460b57cec5SDimitry Andric { // Initialize barrier data for new threads. 53470b57cec5SDimitry Andric int b; 53480b57cec5SDimitry Andric kmp_balign_t *balign = new_worker->th.th_bar; 53490b57cec5SDimitry Andric for (b = 0; b < bs_last_barrier; ++b) { 53500b57cec5SDimitry Andric balign[b].bb.b_arrived = team->t.t_bar[b].b_arrived; 53510b57cec5SDimitry Andric KMP_DEBUG_ASSERT(balign[b].bb.wait_flag != 53520b57cec5SDimitry Andric KMP_BARRIER_PARENT_FLAG); 53530b57cec5SDimitry Andric #if USE_DEBUGGER 53540b57cec5SDimitry Andric balign[b].bb.b_worker_arrived = team->t.t_bar[b].b_team_arrived; 53550b57cec5SDimitry Andric #endif 53560b57cec5SDimitry Andric } 53570b57cec5SDimitry Andric } 53580b57cec5SDimitry Andric } 53590b57cec5SDimitry Andric 5360489b1cf2SDimitry Andric #if (KMP_OS_LINUX || KMP_OS_FREEBSD) && KMP_AFFINITY_SUPPORTED 53610b57cec5SDimitry Andric if (KMP_AFFINITY_CAPABLE()) { 5362fe6060f1SDimitry Andric /* Restore initial primary thread's affinity mask */ 53630b57cec5SDimitry Andric __kmp_set_system_affinity(old_mask, TRUE); 53640b57cec5SDimitry Andric KMP_CPU_FREE(old_mask); 53650b57cec5SDimitry Andric } 53660b57cec5SDimitry Andric #endif 53670b57cec5SDimitry Andric #if KMP_NESTED_HOT_TEAMS 53680b57cec5SDimitry Andric } // end of check of t_nproc vs. new_nproc vs. hot_team_nth 53690b57cec5SDimitry Andric #endif // KMP_NESTED_HOT_TEAMS 5370349cc55cSDimitry Andric if (__kmp_barrier_release_pattern[bs_forkjoin_barrier] == bp_dist_bar) { 5371349cc55cSDimitry Andric // Barrier size already increased earlier in this function 5372349cc55cSDimitry Andric // Activate team threads via th_used_in_team 5373349cc55cSDimitry Andric __kmp_add_threads_to_team(team, new_nproc); 5374349cc55cSDimitry Andric } 53750b57cec5SDimitry Andric /* make sure everyone is syncronized */ 53760b57cec5SDimitry Andric // new threads below 53770b57cec5SDimitry Andric __kmp_initialize_team(team, new_nproc, new_icvs, 53780b57cec5SDimitry Andric root->r.r_uber_thread->th.th_ident); 53790b57cec5SDimitry Andric 53800b57cec5SDimitry Andric /* reinitialize the threads */ 53810b57cec5SDimitry Andric KMP_DEBUG_ASSERT(team->t.t_nproc == new_nproc); 53820b57cec5SDimitry Andric for (f = 0; f < team->t.t_nproc; ++f) 53830b57cec5SDimitry Andric __kmp_initialize_info(team->t.t_threads[f], team, f, 53840b57cec5SDimitry Andric __kmp_gtid_from_tid(f, team)); 53850b57cec5SDimitry Andric 53860b57cec5SDimitry Andric if (level) { // set th_task_state for new threads in nested hot team 53870b57cec5SDimitry Andric // __kmp_initialize_info() no longer zeroes th_task_state, so we should 53880b57cec5SDimitry Andric // only need to set the th_task_state for the new threads. th_task_state 5389fe6060f1SDimitry Andric // for primary thread will not be accurate until after this in 5390fe6060f1SDimitry Andric // __kmp_fork_call(), so we look to the primary thread's memo_stack to 5391fe6060f1SDimitry Andric // get the correct value. 53920b57cec5SDimitry Andric for (f = old_nproc; f < team->t.t_nproc; ++f) 53930b57cec5SDimitry Andric team->t.t_threads[f]->th.th_task_state = 53940b57cec5SDimitry Andric team->t.t_threads[0]->th.th_task_state_memo_stack[level]; 53950b57cec5SDimitry Andric } else { // set th_task_state for new threads in non-nested hot team 5396fe6060f1SDimitry Andric // copy primary thread's state 5397fe6060f1SDimitry Andric kmp_uint8 old_state = team->t.t_threads[0]->th.th_task_state; 53980b57cec5SDimitry Andric for (f = old_nproc; f < team->t.t_nproc; ++f) 53990b57cec5SDimitry Andric team->t.t_threads[f]->th.th_task_state = old_state; 54000b57cec5SDimitry Andric } 54010b57cec5SDimitry Andric 54020b57cec5SDimitry Andric #ifdef KMP_DEBUG 54030b57cec5SDimitry Andric for (f = 0; f < team->t.t_nproc; ++f) { 54040b57cec5SDimitry Andric KMP_DEBUG_ASSERT(team->t.t_threads[f] && 54050b57cec5SDimitry Andric team->t.t_threads[f]->th.th_team_nproc == 54060b57cec5SDimitry Andric team->t.t_nproc); 54070b57cec5SDimitry Andric } 54080b57cec5SDimitry Andric #endif 54090b57cec5SDimitry Andric 5410349cc55cSDimitry Andric if (do_place_partition) { 54110b57cec5SDimitry Andric KMP_CHECK_UPDATE(team->t.t_proc_bind, new_proc_bind); 54120b57cec5SDimitry Andric #if KMP_AFFINITY_SUPPORTED 54130b57cec5SDimitry Andric __kmp_partition_places(team); 54140b57cec5SDimitry Andric #endif 5415349cc55cSDimitry Andric } 54160b57cec5SDimitry Andric } // Check changes in number of threads 54170b57cec5SDimitry Andric 54180b57cec5SDimitry Andric kmp_info_t *master = team->t.t_threads[0]; 54190b57cec5SDimitry Andric if (master->th.th_teams_microtask) { 54200b57cec5SDimitry Andric for (f = 1; f < new_nproc; ++f) { 54210b57cec5SDimitry Andric // propagate teams construct specific info to workers 54220b57cec5SDimitry Andric kmp_info_t *thr = team->t.t_threads[f]; 54230b57cec5SDimitry Andric thr->th.th_teams_microtask = master->th.th_teams_microtask; 54240b57cec5SDimitry Andric thr->th.th_teams_level = master->th.th_teams_level; 54250b57cec5SDimitry Andric thr->th.th_teams_size = master->th.th_teams_size; 54260b57cec5SDimitry Andric } 54270b57cec5SDimitry Andric } 54280b57cec5SDimitry Andric #if KMP_NESTED_HOT_TEAMS 54290b57cec5SDimitry Andric if (level) { 54300b57cec5SDimitry Andric // Sync barrier state for nested hot teams, not needed for outermost hot 54310b57cec5SDimitry Andric // team. 54320b57cec5SDimitry Andric for (f = 1; f < new_nproc; ++f) { 54330b57cec5SDimitry Andric kmp_info_t *thr = team->t.t_threads[f]; 54340b57cec5SDimitry Andric int b; 54350b57cec5SDimitry Andric kmp_balign_t *balign = thr->th.th_bar; 54360b57cec5SDimitry Andric for (b = 0; b < bs_last_barrier; ++b) { 54370b57cec5SDimitry Andric balign[b].bb.b_arrived = team->t.t_bar[b].b_arrived; 54380b57cec5SDimitry Andric KMP_DEBUG_ASSERT(balign[b].bb.wait_flag != KMP_BARRIER_PARENT_FLAG); 54390b57cec5SDimitry Andric #if USE_DEBUGGER 54400b57cec5SDimitry Andric balign[b].bb.b_worker_arrived = team->t.t_bar[b].b_team_arrived; 54410b57cec5SDimitry Andric #endif 54420b57cec5SDimitry Andric } 54430b57cec5SDimitry Andric } 54440b57cec5SDimitry Andric } 54450b57cec5SDimitry Andric #endif // KMP_NESTED_HOT_TEAMS 54460b57cec5SDimitry Andric 54470b57cec5SDimitry Andric /* reallocate space for arguments if necessary */ 54480b57cec5SDimitry Andric __kmp_alloc_argv_entries(argc, team, TRUE); 54490b57cec5SDimitry Andric KMP_CHECK_UPDATE(team->t.t_argc, argc); 54500b57cec5SDimitry Andric // The hot team re-uses the previous task team, 54510b57cec5SDimitry Andric // if untouched during the previous release->gather phase. 54520b57cec5SDimitry Andric 54530b57cec5SDimitry Andric KF_TRACE(10, (" hot_team = %p\n", team)); 54540b57cec5SDimitry Andric 54550b57cec5SDimitry Andric #if KMP_DEBUG 54560b57cec5SDimitry Andric if (__kmp_tasking_mode != tskm_immediate_exec) { 54570b57cec5SDimitry Andric KA_TRACE(20, ("__kmp_allocate_team: hot team task_team[0] = %p " 54580b57cec5SDimitry Andric "task_team[1] = %p after reinit\n", 54590b57cec5SDimitry Andric team->t.t_task_team[0], team->t.t_task_team[1])); 54600b57cec5SDimitry Andric } 54610b57cec5SDimitry Andric #endif 54620b57cec5SDimitry Andric 54630b57cec5SDimitry Andric #if OMPT_SUPPORT 54640b57cec5SDimitry Andric __ompt_team_assign_id(team, ompt_parallel_data); 54650b57cec5SDimitry Andric #endif 54660b57cec5SDimitry Andric 54670b57cec5SDimitry Andric KMP_MB(); 54680b57cec5SDimitry Andric 54690b57cec5SDimitry Andric return team; 54700b57cec5SDimitry Andric } 54710b57cec5SDimitry Andric 54720b57cec5SDimitry Andric /* next, let's try to take one from the team pool */ 54730b57cec5SDimitry Andric KMP_MB(); 54740b57cec5SDimitry Andric for (team = CCAST(kmp_team_t *, __kmp_team_pool); (team);) { 54750b57cec5SDimitry Andric /* TODO: consider resizing undersized teams instead of reaping them, now 54760b57cec5SDimitry Andric that we have a resizing mechanism */ 54770b57cec5SDimitry Andric if (team->t.t_max_nproc >= max_nproc) { 54780b57cec5SDimitry Andric /* take this team from the team pool */ 54790b57cec5SDimitry Andric __kmp_team_pool = team->t.t_next_pool; 54800b57cec5SDimitry Andric 5481349cc55cSDimitry Andric if (max_nproc > 1 && 5482349cc55cSDimitry Andric __kmp_barrier_gather_pattern[bs_forkjoin_barrier] == bp_dist_bar) { 5483349cc55cSDimitry Andric if (!team->t.b) { // Allocate barrier structure 5484349cc55cSDimitry Andric team->t.b = distributedBarrier::allocate(__kmp_dflt_team_nth_ub); 5485349cc55cSDimitry Andric } 5486349cc55cSDimitry Andric } 5487349cc55cSDimitry Andric 54880b57cec5SDimitry Andric /* setup the team for fresh use */ 54890b57cec5SDimitry Andric __kmp_initialize_team(team, new_nproc, new_icvs, NULL); 54900b57cec5SDimitry Andric 54910b57cec5SDimitry Andric KA_TRACE(20, ("__kmp_allocate_team: setting task_team[0] %p and " 54920b57cec5SDimitry Andric "task_team[1] %p to NULL\n", 54930b57cec5SDimitry Andric &team->t.t_task_team[0], &team->t.t_task_team[1])); 54940b57cec5SDimitry Andric team->t.t_task_team[0] = NULL; 54950b57cec5SDimitry Andric team->t.t_task_team[1] = NULL; 54960b57cec5SDimitry Andric 54970b57cec5SDimitry Andric /* reallocate space for arguments if necessary */ 54980b57cec5SDimitry Andric __kmp_alloc_argv_entries(argc, team, TRUE); 54990b57cec5SDimitry Andric KMP_CHECK_UPDATE(team->t.t_argc, argc); 55000b57cec5SDimitry Andric 55010b57cec5SDimitry Andric KA_TRACE( 55020b57cec5SDimitry Andric 20, ("__kmp_allocate_team: team %d init arrived: join=%u, plain=%u\n", 55030b57cec5SDimitry Andric team->t.t_id, KMP_INIT_BARRIER_STATE, KMP_INIT_BARRIER_STATE)); 55040b57cec5SDimitry Andric { // Initialize barrier data. 55050b57cec5SDimitry Andric int b; 55060b57cec5SDimitry Andric for (b = 0; b < bs_last_barrier; ++b) { 55070b57cec5SDimitry Andric team->t.t_bar[b].b_arrived = KMP_INIT_BARRIER_STATE; 55080b57cec5SDimitry Andric #if USE_DEBUGGER 55090b57cec5SDimitry Andric team->t.t_bar[b].b_master_arrived = 0; 55100b57cec5SDimitry Andric team->t.t_bar[b].b_team_arrived = 0; 55110b57cec5SDimitry Andric #endif 55120b57cec5SDimitry Andric } 55130b57cec5SDimitry Andric } 55140b57cec5SDimitry Andric 55150b57cec5SDimitry Andric team->t.t_proc_bind = new_proc_bind; 55160b57cec5SDimitry Andric 55170b57cec5SDimitry Andric KA_TRACE(20, ("__kmp_allocate_team: using team from pool %d.\n", 55180b57cec5SDimitry Andric team->t.t_id)); 55190b57cec5SDimitry Andric 55200b57cec5SDimitry Andric #if OMPT_SUPPORT 55210b57cec5SDimitry Andric __ompt_team_assign_id(team, ompt_parallel_data); 55220b57cec5SDimitry Andric #endif 55230b57cec5SDimitry Andric 55240b57cec5SDimitry Andric KMP_MB(); 55250b57cec5SDimitry Andric 55260b57cec5SDimitry Andric return team; 55270b57cec5SDimitry Andric } 55280b57cec5SDimitry Andric 55290b57cec5SDimitry Andric /* reap team if it is too small, then loop back and check the next one */ 55300b57cec5SDimitry Andric // not sure if this is wise, but, will be redone during the hot-teams 55310b57cec5SDimitry Andric // rewrite. 55320b57cec5SDimitry Andric /* TODO: Use technique to find the right size hot-team, don't reap them */ 55330b57cec5SDimitry Andric team = __kmp_reap_team(team); 55340b57cec5SDimitry Andric __kmp_team_pool = team; 55350b57cec5SDimitry Andric } 55360b57cec5SDimitry Andric 55370b57cec5SDimitry Andric /* nothing available in the pool, no matter, make a new team! */ 55380b57cec5SDimitry Andric KMP_MB(); 55390b57cec5SDimitry Andric team = (kmp_team_t *)__kmp_allocate(sizeof(kmp_team_t)); 55400b57cec5SDimitry Andric 55410b57cec5SDimitry Andric /* and set it up */ 55420b57cec5SDimitry Andric team->t.t_max_nproc = max_nproc; 5543349cc55cSDimitry Andric if (max_nproc > 1 && 5544349cc55cSDimitry Andric __kmp_barrier_gather_pattern[bs_forkjoin_barrier] == bp_dist_bar) { 5545349cc55cSDimitry Andric // Allocate barrier structure 5546349cc55cSDimitry Andric team->t.b = distributedBarrier::allocate(__kmp_dflt_team_nth_ub); 5547349cc55cSDimitry Andric } 5548349cc55cSDimitry Andric 55490b57cec5SDimitry Andric /* NOTE well, for some reason allocating one big buffer and dividing it up 55500b57cec5SDimitry Andric seems to really hurt performance a lot on the P4, so, let's not use this */ 55510b57cec5SDimitry Andric __kmp_allocate_team_arrays(team, max_nproc); 55520b57cec5SDimitry Andric 55530b57cec5SDimitry Andric KA_TRACE(20, ("__kmp_allocate_team: making a new team\n")); 55540b57cec5SDimitry Andric __kmp_initialize_team(team, new_nproc, new_icvs, NULL); 55550b57cec5SDimitry Andric 55560b57cec5SDimitry Andric KA_TRACE(20, ("__kmp_allocate_team: setting task_team[0] %p and task_team[1] " 55570b57cec5SDimitry Andric "%p to NULL\n", 55580b57cec5SDimitry Andric &team->t.t_task_team[0], &team->t.t_task_team[1])); 55590b57cec5SDimitry Andric team->t.t_task_team[0] = NULL; // to be removed, as __kmp_allocate zeroes 55600b57cec5SDimitry Andric // memory, no need to duplicate 55610b57cec5SDimitry Andric team->t.t_task_team[1] = NULL; // to be removed, as __kmp_allocate zeroes 55620b57cec5SDimitry Andric // memory, no need to duplicate 55630b57cec5SDimitry Andric 55640b57cec5SDimitry Andric if (__kmp_storage_map) { 55650b57cec5SDimitry Andric __kmp_print_team_storage_map("team", team, team->t.t_id, new_nproc); 55660b57cec5SDimitry Andric } 55670b57cec5SDimitry Andric 55680b57cec5SDimitry Andric /* allocate space for arguments */ 55690b57cec5SDimitry Andric __kmp_alloc_argv_entries(argc, team, FALSE); 55700b57cec5SDimitry Andric team->t.t_argc = argc; 55710b57cec5SDimitry Andric 55720b57cec5SDimitry Andric KA_TRACE(20, 55730b57cec5SDimitry Andric ("__kmp_allocate_team: team %d init arrived: join=%u, plain=%u\n", 55740b57cec5SDimitry Andric team->t.t_id, KMP_INIT_BARRIER_STATE, KMP_INIT_BARRIER_STATE)); 55750b57cec5SDimitry Andric { // Initialize barrier data. 55760b57cec5SDimitry Andric int b; 55770b57cec5SDimitry Andric for (b = 0; b < bs_last_barrier; ++b) { 55780b57cec5SDimitry Andric team->t.t_bar[b].b_arrived = KMP_INIT_BARRIER_STATE; 55790b57cec5SDimitry Andric #if USE_DEBUGGER 55800b57cec5SDimitry Andric team->t.t_bar[b].b_master_arrived = 0; 55810b57cec5SDimitry Andric team->t.t_bar[b].b_team_arrived = 0; 55820b57cec5SDimitry Andric #endif 55830b57cec5SDimitry Andric } 55840b57cec5SDimitry Andric } 55850b57cec5SDimitry Andric 55860b57cec5SDimitry Andric team->t.t_proc_bind = new_proc_bind; 55870b57cec5SDimitry Andric 55880b57cec5SDimitry Andric #if OMPT_SUPPORT 55890b57cec5SDimitry Andric __ompt_team_assign_id(team, ompt_parallel_data); 55900b57cec5SDimitry Andric team->t.ompt_serialized_team_info = NULL; 55910b57cec5SDimitry Andric #endif 55920b57cec5SDimitry Andric 55930b57cec5SDimitry Andric KMP_MB(); 55940b57cec5SDimitry Andric 55950b57cec5SDimitry Andric KA_TRACE(20, ("__kmp_allocate_team: done creating a new team %d.\n", 55960b57cec5SDimitry Andric team->t.t_id)); 55970b57cec5SDimitry Andric 55980b57cec5SDimitry Andric return team; 55990b57cec5SDimitry Andric } 56000b57cec5SDimitry Andric 56010b57cec5SDimitry Andric /* TODO implement hot-teams at all levels */ 56020b57cec5SDimitry Andric /* TODO implement lazy thread release on demand (disband request) */ 56030b57cec5SDimitry Andric 56040b57cec5SDimitry Andric /* free the team. return it to the team pool. release all the threads 56050b57cec5SDimitry Andric * associated with it */ 56060b57cec5SDimitry Andric void __kmp_free_team(kmp_root_t *root, 56070b57cec5SDimitry Andric kmp_team_t *team USE_NESTED_HOT_ARG(kmp_info_t *master)) { 56080b57cec5SDimitry Andric int f; 56090b57cec5SDimitry Andric KA_TRACE(20, ("__kmp_free_team: T#%d freeing team %d\n", __kmp_get_gtid(), 56100b57cec5SDimitry Andric team->t.t_id)); 56110b57cec5SDimitry Andric 56120b57cec5SDimitry Andric /* verify state */ 56130b57cec5SDimitry Andric KMP_DEBUG_ASSERT(root); 56140b57cec5SDimitry Andric KMP_DEBUG_ASSERT(team); 56150b57cec5SDimitry Andric KMP_DEBUG_ASSERT(team->t.t_nproc <= team->t.t_max_nproc); 56160b57cec5SDimitry Andric KMP_DEBUG_ASSERT(team->t.t_threads); 56170b57cec5SDimitry Andric 56180b57cec5SDimitry Andric int use_hot_team = team == root->r.r_hot_team; 56190b57cec5SDimitry Andric #if KMP_NESTED_HOT_TEAMS 56200b57cec5SDimitry Andric int level; 56210b57cec5SDimitry Andric if (master) { 56220b57cec5SDimitry Andric level = team->t.t_active_level - 1; 56230b57cec5SDimitry Andric if (master->th.th_teams_microtask) { // in teams construct? 56240b57cec5SDimitry Andric if (master->th.th_teams_size.nteams > 1) { 56250b57cec5SDimitry Andric ++level; // level was not increased in teams construct for 56260b57cec5SDimitry Andric // team_of_masters 56270b57cec5SDimitry Andric } 56280b57cec5SDimitry Andric if (team->t.t_pkfn != (microtask_t)__kmp_teams_master && 56290b57cec5SDimitry Andric master->th.th_teams_level == team->t.t_level) { 56300b57cec5SDimitry Andric ++level; // level was not increased in teams construct for 56310b57cec5SDimitry Andric // team_of_workers before the parallel 56320b57cec5SDimitry Andric } // team->t.t_level will be increased inside parallel 56330b57cec5SDimitry Andric } 5634349cc55cSDimitry Andric #if KMP_DEBUG 5635349cc55cSDimitry Andric kmp_hot_team_ptr_t *hot_teams = master->th.th_hot_teams; 5636349cc55cSDimitry Andric #endif 56370b57cec5SDimitry Andric if (level < __kmp_hot_teams_max_level) { 56380b57cec5SDimitry Andric KMP_DEBUG_ASSERT(team == hot_teams[level].hot_team); 56390b57cec5SDimitry Andric use_hot_team = 1; 56400b57cec5SDimitry Andric } 56410b57cec5SDimitry Andric } 56420b57cec5SDimitry Andric #endif // KMP_NESTED_HOT_TEAMS 56430b57cec5SDimitry Andric 56440b57cec5SDimitry Andric /* team is done working */ 56450b57cec5SDimitry Andric TCW_SYNC_PTR(team->t.t_pkfn, 56460b57cec5SDimitry Andric NULL); // Important for Debugging Support Library. 56470b57cec5SDimitry Andric #if KMP_OS_WINDOWS 56480b57cec5SDimitry Andric team->t.t_copyin_counter = 0; // init counter for possible reuse 56490b57cec5SDimitry Andric #endif 56500b57cec5SDimitry Andric // Do not reset pointer to parent team to NULL for hot teams. 56510b57cec5SDimitry Andric 56520b57cec5SDimitry Andric /* if we are non-hot team, release our threads */ 56530b57cec5SDimitry Andric if (!use_hot_team) { 56540b57cec5SDimitry Andric if (__kmp_tasking_mode != tskm_immediate_exec) { 56550b57cec5SDimitry Andric // Wait for threads to reach reapable state 56560b57cec5SDimitry Andric for (f = 1; f < team->t.t_nproc; ++f) { 56570b57cec5SDimitry Andric KMP_DEBUG_ASSERT(team->t.t_threads[f]); 56580b57cec5SDimitry Andric kmp_info_t *th = team->t.t_threads[f]; 56590b57cec5SDimitry Andric volatile kmp_uint32 *state = &th->th.th_reap_state; 56600b57cec5SDimitry Andric while (*state != KMP_SAFE_TO_REAP) { 56610b57cec5SDimitry Andric #if KMP_OS_WINDOWS 56620b57cec5SDimitry Andric // On Windows a thread can be killed at any time, check this 56630b57cec5SDimitry Andric DWORD ecode; 56640b57cec5SDimitry Andric if (!__kmp_is_thread_alive(th, &ecode)) { 56650b57cec5SDimitry Andric *state = KMP_SAFE_TO_REAP; // reset the flag for dead thread 56660b57cec5SDimitry Andric break; 56670b57cec5SDimitry Andric } 56680b57cec5SDimitry Andric #endif 56690b57cec5SDimitry Andric // first check if thread is sleeping 5670e8d8bef9SDimitry Andric kmp_flag_64<> fl(&th->th.th_bar[bs_forkjoin_barrier].bb.b_go, th); 56710b57cec5SDimitry Andric if (fl.is_sleeping()) 56720b57cec5SDimitry Andric fl.resume(__kmp_gtid_from_thread(th)); 56730b57cec5SDimitry Andric KMP_CPU_PAUSE(); 56740b57cec5SDimitry Andric } 56750b57cec5SDimitry Andric } 56760b57cec5SDimitry Andric 56770b57cec5SDimitry Andric // Delete task teams 56780b57cec5SDimitry Andric int tt_idx; 56790b57cec5SDimitry Andric for (tt_idx = 0; tt_idx < 2; ++tt_idx) { 56800b57cec5SDimitry Andric kmp_task_team_t *task_team = team->t.t_task_team[tt_idx]; 56810b57cec5SDimitry Andric if (task_team != NULL) { 56820b57cec5SDimitry Andric for (f = 0; f < team->t.t_nproc; ++f) { // threads unref task teams 56830b57cec5SDimitry Andric KMP_DEBUG_ASSERT(team->t.t_threads[f]); 56840b57cec5SDimitry Andric team->t.t_threads[f]->th.th_task_team = NULL; 56850b57cec5SDimitry Andric } 56860b57cec5SDimitry Andric KA_TRACE( 56870b57cec5SDimitry Andric 20, 56880b57cec5SDimitry Andric ("__kmp_free_team: T#%d deactivating task_team %p on team %d\n", 56890b57cec5SDimitry Andric __kmp_get_gtid(), task_team, team->t.t_id)); 56900b57cec5SDimitry Andric #if KMP_NESTED_HOT_TEAMS 56910b57cec5SDimitry Andric __kmp_free_task_team(master, task_team); 56920b57cec5SDimitry Andric #endif 56930b57cec5SDimitry Andric team->t.t_task_team[tt_idx] = NULL; 56940b57cec5SDimitry Andric } 56950b57cec5SDimitry Andric } 56960b57cec5SDimitry Andric } 56970b57cec5SDimitry Andric 56980b57cec5SDimitry Andric // Reset pointer to parent team only for non-hot teams. 56990b57cec5SDimitry Andric team->t.t_parent = NULL; 57000b57cec5SDimitry Andric team->t.t_level = 0; 57010b57cec5SDimitry Andric team->t.t_active_level = 0; 57020b57cec5SDimitry Andric 57030b57cec5SDimitry Andric /* free the worker threads */ 57040b57cec5SDimitry Andric for (f = 1; f < team->t.t_nproc; ++f) { 57050b57cec5SDimitry Andric KMP_DEBUG_ASSERT(team->t.t_threads[f]); 5706349cc55cSDimitry Andric if (__kmp_barrier_gather_pattern[bs_forkjoin_barrier] == bp_dist_bar) { 5707349cc55cSDimitry Andric KMP_COMPARE_AND_STORE_ACQ32(&(team->t.t_threads[f]->th.th_used_in_team), 5708349cc55cSDimitry Andric 1, 2); 5709349cc55cSDimitry Andric } 57100b57cec5SDimitry Andric __kmp_free_thread(team->t.t_threads[f]); 5711349cc55cSDimitry Andric } 5712349cc55cSDimitry Andric 5713349cc55cSDimitry Andric if (__kmp_barrier_gather_pattern[bs_forkjoin_barrier] == bp_dist_bar) { 5714349cc55cSDimitry Andric if (team->t.b) { 5715349cc55cSDimitry Andric // wake up thread at old location 5716349cc55cSDimitry Andric team->t.b->go_release(); 5717349cc55cSDimitry Andric if (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME) { 5718349cc55cSDimitry Andric for (f = 1; f < team->t.t_nproc; ++f) { 5719349cc55cSDimitry Andric if (team->t.b->sleep[f].sleep) { 5720349cc55cSDimitry Andric __kmp_atomic_resume_64( 5721349cc55cSDimitry Andric team->t.t_threads[f]->th.th_info.ds.ds_gtid, 5722349cc55cSDimitry Andric (kmp_atomic_flag_64<> *)NULL); 5723349cc55cSDimitry Andric } 5724349cc55cSDimitry Andric } 5725349cc55cSDimitry Andric } 5726349cc55cSDimitry Andric // Wait for threads to be removed from team 5727349cc55cSDimitry Andric for (int f = 1; f < team->t.t_nproc; ++f) { 5728349cc55cSDimitry Andric while (team->t.t_threads[f]->th.th_used_in_team.load() != 0) 5729349cc55cSDimitry Andric KMP_CPU_PAUSE(); 5730349cc55cSDimitry Andric } 5731349cc55cSDimitry Andric } 5732349cc55cSDimitry Andric } 5733349cc55cSDimitry Andric 5734349cc55cSDimitry Andric for (f = 1; f < team->t.t_nproc; ++f) { 57350b57cec5SDimitry Andric team->t.t_threads[f] = NULL; 57360b57cec5SDimitry Andric } 57370b57cec5SDimitry Andric 5738349cc55cSDimitry Andric if (team->t.t_max_nproc > 1 && 5739349cc55cSDimitry Andric __kmp_barrier_gather_pattern[bs_forkjoin_barrier] == bp_dist_bar) { 5740349cc55cSDimitry Andric distributedBarrier::deallocate(team->t.b); 5741349cc55cSDimitry Andric team->t.b = NULL; 5742349cc55cSDimitry Andric } 57430b57cec5SDimitry Andric /* put the team back in the team pool */ 57440b57cec5SDimitry Andric /* TODO limit size of team pool, call reap_team if pool too large */ 57450b57cec5SDimitry Andric team->t.t_next_pool = CCAST(kmp_team_t *, __kmp_team_pool); 57460b57cec5SDimitry Andric __kmp_team_pool = (volatile kmp_team_t *)team; 5747fe6060f1SDimitry Andric } else { // Check if team was created for primary threads in teams construct 57480b57cec5SDimitry Andric // See if first worker is a CG root 57490b57cec5SDimitry Andric KMP_DEBUG_ASSERT(team->t.t_threads[1] && 57500b57cec5SDimitry Andric team->t.t_threads[1]->th.th_cg_roots); 57510b57cec5SDimitry Andric if (team->t.t_threads[1]->th.th_cg_roots->cg_root == team->t.t_threads[1]) { 57520b57cec5SDimitry Andric // Clean up the CG root nodes on workers so that this team can be re-used 57530b57cec5SDimitry Andric for (f = 1; f < team->t.t_nproc; ++f) { 57540b57cec5SDimitry Andric kmp_info_t *thr = team->t.t_threads[f]; 57550b57cec5SDimitry Andric KMP_DEBUG_ASSERT(thr && thr->th.th_cg_roots && 57560b57cec5SDimitry Andric thr->th.th_cg_roots->cg_root == thr); 57570b57cec5SDimitry Andric // Pop current CG root off list 57580b57cec5SDimitry Andric kmp_cg_root_t *tmp = thr->th.th_cg_roots; 57590b57cec5SDimitry Andric thr->th.th_cg_roots = tmp->up; 57600b57cec5SDimitry Andric KA_TRACE(100, ("__kmp_free_team: Thread %p popping node %p and moving" 57610b57cec5SDimitry Andric " up to node %p. cg_nthreads was %d\n", 57620b57cec5SDimitry Andric thr, tmp, thr->th.th_cg_roots, tmp->cg_nthreads)); 57630b57cec5SDimitry Andric int i = tmp->cg_nthreads--; 57640b57cec5SDimitry Andric if (i == 1) { 57650b57cec5SDimitry Andric __kmp_free(tmp); // free CG if we are the last thread in it 57660b57cec5SDimitry Andric } 57670b57cec5SDimitry Andric // Restore current task's thread_limit from CG root 57680b57cec5SDimitry Andric if (thr->th.th_cg_roots) 57690b57cec5SDimitry Andric thr->th.th_current_task->td_icvs.thread_limit = 57700b57cec5SDimitry Andric thr->th.th_cg_roots->cg_thread_limit; 57710b57cec5SDimitry Andric } 57720b57cec5SDimitry Andric } 57730b57cec5SDimitry Andric } 57740b57cec5SDimitry Andric 57750b57cec5SDimitry Andric KMP_MB(); 57760b57cec5SDimitry Andric } 57770b57cec5SDimitry Andric 57780b57cec5SDimitry Andric /* reap the team. destroy it, reclaim all its resources and free its memory */ 57790b57cec5SDimitry Andric kmp_team_t *__kmp_reap_team(kmp_team_t *team) { 57800b57cec5SDimitry Andric kmp_team_t *next_pool = team->t.t_next_pool; 57810b57cec5SDimitry Andric 57820b57cec5SDimitry Andric KMP_DEBUG_ASSERT(team); 57830b57cec5SDimitry Andric KMP_DEBUG_ASSERT(team->t.t_dispatch); 57840b57cec5SDimitry Andric KMP_DEBUG_ASSERT(team->t.t_disp_buffer); 57850b57cec5SDimitry Andric KMP_DEBUG_ASSERT(team->t.t_threads); 57860b57cec5SDimitry Andric KMP_DEBUG_ASSERT(team->t.t_argv); 57870b57cec5SDimitry Andric 57880b57cec5SDimitry Andric /* TODO clean the threads that are a part of this? */ 57890b57cec5SDimitry Andric 57900b57cec5SDimitry Andric /* free stuff */ 57910b57cec5SDimitry Andric __kmp_free_team_arrays(team); 57920b57cec5SDimitry Andric if (team->t.t_argv != &team->t.t_inline_argv[0]) 57930b57cec5SDimitry Andric __kmp_free((void *)team->t.t_argv); 57940b57cec5SDimitry Andric __kmp_free(team); 57950b57cec5SDimitry Andric 57960b57cec5SDimitry Andric KMP_MB(); 57970b57cec5SDimitry Andric return next_pool; 57980b57cec5SDimitry Andric } 57990b57cec5SDimitry Andric 58000b57cec5SDimitry Andric // Free the thread. Don't reap it, just place it on the pool of available 58010b57cec5SDimitry Andric // threads. 58020b57cec5SDimitry Andric // 58030b57cec5SDimitry Andric // Changes for Quad issue 527845: We need a predictable OMP tid <-> gtid 58040b57cec5SDimitry Andric // binding for the affinity mechanism to be useful. 58050b57cec5SDimitry Andric // 58060b57cec5SDimitry Andric // Now, we always keep the free list (__kmp_thread_pool) sorted by gtid. 58070b57cec5SDimitry Andric // However, we want to avoid a potential performance problem by always 58080b57cec5SDimitry Andric // scanning through the list to find the correct point at which to insert 58090b57cec5SDimitry Andric // the thread (potential N**2 behavior). To do this we keep track of the 58100b57cec5SDimitry Andric // last place a thread struct was inserted (__kmp_thread_pool_insert_pt). 58110b57cec5SDimitry Andric // With single-level parallelism, threads will always be added to the tail 58120b57cec5SDimitry Andric // of the list, kept track of by __kmp_thread_pool_insert_pt. With nested 58130b57cec5SDimitry Andric // parallelism, all bets are off and we may need to scan through the entire 58140b57cec5SDimitry Andric // free list. 58150b57cec5SDimitry Andric // 58160b57cec5SDimitry Andric // This change also has a potentially large performance benefit, for some 58170b57cec5SDimitry Andric // applications. Previously, as threads were freed from the hot team, they 58180b57cec5SDimitry Andric // would be placed back on the free list in inverse order. If the hot team 58190b57cec5SDimitry Andric // grew back to it's original size, then the freed thread would be placed 58200b57cec5SDimitry Andric // back on the hot team in reverse order. This could cause bad cache 58210b57cec5SDimitry Andric // locality problems on programs where the size of the hot team regularly 58220b57cec5SDimitry Andric // grew and shrunk. 58230b57cec5SDimitry Andric // 58245ffd83dbSDimitry Andric // Now, for single-level parallelism, the OMP tid is always == gtid. 58250b57cec5SDimitry Andric void __kmp_free_thread(kmp_info_t *this_th) { 58260b57cec5SDimitry Andric int gtid; 58270b57cec5SDimitry Andric kmp_info_t **scan; 58280b57cec5SDimitry Andric 58290b57cec5SDimitry Andric KA_TRACE(20, ("__kmp_free_thread: T#%d putting T#%d back on free pool.\n", 58300b57cec5SDimitry Andric __kmp_get_gtid(), this_th->th.th_info.ds.ds_gtid)); 58310b57cec5SDimitry Andric 58320b57cec5SDimitry Andric KMP_DEBUG_ASSERT(this_th); 58330b57cec5SDimitry Andric 58340b57cec5SDimitry Andric // When moving thread to pool, switch thread to wait on own b_go flag, and 58350b57cec5SDimitry Andric // uninitialized (NULL team). 58360b57cec5SDimitry Andric int b; 58370b57cec5SDimitry Andric kmp_balign_t *balign = this_th->th.th_bar; 58380b57cec5SDimitry Andric for (b = 0; b < bs_last_barrier; ++b) { 58390b57cec5SDimitry Andric if (balign[b].bb.wait_flag == KMP_BARRIER_PARENT_FLAG) 58400b57cec5SDimitry Andric balign[b].bb.wait_flag = KMP_BARRIER_SWITCH_TO_OWN_FLAG; 58410b57cec5SDimitry Andric balign[b].bb.team = NULL; 58420b57cec5SDimitry Andric balign[b].bb.leaf_kids = 0; 58430b57cec5SDimitry Andric } 58440b57cec5SDimitry Andric this_th->th.th_task_state = 0; 58450b57cec5SDimitry Andric this_th->th.th_reap_state = KMP_SAFE_TO_REAP; 58460b57cec5SDimitry Andric 58470b57cec5SDimitry Andric /* put thread back on the free pool */ 58480b57cec5SDimitry Andric TCW_PTR(this_th->th.th_team, NULL); 58490b57cec5SDimitry Andric TCW_PTR(this_th->th.th_root, NULL); 58500b57cec5SDimitry Andric TCW_PTR(this_th->th.th_dispatch, NULL); /* NOT NEEDED */ 58510b57cec5SDimitry Andric 58520b57cec5SDimitry Andric while (this_th->th.th_cg_roots) { 58530b57cec5SDimitry Andric this_th->th.th_cg_roots->cg_nthreads--; 58540b57cec5SDimitry Andric KA_TRACE(100, ("__kmp_free_thread: Thread %p decrement cg_nthreads on node" 58550b57cec5SDimitry Andric " %p of thread %p to %d\n", 58560b57cec5SDimitry Andric this_th, this_th->th.th_cg_roots, 58570b57cec5SDimitry Andric this_th->th.th_cg_roots->cg_root, 58580b57cec5SDimitry Andric this_th->th.th_cg_roots->cg_nthreads)); 58590b57cec5SDimitry Andric kmp_cg_root_t *tmp = this_th->th.th_cg_roots; 58600b57cec5SDimitry Andric if (tmp->cg_root == this_th) { // Thread is a cg_root 58610b57cec5SDimitry Andric KMP_DEBUG_ASSERT(tmp->cg_nthreads == 0); 58620b57cec5SDimitry Andric KA_TRACE( 58630b57cec5SDimitry Andric 5, ("__kmp_free_thread: Thread %p freeing node %p\n", this_th, tmp)); 58640b57cec5SDimitry Andric this_th->th.th_cg_roots = tmp->up; 58650b57cec5SDimitry Andric __kmp_free(tmp); 58660b57cec5SDimitry Andric } else { // Worker thread 58670b57cec5SDimitry Andric if (tmp->cg_nthreads == 0) { // last thread leaves contention group 58680b57cec5SDimitry Andric __kmp_free(tmp); 58690b57cec5SDimitry Andric } 58700b57cec5SDimitry Andric this_th->th.th_cg_roots = NULL; 58710b57cec5SDimitry Andric break; 58720b57cec5SDimitry Andric } 58730b57cec5SDimitry Andric } 58740b57cec5SDimitry Andric 58750b57cec5SDimitry Andric /* If the implicit task assigned to this thread can be used by other threads 58760b57cec5SDimitry Andric * -> multiple threads can share the data and try to free the task at 58770b57cec5SDimitry Andric * __kmp_reap_thread at exit. This duplicate use of the task data can happen 58780b57cec5SDimitry Andric * with higher probability when hot team is disabled but can occurs even when 58790b57cec5SDimitry Andric * the hot team is enabled */ 58800b57cec5SDimitry Andric __kmp_free_implicit_task(this_th); 58810b57cec5SDimitry Andric this_th->th.th_current_task = NULL; 58820b57cec5SDimitry Andric 58830b57cec5SDimitry Andric // If the __kmp_thread_pool_insert_pt is already past the new insert 58840b57cec5SDimitry Andric // point, then we need to re-scan the entire list. 58850b57cec5SDimitry Andric gtid = this_th->th.th_info.ds.ds_gtid; 58860b57cec5SDimitry Andric if (__kmp_thread_pool_insert_pt != NULL) { 58870b57cec5SDimitry Andric KMP_DEBUG_ASSERT(__kmp_thread_pool != NULL); 58880b57cec5SDimitry Andric if (__kmp_thread_pool_insert_pt->th.th_info.ds.ds_gtid > gtid) { 58890b57cec5SDimitry Andric __kmp_thread_pool_insert_pt = NULL; 58900b57cec5SDimitry Andric } 58910b57cec5SDimitry Andric } 58920b57cec5SDimitry Andric 58930b57cec5SDimitry Andric // Scan down the list to find the place to insert the thread. 58940b57cec5SDimitry Andric // scan is the address of a link in the list, possibly the address of 58950b57cec5SDimitry Andric // __kmp_thread_pool itself. 58960b57cec5SDimitry Andric // 58975ffd83dbSDimitry Andric // In the absence of nested parallelism, the for loop will have 0 iterations. 58980b57cec5SDimitry Andric if (__kmp_thread_pool_insert_pt != NULL) { 58990b57cec5SDimitry Andric scan = &(__kmp_thread_pool_insert_pt->th.th_next_pool); 59000b57cec5SDimitry Andric } else { 59010b57cec5SDimitry Andric scan = CCAST(kmp_info_t **, &__kmp_thread_pool); 59020b57cec5SDimitry Andric } 59030b57cec5SDimitry Andric for (; (*scan != NULL) && ((*scan)->th.th_info.ds.ds_gtid < gtid); 59040b57cec5SDimitry Andric scan = &((*scan)->th.th_next_pool)) 59050b57cec5SDimitry Andric ; 59060b57cec5SDimitry Andric 59070b57cec5SDimitry Andric // Insert the new element on the list, and set __kmp_thread_pool_insert_pt 59080b57cec5SDimitry Andric // to its address. 59090b57cec5SDimitry Andric TCW_PTR(this_th->th.th_next_pool, *scan); 59100b57cec5SDimitry Andric __kmp_thread_pool_insert_pt = *scan = this_th; 59110b57cec5SDimitry Andric KMP_DEBUG_ASSERT((this_th->th.th_next_pool == NULL) || 59120b57cec5SDimitry Andric (this_th->th.th_info.ds.ds_gtid < 59130b57cec5SDimitry Andric this_th->th.th_next_pool->th.th_info.ds.ds_gtid)); 59140b57cec5SDimitry Andric TCW_4(this_th->th.th_in_pool, TRUE); 59150b57cec5SDimitry Andric __kmp_suspend_initialize_thread(this_th); 59160b57cec5SDimitry Andric __kmp_lock_suspend_mx(this_th); 59170b57cec5SDimitry Andric if (this_th->th.th_active == TRUE) { 59180b57cec5SDimitry Andric KMP_ATOMIC_INC(&__kmp_thread_pool_active_nth); 59190b57cec5SDimitry Andric this_th->th.th_active_in_pool = TRUE; 59200b57cec5SDimitry Andric } 59210b57cec5SDimitry Andric #if KMP_DEBUG 59220b57cec5SDimitry Andric else { 59230b57cec5SDimitry Andric KMP_DEBUG_ASSERT(this_th->th.th_active_in_pool == FALSE); 59240b57cec5SDimitry Andric } 59250b57cec5SDimitry Andric #endif 59260b57cec5SDimitry Andric __kmp_unlock_suspend_mx(this_th); 59270b57cec5SDimitry Andric 59280b57cec5SDimitry Andric TCW_4(__kmp_nth, __kmp_nth - 1); 59290b57cec5SDimitry Andric 59300b57cec5SDimitry Andric #ifdef KMP_ADJUST_BLOCKTIME 59310b57cec5SDimitry Andric /* Adjust blocktime back to user setting or default if necessary */ 59320b57cec5SDimitry Andric /* Middle initialization might never have occurred */ 59330b57cec5SDimitry Andric if (!__kmp_env_blocktime && (__kmp_avail_proc > 0)) { 59340b57cec5SDimitry Andric KMP_DEBUG_ASSERT(__kmp_avail_proc > 0); 59350b57cec5SDimitry Andric if (__kmp_nth <= __kmp_avail_proc) { 59360b57cec5SDimitry Andric __kmp_zero_bt = FALSE; 59370b57cec5SDimitry Andric } 59380b57cec5SDimitry Andric } 59390b57cec5SDimitry Andric #endif /* KMP_ADJUST_BLOCKTIME */ 59400b57cec5SDimitry Andric 59410b57cec5SDimitry Andric KMP_MB(); 59420b57cec5SDimitry Andric } 59430b57cec5SDimitry Andric 59440b57cec5SDimitry Andric /* ------------------------------------------------------------------------ */ 59450b57cec5SDimitry Andric 59460b57cec5SDimitry Andric void *__kmp_launch_thread(kmp_info_t *this_thr) { 5947d409305fSDimitry Andric #if OMP_PROFILING_SUPPORT 5948e8d8bef9SDimitry Andric ProfileTraceFile = getenv("LIBOMPTARGET_PROFILE"); 5949e8d8bef9SDimitry Andric // TODO: add a configuration option for time granularity 5950e8d8bef9SDimitry Andric if (ProfileTraceFile) 5951e8d8bef9SDimitry Andric llvm::timeTraceProfilerInitialize(500 /* us */, "libomptarget"); 5952e8d8bef9SDimitry Andric #endif 5953e8d8bef9SDimitry Andric 59540b57cec5SDimitry Andric int gtid = this_thr->th.th_info.ds.ds_gtid; 59550b57cec5SDimitry Andric /* void *stack_data;*/ 5956489b1cf2SDimitry Andric kmp_team_t **volatile pteam; 59570b57cec5SDimitry Andric 59580b57cec5SDimitry Andric KMP_MB(); 59590b57cec5SDimitry Andric KA_TRACE(10, ("__kmp_launch_thread: T#%d start\n", gtid)); 59600b57cec5SDimitry Andric 59610b57cec5SDimitry Andric if (__kmp_env_consistency_check) { 59620b57cec5SDimitry Andric this_thr->th.th_cons = __kmp_allocate_cons_stack(gtid); // ATT: Memory leak? 59630b57cec5SDimitry Andric } 59640b57cec5SDimitry Andric 5965fe6060f1SDimitry Andric #if OMPD_SUPPORT 5966fe6060f1SDimitry Andric if (ompd_state & OMPD_ENABLE_BP) 5967fe6060f1SDimitry Andric ompd_bp_thread_begin(); 5968fe6060f1SDimitry Andric #endif 5969fe6060f1SDimitry Andric 59700b57cec5SDimitry Andric #if OMPT_SUPPORT 5971fe6060f1SDimitry Andric ompt_data_t *thread_data = nullptr; 59720b57cec5SDimitry Andric if (ompt_enabled.enabled) { 59730b57cec5SDimitry Andric thread_data = &(this_thr->th.ompt_thread_info.thread_data); 59740b57cec5SDimitry Andric *thread_data = ompt_data_none; 59750b57cec5SDimitry Andric 59760b57cec5SDimitry Andric this_thr->th.ompt_thread_info.state = ompt_state_overhead; 59770b57cec5SDimitry Andric this_thr->th.ompt_thread_info.wait_id = 0; 59780b57cec5SDimitry Andric this_thr->th.ompt_thread_info.idle_frame = OMPT_GET_FRAME_ADDRESS(0); 5979489b1cf2SDimitry Andric this_thr->th.ompt_thread_info.parallel_flags = 0; 59800b57cec5SDimitry Andric if (ompt_enabled.ompt_callback_thread_begin) { 59810b57cec5SDimitry Andric ompt_callbacks.ompt_callback(ompt_callback_thread_begin)( 59820b57cec5SDimitry Andric ompt_thread_worker, thread_data); 59830b57cec5SDimitry Andric } 59840b57cec5SDimitry Andric this_thr->th.ompt_thread_info.state = ompt_state_idle; 59850b57cec5SDimitry Andric } 59860b57cec5SDimitry Andric #endif 5987489b1cf2SDimitry Andric 59880b57cec5SDimitry Andric /* This is the place where threads wait for work */ 59890b57cec5SDimitry Andric while (!TCR_4(__kmp_global.g.g_done)) { 59900b57cec5SDimitry Andric KMP_DEBUG_ASSERT(this_thr == __kmp_threads[gtid]); 59910b57cec5SDimitry Andric KMP_MB(); 59920b57cec5SDimitry Andric 59930b57cec5SDimitry Andric /* wait for work to do */ 59940b57cec5SDimitry Andric KA_TRACE(20, ("__kmp_launch_thread: T#%d waiting for work\n", gtid)); 59950b57cec5SDimitry Andric 59960b57cec5SDimitry Andric /* No tid yet since not part of a team */ 59970b57cec5SDimitry Andric __kmp_fork_barrier(gtid, KMP_GTID_DNE); 59980b57cec5SDimitry Andric 59990b57cec5SDimitry Andric #if OMPT_SUPPORT 60000b57cec5SDimitry Andric if (ompt_enabled.enabled) { 60010b57cec5SDimitry Andric this_thr->th.ompt_thread_info.state = ompt_state_overhead; 60020b57cec5SDimitry Andric } 60030b57cec5SDimitry Andric #endif 60040b57cec5SDimitry Andric 6005489b1cf2SDimitry Andric pteam = &this_thr->th.th_team; 60060b57cec5SDimitry Andric 60070b57cec5SDimitry Andric /* have we been allocated? */ 60080b57cec5SDimitry Andric if (TCR_SYNC_PTR(*pteam) && !TCR_4(__kmp_global.g.g_done)) { 60090b57cec5SDimitry Andric /* we were just woken up, so run our new task */ 60100b57cec5SDimitry Andric if (TCR_SYNC_PTR((*pteam)->t.t_pkfn) != NULL) { 60110b57cec5SDimitry Andric int rc; 60120b57cec5SDimitry Andric KA_TRACE(20, 60130b57cec5SDimitry Andric ("__kmp_launch_thread: T#%d(%d:%d) invoke microtask = %p\n", 60140b57cec5SDimitry Andric gtid, (*pteam)->t.t_id, __kmp_tid_from_gtid(gtid), 60150b57cec5SDimitry Andric (*pteam)->t.t_pkfn)); 60160b57cec5SDimitry Andric 60170b57cec5SDimitry Andric updateHWFPControl(*pteam); 60180b57cec5SDimitry Andric 60190b57cec5SDimitry Andric #if OMPT_SUPPORT 60200b57cec5SDimitry Andric if (ompt_enabled.enabled) { 60210b57cec5SDimitry Andric this_thr->th.ompt_thread_info.state = ompt_state_work_parallel; 60220b57cec5SDimitry Andric } 60230b57cec5SDimitry Andric #endif 60240b57cec5SDimitry Andric 60250b57cec5SDimitry Andric rc = (*pteam)->t.t_invoke(gtid); 60260b57cec5SDimitry Andric KMP_ASSERT(rc); 60270b57cec5SDimitry Andric 60280b57cec5SDimitry Andric KMP_MB(); 60290b57cec5SDimitry Andric KA_TRACE(20, ("__kmp_launch_thread: T#%d(%d:%d) done microtask = %p\n", 60300b57cec5SDimitry Andric gtid, (*pteam)->t.t_id, __kmp_tid_from_gtid(gtid), 60310b57cec5SDimitry Andric (*pteam)->t.t_pkfn)); 60320b57cec5SDimitry Andric } 60330b57cec5SDimitry Andric #if OMPT_SUPPORT 60340b57cec5SDimitry Andric if (ompt_enabled.enabled) { 60350b57cec5SDimitry Andric /* no frame set while outside task */ 60360b57cec5SDimitry Andric __ompt_get_task_info_object(0)->frame.exit_frame = ompt_data_none; 60370b57cec5SDimitry Andric 60380b57cec5SDimitry Andric this_thr->th.ompt_thread_info.state = ompt_state_overhead; 60390b57cec5SDimitry Andric } 60400b57cec5SDimitry Andric #endif 60410b57cec5SDimitry Andric /* join barrier after parallel region */ 60420b57cec5SDimitry Andric __kmp_join_barrier(gtid); 60430b57cec5SDimitry Andric } 60440b57cec5SDimitry Andric } 60450b57cec5SDimitry Andric TCR_SYNC_PTR((intptr_t)__kmp_global.g.g_done); 60460b57cec5SDimitry Andric 6047fe6060f1SDimitry Andric #if OMPD_SUPPORT 6048fe6060f1SDimitry Andric if (ompd_state & OMPD_ENABLE_BP) 6049fe6060f1SDimitry Andric ompd_bp_thread_end(); 6050fe6060f1SDimitry Andric #endif 6051fe6060f1SDimitry Andric 60520b57cec5SDimitry Andric #if OMPT_SUPPORT 60530b57cec5SDimitry Andric if (ompt_enabled.ompt_callback_thread_end) { 60540b57cec5SDimitry Andric ompt_callbacks.ompt_callback(ompt_callback_thread_end)(thread_data); 60550b57cec5SDimitry Andric } 60560b57cec5SDimitry Andric #endif 60570b57cec5SDimitry Andric 60580b57cec5SDimitry Andric this_thr->th.th_task_team = NULL; 60590b57cec5SDimitry Andric /* run the destructors for the threadprivate data for this thread */ 60600b57cec5SDimitry Andric __kmp_common_destroy_gtid(gtid); 60610b57cec5SDimitry Andric 60620b57cec5SDimitry Andric KA_TRACE(10, ("__kmp_launch_thread: T#%d done\n", gtid)); 60630b57cec5SDimitry Andric KMP_MB(); 6064e8d8bef9SDimitry Andric 6065d409305fSDimitry Andric #if OMP_PROFILING_SUPPORT 6066e8d8bef9SDimitry Andric llvm::timeTraceProfilerFinishThread(); 6067e8d8bef9SDimitry Andric #endif 60680b57cec5SDimitry Andric return this_thr; 60690b57cec5SDimitry Andric } 60700b57cec5SDimitry Andric 60710b57cec5SDimitry Andric /* ------------------------------------------------------------------------ */ 60720b57cec5SDimitry Andric 60730b57cec5SDimitry Andric void __kmp_internal_end_dest(void *specific_gtid) { 60740b57cec5SDimitry Andric // Make sure no significant bits are lost 6075e8d8bef9SDimitry Andric int gtid; 6076e8d8bef9SDimitry Andric __kmp_type_convert((kmp_intptr_t)specific_gtid - 1, >id); 60770b57cec5SDimitry Andric 60780b57cec5SDimitry Andric KA_TRACE(30, ("__kmp_internal_end_dest: T#%d\n", gtid)); 60790b57cec5SDimitry Andric /* NOTE: the gtid is stored as gitd+1 in the thread-local-storage 60800b57cec5SDimitry Andric * this is because 0 is reserved for the nothing-stored case */ 60810b57cec5SDimitry Andric 60820b57cec5SDimitry Andric __kmp_internal_end_thread(gtid); 60830b57cec5SDimitry Andric } 60840b57cec5SDimitry Andric 60850b57cec5SDimitry Andric #if KMP_OS_UNIX && KMP_DYNAMIC_LIB 60860b57cec5SDimitry Andric 60870b57cec5SDimitry Andric __attribute__((destructor)) void __kmp_internal_end_dtor(void) { 60880b57cec5SDimitry Andric __kmp_internal_end_atexit(); 60890b57cec5SDimitry Andric } 60900b57cec5SDimitry Andric 60910b57cec5SDimitry Andric #endif 60920b57cec5SDimitry Andric 60930b57cec5SDimitry Andric /* [Windows] josh: when the atexit handler is called, there may still be more 60940b57cec5SDimitry Andric than one thread alive */ 60950b57cec5SDimitry Andric void __kmp_internal_end_atexit(void) { 60960b57cec5SDimitry Andric KA_TRACE(30, ("__kmp_internal_end_atexit\n")); 60970b57cec5SDimitry Andric /* [Windows] 60980b57cec5SDimitry Andric josh: ideally, we want to completely shutdown the library in this atexit 60990b57cec5SDimitry Andric handler, but stat code that depends on thread specific data for gtid fails 61000b57cec5SDimitry Andric because that data becomes unavailable at some point during the shutdown, so 61010b57cec5SDimitry Andric we call __kmp_internal_end_thread instead. We should eventually remove the 61020b57cec5SDimitry Andric dependency on __kmp_get_specific_gtid in the stat code and use 61030b57cec5SDimitry Andric __kmp_internal_end_library to cleanly shutdown the library. 61040b57cec5SDimitry Andric 61050b57cec5SDimitry Andric // TODO: Can some of this comment about GVS be removed? 61060b57cec5SDimitry Andric I suspect that the offending stat code is executed when the calling thread 61070b57cec5SDimitry Andric tries to clean up a dead root thread's data structures, resulting in GVS 61080b57cec5SDimitry Andric code trying to close the GVS structures for that thread, but since the stat 61090b57cec5SDimitry Andric code uses __kmp_get_specific_gtid to get the gtid with the assumption that 61100b57cec5SDimitry Andric the calling thread is cleaning up itself instead of another thread, it get 61110b57cec5SDimitry Andric confused. This happens because allowing a thread to unregister and cleanup 61120b57cec5SDimitry Andric another thread is a recent modification for addressing an issue. 61130b57cec5SDimitry Andric Based on the current design (20050722), a thread may end up 61140b57cec5SDimitry Andric trying to unregister another thread only if thread death does not trigger 61150b57cec5SDimitry Andric the calling of __kmp_internal_end_thread. For Linux* OS, there is the 61160b57cec5SDimitry Andric thread specific data destructor function to detect thread death. For 61170b57cec5SDimitry Andric Windows dynamic, there is DllMain(THREAD_DETACH). For Windows static, there 61180b57cec5SDimitry Andric is nothing. Thus, the workaround is applicable only for Windows static 61190b57cec5SDimitry Andric stat library. */ 61200b57cec5SDimitry Andric __kmp_internal_end_library(-1); 61210b57cec5SDimitry Andric #if KMP_OS_WINDOWS 61220b57cec5SDimitry Andric __kmp_close_console(); 61230b57cec5SDimitry Andric #endif 61240b57cec5SDimitry Andric } 61250b57cec5SDimitry Andric 61260b57cec5SDimitry Andric static void __kmp_reap_thread(kmp_info_t *thread, int is_root) { 61270b57cec5SDimitry Andric // It is assumed __kmp_forkjoin_lock is acquired. 61280b57cec5SDimitry Andric 61290b57cec5SDimitry Andric int gtid; 61300b57cec5SDimitry Andric 61310b57cec5SDimitry Andric KMP_DEBUG_ASSERT(thread != NULL); 61320b57cec5SDimitry Andric 61330b57cec5SDimitry Andric gtid = thread->th.th_info.ds.ds_gtid; 61340b57cec5SDimitry Andric 61350b57cec5SDimitry Andric if (!is_root) { 61360b57cec5SDimitry Andric if (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME) { 61370b57cec5SDimitry Andric /* Assume the threads are at the fork barrier here */ 61380b57cec5SDimitry Andric KA_TRACE( 61390b57cec5SDimitry Andric 20, ("__kmp_reap_thread: releasing T#%d from fork barrier for reap\n", 61400b57cec5SDimitry Andric gtid)); 6141349cc55cSDimitry Andric if (__kmp_barrier_gather_pattern[bs_forkjoin_barrier] == bp_dist_bar) { 6142349cc55cSDimitry Andric while ( 6143349cc55cSDimitry Andric !KMP_COMPARE_AND_STORE_ACQ32(&(thread->th.th_used_in_team), 0, 3)) 6144349cc55cSDimitry Andric KMP_CPU_PAUSE(); 6145349cc55cSDimitry Andric __kmp_resume_32(gtid, (kmp_flag_32<false, false> *)NULL); 6146349cc55cSDimitry Andric } else { 6147349cc55cSDimitry Andric /* Need release fence here to prevent seg faults for tree forkjoin 6148349cc55cSDimitry Andric barrier (GEH) */ 6149e8d8bef9SDimitry Andric kmp_flag_64<> flag(&thread->th.th_bar[bs_forkjoin_barrier].bb.b_go, 6150e8d8bef9SDimitry Andric thread); 61510b57cec5SDimitry Andric __kmp_release_64(&flag); 61520b57cec5SDimitry Andric } 6153349cc55cSDimitry Andric } 61540b57cec5SDimitry Andric 61550b57cec5SDimitry Andric // Terminate OS thread. 61560b57cec5SDimitry Andric __kmp_reap_worker(thread); 61570b57cec5SDimitry Andric 61580b57cec5SDimitry Andric // The thread was killed asynchronously. If it was actively 61590b57cec5SDimitry Andric // spinning in the thread pool, decrement the global count. 61600b57cec5SDimitry Andric // 61610b57cec5SDimitry Andric // There is a small timing hole here - if the worker thread was just waking 61620b57cec5SDimitry Andric // up after sleeping in the pool, had reset it's th_active_in_pool flag but 61630b57cec5SDimitry Andric // not decremented the global counter __kmp_thread_pool_active_nth yet, then 61640b57cec5SDimitry Andric // the global counter might not get updated. 61650b57cec5SDimitry Andric // 61660b57cec5SDimitry Andric // Currently, this can only happen as the library is unloaded, 61670b57cec5SDimitry Andric // so there are no harmful side effects. 61680b57cec5SDimitry Andric if (thread->th.th_active_in_pool) { 61690b57cec5SDimitry Andric thread->th.th_active_in_pool = FALSE; 61700b57cec5SDimitry Andric KMP_ATOMIC_DEC(&__kmp_thread_pool_active_nth); 61710b57cec5SDimitry Andric KMP_DEBUG_ASSERT(__kmp_thread_pool_active_nth >= 0); 61720b57cec5SDimitry Andric } 61730b57cec5SDimitry Andric } 61740b57cec5SDimitry Andric 61750b57cec5SDimitry Andric __kmp_free_implicit_task(thread); 61760b57cec5SDimitry Andric 61770b57cec5SDimitry Andric // Free the fast memory for tasking 61780b57cec5SDimitry Andric #if USE_FAST_MEMORY 61790b57cec5SDimitry Andric __kmp_free_fast_memory(thread); 61800b57cec5SDimitry Andric #endif /* USE_FAST_MEMORY */ 61810b57cec5SDimitry Andric 61820b57cec5SDimitry Andric __kmp_suspend_uninitialize_thread(thread); 61830b57cec5SDimitry Andric 61840b57cec5SDimitry Andric KMP_DEBUG_ASSERT(__kmp_threads[gtid] == thread); 61850b57cec5SDimitry Andric TCW_SYNC_PTR(__kmp_threads[gtid], NULL); 61860b57cec5SDimitry Andric 61870b57cec5SDimitry Andric --__kmp_all_nth; 61880b57cec5SDimitry Andric // __kmp_nth was decremented when thread is added to the pool. 61890b57cec5SDimitry Andric 61900b57cec5SDimitry Andric #ifdef KMP_ADJUST_BLOCKTIME 61910b57cec5SDimitry Andric /* Adjust blocktime back to user setting or default if necessary */ 61920b57cec5SDimitry Andric /* Middle initialization might never have occurred */ 61930b57cec5SDimitry Andric if (!__kmp_env_blocktime && (__kmp_avail_proc > 0)) { 61940b57cec5SDimitry Andric KMP_DEBUG_ASSERT(__kmp_avail_proc > 0); 61950b57cec5SDimitry Andric if (__kmp_nth <= __kmp_avail_proc) { 61960b57cec5SDimitry Andric __kmp_zero_bt = FALSE; 61970b57cec5SDimitry Andric } 61980b57cec5SDimitry Andric } 61990b57cec5SDimitry Andric #endif /* KMP_ADJUST_BLOCKTIME */ 62000b57cec5SDimitry Andric 62010b57cec5SDimitry Andric /* free the memory being used */ 62020b57cec5SDimitry Andric if (__kmp_env_consistency_check) { 62030b57cec5SDimitry Andric if (thread->th.th_cons) { 62040b57cec5SDimitry Andric __kmp_free_cons_stack(thread->th.th_cons); 62050b57cec5SDimitry Andric thread->th.th_cons = NULL; 62060b57cec5SDimitry Andric } 62070b57cec5SDimitry Andric } 62080b57cec5SDimitry Andric 62090b57cec5SDimitry Andric if (thread->th.th_pri_common != NULL) { 62100b57cec5SDimitry Andric __kmp_free(thread->th.th_pri_common); 62110b57cec5SDimitry Andric thread->th.th_pri_common = NULL; 62120b57cec5SDimitry Andric } 62130b57cec5SDimitry Andric 62140b57cec5SDimitry Andric if (thread->th.th_task_state_memo_stack != NULL) { 62150b57cec5SDimitry Andric __kmp_free(thread->th.th_task_state_memo_stack); 62160b57cec5SDimitry Andric thread->th.th_task_state_memo_stack = NULL; 62170b57cec5SDimitry Andric } 62180b57cec5SDimitry Andric 62190b57cec5SDimitry Andric #if KMP_USE_BGET 62200b57cec5SDimitry Andric if (thread->th.th_local.bget_data != NULL) { 62210b57cec5SDimitry Andric __kmp_finalize_bget(thread); 62220b57cec5SDimitry Andric } 62230b57cec5SDimitry Andric #endif 62240b57cec5SDimitry Andric 62250b57cec5SDimitry Andric #if KMP_AFFINITY_SUPPORTED 62260b57cec5SDimitry Andric if (thread->th.th_affin_mask != NULL) { 62270b57cec5SDimitry Andric KMP_CPU_FREE(thread->th.th_affin_mask); 62280b57cec5SDimitry Andric thread->th.th_affin_mask = NULL; 62290b57cec5SDimitry Andric } 62300b57cec5SDimitry Andric #endif /* KMP_AFFINITY_SUPPORTED */ 62310b57cec5SDimitry Andric 62320b57cec5SDimitry Andric #if KMP_USE_HIER_SCHED 62330b57cec5SDimitry Andric if (thread->th.th_hier_bar_data != NULL) { 62340b57cec5SDimitry Andric __kmp_free(thread->th.th_hier_bar_data); 62350b57cec5SDimitry Andric thread->th.th_hier_bar_data = NULL; 62360b57cec5SDimitry Andric } 62370b57cec5SDimitry Andric #endif 62380b57cec5SDimitry Andric 62390b57cec5SDimitry Andric __kmp_reap_team(thread->th.th_serial_team); 62400b57cec5SDimitry Andric thread->th.th_serial_team = NULL; 62410b57cec5SDimitry Andric __kmp_free(thread); 62420b57cec5SDimitry Andric 62430b57cec5SDimitry Andric KMP_MB(); 62440b57cec5SDimitry Andric 62450b57cec5SDimitry Andric } // __kmp_reap_thread 62460b57cec5SDimitry Andric 6247349cc55cSDimitry Andric static void __kmp_itthash_clean(kmp_info_t *th) { 6248349cc55cSDimitry Andric #if USE_ITT_NOTIFY 6249349cc55cSDimitry Andric if (__kmp_itt_region_domains.count > 0) { 6250349cc55cSDimitry Andric for (int i = 0; i < KMP_MAX_FRAME_DOMAINS; ++i) { 6251349cc55cSDimitry Andric kmp_itthash_entry_t *bucket = __kmp_itt_region_domains.buckets[i]; 6252349cc55cSDimitry Andric while (bucket) { 6253349cc55cSDimitry Andric kmp_itthash_entry_t *next = bucket->next_in_bucket; 6254349cc55cSDimitry Andric __kmp_thread_free(th, bucket); 6255349cc55cSDimitry Andric bucket = next; 6256349cc55cSDimitry Andric } 6257349cc55cSDimitry Andric } 6258349cc55cSDimitry Andric } 6259349cc55cSDimitry Andric if (__kmp_itt_barrier_domains.count > 0) { 6260349cc55cSDimitry Andric for (int i = 0; i < KMP_MAX_FRAME_DOMAINS; ++i) { 6261349cc55cSDimitry Andric kmp_itthash_entry_t *bucket = __kmp_itt_barrier_domains.buckets[i]; 6262349cc55cSDimitry Andric while (bucket) { 6263349cc55cSDimitry Andric kmp_itthash_entry_t *next = bucket->next_in_bucket; 6264349cc55cSDimitry Andric __kmp_thread_free(th, bucket); 6265349cc55cSDimitry Andric bucket = next; 6266349cc55cSDimitry Andric } 6267349cc55cSDimitry Andric } 6268349cc55cSDimitry Andric } 6269349cc55cSDimitry Andric #endif 6270349cc55cSDimitry Andric } 6271349cc55cSDimitry Andric 62720b57cec5SDimitry Andric static void __kmp_internal_end(void) { 62730b57cec5SDimitry Andric int i; 62740b57cec5SDimitry Andric 62750b57cec5SDimitry Andric /* First, unregister the library */ 62760b57cec5SDimitry Andric __kmp_unregister_library(); 62770b57cec5SDimitry Andric 62780b57cec5SDimitry Andric #if KMP_OS_WINDOWS 62790b57cec5SDimitry Andric /* In Win static library, we can't tell when a root actually dies, so we 62800b57cec5SDimitry Andric reclaim the data structures for any root threads that have died but not 62810b57cec5SDimitry Andric unregistered themselves, in order to shut down cleanly. 62820b57cec5SDimitry Andric In Win dynamic library we also can't tell when a thread dies. */ 62830b57cec5SDimitry Andric __kmp_reclaim_dead_roots(); // AC: moved here to always clean resources of 62840b57cec5SDimitry Andric // dead roots 62850b57cec5SDimitry Andric #endif 62860b57cec5SDimitry Andric 62870b57cec5SDimitry Andric for (i = 0; i < __kmp_threads_capacity; i++) 62880b57cec5SDimitry Andric if (__kmp_root[i]) 62890b57cec5SDimitry Andric if (__kmp_root[i]->r.r_active) 62900b57cec5SDimitry Andric break; 62910b57cec5SDimitry Andric KMP_MB(); /* Flush all pending memory write invalidates. */ 62920b57cec5SDimitry Andric TCW_SYNC_4(__kmp_global.g.g_done, TRUE); 62930b57cec5SDimitry Andric 62940b57cec5SDimitry Andric if (i < __kmp_threads_capacity) { 62950b57cec5SDimitry Andric #if KMP_USE_MONITOR 62960b57cec5SDimitry Andric // 2009-09-08 (lev): Other alive roots found. Why do we kill the monitor?? 62970b57cec5SDimitry Andric KMP_MB(); /* Flush all pending memory write invalidates. */ 62980b57cec5SDimitry Andric 62990b57cec5SDimitry Andric // Need to check that monitor was initialized before reaping it. If we are 63000b57cec5SDimitry Andric // called form __kmp_atfork_child (which sets __kmp_init_parallel = 0), then 63010b57cec5SDimitry Andric // __kmp_monitor will appear to contain valid data, but it is only valid in 63020b57cec5SDimitry Andric // the parent process, not the child. 63030b57cec5SDimitry Andric // New behavior (201008): instead of keying off of the flag 63040b57cec5SDimitry Andric // __kmp_init_parallel, the monitor thread creation is keyed off 63050b57cec5SDimitry Andric // of the new flag __kmp_init_monitor. 63060b57cec5SDimitry Andric __kmp_acquire_bootstrap_lock(&__kmp_monitor_lock); 63070b57cec5SDimitry Andric if (TCR_4(__kmp_init_monitor)) { 63080b57cec5SDimitry Andric __kmp_reap_monitor(&__kmp_monitor); 63090b57cec5SDimitry Andric TCW_4(__kmp_init_monitor, 0); 63100b57cec5SDimitry Andric } 63110b57cec5SDimitry Andric __kmp_release_bootstrap_lock(&__kmp_monitor_lock); 63120b57cec5SDimitry Andric KA_TRACE(10, ("__kmp_internal_end: monitor reaped\n")); 63130b57cec5SDimitry Andric #endif // KMP_USE_MONITOR 63140b57cec5SDimitry Andric } else { 63150b57cec5SDimitry Andric /* TODO move this to cleanup code */ 63160b57cec5SDimitry Andric #ifdef KMP_DEBUG 63170b57cec5SDimitry Andric /* make sure that everything has properly ended */ 63180b57cec5SDimitry Andric for (i = 0; i < __kmp_threads_capacity; i++) { 63190b57cec5SDimitry Andric if (__kmp_root[i]) { 63200b57cec5SDimitry Andric // KMP_ASSERT( ! KMP_UBER_GTID( i ) ); // AC: 63210b57cec5SDimitry Andric // there can be uber threads alive here 63220b57cec5SDimitry Andric KMP_ASSERT(!__kmp_root[i]->r.r_active); // TODO: can they be active? 63230b57cec5SDimitry Andric } 63240b57cec5SDimitry Andric } 63250b57cec5SDimitry Andric #endif 63260b57cec5SDimitry Andric 63270b57cec5SDimitry Andric KMP_MB(); 63280b57cec5SDimitry Andric 63290b57cec5SDimitry Andric // Reap the worker threads. 63300b57cec5SDimitry Andric // This is valid for now, but be careful if threads are reaped sooner. 63310b57cec5SDimitry Andric while (__kmp_thread_pool != NULL) { // Loop thru all the thread in the pool. 63320b57cec5SDimitry Andric // Get the next thread from the pool. 63330b57cec5SDimitry Andric kmp_info_t *thread = CCAST(kmp_info_t *, __kmp_thread_pool); 63340b57cec5SDimitry Andric __kmp_thread_pool = thread->th.th_next_pool; 63350b57cec5SDimitry Andric // Reap it. 63360b57cec5SDimitry Andric KMP_DEBUG_ASSERT(thread->th.th_reap_state == KMP_SAFE_TO_REAP); 63370b57cec5SDimitry Andric thread->th.th_next_pool = NULL; 63380b57cec5SDimitry Andric thread->th.th_in_pool = FALSE; 63390b57cec5SDimitry Andric __kmp_reap_thread(thread, 0); 63400b57cec5SDimitry Andric } 63410b57cec5SDimitry Andric __kmp_thread_pool_insert_pt = NULL; 63420b57cec5SDimitry Andric 63430b57cec5SDimitry Andric // Reap teams. 63440b57cec5SDimitry Andric while (__kmp_team_pool != NULL) { // Loop thru all the teams in the pool. 63450b57cec5SDimitry Andric // Get the next team from the pool. 63460b57cec5SDimitry Andric kmp_team_t *team = CCAST(kmp_team_t *, __kmp_team_pool); 63470b57cec5SDimitry Andric __kmp_team_pool = team->t.t_next_pool; 63480b57cec5SDimitry Andric // Reap it. 63490b57cec5SDimitry Andric team->t.t_next_pool = NULL; 63500b57cec5SDimitry Andric __kmp_reap_team(team); 63510b57cec5SDimitry Andric } 63520b57cec5SDimitry Andric 63530b57cec5SDimitry Andric __kmp_reap_task_teams(); 63540b57cec5SDimitry Andric 63550b57cec5SDimitry Andric #if KMP_OS_UNIX 63560b57cec5SDimitry Andric // Threads that are not reaped should not access any resources since they 63570b57cec5SDimitry Andric // are going to be deallocated soon, so the shutdown sequence should wait 63580b57cec5SDimitry Andric // until all threads either exit the final spin-waiting loop or begin 63590b57cec5SDimitry Andric // sleeping after the given blocktime. 63600b57cec5SDimitry Andric for (i = 0; i < __kmp_threads_capacity; i++) { 63610b57cec5SDimitry Andric kmp_info_t *thr = __kmp_threads[i]; 63620b57cec5SDimitry Andric while (thr && KMP_ATOMIC_LD_ACQ(&thr->th.th_blocking)) 63630b57cec5SDimitry Andric KMP_CPU_PAUSE(); 63640b57cec5SDimitry Andric } 63650b57cec5SDimitry Andric #endif 63660b57cec5SDimitry Andric 63670b57cec5SDimitry Andric for (i = 0; i < __kmp_threads_capacity; ++i) { 63680b57cec5SDimitry Andric // TBD: Add some checking... 63690b57cec5SDimitry Andric // Something like KMP_DEBUG_ASSERT( __kmp_thread[ i ] == NULL ); 63700b57cec5SDimitry Andric } 63710b57cec5SDimitry Andric 63720b57cec5SDimitry Andric /* Make sure all threadprivate destructors get run by joining with all 63730b57cec5SDimitry Andric worker threads before resetting this flag */ 63740b57cec5SDimitry Andric TCW_SYNC_4(__kmp_init_common, FALSE); 63750b57cec5SDimitry Andric 63760b57cec5SDimitry Andric KA_TRACE(10, ("__kmp_internal_end: all workers reaped\n")); 63770b57cec5SDimitry Andric KMP_MB(); 63780b57cec5SDimitry Andric 63790b57cec5SDimitry Andric #if KMP_USE_MONITOR 63800b57cec5SDimitry Andric // See note above: One of the possible fixes for CQ138434 / CQ140126 63810b57cec5SDimitry Andric // 63820b57cec5SDimitry Andric // FIXME: push both code fragments down and CSE them? 63830b57cec5SDimitry Andric // push them into __kmp_cleanup() ? 63840b57cec5SDimitry Andric __kmp_acquire_bootstrap_lock(&__kmp_monitor_lock); 63850b57cec5SDimitry Andric if (TCR_4(__kmp_init_monitor)) { 63860b57cec5SDimitry Andric __kmp_reap_monitor(&__kmp_monitor); 63870b57cec5SDimitry Andric TCW_4(__kmp_init_monitor, 0); 63880b57cec5SDimitry Andric } 63890b57cec5SDimitry Andric __kmp_release_bootstrap_lock(&__kmp_monitor_lock); 63900b57cec5SDimitry Andric KA_TRACE(10, ("__kmp_internal_end: monitor reaped\n")); 63910b57cec5SDimitry Andric #endif 63920b57cec5SDimitry Andric } /* else !__kmp_global.t_active */ 63930b57cec5SDimitry Andric TCW_4(__kmp_init_gtid, FALSE); 63940b57cec5SDimitry Andric KMP_MB(); /* Flush all pending memory write invalidates. */ 63950b57cec5SDimitry Andric 63960b57cec5SDimitry Andric __kmp_cleanup(); 63970b57cec5SDimitry Andric #if OMPT_SUPPORT 63980b57cec5SDimitry Andric ompt_fini(); 63990b57cec5SDimitry Andric #endif 64000b57cec5SDimitry Andric } 64010b57cec5SDimitry Andric 64020b57cec5SDimitry Andric void __kmp_internal_end_library(int gtid_req) { 64030b57cec5SDimitry Andric /* if we have already cleaned up, don't try again, it wouldn't be pretty */ 64040b57cec5SDimitry Andric /* this shouldn't be a race condition because __kmp_internal_end() is the 64050b57cec5SDimitry Andric only place to clear __kmp_serial_init */ 64060b57cec5SDimitry Andric /* we'll check this later too, after we get the lock */ 64070b57cec5SDimitry Andric // 2009-09-06: We do not set g_abort without setting g_done. This check looks 64085ffd83dbSDimitry Andric // redundant, because the next check will work in any case. 64090b57cec5SDimitry Andric if (__kmp_global.g.g_abort) { 64100b57cec5SDimitry Andric KA_TRACE(11, ("__kmp_internal_end_library: abort, exiting\n")); 64110b57cec5SDimitry Andric /* TODO abort? */ 64120b57cec5SDimitry Andric return; 64130b57cec5SDimitry Andric } 64140b57cec5SDimitry Andric if (TCR_4(__kmp_global.g.g_done) || !__kmp_init_serial) { 64150b57cec5SDimitry Andric KA_TRACE(10, ("__kmp_internal_end_library: already finished\n")); 64160b57cec5SDimitry Andric return; 64170b57cec5SDimitry Andric } 64180b57cec5SDimitry Andric 6419fe6060f1SDimitry Andric // If hidden helper team has been initialized, we need to deinit it 6420fe6060f1SDimitry Andric if (TCR_4(__kmp_init_hidden_helper) && 6421fe6060f1SDimitry Andric !TCR_4(__kmp_hidden_helper_team_done)) { 6422fe6060f1SDimitry Andric TCW_SYNC_4(__kmp_hidden_helper_team_done, TRUE); 6423fe6060f1SDimitry Andric // First release the main thread to let it continue its work 6424fe6060f1SDimitry Andric __kmp_hidden_helper_main_thread_release(); 6425fe6060f1SDimitry Andric // Wait until the hidden helper team has been destroyed 6426fe6060f1SDimitry Andric __kmp_hidden_helper_threads_deinitz_wait(); 6427fe6060f1SDimitry Andric } 6428fe6060f1SDimitry Andric 64290b57cec5SDimitry Andric KMP_MB(); /* Flush all pending memory write invalidates. */ 64300b57cec5SDimitry Andric /* find out who we are and what we should do */ 64310b57cec5SDimitry Andric { 64320b57cec5SDimitry Andric int gtid = (gtid_req >= 0) ? gtid_req : __kmp_gtid_get_specific(); 64330b57cec5SDimitry Andric KA_TRACE( 64340b57cec5SDimitry Andric 10, ("__kmp_internal_end_library: enter T#%d (%d)\n", gtid, gtid_req)); 64350b57cec5SDimitry Andric if (gtid == KMP_GTID_SHUTDOWN) { 64360b57cec5SDimitry Andric KA_TRACE(10, ("__kmp_internal_end_library: !__kmp_init_runtime, system " 64370b57cec5SDimitry Andric "already shutdown\n")); 64380b57cec5SDimitry Andric return; 64390b57cec5SDimitry Andric } else if (gtid == KMP_GTID_MONITOR) { 64400b57cec5SDimitry Andric KA_TRACE(10, ("__kmp_internal_end_library: monitor thread, gtid not " 64410b57cec5SDimitry Andric "registered, or system shutdown\n")); 64420b57cec5SDimitry Andric return; 64430b57cec5SDimitry Andric } else if (gtid == KMP_GTID_DNE) { 64440b57cec5SDimitry Andric KA_TRACE(10, ("__kmp_internal_end_library: gtid not registered or system " 64450b57cec5SDimitry Andric "shutdown\n")); 64460b57cec5SDimitry Andric /* we don't know who we are, but we may still shutdown the library */ 64470b57cec5SDimitry Andric } else if (KMP_UBER_GTID(gtid)) { 64480b57cec5SDimitry Andric /* unregister ourselves as an uber thread. gtid is no longer valid */ 64490b57cec5SDimitry Andric if (__kmp_root[gtid]->r.r_active) { 64500b57cec5SDimitry Andric __kmp_global.g.g_abort = -1; 64510b57cec5SDimitry Andric TCW_SYNC_4(__kmp_global.g.g_done, TRUE); 6452e8d8bef9SDimitry Andric __kmp_unregister_library(); 64530b57cec5SDimitry Andric KA_TRACE(10, 64540b57cec5SDimitry Andric ("__kmp_internal_end_library: root still active, abort T#%d\n", 64550b57cec5SDimitry Andric gtid)); 64560b57cec5SDimitry Andric return; 64570b57cec5SDimitry Andric } else { 6458349cc55cSDimitry Andric __kmp_itthash_clean(__kmp_threads[gtid]); 64590b57cec5SDimitry Andric KA_TRACE( 64600b57cec5SDimitry Andric 10, 64610b57cec5SDimitry Andric ("__kmp_internal_end_library: unregistering sibling T#%d\n", gtid)); 64620b57cec5SDimitry Andric __kmp_unregister_root_current_thread(gtid); 64630b57cec5SDimitry Andric } 64640b57cec5SDimitry Andric } else { 64650b57cec5SDimitry Andric /* worker threads may call this function through the atexit handler, if they 64660b57cec5SDimitry Andric * call exit() */ 64670b57cec5SDimitry Andric /* For now, skip the usual subsequent processing and just dump the debug buffer. 64680b57cec5SDimitry Andric TODO: do a thorough shutdown instead */ 64690b57cec5SDimitry Andric #ifdef DUMP_DEBUG_ON_EXIT 64700b57cec5SDimitry Andric if (__kmp_debug_buf) 64710b57cec5SDimitry Andric __kmp_dump_debug_buffer(); 64720b57cec5SDimitry Andric #endif 6473e8d8bef9SDimitry Andric // added unregister library call here when we switch to shm linux 6474e8d8bef9SDimitry Andric // if we don't, it will leave lots of files in /dev/shm 6475e8d8bef9SDimitry Andric // cleanup shared memory file before exiting. 6476e8d8bef9SDimitry Andric __kmp_unregister_library(); 64770b57cec5SDimitry Andric return; 64780b57cec5SDimitry Andric } 64790b57cec5SDimitry Andric } 64800b57cec5SDimitry Andric /* synchronize the termination process */ 64810b57cec5SDimitry Andric __kmp_acquire_bootstrap_lock(&__kmp_initz_lock); 64820b57cec5SDimitry Andric 64830b57cec5SDimitry Andric /* have we already finished */ 64840b57cec5SDimitry Andric if (__kmp_global.g.g_abort) { 64850b57cec5SDimitry Andric KA_TRACE(10, ("__kmp_internal_end_library: abort, exiting\n")); 64860b57cec5SDimitry Andric /* TODO abort? */ 64870b57cec5SDimitry Andric __kmp_release_bootstrap_lock(&__kmp_initz_lock); 64880b57cec5SDimitry Andric return; 64890b57cec5SDimitry Andric } 64900b57cec5SDimitry Andric if (TCR_4(__kmp_global.g.g_done) || !__kmp_init_serial) { 64910b57cec5SDimitry Andric __kmp_release_bootstrap_lock(&__kmp_initz_lock); 64920b57cec5SDimitry Andric return; 64930b57cec5SDimitry Andric } 64940b57cec5SDimitry Andric 64950b57cec5SDimitry Andric /* We need this lock to enforce mutex between this reading of 64960b57cec5SDimitry Andric __kmp_threads_capacity and the writing by __kmp_register_root. 64970b57cec5SDimitry Andric Alternatively, we can use a counter of roots that is atomically updated by 64980b57cec5SDimitry Andric __kmp_get_global_thread_id_reg, __kmp_do_serial_initialize and 64990b57cec5SDimitry Andric __kmp_internal_end_*. */ 65000b57cec5SDimitry Andric __kmp_acquire_bootstrap_lock(&__kmp_forkjoin_lock); 65010b57cec5SDimitry Andric 65020b57cec5SDimitry Andric /* now we can safely conduct the actual termination */ 65030b57cec5SDimitry Andric __kmp_internal_end(); 65040b57cec5SDimitry Andric 65050b57cec5SDimitry Andric __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock); 65060b57cec5SDimitry Andric __kmp_release_bootstrap_lock(&__kmp_initz_lock); 65070b57cec5SDimitry Andric 65080b57cec5SDimitry Andric KA_TRACE(10, ("__kmp_internal_end_library: exit\n")); 65090b57cec5SDimitry Andric 65100b57cec5SDimitry Andric #ifdef DUMP_DEBUG_ON_EXIT 65110b57cec5SDimitry Andric if (__kmp_debug_buf) 65120b57cec5SDimitry Andric __kmp_dump_debug_buffer(); 65130b57cec5SDimitry Andric #endif 65140b57cec5SDimitry Andric 65150b57cec5SDimitry Andric #if KMP_OS_WINDOWS 65160b57cec5SDimitry Andric __kmp_close_console(); 65170b57cec5SDimitry Andric #endif 65180b57cec5SDimitry Andric 65190b57cec5SDimitry Andric __kmp_fini_allocator(); 65200b57cec5SDimitry Andric 65210b57cec5SDimitry Andric } // __kmp_internal_end_library 65220b57cec5SDimitry Andric 65230b57cec5SDimitry Andric void __kmp_internal_end_thread(int gtid_req) { 65240b57cec5SDimitry Andric int i; 65250b57cec5SDimitry Andric 65260b57cec5SDimitry Andric /* if we have already cleaned up, don't try again, it wouldn't be pretty */ 65270b57cec5SDimitry Andric /* this shouldn't be a race condition because __kmp_internal_end() is the 65280b57cec5SDimitry Andric * only place to clear __kmp_serial_init */ 65290b57cec5SDimitry Andric /* we'll check this later too, after we get the lock */ 65300b57cec5SDimitry Andric // 2009-09-06: We do not set g_abort without setting g_done. This check looks 65310b57cec5SDimitry Andric // redundant, because the next check will work in any case. 65320b57cec5SDimitry Andric if (__kmp_global.g.g_abort) { 65330b57cec5SDimitry Andric KA_TRACE(11, ("__kmp_internal_end_thread: abort, exiting\n")); 65340b57cec5SDimitry Andric /* TODO abort? */ 65350b57cec5SDimitry Andric return; 65360b57cec5SDimitry Andric } 65370b57cec5SDimitry Andric if (TCR_4(__kmp_global.g.g_done) || !__kmp_init_serial) { 65380b57cec5SDimitry Andric KA_TRACE(10, ("__kmp_internal_end_thread: already finished\n")); 65390b57cec5SDimitry Andric return; 65400b57cec5SDimitry Andric } 65410b57cec5SDimitry Andric 6542e8d8bef9SDimitry Andric // If hidden helper team has been initialized, we need to deinit it 6543fe6060f1SDimitry Andric if (TCR_4(__kmp_init_hidden_helper) && 6544fe6060f1SDimitry Andric !TCR_4(__kmp_hidden_helper_team_done)) { 6545e8d8bef9SDimitry Andric TCW_SYNC_4(__kmp_hidden_helper_team_done, TRUE); 6546e8d8bef9SDimitry Andric // First release the main thread to let it continue its work 6547e8d8bef9SDimitry Andric __kmp_hidden_helper_main_thread_release(); 6548e8d8bef9SDimitry Andric // Wait until the hidden helper team has been destroyed 6549e8d8bef9SDimitry Andric __kmp_hidden_helper_threads_deinitz_wait(); 6550e8d8bef9SDimitry Andric } 6551e8d8bef9SDimitry Andric 65520b57cec5SDimitry Andric KMP_MB(); /* Flush all pending memory write invalidates. */ 65530b57cec5SDimitry Andric 65540b57cec5SDimitry Andric /* find out who we are and what we should do */ 65550b57cec5SDimitry Andric { 65560b57cec5SDimitry Andric int gtid = (gtid_req >= 0) ? gtid_req : __kmp_gtid_get_specific(); 65570b57cec5SDimitry Andric KA_TRACE(10, 65580b57cec5SDimitry Andric ("__kmp_internal_end_thread: enter T#%d (%d)\n", gtid, gtid_req)); 65590b57cec5SDimitry Andric if (gtid == KMP_GTID_SHUTDOWN) { 65600b57cec5SDimitry Andric KA_TRACE(10, ("__kmp_internal_end_thread: !__kmp_init_runtime, system " 65610b57cec5SDimitry Andric "already shutdown\n")); 65620b57cec5SDimitry Andric return; 65630b57cec5SDimitry Andric } else if (gtid == KMP_GTID_MONITOR) { 65640b57cec5SDimitry Andric KA_TRACE(10, ("__kmp_internal_end_thread: monitor thread, gtid not " 65650b57cec5SDimitry Andric "registered, or system shutdown\n")); 65660b57cec5SDimitry Andric return; 65670b57cec5SDimitry Andric } else if (gtid == KMP_GTID_DNE) { 65680b57cec5SDimitry Andric KA_TRACE(10, ("__kmp_internal_end_thread: gtid not registered or system " 65690b57cec5SDimitry Andric "shutdown\n")); 65700b57cec5SDimitry Andric return; 65710b57cec5SDimitry Andric /* we don't know who we are */ 65720b57cec5SDimitry Andric } else if (KMP_UBER_GTID(gtid)) { 65730b57cec5SDimitry Andric /* unregister ourselves as an uber thread. gtid is no longer valid */ 65740b57cec5SDimitry Andric if (__kmp_root[gtid]->r.r_active) { 65750b57cec5SDimitry Andric __kmp_global.g.g_abort = -1; 65760b57cec5SDimitry Andric TCW_SYNC_4(__kmp_global.g.g_done, TRUE); 65770b57cec5SDimitry Andric KA_TRACE(10, 65780b57cec5SDimitry Andric ("__kmp_internal_end_thread: root still active, abort T#%d\n", 65790b57cec5SDimitry Andric gtid)); 65800b57cec5SDimitry Andric return; 65810b57cec5SDimitry Andric } else { 65820b57cec5SDimitry Andric KA_TRACE(10, ("__kmp_internal_end_thread: unregistering sibling T#%d\n", 65830b57cec5SDimitry Andric gtid)); 65840b57cec5SDimitry Andric __kmp_unregister_root_current_thread(gtid); 65850b57cec5SDimitry Andric } 65860b57cec5SDimitry Andric } else { 65870b57cec5SDimitry Andric /* just a worker thread, let's leave */ 65880b57cec5SDimitry Andric KA_TRACE(10, ("__kmp_internal_end_thread: worker thread T#%d\n", gtid)); 65890b57cec5SDimitry Andric 65900b57cec5SDimitry Andric if (gtid >= 0) { 65910b57cec5SDimitry Andric __kmp_threads[gtid]->th.th_task_team = NULL; 65920b57cec5SDimitry Andric } 65930b57cec5SDimitry Andric 65940b57cec5SDimitry Andric KA_TRACE(10, 65950b57cec5SDimitry Andric ("__kmp_internal_end_thread: worker thread done, exiting T#%d\n", 65960b57cec5SDimitry Andric gtid)); 65970b57cec5SDimitry Andric return; 65980b57cec5SDimitry Andric } 65990b57cec5SDimitry Andric } 66000b57cec5SDimitry Andric #if KMP_DYNAMIC_LIB 66010b57cec5SDimitry Andric if (__kmp_pause_status != kmp_hard_paused) 66020b57cec5SDimitry Andric // AC: lets not shutdown the dynamic library at the exit of uber thread, 66030b57cec5SDimitry Andric // because we will better shutdown later in the library destructor. 66040b57cec5SDimitry Andric { 66050b57cec5SDimitry Andric KA_TRACE(10, ("__kmp_internal_end_thread: exiting T#%d\n", gtid_req)); 66060b57cec5SDimitry Andric return; 66070b57cec5SDimitry Andric } 66080b57cec5SDimitry Andric #endif 66090b57cec5SDimitry Andric /* synchronize the termination process */ 66100b57cec5SDimitry Andric __kmp_acquire_bootstrap_lock(&__kmp_initz_lock); 66110b57cec5SDimitry Andric 66120b57cec5SDimitry Andric /* have we already finished */ 66130b57cec5SDimitry Andric if (__kmp_global.g.g_abort) { 66140b57cec5SDimitry Andric KA_TRACE(10, ("__kmp_internal_end_thread: abort, exiting\n")); 66150b57cec5SDimitry Andric /* TODO abort? */ 66160b57cec5SDimitry Andric __kmp_release_bootstrap_lock(&__kmp_initz_lock); 66170b57cec5SDimitry Andric return; 66180b57cec5SDimitry Andric } 66190b57cec5SDimitry Andric if (TCR_4(__kmp_global.g.g_done) || !__kmp_init_serial) { 66200b57cec5SDimitry Andric __kmp_release_bootstrap_lock(&__kmp_initz_lock); 66210b57cec5SDimitry Andric return; 66220b57cec5SDimitry Andric } 66230b57cec5SDimitry Andric 66240b57cec5SDimitry Andric /* We need this lock to enforce mutex between this reading of 66250b57cec5SDimitry Andric __kmp_threads_capacity and the writing by __kmp_register_root. 66260b57cec5SDimitry Andric Alternatively, we can use a counter of roots that is atomically updated by 66270b57cec5SDimitry Andric __kmp_get_global_thread_id_reg, __kmp_do_serial_initialize and 66280b57cec5SDimitry Andric __kmp_internal_end_*. */ 66290b57cec5SDimitry Andric 66300b57cec5SDimitry Andric /* should we finish the run-time? are all siblings done? */ 66310b57cec5SDimitry Andric __kmp_acquire_bootstrap_lock(&__kmp_forkjoin_lock); 66320b57cec5SDimitry Andric 66330b57cec5SDimitry Andric for (i = 0; i < __kmp_threads_capacity; ++i) { 66340b57cec5SDimitry Andric if (KMP_UBER_GTID(i)) { 66350b57cec5SDimitry Andric KA_TRACE( 66360b57cec5SDimitry Andric 10, 66370b57cec5SDimitry Andric ("__kmp_internal_end_thread: remaining sibling task: gtid==%d\n", i)); 66380b57cec5SDimitry Andric __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock); 66390b57cec5SDimitry Andric __kmp_release_bootstrap_lock(&__kmp_initz_lock); 66400b57cec5SDimitry Andric return; 66410b57cec5SDimitry Andric } 66420b57cec5SDimitry Andric } 66430b57cec5SDimitry Andric 66440b57cec5SDimitry Andric /* now we can safely conduct the actual termination */ 66450b57cec5SDimitry Andric 66460b57cec5SDimitry Andric __kmp_internal_end(); 66470b57cec5SDimitry Andric 66480b57cec5SDimitry Andric __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock); 66490b57cec5SDimitry Andric __kmp_release_bootstrap_lock(&__kmp_initz_lock); 66500b57cec5SDimitry Andric 66510b57cec5SDimitry Andric KA_TRACE(10, ("__kmp_internal_end_thread: exit T#%d\n", gtid_req)); 66520b57cec5SDimitry Andric 66530b57cec5SDimitry Andric #ifdef DUMP_DEBUG_ON_EXIT 66540b57cec5SDimitry Andric if (__kmp_debug_buf) 66550b57cec5SDimitry Andric __kmp_dump_debug_buffer(); 66560b57cec5SDimitry Andric #endif 66570b57cec5SDimitry Andric } // __kmp_internal_end_thread 66580b57cec5SDimitry Andric 66590b57cec5SDimitry Andric // ----------------------------------------------------------------------------- 66600b57cec5SDimitry Andric // Library registration stuff. 66610b57cec5SDimitry Andric 66620b57cec5SDimitry Andric static long __kmp_registration_flag = 0; 66630b57cec5SDimitry Andric // Random value used to indicate library initialization. 66640b57cec5SDimitry Andric static char *__kmp_registration_str = NULL; 66650b57cec5SDimitry Andric // Value to be saved in env var __KMP_REGISTERED_LIB_<pid>. 66660b57cec5SDimitry Andric 66670b57cec5SDimitry Andric static inline char *__kmp_reg_status_name() { 66680b57cec5SDimitry Andric /* On RHEL 3u5 if linked statically, getpid() returns different values in 66690b57cec5SDimitry Andric each thread. If registration and unregistration go in different threads 66700b57cec5SDimitry Andric (omp_misc_other_root_exit.cpp test case), the name of registered_lib_env 66710b57cec5SDimitry Andric env var can not be found, because the name will contain different pid. */ 6672e8d8bef9SDimitry Andric // macOS* complains about name being too long with additional getuid() 6673e8d8bef9SDimitry Andric #if KMP_OS_UNIX && !KMP_OS_DARWIN && KMP_DYNAMIC_LIB 6674e8d8bef9SDimitry Andric return __kmp_str_format("__KMP_REGISTERED_LIB_%d_%d", (int)getpid(), 6675e8d8bef9SDimitry Andric (int)getuid()); 6676e8d8bef9SDimitry Andric #else 66770b57cec5SDimitry Andric return __kmp_str_format("__KMP_REGISTERED_LIB_%d", (int)getpid()); 6678e8d8bef9SDimitry Andric #endif 66790b57cec5SDimitry Andric } // __kmp_reg_status_get 66800b57cec5SDimitry Andric 66810b57cec5SDimitry Andric void __kmp_register_library_startup(void) { 66820b57cec5SDimitry Andric 66830b57cec5SDimitry Andric char *name = __kmp_reg_status_name(); // Name of the environment variable. 66840b57cec5SDimitry Andric int done = 0; 66850b57cec5SDimitry Andric union { 66860b57cec5SDimitry Andric double dtime; 66870b57cec5SDimitry Andric long ltime; 66880b57cec5SDimitry Andric } time; 66890b57cec5SDimitry Andric #if KMP_ARCH_X86 || KMP_ARCH_X86_64 66900b57cec5SDimitry Andric __kmp_initialize_system_tick(); 66910b57cec5SDimitry Andric #endif 66920b57cec5SDimitry Andric __kmp_read_system_time(&time.dtime); 66930b57cec5SDimitry Andric __kmp_registration_flag = 0xCAFE0000L | (time.ltime & 0x0000FFFFL); 66940b57cec5SDimitry Andric __kmp_registration_str = 66950b57cec5SDimitry Andric __kmp_str_format("%p-%lx-%s", &__kmp_registration_flag, 66960b57cec5SDimitry Andric __kmp_registration_flag, KMP_LIBRARY_FILE); 66970b57cec5SDimitry Andric 66980b57cec5SDimitry Andric KA_TRACE(50, ("__kmp_register_library_startup: %s=\"%s\"\n", name, 66990b57cec5SDimitry Andric __kmp_registration_str)); 67000b57cec5SDimitry Andric 67010b57cec5SDimitry Andric while (!done) { 67020b57cec5SDimitry Andric 67030b57cec5SDimitry Andric char *value = NULL; // Actual value of the environment variable. 67040b57cec5SDimitry Andric 6705349cc55cSDimitry Andric #if defined(KMP_USE_SHM) 6706e8d8bef9SDimitry Andric char *shm_name = __kmp_str_format("/%s", name); 6707e8d8bef9SDimitry Andric int shm_preexist = 0; 6708e8d8bef9SDimitry Andric char *data1; 6709e8d8bef9SDimitry Andric int fd1 = shm_open(shm_name, O_CREAT | O_EXCL | O_RDWR, 0666); 6710e8d8bef9SDimitry Andric if ((fd1 == -1) && (errno == EEXIST)) { 6711e8d8bef9SDimitry Andric // file didn't open because it already exists. 6712e8d8bef9SDimitry Andric // try opening existing file 6713e8d8bef9SDimitry Andric fd1 = shm_open(shm_name, O_RDWR, 0666); 6714e8d8bef9SDimitry Andric if (fd1 == -1) { // file didn't open 6715e8d8bef9SDimitry Andric // error out here 6716e8d8bef9SDimitry Andric __kmp_fatal(KMP_MSG(FunctionError, "Can't open SHM"), KMP_ERR(0), 6717e8d8bef9SDimitry Andric __kmp_msg_null); 6718e8d8bef9SDimitry Andric } else { 6719e8d8bef9SDimitry Andric // able to open existing file 6720e8d8bef9SDimitry Andric shm_preexist = 1; 6721e8d8bef9SDimitry Andric } 6722e8d8bef9SDimitry Andric } else if (fd1 == -1) { // SHM didn't open; it was due to error other than 6723e8d8bef9SDimitry Andric // already exists. 6724e8d8bef9SDimitry Andric // error out here. 6725e8d8bef9SDimitry Andric __kmp_fatal(KMP_MSG(FunctionError, "Can't open SHM2"), KMP_ERR(errno), 6726e8d8bef9SDimitry Andric __kmp_msg_null); 6727e8d8bef9SDimitry Andric } 6728e8d8bef9SDimitry Andric if (shm_preexist == 0) { 6729e8d8bef9SDimitry Andric // we created SHM now set size 6730e8d8bef9SDimitry Andric if (ftruncate(fd1, SHM_SIZE) == -1) { 6731e8d8bef9SDimitry Andric // error occured setting size; 6732e8d8bef9SDimitry Andric __kmp_fatal(KMP_MSG(FunctionError, "Can't set size of SHM"), 6733e8d8bef9SDimitry Andric KMP_ERR(errno), __kmp_msg_null); 6734e8d8bef9SDimitry Andric } 6735e8d8bef9SDimitry Andric } 6736e8d8bef9SDimitry Andric data1 = 6737e8d8bef9SDimitry Andric (char *)mmap(0, SHM_SIZE, PROT_READ | PROT_WRITE, MAP_SHARED, fd1, 0); 6738e8d8bef9SDimitry Andric if (data1 == MAP_FAILED) { 6739e8d8bef9SDimitry Andric // failed to map shared memory 6740e8d8bef9SDimitry Andric __kmp_fatal(KMP_MSG(FunctionError, "Can't map SHM"), KMP_ERR(errno), 6741e8d8bef9SDimitry Andric __kmp_msg_null); 6742e8d8bef9SDimitry Andric } 6743e8d8bef9SDimitry Andric if (shm_preexist == 0) { // set data to SHM, set value 6744e8d8bef9SDimitry Andric KMP_STRCPY_S(data1, SHM_SIZE, __kmp_registration_str); 6745e8d8bef9SDimitry Andric } 6746e8d8bef9SDimitry Andric // Read value from either what we just wrote or existing file. 6747e8d8bef9SDimitry Andric value = __kmp_str_format("%s", data1); // read value from SHM 6748e8d8bef9SDimitry Andric munmap(data1, SHM_SIZE); 6749e8d8bef9SDimitry Andric close(fd1); 6750e8d8bef9SDimitry Andric #else // Windows and unix with static library 67510b57cec5SDimitry Andric // Set environment variable, but do not overwrite if it is exist. 67520b57cec5SDimitry Andric __kmp_env_set(name, __kmp_registration_str, 0); 6753e8d8bef9SDimitry Andric // read value to see if it got set 67540b57cec5SDimitry Andric value = __kmp_env_get(name); 6755e8d8bef9SDimitry Andric #endif 6756e8d8bef9SDimitry Andric 67570b57cec5SDimitry Andric if (value != NULL && strcmp(value, __kmp_registration_str) == 0) { 67580b57cec5SDimitry Andric done = 1; // Ok, environment variable set successfully, exit the loop. 67590b57cec5SDimitry Andric } else { 67600b57cec5SDimitry Andric // Oops. Write failed. Another copy of OpenMP RTL is in memory. 67610b57cec5SDimitry Andric // Check whether it alive or dead. 67620b57cec5SDimitry Andric int neighbor = 0; // 0 -- unknown status, 1 -- alive, 2 -- dead. 67630b57cec5SDimitry Andric char *tail = value; 67640b57cec5SDimitry Andric char *flag_addr_str = NULL; 67650b57cec5SDimitry Andric char *flag_val_str = NULL; 67660b57cec5SDimitry Andric char const *file_name = NULL; 67670b57cec5SDimitry Andric __kmp_str_split(tail, '-', &flag_addr_str, &tail); 67680b57cec5SDimitry Andric __kmp_str_split(tail, '-', &flag_val_str, &tail); 67690b57cec5SDimitry Andric file_name = tail; 67700b57cec5SDimitry Andric if (tail != NULL) { 6771fe6060f1SDimitry Andric unsigned long *flag_addr = 0; 6772fe6060f1SDimitry Andric unsigned long flag_val = 0; 67730b57cec5SDimitry Andric KMP_SSCANF(flag_addr_str, "%p", RCAST(void **, &flag_addr)); 67740b57cec5SDimitry Andric KMP_SSCANF(flag_val_str, "%lx", &flag_val); 67750b57cec5SDimitry Andric if (flag_addr != 0 && flag_val != 0 && strcmp(file_name, "") != 0) { 67760b57cec5SDimitry Andric // First, check whether environment-encoded address is mapped into 67770b57cec5SDimitry Andric // addr space. 67780b57cec5SDimitry Andric // If so, dereference it to see if it still has the right value. 67790b57cec5SDimitry Andric if (__kmp_is_address_mapped(flag_addr) && *flag_addr == flag_val) { 67800b57cec5SDimitry Andric neighbor = 1; 67810b57cec5SDimitry Andric } else { 67820b57cec5SDimitry Andric // If not, then we know the other copy of the library is no longer 67830b57cec5SDimitry Andric // running. 67840b57cec5SDimitry Andric neighbor = 2; 67850b57cec5SDimitry Andric } 67860b57cec5SDimitry Andric } 67870b57cec5SDimitry Andric } 67880b57cec5SDimitry Andric switch (neighbor) { 67890b57cec5SDimitry Andric case 0: // Cannot parse environment variable -- neighbor status unknown. 67900b57cec5SDimitry Andric // Assume it is the incompatible format of future version of the 67910b57cec5SDimitry Andric // library. Assume the other library is alive. 67920b57cec5SDimitry Andric // WARN( ... ); // TODO: Issue a warning. 67930b57cec5SDimitry Andric file_name = "unknown library"; 67940b57cec5SDimitry Andric KMP_FALLTHROUGH(); 67950b57cec5SDimitry Andric // Attention! Falling to the next case. That's intentional. 67960b57cec5SDimitry Andric case 1: { // Neighbor is alive. 67970b57cec5SDimitry Andric // Check it is allowed. 67980b57cec5SDimitry Andric char *duplicate_ok = __kmp_env_get("KMP_DUPLICATE_LIB_OK"); 67990b57cec5SDimitry Andric if (!__kmp_str_match_true(duplicate_ok)) { 68000b57cec5SDimitry Andric // That's not allowed. Issue fatal error. 68010b57cec5SDimitry Andric __kmp_fatal(KMP_MSG(DuplicateLibrary, KMP_LIBRARY_FILE, file_name), 68020b57cec5SDimitry Andric KMP_HNT(DuplicateLibrary), __kmp_msg_null); 68030b57cec5SDimitry Andric } 68040b57cec5SDimitry Andric KMP_INTERNAL_FREE(duplicate_ok); 68050b57cec5SDimitry Andric __kmp_duplicate_library_ok = 1; 68060b57cec5SDimitry Andric done = 1; // Exit the loop. 68070b57cec5SDimitry Andric } break; 68080b57cec5SDimitry Andric case 2: { // Neighbor is dead. 6809e8d8bef9SDimitry Andric 6810349cc55cSDimitry Andric #if defined(KMP_USE_SHM) 6811e8d8bef9SDimitry Andric // close shared memory. 6812e8d8bef9SDimitry Andric shm_unlink(shm_name); // this removes file in /dev/shm 6813e8d8bef9SDimitry Andric #else 68140b57cec5SDimitry Andric // Clear the variable and try to register library again. 68150b57cec5SDimitry Andric __kmp_env_unset(name); 6816e8d8bef9SDimitry Andric #endif 68170b57cec5SDimitry Andric } break; 6818fe6060f1SDimitry Andric default: { 6819fe6060f1SDimitry Andric KMP_DEBUG_ASSERT(0); 6820fe6060f1SDimitry Andric } break; 68210b57cec5SDimitry Andric } 68220b57cec5SDimitry Andric } 68230b57cec5SDimitry Andric KMP_INTERNAL_FREE((void *)value); 6824349cc55cSDimitry Andric #if defined(KMP_USE_SHM) 6825e8d8bef9SDimitry Andric KMP_INTERNAL_FREE((void *)shm_name); 6826e8d8bef9SDimitry Andric #endif 6827e8d8bef9SDimitry Andric } // while 68280b57cec5SDimitry Andric KMP_INTERNAL_FREE((void *)name); 68290b57cec5SDimitry Andric 68300b57cec5SDimitry Andric } // func __kmp_register_library_startup 68310b57cec5SDimitry Andric 68320b57cec5SDimitry Andric void __kmp_unregister_library(void) { 68330b57cec5SDimitry Andric 68340b57cec5SDimitry Andric char *name = __kmp_reg_status_name(); 6835e8d8bef9SDimitry Andric char *value = NULL; 6836e8d8bef9SDimitry Andric 6837349cc55cSDimitry Andric #if defined(KMP_USE_SHM) 6838e8d8bef9SDimitry Andric char *shm_name = __kmp_str_format("/%s", name); 6839e8d8bef9SDimitry Andric int fd1 = shm_open(shm_name, O_RDONLY, 0666); 6840e8d8bef9SDimitry Andric if (fd1 == -1) { 6841e8d8bef9SDimitry Andric // file did not open. return. 6842e8d8bef9SDimitry Andric return; 6843e8d8bef9SDimitry Andric } 6844e8d8bef9SDimitry Andric char *data1 = (char *)mmap(0, SHM_SIZE, PROT_READ, MAP_SHARED, fd1, 0); 6845e8d8bef9SDimitry Andric if (data1 != MAP_FAILED) { 6846e8d8bef9SDimitry Andric value = __kmp_str_format("%s", data1); // read value from SHM 6847e8d8bef9SDimitry Andric munmap(data1, SHM_SIZE); 6848e8d8bef9SDimitry Andric } 6849e8d8bef9SDimitry Andric close(fd1); 6850e8d8bef9SDimitry Andric #else 6851e8d8bef9SDimitry Andric value = __kmp_env_get(name); 6852e8d8bef9SDimitry Andric #endif 68530b57cec5SDimitry Andric 68540b57cec5SDimitry Andric KMP_DEBUG_ASSERT(__kmp_registration_flag != 0); 68550b57cec5SDimitry Andric KMP_DEBUG_ASSERT(__kmp_registration_str != NULL); 68560b57cec5SDimitry Andric if (value != NULL && strcmp(value, __kmp_registration_str) == 0) { 68570b57cec5SDimitry Andric // Ok, this is our variable. Delete it. 6858349cc55cSDimitry Andric #if defined(KMP_USE_SHM) 6859e8d8bef9SDimitry Andric shm_unlink(shm_name); // this removes file in /dev/shm 6860e8d8bef9SDimitry Andric #else 68610b57cec5SDimitry Andric __kmp_env_unset(name); 6862e8d8bef9SDimitry Andric #endif 68630b57cec5SDimitry Andric } 68640b57cec5SDimitry Andric 6865349cc55cSDimitry Andric #if defined(KMP_USE_SHM) 6866e8d8bef9SDimitry Andric KMP_INTERNAL_FREE(shm_name); 6867e8d8bef9SDimitry Andric #endif 6868e8d8bef9SDimitry Andric 68690b57cec5SDimitry Andric KMP_INTERNAL_FREE(__kmp_registration_str); 68700b57cec5SDimitry Andric KMP_INTERNAL_FREE(value); 68710b57cec5SDimitry Andric KMP_INTERNAL_FREE(name); 68720b57cec5SDimitry Andric 68730b57cec5SDimitry Andric __kmp_registration_flag = 0; 68740b57cec5SDimitry Andric __kmp_registration_str = NULL; 68750b57cec5SDimitry Andric 68760b57cec5SDimitry Andric } // __kmp_unregister_library 68770b57cec5SDimitry Andric 68780b57cec5SDimitry Andric // End of Library registration stuff. 68790b57cec5SDimitry Andric // ----------------------------------------------------------------------------- 68800b57cec5SDimitry Andric 68810b57cec5SDimitry Andric #if KMP_MIC_SUPPORTED 68820b57cec5SDimitry Andric 68830b57cec5SDimitry Andric static void __kmp_check_mic_type() { 68840b57cec5SDimitry Andric kmp_cpuid_t cpuid_state = {0}; 68850b57cec5SDimitry Andric kmp_cpuid_t *cs_p = &cpuid_state; 68860b57cec5SDimitry Andric __kmp_x86_cpuid(1, 0, cs_p); 68870b57cec5SDimitry Andric // We don't support mic1 at the moment 68880b57cec5SDimitry Andric if ((cs_p->eax & 0xff0) == 0xB10) { 68890b57cec5SDimitry Andric __kmp_mic_type = mic2; 68900b57cec5SDimitry Andric } else if ((cs_p->eax & 0xf0ff0) == 0x50670) { 68910b57cec5SDimitry Andric __kmp_mic_type = mic3; 68920b57cec5SDimitry Andric } else { 68930b57cec5SDimitry Andric __kmp_mic_type = non_mic; 68940b57cec5SDimitry Andric } 68950b57cec5SDimitry Andric } 68960b57cec5SDimitry Andric 68970b57cec5SDimitry Andric #endif /* KMP_MIC_SUPPORTED */ 68980b57cec5SDimitry Andric 6899e8d8bef9SDimitry Andric #if KMP_HAVE_UMWAIT 6900e8d8bef9SDimitry Andric static void __kmp_user_level_mwait_init() { 6901e8d8bef9SDimitry Andric struct kmp_cpuid buf; 6902e8d8bef9SDimitry Andric __kmp_x86_cpuid(7, 0, &buf); 690304eeddc0SDimitry Andric __kmp_waitpkg_enabled = ((buf.ecx >> 5) & 1); 690404eeddc0SDimitry Andric __kmp_umwait_enabled = __kmp_waitpkg_enabled && __kmp_user_level_mwait; 690504eeddc0SDimitry Andric __kmp_tpause_enabled = __kmp_waitpkg_enabled && (__kmp_tpause_state > 0); 6906e8d8bef9SDimitry Andric KF_TRACE(30, ("__kmp_user_level_mwait_init: __kmp_umwait_enabled = %d\n", 6907e8d8bef9SDimitry Andric __kmp_umwait_enabled)); 6908e8d8bef9SDimitry Andric } 6909e8d8bef9SDimitry Andric #elif KMP_HAVE_MWAIT 6910e8d8bef9SDimitry Andric #ifndef AT_INTELPHIUSERMWAIT 6911e8d8bef9SDimitry Andric // Spurious, non-existent value that should always fail to return anything. 6912e8d8bef9SDimitry Andric // Will be replaced with the correct value when we know that. 6913e8d8bef9SDimitry Andric #define AT_INTELPHIUSERMWAIT 10000 6914e8d8bef9SDimitry Andric #endif 6915e8d8bef9SDimitry Andric // getauxval() function is available in RHEL7 and SLES12. If a system with an 6916e8d8bef9SDimitry Andric // earlier OS is used to build the RTL, we'll use the following internal 6917e8d8bef9SDimitry Andric // function when the entry is not found. 6918e8d8bef9SDimitry Andric unsigned long getauxval(unsigned long) KMP_WEAK_ATTRIBUTE_EXTERNAL; 6919e8d8bef9SDimitry Andric unsigned long getauxval(unsigned long) { return 0; } 6920e8d8bef9SDimitry Andric 6921e8d8bef9SDimitry Andric static void __kmp_user_level_mwait_init() { 6922e8d8bef9SDimitry Andric // When getauxval() and correct value of AT_INTELPHIUSERMWAIT are available 6923e8d8bef9SDimitry Andric // use them to find if the user-level mwait is enabled. Otherwise, forcibly 6924e8d8bef9SDimitry Andric // set __kmp_mwait_enabled=TRUE on Intel MIC if the environment variable 6925e8d8bef9SDimitry Andric // KMP_USER_LEVEL_MWAIT was set to TRUE. 6926e8d8bef9SDimitry Andric if (__kmp_mic_type == mic3) { 6927e8d8bef9SDimitry Andric unsigned long res = getauxval(AT_INTELPHIUSERMWAIT); 6928e8d8bef9SDimitry Andric if ((res & 0x1) || __kmp_user_level_mwait) { 6929e8d8bef9SDimitry Andric __kmp_mwait_enabled = TRUE; 6930e8d8bef9SDimitry Andric if (__kmp_user_level_mwait) { 6931e8d8bef9SDimitry Andric KMP_INFORM(EnvMwaitWarn); 6932e8d8bef9SDimitry Andric } 6933e8d8bef9SDimitry Andric } else { 6934e8d8bef9SDimitry Andric __kmp_mwait_enabled = FALSE; 6935e8d8bef9SDimitry Andric } 6936e8d8bef9SDimitry Andric } 6937e8d8bef9SDimitry Andric KF_TRACE(30, ("__kmp_user_level_mwait_init: __kmp_mic_type = %d, " 6938e8d8bef9SDimitry Andric "__kmp_mwait_enabled = %d\n", 6939e8d8bef9SDimitry Andric __kmp_mic_type, __kmp_mwait_enabled)); 6940e8d8bef9SDimitry Andric } 6941e8d8bef9SDimitry Andric #endif /* KMP_HAVE_UMWAIT */ 6942e8d8bef9SDimitry Andric 69430b57cec5SDimitry Andric static void __kmp_do_serial_initialize(void) { 69440b57cec5SDimitry Andric int i, gtid; 6945e8d8bef9SDimitry Andric size_t size; 69460b57cec5SDimitry Andric 69470b57cec5SDimitry Andric KA_TRACE(10, ("__kmp_do_serial_initialize: enter\n")); 69480b57cec5SDimitry Andric 69490b57cec5SDimitry Andric KMP_DEBUG_ASSERT(sizeof(kmp_int32) == 4); 69500b57cec5SDimitry Andric KMP_DEBUG_ASSERT(sizeof(kmp_uint32) == 4); 69510b57cec5SDimitry Andric KMP_DEBUG_ASSERT(sizeof(kmp_int64) == 8); 69520b57cec5SDimitry Andric KMP_DEBUG_ASSERT(sizeof(kmp_uint64) == 8); 69530b57cec5SDimitry Andric KMP_DEBUG_ASSERT(sizeof(kmp_intptr_t) == sizeof(void *)); 69540b57cec5SDimitry Andric 69550b57cec5SDimitry Andric #if OMPT_SUPPORT 69560b57cec5SDimitry Andric ompt_pre_init(); 69570b57cec5SDimitry Andric #endif 6958fe6060f1SDimitry Andric #if OMPD_SUPPORT 6959fe6060f1SDimitry Andric __kmp_env_dump(); 6960fe6060f1SDimitry Andric ompd_init(); 6961fe6060f1SDimitry Andric #endif 69620b57cec5SDimitry Andric 69630b57cec5SDimitry Andric __kmp_validate_locks(); 69640b57cec5SDimitry Andric 69650b57cec5SDimitry Andric /* Initialize internal memory allocator */ 69660b57cec5SDimitry Andric __kmp_init_allocator(); 69670b57cec5SDimitry Andric 69680b57cec5SDimitry Andric /* Register the library startup via an environment variable and check to see 69690b57cec5SDimitry Andric whether another copy of the library is already registered. */ 69700b57cec5SDimitry Andric 69710b57cec5SDimitry Andric __kmp_register_library_startup(); 69720b57cec5SDimitry Andric 69730b57cec5SDimitry Andric /* TODO reinitialization of library */ 69740b57cec5SDimitry Andric if (TCR_4(__kmp_global.g.g_done)) { 69750b57cec5SDimitry Andric KA_TRACE(10, ("__kmp_do_serial_initialize: reinitialization of library\n")); 69760b57cec5SDimitry Andric } 69770b57cec5SDimitry Andric 69780b57cec5SDimitry Andric __kmp_global.g.g_abort = 0; 69790b57cec5SDimitry Andric TCW_SYNC_4(__kmp_global.g.g_done, FALSE); 69800b57cec5SDimitry Andric 69810b57cec5SDimitry Andric /* initialize the locks */ 69820b57cec5SDimitry Andric #if KMP_USE_ADAPTIVE_LOCKS 69830b57cec5SDimitry Andric #if KMP_DEBUG_ADAPTIVE_LOCKS 69840b57cec5SDimitry Andric __kmp_init_speculative_stats(); 69850b57cec5SDimitry Andric #endif 69860b57cec5SDimitry Andric #endif 69870b57cec5SDimitry Andric #if KMP_STATS_ENABLED 69880b57cec5SDimitry Andric __kmp_stats_init(); 69890b57cec5SDimitry Andric #endif 69900b57cec5SDimitry Andric __kmp_init_lock(&__kmp_global_lock); 69910b57cec5SDimitry Andric __kmp_init_queuing_lock(&__kmp_dispatch_lock); 69920b57cec5SDimitry Andric __kmp_init_lock(&__kmp_debug_lock); 69930b57cec5SDimitry Andric __kmp_init_atomic_lock(&__kmp_atomic_lock); 69940b57cec5SDimitry Andric __kmp_init_atomic_lock(&__kmp_atomic_lock_1i); 69950b57cec5SDimitry Andric __kmp_init_atomic_lock(&__kmp_atomic_lock_2i); 69960b57cec5SDimitry Andric __kmp_init_atomic_lock(&__kmp_atomic_lock_4i); 69970b57cec5SDimitry Andric __kmp_init_atomic_lock(&__kmp_atomic_lock_4r); 69980b57cec5SDimitry Andric __kmp_init_atomic_lock(&__kmp_atomic_lock_8i); 69990b57cec5SDimitry Andric __kmp_init_atomic_lock(&__kmp_atomic_lock_8r); 70000b57cec5SDimitry Andric __kmp_init_atomic_lock(&__kmp_atomic_lock_8c); 70010b57cec5SDimitry Andric __kmp_init_atomic_lock(&__kmp_atomic_lock_10r); 70020b57cec5SDimitry Andric __kmp_init_atomic_lock(&__kmp_atomic_lock_16r); 70030b57cec5SDimitry Andric __kmp_init_atomic_lock(&__kmp_atomic_lock_16c); 70040b57cec5SDimitry Andric __kmp_init_atomic_lock(&__kmp_atomic_lock_20c); 70050b57cec5SDimitry Andric __kmp_init_atomic_lock(&__kmp_atomic_lock_32c); 70060b57cec5SDimitry Andric __kmp_init_bootstrap_lock(&__kmp_forkjoin_lock); 70070b57cec5SDimitry Andric __kmp_init_bootstrap_lock(&__kmp_exit_lock); 70080b57cec5SDimitry Andric #if KMP_USE_MONITOR 70090b57cec5SDimitry Andric __kmp_init_bootstrap_lock(&__kmp_monitor_lock); 70100b57cec5SDimitry Andric #endif 70110b57cec5SDimitry Andric __kmp_init_bootstrap_lock(&__kmp_tp_cached_lock); 70120b57cec5SDimitry Andric 70130b57cec5SDimitry Andric /* conduct initialization and initial setup of configuration */ 70140b57cec5SDimitry Andric 70150b57cec5SDimitry Andric __kmp_runtime_initialize(); 70160b57cec5SDimitry Andric 70170b57cec5SDimitry Andric #if KMP_MIC_SUPPORTED 70180b57cec5SDimitry Andric __kmp_check_mic_type(); 70190b57cec5SDimitry Andric #endif 70200b57cec5SDimitry Andric 70210b57cec5SDimitry Andric // Some global variable initialization moved here from kmp_env_initialize() 70220b57cec5SDimitry Andric #ifdef KMP_DEBUG 70230b57cec5SDimitry Andric kmp_diag = 0; 70240b57cec5SDimitry Andric #endif 70250b57cec5SDimitry Andric __kmp_abort_delay = 0; 70260b57cec5SDimitry Andric 70270b57cec5SDimitry Andric // From __kmp_init_dflt_team_nth() 70280b57cec5SDimitry Andric /* assume the entire machine will be used */ 70290b57cec5SDimitry Andric __kmp_dflt_team_nth_ub = __kmp_xproc; 70300b57cec5SDimitry Andric if (__kmp_dflt_team_nth_ub < KMP_MIN_NTH) { 70310b57cec5SDimitry Andric __kmp_dflt_team_nth_ub = KMP_MIN_NTH; 70320b57cec5SDimitry Andric } 70330b57cec5SDimitry Andric if (__kmp_dflt_team_nth_ub > __kmp_sys_max_nth) { 70340b57cec5SDimitry Andric __kmp_dflt_team_nth_ub = __kmp_sys_max_nth; 70350b57cec5SDimitry Andric } 70360b57cec5SDimitry Andric __kmp_max_nth = __kmp_sys_max_nth; 70370b57cec5SDimitry Andric __kmp_cg_max_nth = __kmp_sys_max_nth; 70380b57cec5SDimitry Andric __kmp_teams_max_nth = __kmp_xproc; // set a "reasonable" default 70390b57cec5SDimitry Andric if (__kmp_teams_max_nth > __kmp_sys_max_nth) { 70400b57cec5SDimitry Andric __kmp_teams_max_nth = __kmp_sys_max_nth; 70410b57cec5SDimitry Andric } 70420b57cec5SDimitry Andric 70430b57cec5SDimitry Andric // Three vars below moved here from __kmp_env_initialize() "KMP_BLOCKTIME" 70440b57cec5SDimitry Andric // part 70450b57cec5SDimitry Andric __kmp_dflt_blocktime = KMP_DEFAULT_BLOCKTIME; 70460b57cec5SDimitry Andric #if KMP_USE_MONITOR 70470b57cec5SDimitry Andric __kmp_monitor_wakeups = 70480b57cec5SDimitry Andric KMP_WAKEUPS_FROM_BLOCKTIME(__kmp_dflt_blocktime, __kmp_monitor_wakeups); 70490b57cec5SDimitry Andric __kmp_bt_intervals = 70500b57cec5SDimitry Andric KMP_INTERVALS_FROM_BLOCKTIME(__kmp_dflt_blocktime, __kmp_monitor_wakeups); 70510b57cec5SDimitry Andric #endif 70520b57cec5SDimitry Andric // From "KMP_LIBRARY" part of __kmp_env_initialize() 70530b57cec5SDimitry Andric __kmp_library = library_throughput; 70540b57cec5SDimitry Andric // From KMP_SCHEDULE initialization 70550b57cec5SDimitry Andric __kmp_static = kmp_sch_static_balanced; 70560b57cec5SDimitry Andric // AC: do not use analytical here, because it is non-monotonous 70570b57cec5SDimitry Andric //__kmp_guided = kmp_sch_guided_iterative_chunked; 70580b57cec5SDimitry Andric //__kmp_auto = kmp_sch_guided_analytical_chunked; // AC: it is the default, no 70590b57cec5SDimitry Andric // need to repeat assignment 70600b57cec5SDimitry Andric // Barrier initialization. Moved here from __kmp_env_initialize() Barrier branch 70610b57cec5SDimitry Andric // bit control and barrier method control parts 70620b57cec5SDimitry Andric #if KMP_FAST_REDUCTION_BARRIER 70630b57cec5SDimitry Andric #define kmp_reduction_barrier_gather_bb ((int)1) 70640b57cec5SDimitry Andric #define kmp_reduction_barrier_release_bb ((int)1) 7065349cc55cSDimitry Andric #define kmp_reduction_barrier_gather_pat __kmp_barrier_gather_pat_dflt 7066349cc55cSDimitry Andric #define kmp_reduction_barrier_release_pat __kmp_barrier_release_pat_dflt 70670b57cec5SDimitry Andric #endif // KMP_FAST_REDUCTION_BARRIER 70680b57cec5SDimitry Andric for (i = bs_plain_barrier; i < bs_last_barrier; i++) { 70690b57cec5SDimitry Andric __kmp_barrier_gather_branch_bits[i] = __kmp_barrier_gather_bb_dflt; 70700b57cec5SDimitry Andric __kmp_barrier_release_branch_bits[i] = __kmp_barrier_release_bb_dflt; 70710b57cec5SDimitry Andric __kmp_barrier_gather_pattern[i] = __kmp_barrier_gather_pat_dflt; 70720b57cec5SDimitry Andric __kmp_barrier_release_pattern[i] = __kmp_barrier_release_pat_dflt; 70730b57cec5SDimitry Andric #if KMP_FAST_REDUCTION_BARRIER 70740b57cec5SDimitry Andric if (i == bs_reduction_barrier) { // tested and confirmed on ALTIX only ( 70750b57cec5SDimitry Andric // lin_64 ): hyper,1 70760b57cec5SDimitry Andric __kmp_barrier_gather_branch_bits[i] = kmp_reduction_barrier_gather_bb; 70770b57cec5SDimitry Andric __kmp_barrier_release_branch_bits[i] = kmp_reduction_barrier_release_bb; 70780b57cec5SDimitry Andric __kmp_barrier_gather_pattern[i] = kmp_reduction_barrier_gather_pat; 70790b57cec5SDimitry Andric __kmp_barrier_release_pattern[i] = kmp_reduction_barrier_release_pat; 70800b57cec5SDimitry Andric } 70810b57cec5SDimitry Andric #endif // KMP_FAST_REDUCTION_BARRIER 70820b57cec5SDimitry Andric } 70830b57cec5SDimitry Andric #if KMP_FAST_REDUCTION_BARRIER 70840b57cec5SDimitry Andric #undef kmp_reduction_barrier_release_pat 70850b57cec5SDimitry Andric #undef kmp_reduction_barrier_gather_pat 70860b57cec5SDimitry Andric #undef kmp_reduction_barrier_release_bb 70870b57cec5SDimitry Andric #undef kmp_reduction_barrier_gather_bb 70880b57cec5SDimitry Andric #endif // KMP_FAST_REDUCTION_BARRIER 70890b57cec5SDimitry Andric #if KMP_MIC_SUPPORTED 70900b57cec5SDimitry Andric if (__kmp_mic_type == mic2) { // KNC 70910b57cec5SDimitry Andric // AC: plane=3,2, forkjoin=2,1 are optimal for 240 threads on KNC 70920b57cec5SDimitry Andric __kmp_barrier_gather_branch_bits[bs_plain_barrier] = 3; // plain gather 70930b57cec5SDimitry Andric __kmp_barrier_release_branch_bits[bs_forkjoin_barrier] = 70940b57cec5SDimitry Andric 1; // forkjoin release 70950b57cec5SDimitry Andric __kmp_barrier_gather_pattern[bs_forkjoin_barrier] = bp_hierarchical_bar; 70960b57cec5SDimitry Andric __kmp_barrier_release_pattern[bs_forkjoin_barrier] = bp_hierarchical_bar; 70970b57cec5SDimitry Andric } 70980b57cec5SDimitry Andric #if KMP_FAST_REDUCTION_BARRIER 70990b57cec5SDimitry Andric if (__kmp_mic_type == mic2) { // KNC 71000b57cec5SDimitry Andric __kmp_barrier_gather_pattern[bs_reduction_barrier] = bp_hierarchical_bar; 71010b57cec5SDimitry Andric __kmp_barrier_release_pattern[bs_reduction_barrier] = bp_hierarchical_bar; 71020b57cec5SDimitry Andric } 71030b57cec5SDimitry Andric #endif // KMP_FAST_REDUCTION_BARRIER 71040b57cec5SDimitry Andric #endif // KMP_MIC_SUPPORTED 71050b57cec5SDimitry Andric 71060b57cec5SDimitry Andric // From KMP_CHECKS initialization 71070b57cec5SDimitry Andric #ifdef KMP_DEBUG 71080b57cec5SDimitry Andric __kmp_env_checks = TRUE; /* development versions have the extra checks */ 71090b57cec5SDimitry Andric #else 71100b57cec5SDimitry Andric __kmp_env_checks = FALSE; /* port versions do not have the extra checks */ 71110b57cec5SDimitry Andric #endif 71120b57cec5SDimitry Andric 71130b57cec5SDimitry Andric // From "KMP_FOREIGN_THREADS_THREADPRIVATE" initialization 71140b57cec5SDimitry Andric __kmp_foreign_tp = TRUE; 71150b57cec5SDimitry Andric 71160b57cec5SDimitry Andric __kmp_global.g.g_dynamic = FALSE; 71170b57cec5SDimitry Andric __kmp_global.g.g_dynamic_mode = dynamic_default; 71180b57cec5SDimitry Andric 7119fe6060f1SDimitry Andric __kmp_init_nesting_mode(); 7120fe6060f1SDimitry Andric 71210b57cec5SDimitry Andric __kmp_env_initialize(NULL); 71220b57cec5SDimitry Andric 7123e8d8bef9SDimitry Andric #if KMP_HAVE_MWAIT || KMP_HAVE_UMWAIT 7124e8d8bef9SDimitry Andric __kmp_user_level_mwait_init(); 7125e8d8bef9SDimitry Andric #endif 71260b57cec5SDimitry Andric // Print all messages in message catalog for testing purposes. 71270b57cec5SDimitry Andric #ifdef KMP_DEBUG 71280b57cec5SDimitry Andric char const *val = __kmp_env_get("KMP_DUMP_CATALOG"); 71290b57cec5SDimitry Andric if (__kmp_str_match_true(val)) { 71300b57cec5SDimitry Andric kmp_str_buf_t buffer; 71310b57cec5SDimitry Andric __kmp_str_buf_init(&buffer); 71320b57cec5SDimitry Andric __kmp_i18n_dump_catalog(&buffer); 71330b57cec5SDimitry Andric __kmp_printf("%s", buffer.str); 71340b57cec5SDimitry Andric __kmp_str_buf_free(&buffer); 71350b57cec5SDimitry Andric } 71360b57cec5SDimitry Andric __kmp_env_free(&val); 71370b57cec5SDimitry Andric #endif 71380b57cec5SDimitry Andric 71390b57cec5SDimitry Andric __kmp_threads_capacity = 71400b57cec5SDimitry Andric __kmp_initial_threads_capacity(__kmp_dflt_team_nth_ub); 71410b57cec5SDimitry Andric // Moved here from __kmp_env_initialize() "KMP_ALL_THREADPRIVATE" part 71420b57cec5SDimitry Andric __kmp_tp_capacity = __kmp_default_tp_capacity( 71430b57cec5SDimitry Andric __kmp_dflt_team_nth_ub, __kmp_max_nth, __kmp_allThreadsSpecified); 71440b57cec5SDimitry Andric 71450b57cec5SDimitry Andric // If the library is shut down properly, both pools must be NULL. Just in 71460b57cec5SDimitry Andric // case, set them to NULL -- some memory may leak, but subsequent code will 71470b57cec5SDimitry Andric // work even if pools are not freed. 71480b57cec5SDimitry Andric KMP_DEBUG_ASSERT(__kmp_thread_pool == NULL); 71490b57cec5SDimitry Andric KMP_DEBUG_ASSERT(__kmp_thread_pool_insert_pt == NULL); 71500b57cec5SDimitry Andric KMP_DEBUG_ASSERT(__kmp_team_pool == NULL); 71510b57cec5SDimitry Andric __kmp_thread_pool = NULL; 71520b57cec5SDimitry Andric __kmp_thread_pool_insert_pt = NULL; 71530b57cec5SDimitry Andric __kmp_team_pool = NULL; 71540b57cec5SDimitry Andric 71550b57cec5SDimitry Andric /* Allocate all of the variable sized records */ 71560b57cec5SDimitry Andric /* NOTE: __kmp_threads_capacity entries are allocated, but the arrays are 71570b57cec5SDimitry Andric * expandable */ 71580b57cec5SDimitry Andric /* Since allocation is cache-aligned, just add extra padding at the end */ 71590b57cec5SDimitry Andric size = 71600b57cec5SDimitry Andric (sizeof(kmp_info_t *) + sizeof(kmp_root_t *)) * __kmp_threads_capacity + 71610b57cec5SDimitry Andric CACHE_LINE; 71620b57cec5SDimitry Andric __kmp_threads = (kmp_info_t **)__kmp_allocate(size); 71630b57cec5SDimitry Andric __kmp_root = (kmp_root_t **)((char *)__kmp_threads + 71640b57cec5SDimitry Andric sizeof(kmp_info_t *) * __kmp_threads_capacity); 71650b57cec5SDimitry Andric 71660b57cec5SDimitry Andric /* init thread counts */ 71670b57cec5SDimitry Andric KMP_DEBUG_ASSERT(__kmp_all_nth == 71680b57cec5SDimitry Andric 0); // Asserts fail if the library is reinitializing and 71690b57cec5SDimitry Andric KMP_DEBUG_ASSERT(__kmp_nth == 0); // something was wrong in termination. 71700b57cec5SDimitry Andric __kmp_all_nth = 0; 71710b57cec5SDimitry Andric __kmp_nth = 0; 71720b57cec5SDimitry Andric 71730b57cec5SDimitry Andric /* setup the uber master thread and hierarchy */ 71740b57cec5SDimitry Andric gtid = __kmp_register_root(TRUE); 71750b57cec5SDimitry Andric KA_TRACE(10, ("__kmp_do_serial_initialize T#%d\n", gtid)); 71760b57cec5SDimitry Andric KMP_ASSERT(KMP_UBER_GTID(gtid)); 71770b57cec5SDimitry Andric KMP_ASSERT(KMP_INITIAL_GTID(gtid)); 71780b57cec5SDimitry Andric 71790b57cec5SDimitry Andric KMP_MB(); /* Flush all pending memory write invalidates. */ 71800b57cec5SDimitry Andric 71810b57cec5SDimitry Andric __kmp_common_initialize(); 71820b57cec5SDimitry Andric 71830b57cec5SDimitry Andric #if KMP_OS_UNIX 71840b57cec5SDimitry Andric /* invoke the child fork handler */ 71850b57cec5SDimitry Andric __kmp_register_atfork(); 71860b57cec5SDimitry Andric #endif 71870b57cec5SDimitry Andric 71880b57cec5SDimitry Andric #if !KMP_DYNAMIC_LIB 71890b57cec5SDimitry Andric { 71900b57cec5SDimitry Andric /* Invoke the exit handler when the program finishes, only for static 71910b57cec5SDimitry Andric library. For dynamic library, we already have _fini and DllMain. */ 71920b57cec5SDimitry Andric int rc = atexit(__kmp_internal_end_atexit); 71930b57cec5SDimitry Andric if (rc != 0) { 71940b57cec5SDimitry Andric __kmp_fatal(KMP_MSG(FunctionError, "atexit()"), KMP_ERR(rc), 71950b57cec5SDimitry Andric __kmp_msg_null); 71960b57cec5SDimitry Andric } 71970b57cec5SDimitry Andric } 71980b57cec5SDimitry Andric #endif 71990b57cec5SDimitry Andric 72000b57cec5SDimitry Andric #if KMP_HANDLE_SIGNALS 72010b57cec5SDimitry Andric #if KMP_OS_UNIX 72020b57cec5SDimitry Andric /* NOTE: make sure that this is called before the user installs their own 72030b57cec5SDimitry Andric signal handlers so that the user handlers are called first. this way they 72040b57cec5SDimitry Andric can return false, not call our handler, avoid terminating the library, and 72050b57cec5SDimitry Andric continue execution where they left off. */ 72060b57cec5SDimitry Andric __kmp_install_signals(FALSE); 72070b57cec5SDimitry Andric #endif /* KMP_OS_UNIX */ 72080b57cec5SDimitry Andric #if KMP_OS_WINDOWS 72090b57cec5SDimitry Andric __kmp_install_signals(TRUE); 72100b57cec5SDimitry Andric #endif /* KMP_OS_WINDOWS */ 72110b57cec5SDimitry Andric #endif 72120b57cec5SDimitry Andric 72130b57cec5SDimitry Andric /* we have finished the serial initialization */ 72140b57cec5SDimitry Andric __kmp_init_counter++; 72150b57cec5SDimitry Andric 72160b57cec5SDimitry Andric __kmp_init_serial = TRUE; 72170b57cec5SDimitry Andric 72180b57cec5SDimitry Andric if (__kmp_settings) { 72190b57cec5SDimitry Andric __kmp_env_print(); 72200b57cec5SDimitry Andric } 72210b57cec5SDimitry Andric 72220b57cec5SDimitry Andric if (__kmp_display_env || __kmp_display_env_verbose) { 72230b57cec5SDimitry Andric __kmp_env_print_2(); 72240b57cec5SDimitry Andric } 72250b57cec5SDimitry Andric 72260b57cec5SDimitry Andric #if OMPT_SUPPORT 72270b57cec5SDimitry Andric ompt_post_init(); 72280b57cec5SDimitry Andric #endif 72290b57cec5SDimitry Andric 72300b57cec5SDimitry Andric KMP_MB(); 72310b57cec5SDimitry Andric 72320b57cec5SDimitry Andric KA_TRACE(10, ("__kmp_do_serial_initialize: exit\n")); 72330b57cec5SDimitry Andric } 72340b57cec5SDimitry Andric 72350b57cec5SDimitry Andric void __kmp_serial_initialize(void) { 72360b57cec5SDimitry Andric if (__kmp_init_serial) { 72370b57cec5SDimitry Andric return; 72380b57cec5SDimitry Andric } 72390b57cec5SDimitry Andric __kmp_acquire_bootstrap_lock(&__kmp_initz_lock); 72400b57cec5SDimitry Andric if (__kmp_init_serial) { 72410b57cec5SDimitry Andric __kmp_release_bootstrap_lock(&__kmp_initz_lock); 72420b57cec5SDimitry Andric return; 72430b57cec5SDimitry Andric } 72440b57cec5SDimitry Andric __kmp_do_serial_initialize(); 72450b57cec5SDimitry Andric __kmp_release_bootstrap_lock(&__kmp_initz_lock); 72460b57cec5SDimitry Andric } 72470b57cec5SDimitry Andric 72480b57cec5SDimitry Andric static void __kmp_do_middle_initialize(void) { 72490b57cec5SDimitry Andric int i, j; 72500b57cec5SDimitry Andric int prev_dflt_team_nth; 72510b57cec5SDimitry Andric 72520b57cec5SDimitry Andric if (!__kmp_init_serial) { 72530b57cec5SDimitry Andric __kmp_do_serial_initialize(); 72540b57cec5SDimitry Andric } 72550b57cec5SDimitry Andric 72560b57cec5SDimitry Andric KA_TRACE(10, ("__kmp_middle_initialize: enter\n")); 72570b57cec5SDimitry Andric 72580b57cec5SDimitry Andric // Save the previous value for the __kmp_dflt_team_nth so that 72590b57cec5SDimitry Andric // we can avoid some reinitialization if it hasn't changed. 72600b57cec5SDimitry Andric prev_dflt_team_nth = __kmp_dflt_team_nth; 72610b57cec5SDimitry Andric 72620b57cec5SDimitry Andric #if KMP_AFFINITY_SUPPORTED 72630b57cec5SDimitry Andric // __kmp_affinity_initialize() will try to set __kmp_ncores to the 72640b57cec5SDimitry Andric // number of cores on the machine. 72650b57cec5SDimitry Andric __kmp_affinity_initialize(); 72660b57cec5SDimitry Andric 72670b57cec5SDimitry Andric #endif /* KMP_AFFINITY_SUPPORTED */ 72680b57cec5SDimitry Andric 72690b57cec5SDimitry Andric KMP_ASSERT(__kmp_xproc > 0); 72700b57cec5SDimitry Andric if (__kmp_avail_proc == 0) { 72710b57cec5SDimitry Andric __kmp_avail_proc = __kmp_xproc; 72720b57cec5SDimitry Andric } 72730b57cec5SDimitry Andric 72740b57cec5SDimitry Andric // If there were empty places in num_threads list (OMP_NUM_THREADS=,,2,3), 72750b57cec5SDimitry Andric // correct them now 72760b57cec5SDimitry Andric j = 0; 72770b57cec5SDimitry Andric while ((j < __kmp_nested_nth.used) && !__kmp_nested_nth.nth[j]) { 72780b57cec5SDimitry Andric __kmp_nested_nth.nth[j] = __kmp_dflt_team_nth = __kmp_dflt_team_nth_ub = 72790b57cec5SDimitry Andric __kmp_avail_proc; 72800b57cec5SDimitry Andric j++; 72810b57cec5SDimitry Andric } 72820b57cec5SDimitry Andric 72830b57cec5SDimitry Andric if (__kmp_dflt_team_nth == 0) { 72840b57cec5SDimitry Andric #ifdef KMP_DFLT_NTH_CORES 72850b57cec5SDimitry Andric // Default #threads = #cores 72860b57cec5SDimitry Andric __kmp_dflt_team_nth = __kmp_ncores; 72870b57cec5SDimitry Andric KA_TRACE(20, ("__kmp_middle_initialize: setting __kmp_dflt_team_nth = " 72880b57cec5SDimitry Andric "__kmp_ncores (%d)\n", 72890b57cec5SDimitry Andric __kmp_dflt_team_nth)); 72900b57cec5SDimitry Andric #else 72910b57cec5SDimitry Andric // Default #threads = #available OS procs 72920b57cec5SDimitry Andric __kmp_dflt_team_nth = __kmp_avail_proc; 72930b57cec5SDimitry Andric KA_TRACE(20, ("__kmp_middle_initialize: setting __kmp_dflt_team_nth = " 72940b57cec5SDimitry Andric "__kmp_avail_proc(%d)\n", 72950b57cec5SDimitry Andric __kmp_dflt_team_nth)); 72960b57cec5SDimitry Andric #endif /* KMP_DFLT_NTH_CORES */ 72970b57cec5SDimitry Andric } 72980b57cec5SDimitry Andric 72990b57cec5SDimitry Andric if (__kmp_dflt_team_nth < KMP_MIN_NTH) { 73000b57cec5SDimitry Andric __kmp_dflt_team_nth = KMP_MIN_NTH; 73010b57cec5SDimitry Andric } 73020b57cec5SDimitry Andric if (__kmp_dflt_team_nth > __kmp_sys_max_nth) { 73030b57cec5SDimitry Andric __kmp_dflt_team_nth = __kmp_sys_max_nth; 73040b57cec5SDimitry Andric } 73050b57cec5SDimitry Andric 7306fe6060f1SDimitry Andric if (__kmp_nesting_mode > 0) 7307fe6060f1SDimitry Andric __kmp_set_nesting_mode_threads(); 7308fe6060f1SDimitry Andric 73090b57cec5SDimitry Andric // There's no harm in continuing if the following check fails, 73100b57cec5SDimitry Andric // but it indicates an error in the previous logic. 73110b57cec5SDimitry Andric KMP_DEBUG_ASSERT(__kmp_dflt_team_nth <= __kmp_dflt_team_nth_ub); 73120b57cec5SDimitry Andric 73130b57cec5SDimitry Andric if (__kmp_dflt_team_nth != prev_dflt_team_nth) { 73140b57cec5SDimitry Andric // Run through the __kmp_threads array and set the num threads icv for each 73150b57cec5SDimitry Andric // root thread that is currently registered with the RTL (which has not 73160b57cec5SDimitry Andric // already explicitly set its nthreads-var with a call to 73170b57cec5SDimitry Andric // omp_set_num_threads()). 73180b57cec5SDimitry Andric for (i = 0; i < __kmp_threads_capacity; i++) { 73190b57cec5SDimitry Andric kmp_info_t *thread = __kmp_threads[i]; 73200b57cec5SDimitry Andric if (thread == NULL) 73210b57cec5SDimitry Andric continue; 73220b57cec5SDimitry Andric if (thread->th.th_current_task->td_icvs.nproc != 0) 73230b57cec5SDimitry Andric continue; 73240b57cec5SDimitry Andric 73250b57cec5SDimitry Andric set__nproc(__kmp_threads[i], __kmp_dflt_team_nth); 73260b57cec5SDimitry Andric } 73270b57cec5SDimitry Andric } 73280b57cec5SDimitry Andric KA_TRACE( 73290b57cec5SDimitry Andric 20, 73300b57cec5SDimitry Andric ("__kmp_middle_initialize: final value for __kmp_dflt_team_nth = %d\n", 73310b57cec5SDimitry Andric __kmp_dflt_team_nth)); 73320b57cec5SDimitry Andric 73330b57cec5SDimitry Andric #ifdef KMP_ADJUST_BLOCKTIME 73340b57cec5SDimitry Andric /* Adjust blocktime to zero if necessary now that __kmp_avail_proc is set */ 73350b57cec5SDimitry Andric if (!__kmp_env_blocktime && (__kmp_avail_proc > 0)) { 73360b57cec5SDimitry Andric KMP_DEBUG_ASSERT(__kmp_avail_proc > 0); 73370b57cec5SDimitry Andric if (__kmp_nth > __kmp_avail_proc) { 73380b57cec5SDimitry Andric __kmp_zero_bt = TRUE; 73390b57cec5SDimitry Andric } 73400b57cec5SDimitry Andric } 73410b57cec5SDimitry Andric #endif /* KMP_ADJUST_BLOCKTIME */ 73420b57cec5SDimitry Andric 73430b57cec5SDimitry Andric /* we have finished middle initialization */ 73440b57cec5SDimitry Andric TCW_SYNC_4(__kmp_init_middle, TRUE); 73450b57cec5SDimitry Andric 73460b57cec5SDimitry Andric KA_TRACE(10, ("__kmp_do_middle_initialize: exit\n")); 73470b57cec5SDimitry Andric } 73480b57cec5SDimitry Andric 73490b57cec5SDimitry Andric void __kmp_middle_initialize(void) { 73500b57cec5SDimitry Andric if (__kmp_init_middle) { 73510b57cec5SDimitry Andric return; 73520b57cec5SDimitry Andric } 73530b57cec5SDimitry Andric __kmp_acquire_bootstrap_lock(&__kmp_initz_lock); 73540b57cec5SDimitry Andric if (__kmp_init_middle) { 73550b57cec5SDimitry Andric __kmp_release_bootstrap_lock(&__kmp_initz_lock); 73560b57cec5SDimitry Andric return; 73570b57cec5SDimitry Andric } 73580b57cec5SDimitry Andric __kmp_do_middle_initialize(); 73590b57cec5SDimitry Andric __kmp_release_bootstrap_lock(&__kmp_initz_lock); 73600b57cec5SDimitry Andric } 73610b57cec5SDimitry Andric 73620b57cec5SDimitry Andric void __kmp_parallel_initialize(void) { 73630b57cec5SDimitry Andric int gtid = __kmp_entry_gtid(); // this might be a new root 73640b57cec5SDimitry Andric 73650b57cec5SDimitry Andric /* synchronize parallel initialization (for sibling) */ 73660b57cec5SDimitry Andric if (TCR_4(__kmp_init_parallel)) 73670b57cec5SDimitry Andric return; 73680b57cec5SDimitry Andric __kmp_acquire_bootstrap_lock(&__kmp_initz_lock); 73690b57cec5SDimitry Andric if (TCR_4(__kmp_init_parallel)) { 73700b57cec5SDimitry Andric __kmp_release_bootstrap_lock(&__kmp_initz_lock); 73710b57cec5SDimitry Andric return; 73720b57cec5SDimitry Andric } 73730b57cec5SDimitry Andric 73740b57cec5SDimitry Andric /* TODO reinitialization after we have already shut down */ 73750b57cec5SDimitry Andric if (TCR_4(__kmp_global.g.g_done)) { 73760b57cec5SDimitry Andric KA_TRACE( 73770b57cec5SDimitry Andric 10, 73780b57cec5SDimitry Andric ("__kmp_parallel_initialize: attempt to init while shutting down\n")); 73790b57cec5SDimitry Andric __kmp_infinite_loop(); 73800b57cec5SDimitry Andric } 73810b57cec5SDimitry Andric 73820b57cec5SDimitry Andric /* jc: The lock __kmp_initz_lock is already held, so calling 73830b57cec5SDimitry Andric __kmp_serial_initialize would cause a deadlock. So we call 73840b57cec5SDimitry Andric __kmp_do_serial_initialize directly. */ 73850b57cec5SDimitry Andric if (!__kmp_init_middle) { 73860b57cec5SDimitry Andric __kmp_do_middle_initialize(); 73870b57cec5SDimitry Andric } 7388fe6060f1SDimitry Andric __kmp_assign_root_init_mask(); 73890b57cec5SDimitry Andric __kmp_resume_if_hard_paused(); 73900b57cec5SDimitry Andric 73910b57cec5SDimitry Andric /* begin initialization */ 73920b57cec5SDimitry Andric KA_TRACE(10, ("__kmp_parallel_initialize: enter\n")); 73930b57cec5SDimitry Andric KMP_ASSERT(KMP_UBER_GTID(gtid)); 73940b57cec5SDimitry Andric 73950b57cec5SDimitry Andric #if KMP_ARCH_X86 || KMP_ARCH_X86_64 73960b57cec5SDimitry Andric // Save the FP control regs. 73970b57cec5SDimitry Andric // Worker threads will set theirs to these values at thread startup. 73980b57cec5SDimitry Andric __kmp_store_x87_fpu_control_word(&__kmp_init_x87_fpu_control_word); 73990b57cec5SDimitry Andric __kmp_store_mxcsr(&__kmp_init_mxcsr); 74000b57cec5SDimitry Andric __kmp_init_mxcsr &= KMP_X86_MXCSR_MASK; 74010b57cec5SDimitry Andric #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */ 74020b57cec5SDimitry Andric 74030b57cec5SDimitry Andric #if KMP_OS_UNIX 74040b57cec5SDimitry Andric #if KMP_HANDLE_SIGNALS 74050b57cec5SDimitry Andric /* must be after __kmp_serial_initialize */ 74060b57cec5SDimitry Andric __kmp_install_signals(TRUE); 74070b57cec5SDimitry Andric #endif 74080b57cec5SDimitry Andric #endif 74090b57cec5SDimitry Andric 74100b57cec5SDimitry Andric __kmp_suspend_initialize(); 74110b57cec5SDimitry Andric 74120b57cec5SDimitry Andric #if defined(USE_LOAD_BALANCE) 74130b57cec5SDimitry Andric if (__kmp_global.g.g_dynamic_mode == dynamic_default) { 74140b57cec5SDimitry Andric __kmp_global.g.g_dynamic_mode = dynamic_load_balance; 74150b57cec5SDimitry Andric } 74160b57cec5SDimitry Andric #else 74170b57cec5SDimitry Andric if (__kmp_global.g.g_dynamic_mode == dynamic_default) { 74180b57cec5SDimitry Andric __kmp_global.g.g_dynamic_mode = dynamic_thread_limit; 74190b57cec5SDimitry Andric } 74200b57cec5SDimitry Andric #endif 74210b57cec5SDimitry Andric 74220b57cec5SDimitry Andric if (__kmp_version) { 74230b57cec5SDimitry Andric __kmp_print_version_2(); 74240b57cec5SDimitry Andric } 74250b57cec5SDimitry Andric 74260b57cec5SDimitry Andric /* we have finished parallel initialization */ 74270b57cec5SDimitry Andric TCW_SYNC_4(__kmp_init_parallel, TRUE); 74280b57cec5SDimitry Andric 74290b57cec5SDimitry Andric KMP_MB(); 74300b57cec5SDimitry Andric KA_TRACE(10, ("__kmp_parallel_initialize: exit\n")); 74310b57cec5SDimitry Andric 74320b57cec5SDimitry Andric __kmp_release_bootstrap_lock(&__kmp_initz_lock); 74330b57cec5SDimitry Andric } 74340b57cec5SDimitry Andric 7435e8d8bef9SDimitry Andric void __kmp_hidden_helper_initialize() { 7436e8d8bef9SDimitry Andric if (TCR_4(__kmp_init_hidden_helper)) 7437e8d8bef9SDimitry Andric return; 7438e8d8bef9SDimitry Andric 7439e8d8bef9SDimitry Andric // __kmp_parallel_initialize is required before we initialize hidden helper 7440e8d8bef9SDimitry Andric if (!TCR_4(__kmp_init_parallel)) 7441e8d8bef9SDimitry Andric __kmp_parallel_initialize(); 7442e8d8bef9SDimitry Andric 7443e8d8bef9SDimitry Andric // Double check. Note that this double check should not be placed before 7444e8d8bef9SDimitry Andric // __kmp_parallel_initialize as it will cause dead lock. 7445e8d8bef9SDimitry Andric __kmp_acquire_bootstrap_lock(&__kmp_initz_lock); 7446e8d8bef9SDimitry Andric if (TCR_4(__kmp_init_hidden_helper)) { 7447e8d8bef9SDimitry Andric __kmp_release_bootstrap_lock(&__kmp_initz_lock); 7448e8d8bef9SDimitry Andric return; 7449e8d8bef9SDimitry Andric } 7450e8d8bef9SDimitry Andric 7451e8d8bef9SDimitry Andric // Set the count of hidden helper tasks to be executed to zero 7452e8d8bef9SDimitry Andric KMP_ATOMIC_ST_REL(&__kmp_unexecuted_hidden_helper_tasks, 0); 7453e8d8bef9SDimitry Andric 7454e8d8bef9SDimitry Andric // Set the global variable indicating that we're initializing hidden helper 7455e8d8bef9SDimitry Andric // team/threads 7456e8d8bef9SDimitry Andric TCW_SYNC_4(__kmp_init_hidden_helper_threads, TRUE); 7457e8d8bef9SDimitry Andric 7458e8d8bef9SDimitry Andric // Platform independent initialization 7459e8d8bef9SDimitry Andric __kmp_do_initialize_hidden_helper_threads(); 7460e8d8bef9SDimitry Andric 7461e8d8bef9SDimitry Andric // Wait here for the finish of initialization of hidden helper teams 7462e8d8bef9SDimitry Andric __kmp_hidden_helper_threads_initz_wait(); 7463e8d8bef9SDimitry Andric 7464e8d8bef9SDimitry Andric // We have finished hidden helper initialization 7465e8d8bef9SDimitry Andric TCW_SYNC_4(__kmp_init_hidden_helper, TRUE); 7466e8d8bef9SDimitry Andric 7467e8d8bef9SDimitry Andric __kmp_release_bootstrap_lock(&__kmp_initz_lock); 7468e8d8bef9SDimitry Andric } 7469e8d8bef9SDimitry Andric 74700b57cec5SDimitry Andric /* ------------------------------------------------------------------------ */ 74710b57cec5SDimitry Andric 74720b57cec5SDimitry Andric void __kmp_run_before_invoked_task(int gtid, int tid, kmp_info_t *this_thr, 74730b57cec5SDimitry Andric kmp_team_t *team) { 74740b57cec5SDimitry Andric kmp_disp_t *dispatch; 74750b57cec5SDimitry Andric 74760b57cec5SDimitry Andric KMP_MB(); 74770b57cec5SDimitry Andric 74780b57cec5SDimitry Andric /* none of the threads have encountered any constructs, yet. */ 74790b57cec5SDimitry Andric this_thr->th.th_local.this_construct = 0; 74800b57cec5SDimitry Andric #if KMP_CACHE_MANAGE 74810b57cec5SDimitry Andric KMP_CACHE_PREFETCH(&this_thr->th.th_bar[bs_forkjoin_barrier].bb.b_arrived); 74820b57cec5SDimitry Andric #endif /* KMP_CACHE_MANAGE */ 74830b57cec5SDimitry Andric dispatch = (kmp_disp_t *)TCR_PTR(this_thr->th.th_dispatch); 74840b57cec5SDimitry Andric KMP_DEBUG_ASSERT(dispatch); 74850b57cec5SDimitry Andric KMP_DEBUG_ASSERT(team->t.t_dispatch); 74860b57cec5SDimitry Andric // KMP_DEBUG_ASSERT( this_thr->th.th_dispatch == &team->t.t_dispatch[ 74870b57cec5SDimitry Andric // this_thr->th.th_info.ds.ds_tid ] ); 74880b57cec5SDimitry Andric 74890b57cec5SDimitry Andric dispatch->th_disp_index = 0; /* reset the dispatch buffer counter */ 74900b57cec5SDimitry Andric dispatch->th_doacross_buf_idx = 0; // reset doacross dispatch buffer counter 74910b57cec5SDimitry Andric if (__kmp_env_consistency_check) 74920b57cec5SDimitry Andric __kmp_push_parallel(gtid, team->t.t_ident); 74930b57cec5SDimitry Andric 74940b57cec5SDimitry Andric KMP_MB(); /* Flush all pending memory write invalidates. */ 74950b57cec5SDimitry Andric } 74960b57cec5SDimitry Andric 74970b57cec5SDimitry Andric void __kmp_run_after_invoked_task(int gtid, int tid, kmp_info_t *this_thr, 74980b57cec5SDimitry Andric kmp_team_t *team) { 74990b57cec5SDimitry Andric if (__kmp_env_consistency_check) 75000b57cec5SDimitry Andric __kmp_pop_parallel(gtid, team->t.t_ident); 75010b57cec5SDimitry Andric 75020b57cec5SDimitry Andric __kmp_finish_implicit_task(this_thr); 75030b57cec5SDimitry Andric } 75040b57cec5SDimitry Andric 75050b57cec5SDimitry Andric int __kmp_invoke_task_func(int gtid) { 75060b57cec5SDimitry Andric int rc; 75070b57cec5SDimitry Andric int tid = __kmp_tid_from_gtid(gtid); 75080b57cec5SDimitry Andric kmp_info_t *this_thr = __kmp_threads[gtid]; 75090b57cec5SDimitry Andric kmp_team_t *team = this_thr->th.th_team; 75100b57cec5SDimitry Andric 75110b57cec5SDimitry Andric __kmp_run_before_invoked_task(gtid, tid, this_thr, team); 75120b57cec5SDimitry Andric #if USE_ITT_BUILD 75130b57cec5SDimitry Andric if (__itt_stack_caller_create_ptr) { 7514fe6060f1SDimitry Andric // inform ittnotify about entering user's code 7515fe6060f1SDimitry Andric if (team->t.t_stack_id != NULL) { 7516fe6060f1SDimitry Andric __kmp_itt_stack_callee_enter((__itt_caller)team->t.t_stack_id); 7517fe6060f1SDimitry Andric } else { 7518fe6060f1SDimitry Andric KMP_DEBUG_ASSERT(team->t.t_parent->t.t_stack_id != NULL); 75190b57cec5SDimitry Andric __kmp_itt_stack_callee_enter( 7520fe6060f1SDimitry Andric (__itt_caller)team->t.t_parent->t.t_stack_id); 7521fe6060f1SDimitry Andric } 75220b57cec5SDimitry Andric } 75230b57cec5SDimitry Andric #endif /* USE_ITT_BUILD */ 75240b57cec5SDimitry Andric #if INCLUDE_SSC_MARKS 75250b57cec5SDimitry Andric SSC_MARK_INVOKING(); 75260b57cec5SDimitry Andric #endif 75270b57cec5SDimitry Andric 75280b57cec5SDimitry Andric #if OMPT_SUPPORT 75290b57cec5SDimitry Andric void *dummy; 7530489b1cf2SDimitry Andric void **exit_frame_p; 75310b57cec5SDimitry Andric ompt_data_t *my_task_data; 75320b57cec5SDimitry Andric ompt_data_t *my_parallel_data; 75330b57cec5SDimitry Andric int ompt_team_size; 75340b57cec5SDimitry Andric 75350b57cec5SDimitry Andric if (ompt_enabled.enabled) { 7536fe6060f1SDimitry Andric exit_frame_p = &(team->t.t_implicit_task_taskdata[tid] 7537fe6060f1SDimitry Andric .ompt_task_info.frame.exit_frame.ptr); 75380b57cec5SDimitry Andric } else { 7539489b1cf2SDimitry Andric exit_frame_p = &dummy; 75400b57cec5SDimitry Andric } 75410b57cec5SDimitry Andric 75420b57cec5SDimitry Andric my_task_data = 75430b57cec5SDimitry Andric &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data); 75440b57cec5SDimitry Andric my_parallel_data = &(team->t.ompt_team_info.parallel_data); 75450b57cec5SDimitry Andric if (ompt_enabled.ompt_callback_implicit_task) { 75460b57cec5SDimitry Andric ompt_team_size = team->t.t_nproc; 75470b57cec5SDimitry Andric ompt_callbacks.ompt_callback(ompt_callback_implicit_task)( 75480b57cec5SDimitry Andric ompt_scope_begin, my_parallel_data, my_task_data, ompt_team_size, 7549489b1cf2SDimitry Andric __kmp_tid_from_gtid(gtid), ompt_task_implicit); 75500b57cec5SDimitry Andric OMPT_CUR_TASK_INFO(this_thr)->thread_num = __kmp_tid_from_gtid(gtid); 75510b57cec5SDimitry Andric } 75520b57cec5SDimitry Andric #endif 75530b57cec5SDimitry Andric 75540b57cec5SDimitry Andric #if KMP_STATS_ENABLED 75550b57cec5SDimitry Andric stats_state_e previous_state = KMP_GET_THREAD_STATE(); 75560b57cec5SDimitry Andric if (previous_state == stats_state_e::TEAMS_REGION) { 75570b57cec5SDimitry Andric KMP_PUSH_PARTITIONED_TIMER(OMP_teams); 75580b57cec5SDimitry Andric } else { 75590b57cec5SDimitry Andric KMP_PUSH_PARTITIONED_TIMER(OMP_parallel); 75600b57cec5SDimitry Andric } 75610b57cec5SDimitry Andric KMP_SET_THREAD_STATE(IMPLICIT_TASK); 75620b57cec5SDimitry Andric #endif 75630b57cec5SDimitry Andric 75640b57cec5SDimitry Andric rc = __kmp_invoke_microtask((microtask_t)TCR_SYNC_PTR(team->t.t_pkfn), gtid, 75650b57cec5SDimitry Andric tid, (int)team->t.t_argc, (void **)team->t.t_argv 75660b57cec5SDimitry Andric #if OMPT_SUPPORT 75670b57cec5SDimitry Andric , 7568489b1cf2SDimitry Andric exit_frame_p 75690b57cec5SDimitry Andric #endif 75700b57cec5SDimitry Andric ); 75710b57cec5SDimitry Andric #if OMPT_SUPPORT 7572489b1cf2SDimitry Andric *exit_frame_p = NULL; 7573489b1cf2SDimitry Andric this_thr->th.ompt_thread_info.parallel_flags |= ompt_parallel_team; 75740b57cec5SDimitry Andric #endif 75750b57cec5SDimitry Andric 75760b57cec5SDimitry Andric #if KMP_STATS_ENABLED 75770b57cec5SDimitry Andric if (previous_state == stats_state_e::TEAMS_REGION) { 75780b57cec5SDimitry Andric KMP_SET_THREAD_STATE(previous_state); 75790b57cec5SDimitry Andric } 75800b57cec5SDimitry Andric KMP_POP_PARTITIONED_TIMER(); 75810b57cec5SDimitry Andric #endif 75820b57cec5SDimitry Andric 75830b57cec5SDimitry Andric #if USE_ITT_BUILD 75840b57cec5SDimitry Andric if (__itt_stack_caller_create_ptr) { 7585fe6060f1SDimitry Andric // inform ittnotify about leaving user's code 7586fe6060f1SDimitry Andric if (team->t.t_stack_id != NULL) { 7587fe6060f1SDimitry Andric __kmp_itt_stack_callee_leave((__itt_caller)team->t.t_stack_id); 7588fe6060f1SDimitry Andric } else { 7589fe6060f1SDimitry Andric KMP_DEBUG_ASSERT(team->t.t_parent->t.t_stack_id != NULL); 75900b57cec5SDimitry Andric __kmp_itt_stack_callee_leave( 7591fe6060f1SDimitry Andric (__itt_caller)team->t.t_parent->t.t_stack_id); 7592fe6060f1SDimitry Andric } 75930b57cec5SDimitry Andric } 75940b57cec5SDimitry Andric #endif /* USE_ITT_BUILD */ 75950b57cec5SDimitry Andric __kmp_run_after_invoked_task(gtid, tid, this_thr, team); 75960b57cec5SDimitry Andric 75970b57cec5SDimitry Andric return rc; 75980b57cec5SDimitry Andric } 75990b57cec5SDimitry Andric 76000b57cec5SDimitry Andric void __kmp_teams_master(int gtid) { 7601fe6060f1SDimitry Andric // This routine is called by all primary threads in teams construct 76020b57cec5SDimitry Andric kmp_info_t *thr = __kmp_threads[gtid]; 76030b57cec5SDimitry Andric kmp_team_t *team = thr->th.th_team; 76040b57cec5SDimitry Andric ident_t *loc = team->t.t_ident; 76050b57cec5SDimitry Andric thr->th.th_set_nproc = thr->th.th_teams_size.nth; 76060b57cec5SDimitry Andric KMP_DEBUG_ASSERT(thr->th.th_teams_microtask); 76070b57cec5SDimitry Andric KMP_DEBUG_ASSERT(thr->th.th_set_nproc); 76080b57cec5SDimitry Andric KA_TRACE(20, ("__kmp_teams_master: T#%d, Tid %d, microtask %p\n", gtid, 76090b57cec5SDimitry Andric __kmp_tid_from_gtid(gtid), thr->th.th_teams_microtask)); 76100b57cec5SDimitry Andric 76110b57cec5SDimitry Andric // This thread is a new CG root. Set up the proper variables. 76120b57cec5SDimitry Andric kmp_cg_root_t *tmp = (kmp_cg_root_t *)__kmp_allocate(sizeof(kmp_cg_root_t)); 76130b57cec5SDimitry Andric tmp->cg_root = thr; // Make thr the CG root 7614fe6060f1SDimitry Andric // Init to thread limit stored when league primary threads were forked 76150b57cec5SDimitry Andric tmp->cg_thread_limit = thr->th.th_current_task->td_icvs.thread_limit; 76160b57cec5SDimitry Andric tmp->cg_nthreads = 1; // Init counter to one active thread, this one 76170b57cec5SDimitry Andric KA_TRACE(100, ("__kmp_teams_master: Thread %p created node %p and init" 76180b57cec5SDimitry Andric " cg_nthreads to 1\n", 76190b57cec5SDimitry Andric thr, tmp)); 76200b57cec5SDimitry Andric tmp->up = thr->th.th_cg_roots; 76210b57cec5SDimitry Andric thr->th.th_cg_roots = tmp; 76220b57cec5SDimitry Andric 76230b57cec5SDimitry Andric // Launch league of teams now, but not let workers execute 76240b57cec5SDimitry Andric // (they hang on fork barrier until next parallel) 76250b57cec5SDimitry Andric #if INCLUDE_SSC_MARKS 76260b57cec5SDimitry Andric SSC_MARK_FORKING(); 76270b57cec5SDimitry Andric #endif 76280b57cec5SDimitry Andric __kmp_fork_call(loc, gtid, fork_context_intel, team->t.t_argc, 76290b57cec5SDimitry Andric (microtask_t)thr->th.th_teams_microtask, // "wrapped" task 76300b57cec5SDimitry Andric VOLATILE_CAST(launch_t) __kmp_invoke_task_func, NULL); 76310b57cec5SDimitry Andric #if INCLUDE_SSC_MARKS 76320b57cec5SDimitry Andric SSC_MARK_JOINING(); 76330b57cec5SDimitry Andric #endif 76340b57cec5SDimitry Andric // If the team size was reduced from the limit, set it to the new size 76350b57cec5SDimitry Andric if (thr->th.th_team_nproc < thr->th.th_teams_size.nth) 76360b57cec5SDimitry Andric thr->th.th_teams_size.nth = thr->th.th_team_nproc; 76370b57cec5SDimitry Andric // AC: last parameter "1" eliminates join barrier which won't work because 76380b57cec5SDimitry Andric // worker threads are in a fork barrier waiting for more parallel regions 76390b57cec5SDimitry Andric __kmp_join_call(loc, gtid 76400b57cec5SDimitry Andric #if OMPT_SUPPORT 76410b57cec5SDimitry Andric , 76420b57cec5SDimitry Andric fork_context_intel 76430b57cec5SDimitry Andric #endif 76440b57cec5SDimitry Andric , 76450b57cec5SDimitry Andric 1); 76460b57cec5SDimitry Andric } 76470b57cec5SDimitry Andric 76480b57cec5SDimitry Andric int __kmp_invoke_teams_master(int gtid) { 76490b57cec5SDimitry Andric kmp_info_t *this_thr = __kmp_threads[gtid]; 76500b57cec5SDimitry Andric kmp_team_t *team = this_thr->th.th_team; 76510b57cec5SDimitry Andric #if KMP_DEBUG 76520b57cec5SDimitry Andric if (!__kmp_threads[gtid]->th.th_team->t.t_serialized) 76530b57cec5SDimitry Andric KMP_DEBUG_ASSERT((void *)__kmp_threads[gtid]->th.th_team->t.t_pkfn == 76540b57cec5SDimitry Andric (void *)__kmp_teams_master); 76550b57cec5SDimitry Andric #endif 76560b57cec5SDimitry Andric __kmp_run_before_invoked_task(gtid, 0, this_thr, team); 7657489b1cf2SDimitry Andric #if OMPT_SUPPORT 7658489b1cf2SDimitry Andric int tid = __kmp_tid_from_gtid(gtid); 7659489b1cf2SDimitry Andric ompt_data_t *task_data = 7660489b1cf2SDimitry Andric &team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data; 7661489b1cf2SDimitry Andric ompt_data_t *parallel_data = &team->t.ompt_team_info.parallel_data; 7662489b1cf2SDimitry Andric if (ompt_enabled.ompt_callback_implicit_task) { 7663489b1cf2SDimitry Andric ompt_callbacks.ompt_callback(ompt_callback_implicit_task)( 7664489b1cf2SDimitry Andric ompt_scope_begin, parallel_data, task_data, team->t.t_nproc, tid, 7665489b1cf2SDimitry Andric ompt_task_initial); 7666489b1cf2SDimitry Andric OMPT_CUR_TASK_INFO(this_thr)->thread_num = tid; 7667489b1cf2SDimitry Andric } 7668489b1cf2SDimitry Andric #endif 76690b57cec5SDimitry Andric __kmp_teams_master(gtid); 7670489b1cf2SDimitry Andric #if OMPT_SUPPORT 7671489b1cf2SDimitry Andric this_thr->th.ompt_thread_info.parallel_flags |= ompt_parallel_league; 7672489b1cf2SDimitry Andric #endif 76730b57cec5SDimitry Andric __kmp_run_after_invoked_task(gtid, 0, this_thr, team); 76740b57cec5SDimitry Andric return 1; 76750b57cec5SDimitry Andric } 76760b57cec5SDimitry Andric 76770b57cec5SDimitry Andric /* this sets the requested number of threads for the next parallel region 76780b57cec5SDimitry Andric encountered by this team. since this should be enclosed in the forkjoin 7679480093f4SDimitry Andric critical section it should avoid race conditions with asymmetrical nested 76800b57cec5SDimitry Andric parallelism */ 76810b57cec5SDimitry Andric 76820b57cec5SDimitry Andric void __kmp_push_num_threads(ident_t *id, int gtid, int num_threads) { 76830b57cec5SDimitry Andric kmp_info_t *thr = __kmp_threads[gtid]; 76840b57cec5SDimitry Andric 76850b57cec5SDimitry Andric if (num_threads > 0) 76860b57cec5SDimitry Andric thr->th.th_set_nproc = num_threads; 76870b57cec5SDimitry Andric } 76880b57cec5SDimitry Andric 7689fe6060f1SDimitry Andric static void __kmp_push_thread_limit(kmp_info_t *thr, int num_teams, 7690fe6060f1SDimitry Andric int num_threads) { 7691fe6060f1SDimitry Andric KMP_DEBUG_ASSERT(thr); 7692fe6060f1SDimitry Andric // Remember the number of threads for inner parallel regions 7693fe6060f1SDimitry Andric if (!TCR_4(__kmp_init_middle)) 7694fe6060f1SDimitry Andric __kmp_middle_initialize(); // get internal globals calculated 7695fe6060f1SDimitry Andric __kmp_assign_root_init_mask(); 7696fe6060f1SDimitry Andric KMP_DEBUG_ASSERT(__kmp_avail_proc); 7697fe6060f1SDimitry Andric KMP_DEBUG_ASSERT(__kmp_dflt_team_nth); 7698fe6060f1SDimitry Andric 7699fe6060f1SDimitry Andric if (num_threads == 0) { 7700fe6060f1SDimitry Andric if (__kmp_teams_thread_limit > 0) { 7701fe6060f1SDimitry Andric num_threads = __kmp_teams_thread_limit; 7702fe6060f1SDimitry Andric } else { 7703fe6060f1SDimitry Andric num_threads = __kmp_avail_proc / num_teams; 7704fe6060f1SDimitry Andric } 7705fe6060f1SDimitry Andric // adjust num_threads w/o warning as it is not user setting 7706fe6060f1SDimitry Andric // num_threads = min(num_threads, nthreads-var, thread-limit-var) 7707fe6060f1SDimitry Andric // no thread_limit clause specified - do not change thread-limit-var ICV 7708fe6060f1SDimitry Andric if (num_threads > __kmp_dflt_team_nth) { 7709fe6060f1SDimitry Andric num_threads = __kmp_dflt_team_nth; // honor nthreads-var ICV 7710fe6060f1SDimitry Andric } 7711fe6060f1SDimitry Andric if (num_threads > thr->th.th_current_task->td_icvs.thread_limit) { 7712fe6060f1SDimitry Andric num_threads = thr->th.th_current_task->td_icvs.thread_limit; 7713fe6060f1SDimitry Andric } // prevent team size to exceed thread-limit-var 7714fe6060f1SDimitry Andric if (num_teams * num_threads > __kmp_teams_max_nth) { 7715fe6060f1SDimitry Andric num_threads = __kmp_teams_max_nth / num_teams; 7716fe6060f1SDimitry Andric } 7717fe6060f1SDimitry Andric if (num_threads == 0) { 7718fe6060f1SDimitry Andric num_threads = 1; 7719fe6060f1SDimitry Andric } 7720fe6060f1SDimitry Andric } else { 77210eae32dcSDimitry Andric if (num_threads < 0) { 77220eae32dcSDimitry Andric __kmp_msg(kmp_ms_warning, KMP_MSG(CantFormThrTeam, num_threads, 1), 77230eae32dcSDimitry Andric __kmp_msg_null); 77240eae32dcSDimitry Andric num_threads = 1; 77250eae32dcSDimitry Andric } 7726fe6060f1SDimitry Andric // This thread will be the primary thread of the league primary threads 7727fe6060f1SDimitry Andric // Store new thread limit; old limit is saved in th_cg_roots list 7728fe6060f1SDimitry Andric thr->th.th_current_task->td_icvs.thread_limit = num_threads; 7729fe6060f1SDimitry Andric // num_threads = min(num_threads, nthreads-var) 7730fe6060f1SDimitry Andric if (num_threads > __kmp_dflt_team_nth) { 7731fe6060f1SDimitry Andric num_threads = __kmp_dflt_team_nth; // honor nthreads-var ICV 7732fe6060f1SDimitry Andric } 7733fe6060f1SDimitry Andric if (num_teams * num_threads > __kmp_teams_max_nth) { 7734fe6060f1SDimitry Andric int new_threads = __kmp_teams_max_nth / num_teams; 7735fe6060f1SDimitry Andric if (new_threads == 0) { 7736fe6060f1SDimitry Andric new_threads = 1; 7737fe6060f1SDimitry Andric } 7738fe6060f1SDimitry Andric if (new_threads != num_threads) { 7739fe6060f1SDimitry Andric if (!__kmp_reserve_warn) { // user asked for too many threads 7740fe6060f1SDimitry Andric __kmp_reserve_warn = 1; // conflicts with KMP_TEAMS_THREAD_LIMIT 7741fe6060f1SDimitry Andric __kmp_msg(kmp_ms_warning, 7742fe6060f1SDimitry Andric KMP_MSG(CantFormThrTeam, num_threads, new_threads), 7743fe6060f1SDimitry Andric KMP_HNT(Unset_ALL_THREADS), __kmp_msg_null); 7744fe6060f1SDimitry Andric } 7745fe6060f1SDimitry Andric } 7746fe6060f1SDimitry Andric num_threads = new_threads; 7747fe6060f1SDimitry Andric } 7748fe6060f1SDimitry Andric } 7749fe6060f1SDimitry Andric thr->th.th_teams_size.nth = num_threads; 7750fe6060f1SDimitry Andric } 7751fe6060f1SDimitry Andric 77520b57cec5SDimitry Andric /* this sets the requested number of teams for the teams region and/or 77530b57cec5SDimitry Andric the number of threads for the next parallel region encountered */ 77540b57cec5SDimitry Andric void __kmp_push_num_teams(ident_t *id, int gtid, int num_teams, 77550b57cec5SDimitry Andric int num_threads) { 77560b57cec5SDimitry Andric kmp_info_t *thr = __kmp_threads[gtid]; 77570eae32dcSDimitry Andric if (num_teams < 0) { 77580eae32dcSDimitry Andric // OpenMP specification requires requested values to be positive, 77590eae32dcSDimitry Andric // but people can send us any value, so we'd better check 77600eae32dcSDimitry Andric __kmp_msg(kmp_ms_warning, KMP_MSG(NumTeamsNotPositive, num_teams, 1), 77610eae32dcSDimitry Andric __kmp_msg_null); 77620eae32dcSDimitry Andric num_teams = 1; 77630eae32dcSDimitry Andric } 7764fe6060f1SDimitry Andric if (num_teams == 0) { 7765fe6060f1SDimitry Andric if (__kmp_nteams > 0) { 7766fe6060f1SDimitry Andric num_teams = __kmp_nteams; 7767fe6060f1SDimitry Andric } else { 77680b57cec5SDimitry Andric num_teams = 1; // default number of teams is 1. 7769fe6060f1SDimitry Andric } 7770fe6060f1SDimitry Andric } 77710b57cec5SDimitry Andric if (num_teams > __kmp_teams_max_nth) { // if too many teams requested? 77720b57cec5SDimitry Andric if (!__kmp_reserve_warn) { 77730b57cec5SDimitry Andric __kmp_reserve_warn = 1; 77740b57cec5SDimitry Andric __kmp_msg(kmp_ms_warning, 77750b57cec5SDimitry Andric KMP_MSG(CantFormThrTeam, num_teams, __kmp_teams_max_nth), 77760b57cec5SDimitry Andric KMP_HNT(Unset_ALL_THREADS), __kmp_msg_null); 77770b57cec5SDimitry Andric } 77780b57cec5SDimitry Andric num_teams = __kmp_teams_max_nth; 77790b57cec5SDimitry Andric } 77800b57cec5SDimitry Andric // Set number of teams (number of threads in the outer "parallel" of the 77810b57cec5SDimitry Andric // teams) 77820b57cec5SDimitry Andric thr->th.th_set_nproc = thr->th.th_teams_size.nteams = num_teams; 77830b57cec5SDimitry Andric 7784fe6060f1SDimitry Andric __kmp_push_thread_limit(thr, num_teams, num_threads); 7785489b1cf2SDimitry Andric } 7786fe6060f1SDimitry Andric 7787fe6060f1SDimitry Andric /* This sets the requested number of teams for the teams region and/or 7788fe6060f1SDimitry Andric the number of threads for the next parallel region encountered */ 7789fe6060f1SDimitry Andric void __kmp_push_num_teams_51(ident_t *id, int gtid, int num_teams_lb, 7790fe6060f1SDimitry Andric int num_teams_ub, int num_threads) { 7791fe6060f1SDimitry Andric kmp_info_t *thr = __kmp_threads[gtid]; 7792fe6060f1SDimitry Andric KMP_DEBUG_ASSERT(num_teams_lb >= 0 && num_teams_ub >= 0); 7793fe6060f1SDimitry Andric KMP_DEBUG_ASSERT(num_teams_ub >= num_teams_lb); 7794fe6060f1SDimitry Andric KMP_DEBUG_ASSERT(num_threads >= 0); 7795fe6060f1SDimitry Andric 7796fe6060f1SDimitry Andric if (num_teams_lb > num_teams_ub) { 7797fe6060f1SDimitry Andric __kmp_fatal(KMP_MSG(FailedToCreateTeam, num_teams_lb, num_teams_ub), 7798fe6060f1SDimitry Andric KMP_HNT(SetNewBound, __kmp_teams_max_nth), __kmp_msg_null); 77990b57cec5SDimitry Andric } 7800fe6060f1SDimitry Andric 7801fe6060f1SDimitry Andric int num_teams = 1; // defalt number of teams is 1. 7802fe6060f1SDimitry Andric 7803fe6060f1SDimitry Andric if (num_teams_lb == 0 && num_teams_ub > 0) 7804fe6060f1SDimitry Andric num_teams_lb = num_teams_ub; 7805fe6060f1SDimitry Andric 7806fe6060f1SDimitry Andric if (num_teams_lb == 0 && num_teams_ub == 0) { // no num_teams clause 7807fe6060f1SDimitry Andric num_teams = (__kmp_nteams > 0) ? __kmp_nteams : num_teams; 7808fe6060f1SDimitry Andric if (num_teams > __kmp_teams_max_nth) { 7809fe6060f1SDimitry Andric if (!__kmp_reserve_warn) { 7810fe6060f1SDimitry Andric __kmp_reserve_warn = 1; 78110b57cec5SDimitry Andric __kmp_msg(kmp_ms_warning, 7812fe6060f1SDimitry Andric KMP_MSG(CantFormThrTeam, num_teams, __kmp_teams_max_nth), 78130b57cec5SDimitry Andric KMP_HNT(Unset_ALL_THREADS), __kmp_msg_null); 78140b57cec5SDimitry Andric } 7815fe6060f1SDimitry Andric num_teams = __kmp_teams_max_nth; 7816fe6060f1SDimitry Andric } 7817fe6060f1SDimitry Andric } else if (num_teams_lb == num_teams_ub) { // requires exact number of teams 7818fe6060f1SDimitry Andric num_teams = num_teams_ub; 7819fe6060f1SDimitry Andric } else { // num_teams_lb <= num_teams <= num_teams_ub 78200eae32dcSDimitry Andric if (num_threads <= 0) { 7821fe6060f1SDimitry Andric if (num_teams_ub > __kmp_teams_max_nth) { 7822fe6060f1SDimitry Andric num_teams = num_teams_lb; 7823fe6060f1SDimitry Andric } else { 7824fe6060f1SDimitry Andric num_teams = num_teams_ub; 7825fe6060f1SDimitry Andric } 7826fe6060f1SDimitry Andric } else { 7827fe6060f1SDimitry Andric num_teams = (num_threads > __kmp_teams_max_nth) 7828fe6060f1SDimitry Andric ? num_teams 7829fe6060f1SDimitry Andric : __kmp_teams_max_nth / num_threads; 7830fe6060f1SDimitry Andric if (num_teams < num_teams_lb) { 7831fe6060f1SDimitry Andric num_teams = num_teams_lb; 7832fe6060f1SDimitry Andric } else if (num_teams > num_teams_ub) { 7833fe6060f1SDimitry Andric num_teams = num_teams_ub; 78340b57cec5SDimitry Andric } 78350b57cec5SDimitry Andric } 7836fe6060f1SDimitry Andric } 7837fe6060f1SDimitry Andric // Set number of teams (number of threads in the outer "parallel" of the 7838fe6060f1SDimitry Andric // teams) 7839fe6060f1SDimitry Andric thr->th.th_set_nproc = thr->th.th_teams_size.nteams = num_teams; 7840fe6060f1SDimitry Andric 7841fe6060f1SDimitry Andric __kmp_push_thread_limit(thr, num_teams, num_threads); 78420b57cec5SDimitry Andric } 78430b57cec5SDimitry Andric 78440b57cec5SDimitry Andric // Set the proc_bind var to use in the following parallel region. 78450b57cec5SDimitry Andric void __kmp_push_proc_bind(ident_t *id, int gtid, kmp_proc_bind_t proc_bind) { 78460b57cec5SDimitry Andric kmp_info_t *thr = __kmp_threads[gtid]; 78470b57cec5SDimitry Andric thr->th.th_set_proc_bind = proc_bind; 78480b57cec5SDimitry Andric } 78490b57cec5SDimitry Andric 78500b57cec5SDimitry Andric /* Launch the worker threads into the microtask. */ 78510b57cec5SDimitry Andric 78520b57cec5SDimitry Andric void __kmp_internal_fork(ident_t *id, int gtid, kmp_team_t *team) { 78530b57cec5SDimitry Andric kmp_info_t *this_thr = __kmp_threads[gtid]; 78540b57cec5SDimitry Andric 78550b57cec5SDimitry Andric #ifdef KMP_DEBUG 78560b57cec5SDimitry Andric int f; 78570b57cec5SDimitry Andric #endif /* KMP_DEBUG */ 78580b57cec5SDimitry Andric 78590b57cec5SDimitry Andric KMP_DEBUG_ASSERT(team); 78600b57cec5SDimitry Andric KMP_DEBUG_ASSERT(this_thr->th.th_team == team); 78610b57cec5SDimitry Andric KMP_ASSERT(KMP_MASTER_GTID(gtid)); 78620b57cec5SDimitry Andric KMP_MB(); /* Flush all pending memory write invalidates. */ 78630b57cec5SDimitry Andric 78640b57cec5SDimitry Andric team->t.t_construct = 0; /* no single directives seen yet */ 78650b57cec5SDimitry Andric team->t.t_ordered.dt.t_value = 78660b57cec5SDimitry Andric 0; /* thread 0 enters the ordered section first */ 78670b57cec5SDimitry Andric 78680b57cec5SDimitry Andric /* Reset the identifiers on the dispatch buffer */ 78690b57cec5SDimitry Andric KMP_DEBUG_ASSERT(team->t.t_disp_buffer); 78700b57cec5SDimitry Andric if (team->t.t_max_nproc > 1) { 78710b57cec5SDimitry Andric int i; 78720b57cec5SDimitry Andric for (i = 0; i < __kmp_dispatch_num_buffers; ++i) { 78730b57cec5SDimitry Andric team->t.t_disp_buffer[i].buffer_index = i; 78740b57cec5SDimitry Andric team->t.t_disp_buffer[i].doacross_buf_idx = i; 78750b57cec5SDimitry Andric } 78760b57cec5SDimitry Andric } else { 78770b57cec5SDimitry Andric team->t.t_disp_buffer[0].buffer_index = 0; 78780b57cec5SDimitry Andric team->t.t_disp_buffer[0].doacross_buf_idx = 0; 78790b57cec5SDimitry Andric } 78800b57cec5SDimitry Andric 78810b57cec5SDimitry Andric KMP_MB(); /* Flush all pending memory write invalidates. */ 78820b57cec5SDimitry Andric KMP_ASSERT(this_thr->th.th_team == team); 78830b57cec5SDimitry Andric 78840b57cec5SDimitry Andric #ifdef KMP_DEBUG 78850b57cec5SDimitry Andric for (f = 0; f < team->t.t_nproc; f++) { 78860b57cec5SDimitry Andric KMP_DEBUG_ASSERT(team->t.t_threads[f] && 78870b57cec5SDimitry Andric team->t.t_threads[f]->th.th_team_nproc == team->t.t_nproc); 78880b57cec5SDimitry Andric } 78890b57cec5SDimitry Andric #endif /* KMP_DEBUG */ 78900b57cec5SDimitry Andric 78910b57cec5SDimitry Andric /* release the worker threads so they may begin working */ 78920b57cec5SDimitry Andric __kmp_fork_barrier(gtid, 0); 78930b57cec5SDimitry Andric } 78940b57cec5SDimitry Andric 78950b57cec5SDimitry Andric void __kmp_internal_join(ident_t *id, int gtid, kmp_team_t *team) { 78960b57cec5SDimitry Andric kmp_info_t *this_thr = __kmp_threads[gtid]; 78970b57cec5SDimitry Andric 78980b57cec5SDimitry Andric KMP_DEBUG_ASSERT(team); 78990b57cec5SDimitry Andric KMP_DEBUG_ASSERT(this_thr->th.th_team == team); 79000b57cec5SDimitry Andric KMP_ASSERT(KMP_MASTER_GTID(gtid)); 79010b57cec5SDimitry Andric KMP_MB(); /* Flush all pending memory write invalidates. */ 79020b57cec5SDimitry Andric 79030b57cec5SDimitry Andric /* Join barrier after fork */ 79040b57cec5SDimitry Andric 79050b57cec5SDimitry Andric #ifdef KMP_DEBUG 79060b57cec5SDimitry Andric if (__kmp_threads[gtid] && 79070b57cec5SDimitry Andric __kmp_threads[gtid]->th.th_team_nproc != team->t.t_nproc) { 79080b57cec5SDimitry Andric __kmp_printf("GTID: %d, __kmp_threads[%d]=%p\n", gtid, gtid, 79090b57cec5SDimitry Andric __kmp_threads[gtid]); 79100b57cec5SDimitry Andric __kmp_printf("__kmp_threads[%d]->th.th_team_nproc=%d, TEAM: %p, " 79110b57cec5SDimitry Andric "team->t.t_nproc=%d\n", 79120b57cec5SDimitry Andric gtid, __kmp_threads[gtid]->th.th_team_nproc, team, 79130b57cec5SDimitry Andric team->t.t_nproc); 79140b57cec5SDimitry Andric __kmp_print_structure(); 79150b57cec5SDimitry Andric } 79160b57cec5SDimitry Andric KMP_DEBUG_ASSERT(__kmp_threads[gtid] && 79170b57cec5SDimitry Andric __kmp_threads[gtid]->th.th_team_nproc == team->t.t_nproc); 79180b57cec5SDimitry Andric #endif /* KMP_DEBUG */ 79190b57cec5SDimitry Andric 79200b57cec5SDimitry Andric __kmp_join_barrier(gtid); /* wait for everyone */ 79210b57cec5SDimitry Andric #if OMPT_SUPPORT 79220b57cec5SDimitry Andric if (ompt_enabled.enabled && 79230b57cec5SDimitry Andric this_thr->th.ompt_thread_info.state == ompt_state_wait_barrier_implicit) { 79240b57cec5SDimitry Andric int ds_tid = this_thr->th.th_info.ds.ds_tid; 79250b57cec5SDimitry Andric ompt_data_t *task_data = OMPT_CUR_TASK_DATA(this_thr); 79260b57cec5SDimitry Andric this_thr->th.ompt_thread_info.state = ompt_state_overhead; 79270b57cec5SDimitry Andric #if OMPT_OPTIONAL 79280b57cec5SDimitry Andric void *codeptr = NULL; 79290b57cec5SDimitry Andric if (KMP_MASTER_TID(ds_tid) && 79300b57cec5SDimitry Andric (ompt_callbacks.ompt_callback(ompt_callback_sync_region_wait) || 79310b57cec5SDimitry Andric ompt_callbacks.ompt_callback(ompt_callback_sync_region))) 79320b57cec5SDimitry Andric codeptr = OMPT_CUR_TEAM_INFO(this_thr)->master_return_address; 79330b57cec5SDimitry Andric 79340b57cec5SDimitry Andric if (ompt_enabled.ompt_callback_sync_region_wait) { 79350b57cec5SDimitry Andric ompt_callbacks.ompt_callback(ompt_callback_sync_region_wait)( 79360b57cec5SDimitry Andric ompt_sync_region_barrier_implicit, ompt_scope_end, NULL, task_data, 79370b57cec5SDimitry Andric codeptr); 79380b57cec5SDimitry Andric } 79390b57cec5SDimitry Andric if (ompt_enabled.ompt_callback_sync_region) { 79400b57cec5SDimitry Andric ompt_callbacks.ompt_callback(ompt_callback_sync_region)( 79410b57cec5SDimitry Andric ompt_sync_region_barrier_implicit, ompt_scope_end, NULL, task_data, 79420b57cec5SDimitry Andric codeptr); 79430b57cec5SDimitry Andric } 79440b57cec5SDimitry Andric #endif 79450b57cec5SDimitry Andric if (!KMP_MASTER_TID(ds_tid) && ompt_enabled.ompt_callback_implicit_task) { 79460b57cec5SDimitry Andric ompt_callbacks.ompt_callback(ompt_callback_implicit_task)( 7947fe6060f1SDimitry Andric ompt_scope_end, NULL, task_data, 0, ds_tid, 7948fe6060f1SDimitry Andric ompt_task_implicit); // TODO: Can this be ompt_task_initial? 79490b57cec5SDimitry Andric } 79500b57cec5SDimitry Andric } 79510b57cec5SDimitry Andric #endif 79520b57cec5SDimitry Andric 79530b57cec5SDimitry Andric KMP_MB(); /* Flush all pending memory write invalidates. */ 79540b57cec5SDimitry Andric KMP_ASSERT(this_thr->th.th_team == team); 79550b57cec5SDimitry Andric } 79560b57cec5SDimitry Andric 79570b57cec5SDimitry Andric /* ------------------------------------------------------------------------ */ 79580b57cec5SDimitry Andric 79590b57cec5SDimitry Andric #ifdef USE_LOAD_BALANCE 79600b57cec5SDimitry Andric 79610b57cec5SDimitry Andric // Return the worker threads actively spinning in the hot team, if we 79620b57cec5SDimitry Andric // are at the outermost level of parallelism. Otherwise, return 0. 79630b57cec5SDimitry Andric static int __kmp_active_hot_team_nproc(kmp_root_t *root) { 79640b57cec5SDimitry Andric int i; 79650b57cec5SDimitry Andric int retval; 79660b57cec5SDimitry Andric kmp_team_t *hot_team; 79670b57cec5SDimitry Andric 79680b57cec5SDimitry Andric if (root->r.r_active) { 79690b57cec5SDimitry Andric return 0; 79700b57cec5SDimitry Andric } 79710b57cec5SDimitry Andric hot_team = root->r.r_hot_team; 79720b57cec5SDimitry Andric if (__kmp_dflt_blocktime == KMP_MAX_BLOCKTIME) { 7973fe6060f1SDimitry Andric return hot_team->t.t_nproc - 1; // Don't count primary thread 79740b57cec5SDimitry Andric } 79750b57cec5SDimitry Andric 7976fe6060f1SDimitry Andric // Skip the primary thread - it is accounted for elsewhere. 79770b57cec5SDimitry Andric retval = 0; 79780b57cec5SDimitry Andric for (i = 1; i < hot_team->t.t_nproc; i++) { 79790b57cec5SDimitry Andric if (hot_team->t.t_threads[i]->th.th_active) { 79800b57cec5SDimitry Andric retval++; 79810b57cec5SDimitry Andric } 79820b57cec5SDimitry Andric } 79830b57cec5SDimitry Andric return retval; 79840b57cec5SDimitry Andric } 79850b57cec5SDimitry Andric 79860b57cec5SDimitry Andric // Perform an automatic adjustment to the number of 79870b57cec5SDimitry Andric // threads used by the next parallel region. 79880b57cec5SDimitry Andric static int __kmp_load_balance_nproc(kmp_root_t *root, int set_nproc) { 79890b57cec5SDimitry Andric int retval; 79900b57cec5SDimitry Andric int pool_active; 79910b57cec5SDimitry Andric int hot_team_active; 79920b57cec5SDimitry Andric int team_curr_active; 79930b57cec5SDimitry Andric int system_active; 79940b57cec5SDimitry Andric 79950b57cec5SDimitry Andric KB_TRACE(20, ("__kmp_load_balance_nproc: called root:%p set_nproc:%d\n", root, 79960b57cec5SDimitry Andric set_nproc)); 79970b57cec5SDimitry Andric KMP_DEBUG_ASSERT(root); 79980b57cec5SDimitry Andric KMP_DEBUG_ASSERT(root->r.r_root_team->t.t_threads[0] 79990b57cec5SDimitry Andric ->th.th_current_task->td_icvs.dynamic == TRUE); 80000b57cec5SDimitry Andric KMP_DEBUG_ASSERT(set_nproc > 1); 80010b57cec5SDimitry Andric 80020b57cec5SDimitry Andric if (set_nproc == 1) { 80030b57cec5SDimitry Andric KB_TRACE(20, ("__kmp_load_balance_nproc: serial execution.\n")); 80040b57cec5SDimitry Andric return 1; 80050b57cec5SDimitry Andric } 80060b57cec5SDimitry Andric 80070b57cec5SDimitry Andric // Threads that are active in the thread pool, active in the hot team for this 80080b57cec5SDimitry Andric // particular root (if we are at the outer par level), and the currently 8009fe6060f1SDimitry Andric // executing thread (to become the primary thread) are available to add to the 8010fe6060f1SDimitry Andric // new team, but are currently contributing to the system load, and must be 80110b57cec5SDimitry Andric // accounted for. 80120b57cec5SDimitry Andric pool_active = __kmp_thread_pool_active_nth; 80130b57cec5SDimitry Andric hot_team_active = __kmp_active_hot_team_nproc(root); 80140b57cec5SDimitry Andric team_curr_active = pool_active + hot_team_active + 1; 80150b57cec5SDimitry Andric 80160b57cec5SDimitry Andric // Check the system load. 80170b57cec5SDimitry Andric system_active = __kmp_get_load_balance(__kmp_avail_proc + team_curr_active); 80180b57cec5SDimitry Andric KB_TRACE(30, ("__kmp_load_balance_nproc: system active = %d pool active = %d " 80190b57cec5SDimitry Andric "hot team active = %d\n", 80200b57cec5SDimitry Andric system_active, pool_active, hot_team_active)); 80210b57cec5SDimitry Andric 80220b57cec5SDimitry Andric if (system_active < 0) { 80230b57cec5SDimitry Andric // There was an error reading the necessary info from /proc, so use the 80240b57cec5SDimitry Andric // thread limit algorithm instead. Once we set __kmp_global.g.g_dynamic_mode 80250b57cec5SDimitry Andric // = dynamic_thread_limit, we shouldn't wind up getting back here. 80260b57cec5SDimitry Andric __kmp_global.g.g_dynamic_mode = dynamic_thread_limit; 80270b57cec5SDimitry Andric KMP_WARNING(CantLoadBalUsing, "KMP_DYNAMIC_MODE=thread limit"); 80280b57cec5SDimitry Andric 80290b57cec5SDimitry Andric // Make this call behave like the thread limit algorithm. 80300b57cec5SDimitry Andric retval = __kmp_avail_proc - __kmp_nth + 80310b57cec5SDimitry Andric (root->r.r_active ? 1 : root->r.r_hot_team->t.t_nproc); 80320b57cec5SDimitry Andric if (retval > set_nproc) { 80330b57cec5SDimitry Andric retval = set_nproc; 80340b57cec5SDimitry Andric } 80350b57cec5SDimitry Andric if (retval < KMP_MIN_NTH) { 80360b57cec5SDimitry Andric retval = KMP_MIN_NTH; 80370b57cec5SDimitry Andric } 80380b57cec5SDimitry Andric 80390b57cec5SDimitry Andric KB_TRACE(20, ("__kmp_load_balance_nproc: thread limit exit. retval:%d\n", 80400b57cec5SDimitry Andric retval)); 80410b57cec5SDimitry Andric return retval; 80420b57cec5SDimitry Andric } 80430b57cec5SDimitry Andric 80440b57cec5SDimitry Andric // There is a slight delay in the load balance algorithm in detecting new 80450b57cec5SDimitry Andric // running procs. The real system load at this instant should be at least as 80460b57cec5SDimitry Andric // large as the #active omp thread that are available to add to the team. 80470b57cec5SDimitry Andric if (system_active < team_curr_active) { 80480b57cec5SDimitry Andric system_active = team_curr_active; 80490b57cec5SDimitry Andric } 80500b57cec5SDimitry Andric retval = __kmp_avail_proc - system_active + team_curr_active; 80510b57cec5SDimitry Andric if (retval > set_nproc) { 80520b57cec5SDimitry Andric retval = set_nproc; 80530b57cec5SDimitry Andric } 80540b57cec5SDimitry Andric if (retval < KMP_MIN_NTH) { 80550b57cec5SDimitry Andric retval = KMP_MIN_NTH; 80560b57cec5SDimitry Andric } 80570b57cec5SDimitry Andric 80580b57cec5SDimitry Andric KB_TRACE(20, ("__kmp_load_balance_nproc: exit. retval:%d\n", retval)); 80590b57cec5SDimitry Andric return retval; 80600b57cec5SDimitry Andric } // __kmp_load_balance_nproc() 80610b57cec5SDimitry Andric 80620b57cec5SDimitry Andric #endif /* USE_LOAD_BALANCE */ 80630b57cec5SDimitry Andric 80640b57cec5SDimitry Andric /* ------------------------------------------------------------------------ */ 80650b57cec5SDimitry Andric 80660b57cec5SDimitry Andric /* NOTE: this is called with the __kmp_init_lock held */ 80670b57cec5SDimitry Andric void __kmp_cleanup(void) { 80680b57cec5SDimitry Andric int f; 80690b57cec5SDimitry Andric 80700b57cec5SDimitry Andric KA_TRACE(10, ("__kmp_cleanup: enter\n")); 80710b57cec5SDimitry Andric 80720b57cec5SDimitry Andric if (TCR_4(__kmp_init_parallel)) { 80730b57cec5SDimitry Andric #if KMP_HANDLE_SIGNALS 80740b57cec5SDimitry Andric __kmp_remove_signals(); 80750b57cec5SDimitry Andric #endif 80760b57cec5SDimitry Andric TCW_4(__kmp_init_parallel, FALSE); 80770b57cec5SDimitry Andric } 80780b57cec5SDimitry Andric 80790b57cec5SDimitry Andric if (TCR_4(__kmp_init_middle)) { 80800b57cec5SDimitry Andric #if KMP_AFFINITY_SUPPORTED 80810b57cec5SDimitry Andric __kmp_affinity_uninitialize(); 80820b57cec5SDimitry Andric #endif /* KMP_AFFINITY_SUPPORTED */ 80830b57cec5SDimitry Andric __kmp_cleanup_hierarchy(); 80840b57cec5SDimitry Andric TCW_4(__kmp_init_middle, FALSE); 80850b57cec5SDimitry Andric } 80860b57cec5SDimitry Andric 80870b57cec5SDimitry Andric KA_TRACE(10, ("__kmp_cleanup: go serial cleanup\n")); 80880b57cec5SDimitry Andric 80890b57cec5SDimitry Andric if (__kmp_init_serial) { 80900b57cec5SDimitry Andric __kmp_runtime_destroy(); 80910b57cec5SDimitry Andric __kmp_init_serial = FALSE; 80920b57cec5SDimitry Andric } 80930b57cec5SDimitry Andric 80940b57cec5SDimitry Andric __kmp_cleanup_threadprivate_caches(); 80950b57cec5SDimitry Andric 80960b57cec5SDimitry Andric for (f = 0; f < __kmp_threads_capacity; f++) { 80970b57cec5SDimitry Andric if (__kmp_root[f] != NULL) { 80980b57cec5SDimitry Andric __kmp_free(__kmp_root[f]); 80990b57cec5SDimitry Andric __kmp_root[f] = NULL; 81000b57cec5SDimitry Andric } 81010b57cec5SDimitry Andric } 81020b57cec5SDimitry Andric __kmp_free(__kmp_threads); 81030b57cec5SDimitry Andric // __kmp_threads and __kmp_root were allocated at once, as single block, so 81040b57cec5SDimitry Andric // there is no need in freeing __kmp_root. 81050b57cec5SDimitry Andric __kmp_threads = NULL; 81060b57cec5SDimitry Andric __kmp_root = NULL; 81070b57cec5SDimitry Andric __kmp_threads_capacity = 0; 81080b57cec5SDimitry Andric 810981ad6265SDimitry Andric // Free old __kmp_threads arrays if they exist. 811081ad6265SDimitry Andric kmp_old_threads_list_t *ptr = __kmp_old_threads_list; 811181ad6265SDimitry Andric while (ptr) { 811281ad6265SDimitry Andric kmp_old_threads_list_t *next = ptr->next; 811381ad6265SDimitry Andric __kmp_free(ptr->threads); 811481ad6265SDimitry Andric __kmp_free(ptr); 811581ad6265SDimitry Andric ptr = next; 811681ad6265SDimitry Andric } 811781ad6265SDimitry Andric 81180b57cec5SDimitry Andric #if KMP_USE_DYNAMIC_LOCK 81190b57cec5SDimitry Andric __kmp_cleanup_indirect_user_locks(); 81200b57cec5SDimitry Andric #else 81210b57cec5SDimitry Andric __kmp_cleanup_user_locks(); 81220b57cec5SDimitry Andric #endif 8123fe6060f1SDimitry Andric #if OMPD_SUPPORT 8124fe6060f1SDimitry Andric if (ompd_state) { 8125fe6060f1SDimitry Andric __kmp_free(ompd_env_block); 8126fe6060f1SDimitry Andric ompd_env_block = NULL; 8127fe6060f1SDimitry Andric ompd_env_block_size = 0; 8128fe6060f1SDimitry Andric } 8129fe6060f1SDimitry Andric #endif 81300b57cec5SDimitry Andric 81310b57cec5SDimitry Andric #if KMP_AFFINITY_SUPPORTED 81320b57cec5SDimitry Andric KMP_INTERNAL_FREE(CCAST(char *, __kmp_cpuinfo_file)); 81330b57cec5SDimitry Andric __kmp_cpuinfo_file = NULL; 81340b57cec5SDimitry Andric #endif /* KMP_AFFINITY_SUPPORTED */ 81350b57cec5SDimitry Andric 81360b57cec5SDimitry Andric #if KMP_USE_ADAPTIVE_LOCKS 81370b57cec5SDimitry Andric #if KMP_DEBUG_ADAPTIVE_LOCKS 81380b57cec5SDimitry Andric __kmp_print_speculative_stats(); 81390b57cec5SDimitry Andric #endif 81400b57cec5SDimitry Andric #endif 81410b57cec5SDimitry Andric KMP_INTERNAL_FREE(__kmp_nested_nth.nth); 81420b57cec5SDimitry Andric __kmp_nested_nth.nth = NULL; 81430b57cec5SDimitry Andric __kmp_nested_nth.size = 0; 81440b57cec5SDimitry Andric __kmp_nested_nth.used = 0; 81450b57cec5SDimitry Andric KMP_INTERNAL_FREE(__kmp_nested_proc_bind.bind_types); 81460b57cec5SDimitry Andric __kmp_nested_proc_bind.bind_types = NULL; 81470b57cec5SDimitry Andric __kmp_nested_proc_bind.size = 0; 81480b57cec5SDimitry Andric __kmp_nested_proc_bind.used = 0; 81490b57cec5SDimitry Andric if (__kmp_affinity_format) { 81500b57cec5SDimitry Andric KMP_INTERNAL_FREE(__kmp_affinity_format); 81510b57cec5SDimitry Andric __kmp_affinity_format = NULL; 81520b57cec5SDimitry Andric } 81530b57cec5SDimitry Andric 81540b57cec5SDimitry Andric __kmp_i18n_catclose(); 81550b57cec5SDimitry Andric 81560b57cec5SDimitry Andric #if KMP_USE_HIER_SCHED 81570b57cec5SDimitry Andric __kmp_hier_scheds.deallocate(); 81580b57cec5SDimitry Andric #endif 81590b57cec5SDimitry Andric 81600b57cec5SDimitry Andric #if KMP_STATS_ENABLED 81610b57cec5SDimitry Andric __kmp_stats_fini(); 81620b57cec5SDimitry Andric #endif 81630b57cec5SDimitry Andric 81640b57cec5SDimitry Andric KA_TRACE(10, ("__kmp_cleanup: exit\n")); 81650b57cec5SDimitry Andric } 81660b57cec5SDimitry Andric 81670b57cec5SDimitry Andric /* ------------------------------------------------------------------------ */ 81680b57cec5SDimitry Andric 81690b57cec5SDimitry Andric int __kmp_ignore_mppbeg(void) { 81700b57cec5SDimitry Andric char *env; 81710b57cec5SDimitry Andric 81720b57cec5SDimitry Andric if ((env = getenv("KMP_IGNORE_MPPBEG")) != NULL) { 81730b57cec5SDimitry Andric if (__kmp_str_match_false(env)) 81740b57cec5SDimitry Andric return FALSE; 81750b57cec5SDimitry Andric } 81760b57cec5SDimitry Andric // By default __kmpc_begin() is no-op. 81770b57cec5SDimitry Andric return TRUE; 81780b57cec5SDimitry Andric } 81790b57cec5SDimitry Andric 81800b57cec5SDimitry Andric int __kmp_ignore_mppend(void) { 81810b57cec5SDimitry Andric char *env; 81820b57cec5SDimitry Andric 81830b57cec5SDimitry Andric if ((env = getenv("KMP_IGNORE_MPPEND")) != NULL) { 81840b57cec5SDimitry Andric if (__kmp_str_match_false(env)) 81850b57cec5SDimitry Andric return FALSE; 81860b57cec5SDimitry Andric } 81870b57cec5SDimitry Andric // By default __kmpc_end() is no-op. 81880b57cec5SDimitry Andric return TRUE; 81890b57cec5SDimitry Andric } 81900b57cec5SDimitry Andric 81910b57cec5SDimitry Andric void __kmp_internal_begin(void) { 81920b57cec5SDimitry Andric int gtid; 81930b57cec5SDimitry Andric kmp_root_t *root; 81940b57cec5SDimitry Andric 81950b57cec5SDimitry Andric /* this is a very important step as it will register new sibling threads 81960b57cec5SDimitry Andric and assign these new uber threads a new gtid */ 81970b57cec5SDimitry Andric gtid = __kmp_entry_gtid(); 81980b57cec5SDimitry Andric root = __kmp_threads[gtid]->th.th_root; 81990b57cec5SDimitry Andric KMP_ASSERT(KMP_UBER_GTID(gtid)); 82000b57cec5SDimitry Andric 82010b57cec5SDimitry Andric if (root->r.r_begin) 82020b57cec5SDimitry Andric return; 82030b57cec5SDimitry Andric __kmp_acquire_lock(&root->r.r_begin_lock, gtid); 82040b57cec5SDimitry Andric if (root->r.r_begin) { 82050b57cec5SDimitry Andric __kmp_release_lock(&root->r.r_begin_lock, gtid); 82060b57cec5SDimitry Andric return; 82070b57cec5SDimitry Andric } 82080b57cec5SDimitry Andric 82090b57cec5SDimitry Andric root->r.r_begin = TRUE; 82100b57cec5SDimitry Andric 82110b57cec5SDimitry Andric __kmp_release_lock(&root->r.r_begin_lock, gtid); 82120b57cec5SDimitry Andric } 82130b57cec5SDimitry Andric 82140b57cec5SDimitry Andric /* ------------------------------------------------------------------------ */ 82150b57cec5SDimitry Andric 82160b57cec5SDimitry Andric void __kmp_user_set_library(enum library_type arg) { 82170b57cec5SDimitry Andric int gtid; 82180b57cec5SDimitry Andric kmp_root_t *root; 82190b57cec5SDimitry Andric kmp_info_t *thread; 82200b57cec5SDimitry Andric 82210b57cec5SDimitry Andric /* first, make sure we are initialized so we can get our gtid */ 82220b57cec5SDimitry Andric 82230b57cec5SDimitry Andric gtid = __kmp_entry_gtid(); 82240b57cec5SDimitry Andric thread = __kmp_threads[gtid]; 82250b57cec5SDimitry Andric 82260b57cec5SDimitry Andric root = thread->th.th_root; 82270b57cec5SDimitry Andric 82280b57cec5SDimitry Andric KA_TRACE(20, ("__kmp_user_set_library: enter T#%d, arg: %d, %d\n", gtid, arg, 82290b57cec5SDimitry Andric library_serial)); 82300b57cec5SDimitry Andric if (root->r.r_in_parallel) { /* Must be called in serial section of top-level 82310b57cec5SDimitry Andric thread */ 82320b57cec5SDimitry Andric KMP_WARNING(SetLibraryIncorrectCall); 82330b57cec5SDimitry Andric return; 82340b57cec5SDimitry Andric } 82350b57cec5SDimitry Andric 82360b57cec5SDimitry Andric switch (arg) { 82370b57cec5SDimitry Andric case library_serial: 82380b57cec5SDimitry Andric thread->th.th_set_nproc = 0; 82390b57cec5SDimitry Andric set__nproc(thread, 1); 82400b57cec5SDimitry Andric break; 82410b57cec5SDimitry Andric case library_turnaround: 82420b57cec5SDimitry Andric thread->th.th_set_nproc = 0; 82430b57cec5SDimitry Andric set__nproc(thread, __kmp_dflt_team_nth ? __kmp_dflt_team_nth 82440b57cec5SDimitry Andric : __kmp_dflt_team_nth_ub); 82450b57cec5SDimitry Andric break; 82460b57cec5SDimitry Andric case library_throughput: 82470b57cec5SDimitry Andric thread->th.th_set_nproc = 0; 82480b57cec5SDimitry Andric set__nproc(thread, __kmp_dflt_team_nth ? __kmp_dflt_team_nth 82490b57cec5SDimitry Andric : __kmp_dflt_team_nth_ub); 82500b57cec5SDimitry Andric break; 82510b57cec5SDimitry Andric default: 82520b57cec5SDimitry Andric KMP_FATAL(UnknownLibraryType, arg); 82530b57cec5SDimitry Andric } 82540b57cec5SDimitry Andric 82550b57cec5SDimitry Andric __kmp_aux_set_library(arg); 82560b57cec5SDimitry Andric } 82570b57cec5SDimitry Andric 82580b57cec5SDimitry Andric void __kmp_aux_set_stacksize(size_t arg) { 82590b57cec5SDimitry Andric if (!__kmp_init_serial) 82600b57cec5SDimitry Andric __kmp_serial_initialize(); 82610b57cec5SDimitry Andric 82620b57cec5SDimitry Andric #if KMP_OS_DARWIN 82630b57cec5SDimitry Andric if (arg & (0x1000 - 1)) { 82640b57cec5SDimitry Andric arg &= ~(0x1000 - 1); 82650b57cec5SDimitry Andric if (arg + 0x1000) /* check for overflow if we round up */ 82660b57cec5SDimitry Andric arg += 0x1000; 82670b57cec5SDimitry Andric } 82680b57cec5SDimitry Andric #endif 82690b57cec5SDimitry Andric __kmp_acquire_bootstrap_lock(&__kmp_initz_lock); 82700b57cec5SDimitry Andric 82710b57cec5SDimitry Andric /* only change the default stacksize before the first parallel region */ 82720b57cec5SDimitry Andric if (!TCR_4(__kmp_init_parallel)) { 82730b57cec5SDimitry Andric size_t value = arg; /* argument is in bytes */ 82740b57cec5SDimitry Andric 82750b57cec5SDimitry Andric if (value < __kmp_sys_min_stksize) 82760b57cec5SDimitry Andric value = __kmp_sys_min_stksize; 82770b57cec5SDimitry Andric else if (value > KMP_MAX_STKSIZE) 82780b57cec5SDimitry Andric value = KMP_MAX_STKSIZE; 82790b57cec5SDimitry Andric 82800b57cec5SDimitry Andric __kmp_stksize = value; 82810b57cec5SDimitry Andric 82820b57cec5SDimitry Andric __kmp_env_stksize = TRUE; /* was KMP_STACKSIZE specified? */ 82830b57cec5SDimitry Andric } 82840b57cec5SDimitry Andric 82850b57cec5SDimitry Andric __kmp_release_bootstrap_lock(&__kmp_initz_lock); 82860b57cec5SDimitry Andric } 82870b57cec5SDimitry Andric 82880b57cec5SDimitry Andric /* set the behaviour of the runtime library */ 82890b57cec5SDimitry Andric /* TODO this can cause some odd behaviour with sibling parallelism... */ 82900b57cec5SDimitry Andric void __kmp_aux_set_library(enum library_type arg) { 82910b57cec5SDimitry Andric __kmp_library = arg; 82920b57cec5SDimitry Andric 82930b57cec5SDimitry Andric switch (__kmp_library) { 82940b57cec5SDimitry Andric case library_serial: { 82950b57cec5SDimitry Andric KMP_INFORM(LibraryIsSerial); 82960b57cec5SDimitry Andric } break; 82970b57cec5SDimitry Andric case library_turnaround: 82980b57cec5SDimitry Andric if (__kmp_use_yield == 1 && !__kmp_use_yield_exp_set) 82990b57cec5SDimitry Andric __kmp_use_yield = 2; // only yield when oversubscribed 83000b57cec5SDimitry Andric break; 83010b57cec5SDimitry Andric case library_throughput: 83020b57cec5SDimitry Andric if (__kmp_dflt_blocktime == KMP_MAX_BLOCKTIME) 830381ad6265SDimitry Andric __kmp_dflt_blocktime = KMP_DEFAULT_BLOCKTIME; 83040b57cec5SDimitry Andric break; 83050b57cec5SDimitry Andric default: 83060b57cec5SDimitry Andric KMP_FATAL(UnknownLibraryType, arg); 83070b57cec5SDimitry Andric } 83080b57cec5SDimitry Andric } 83090b57cec5SDimitry Andric 83100b57cec5SDimitry Andric /* Getting team information common for all team API */ 83110b57cec5SDimitry Andric // Returns NULL if not in teams construct 83120b57cec5SDimitry Andric static kmp_team_t *__kmp_aux_get_team_info(int &teams_serialized) { 83130b57cec5SDimitry Andric kmp_info_t *thr = __kmp_entry_thread(); 83140b57cec5SDimitry Andric teams_serialized = 0; 83150b57cec5SDimitry Andric if (thr->th.th_teams_microtask) { 83160b57cec5SDimitry Andric kmp_team_t *team = thr->th.th_team; 83170b57cec5SDimitry Andric int tlevel = thr->th.th_teams_level; // the level of the teams construct 83180b57cec5SDimitry Andric int ii = team->t.t_level; 83190b57cec5SDimitry Andric teams_serialized = team->t.t_serialized; 83200b57cec5SDimitry Andric int level = tlevel + 1; 83210b57cec5SDimitry Andric KMP_DEBUG_ASSERT(ii >= tlevel); 83220b57cec5SDimitry Andric while (ii > level) { 83230b57cec5SDimitry Andric for (teams_serialized = team->t.t_serialized; 83240b57cec5SDimitry Andric (teams_serialized > 0) && (ii > level); teams_serialized--, ii--) { 83250b57cec5SDimitry Andric } 83260b57cec5SDimitry Andric if (team->t.t_serialized && (!teams_serialized)) { 83270b57cec5SDimitry Andric team = team->t.t_parent; 83280b57cec5SDimitry Andric continue; 83290b57cec5SDimitry Andric } 83300b57cec5SDimitry Andric if (ii > level) { 83310b57cec5SDimitry Andric team = team->t.t_parent; 83320b57cec5SDimitry Andric ii--; 83330b57cec5SDimitry Andric } 83340b57cec5SDimitry Andric } 83350b57cec5SDimitry Andric return team; 83360b57cec5SDimitry Andric } 83370b57cec5SDimitry Andric return NULL; 83380b57cec5SDimitry Andric } 83390b57cec5SDimitry Andric 83400b57cec5SDimitry Andric int __kmp_aux_get_team_num() { 83410b57cec5SDimitry Andric int serialized; 83420b57cec5SDimitry Andric kmp_team_t *team = __kmp_aux_get_team_info(serialized); 83430b57cec5SDimitry Andric if (team) { 83440b57cec5SDimitry Andric if (serialized > 1) { 83450b57cec5SDimitry Andric return 0; // teams region is serialized ( 1 team of 1 thread ). 83460b57cec5SDimitry Andric } else { 83470b57cec5SDimitry Andric return team->t.t_master_tid; 83480b57cec5SDimitry Andric } 83490b57cec5SDimitry Andric } 83500b57cec5SDimitry Andric return 0; 83510b57cec5SDimitry Andric } 83520b57cec5SDimitry Andric 83530b57cec5SDimitry Andric int __kmp_aux_get_num_teams() { 83540b57cec5SDimitry Andric int serialized; 83550b57cec5SDimitry Andric kmp_team_t *team = __kmp_aux_get_team_info(serialized); 83560b57cec5SDimitry Andric if (team) { 83570b57cec5SDimitry Andric if (serialized > 1) { 83580b57cec5SDimitry Andric return 1; 83590b57cec5SDimitry Andric } else { 83600b57cec5SDimitry Andric return team->t.t_parent->t.t_nproc; 83610b57cec5SDimitry Andric } 83620b57cec5SDimitry Andric } 83630b57cec5SDimitry Andric return 1; 83640b57cec5SDimitry Andric } 83650b57cec5SDimitry Andric 83660b57cec5SDimitry Andric /* ------------------------------------------------------------------------ */ 83670b57cec5SDimitry Andric 83680b57cec5SDimitry Andric /* 83690b57cec5SDimitry Andric * Affinity Format Parser 83700b57cec5SDimitry Andric * 83710b57cec5SDimitry Andric * Field is in form of: %[[[0].]size]type 83720b57cec5SDimitry Andric * % and type are required (%% means print a literal '%') 83730b57cec5SDimitry Andric * type is either single char or long name surrounded by {}, 83740b57cec5SDimitry Andric * e.g., N or {num_threads} 83750b57cec5SDimitry Andric * 0 => leading zeros 83760b57cec5SDimitry Andric * . => right justified when size is specified 83770b57cec5SDimitry Andric * by default output is left justified 83780b57cec5SDimitry Andric * size is the *minimum* field length 83790b57cec5SDimitry Andric * All other characters are printed as is 83800b57cec5SDimitry Andric * 83810b57cec5SDimitry Andric * Available field types: 83820b57cec5SDimitry Andric * L {thread_level} - omp_get_level() 83830b57cec5SDimitry Andric * n {thread_num} - omp_get_thread_num() 83840b57cec5SDimitry Andric * h {host} - name of host machine 83850b57cec5SDimitry Andric * P {process_id} - process id (integer) 83860b57cec5SDimitry Andric * T {thread_identifier} - native thread identifier (integer) 83870b57cec5SDimitry Andric * N {num_threads} - omp_get_num_threads() 83880b57cec5SDimitry Andric * A {ancestor_tnum} - omp_get_ancestor_thread_num(omp_get_level()-1) 83890b57cec5SDimitry Andric * a {thread_affinity} - comma separated list of integers or integer ranges 83900b57cec5SDimitry Andric * (values of affinity mask) 83910b57cec5SDimitry Andric * 83920b57cec5SDimitry Andric * Implementation-specific field types can be added 83930b57cec5SDimitry Andric * If a type is unknown, print "undefined" 83940b57cec5SDimitry Andric */ 83950b57cec5SDimitry Andric 83960b57cec5SDimitry Andric // Structure holding the short name, long name, and corresponding data type 83970b57cec5SDimitry Andric // for snprintf. A table of these will represent the entire valid keyword 83980b57cec5SDimitry Andric // field types. 83990b57cec5SDimitry Andric typedef struct kmp_affinity_format_field_t { 84000b57cec5SDimitry Andric char short_name; // from spec e.g., L -> thread level 84010b57cec5SDimitry Andric const char *long_name; // from spec thread_level -> thread level 84020b57cec5SDimitry Andric char field_format; // data type for snprintf (typically 'd' or 's' 84030b57cec5SDimitry Andric // for integer or string) 84040b57cec5SDimitry Andric } kmp_affinity_format_field_t; 84050b57cec5SDimitry Andric 84060b57cec5SDimitry Andric static const kmp_affinity_format_field_t __kmp_affinity_format_table[] = { 84070b57cec5SDimitry Andric #if KMP_AFFINITY_SUPPORTED 84080b57cec5SDimitry Andric {'A', "thread_affinity", 's'}, 84090b57cec5SDimitry Andric #endif 84100b57cec5SDimitry Andric {'t', "team_num", 'd'}, 84110b57cec5SDimitry Andric {'T', "num_teams", 'd'}, 84120b57cec5SDimitry Andric {'L', "nesting_level", 'd'}, 84130b57cec5SDimitry Andric {'n', "thread_num", 'd'}, 84140b57cec5SDimitry Andric {'N', "num_threads", 'd'}, 84150b57cec5SDimitry Andric {'a', "ancestor_tnum", 'd'}, 84160b57cec5SDimitry Andric {'H', "host", 's'}, 84170b57cec5SDimitry Andric {'P', "process_id", 'd'}, 84180b57cec5SDimitry Andric {'i', "native_thread_id", 'd'}}; 84190b57cec5SDimitry Andric 84200b57cec5SDimitry Andric // Return the number of characters it takes to hold field 84210b57cec5SDimitry Andric static int __kmp_aux_capture_affinity_field(int gtid, const kmp_info_t *th, 84220b57cec5SDimitry Andric const char **ptr, 84230b57cec5SDimitry Andric kmp_str_buf_t *field_buffer) { 84240b57cec5SDimitry Andric int rc, format_index, field_value; 84250b57cec5SDimitry Andric const char *width_left, *width_right; 84260b57cec5SDimitry Andric bool pad_zeros, right_justify, parse_long_name, found_valid_name; 84270b57cec5SDimitry Andric static const int FORMAT_SIZE = 20; 84280b57cec5SDimitry Andric char format[FORMAT_SIZE] = {0}; 84290b57cec5SDimitry Andric char absolute_short_name = 0; 84300b57cec5SDimitry Andric 84310b57cec5SDimitry Andric KMP_DEBUG_ASSERT(gtid >= 0); 84320b57cec5SDimitry Andric KMP_DEBUG_ASSERT(th); 84330b57cec5SDimitry Andric KMP_DEBUG_ASSERT(**ptr == '%'); 84340b57cec5SDimitry Andric KMP_DEBUG_ASSERT(field_buffer); 84350b57cec5SDimitry Andric 84360b57cec5SDimitry Andric __kmp_str_buf_clear(field_buffer); 84370b57cec5SDimitry Andric 84380b57cec5SDimitry Andric // Skip the initial % 84390b57cec5SDimitry Andric (*ptr)++; 84400b57cec5SDimitry Andric 84410b57cec5SDimitry Andric // Check for %% first 84420b57cec5SDimitry Andric if (**ptr == '%') { 84430b57cec5SDimitry Andric __kmp_str_buf_cat(field_buffer, "%", 1); 84440b57cec5SDimitry Andric (*ptr)++; // skip over the second % 84450b57cec5SDimitry Andric return 1; 84460b57cec5SDimitry Andric } 84470b57cec5SDimitry Andric 84480b57cec5SDimitry Andric // Parse field modifiers if they are present 84490b57cec5SDimitry Andric pad_zeros = false; 84500b57cec5SDimitry Andric if (**ptr == '0') { 84510b57cec5SDimitry Andric pad_zeros = true; 84520b57cec5SDimitry Andric (*ptr)++; // skip over 0 84530b57cec5SDimitry Andric } 84540b57cec5SDimitry Andric right_justify = false; 84550b57cec5SDimitry Andric if (**ptr == '.') { 84560b57cec5SDimitry Andric right_justify = true; 84570b57cec5SDimitry Andric (*ptr)++; // skip over . 84580b57cec5SDimitry Andric } 84590b57cec5SDimitry Andric // Parse width of field: [width_left, width_right) 84600b57cec5SDimitry Andric width_left = width_right = NULL; 84610b57cec5SDimitry Andric if (**ptr >= '0' && **ptr <= '9') { 84620b57cec5SDimitry Andric width_left = *ptr; 84630b57cec5SDimitry Andric SKIP_DIGITS(*ptr); 84640b57cec5SDimitry Andric width_right = *ptr; 84650b57cec5SDimitry Andric } 84660b57cec5SDimitry Andric 84670b57cec5SDimitry Andric // Create the format for KMP_SNPRINTF based on flags parsed above 84680b57cec5SDimitry Andric format_index = 0; 84690b57cec5SDimitry Andric format[format_index++] = '%'; 84700b57cec5SDimitry Andric if (!right_justify) 84710b57cec5SDimitry Andric format[format_index++] = '-'; 84720b57cec5SDimitry Andric if (pad_zeros) 84730b57cec5SDimitry Andric format[format_index++] = '0'; 84740b57cec5SDimitry Andric if (width_left && width_right) { 84750b57cec5SDimitry Andric int i = 0; 84760b57cec5SDimitry Andric // Only allow 8 digit number widths. 84770b57cec5SDimitry Andric // This also prevents overflowing format variable 84780b57cec5SDimitry Andric while (i < 8 && width_left < width_right) { 84790b57cec5SDimitry Andric format[format_index++] = *width_left; 84800b57cec5SDimitry Andric width_left++; 84810b57cec5SDimitry Andric i++; 84820b57cec5SDimitry Andric } 84830b57cec5SDimitry Andric } 84840b57cec5SDimitry Andric 84850b57cec5SDimitry Andric // Parse a name (long or short) 84860b57cec5SDimitry Andric // Canonicalize the name into absolute_short_name 84870b57cec5SDimitry Andric found_valid_name = false; 84880b57cec5SDimitry Andric parse_long_name = (**ptr == '{'); 84890b57cec5SDimitry Andric if (parse_long_name) 84900b57cec5SDimitry Andric (*ptr)++; // skip initial left brace 84910b57cec5SDimitry Andric for (size_t i = 0; i < sizeof(__kmp_affinity_format_table) / 84920b57cec5SDimitry Andric sizeof(__kmp_affinity_format_table[0]); 84930b57cec5SDimitry Andric ++i) { 84940b57cec5SDimitry Andric char short_name = __kmp_affinity_format_table[i].short_name; 84950b57cec5SDimitry Andric const char *long_name = __kmp_affinity_format_table[i].long_name; 84960b57cec5SDimitry Andric char field_format = __kmp_affinity_format_table[i].field_format; 84970b57cec5SDimitry Andric if (parse_long_name) { 8498e8d8bef9SDimitry Andric size_t length = KMP_STRLEN(long_name); 84990b57cec5SDimitry Andric if (strncmp(*ptr, long_name, length) == 0) { 85000b57cec5SDimitry Andric found_valid_name = true; 85010b57cec5SDimitry Andric (*ptr) += length; // skip the long name 85020b57cec5SDimitry Andric } 85030b57cec5SDimitry Andric } else if (**ptr == short_name) { 85040b57cec5SDimitry Andric found_valid_name = true; 85050b57cec5SDimitry Andric (*ptr)++; // skip the short name 85060b57cec5SDimitry Andric } 85070b57cec5SDimitry Andric if (found_valid_name) { 85080b57cec5SDimitry Andric format[format_index++] = field_format; 85090b57cec5SDimitry Andric format[format_index++] = '\0'; 85100b57cec5SDimitry Andric absolute_short_name = short_name; 85110b57cec5SDimitry Andric break; 85120b57cec5SDimitry Andric } 85130b57cec5SDimitry Andric } 85140b57cec5SDimitry Andric if (parse_long_name) { 85150b57cec5SDimitry Andric if (**ptr != '}') { 85160b57cec5SDimitry Andric absolute_short_name = 0; 85170b57cec5SDimitry Andric } else { 85180b57cec5SDimitry Andric (*ptr)++; // skip over the right brace 85190b57cec5SDimitry Andric } 85200b57cec5SDimitry Andric } 85210b57cec5SDimitry Andric 85220b57cec5SDimitry Andric // Attempt to fill the buffer with the requested 85230b57cec5SDimitry Andric // value using snprintf within __kmp_str_buf_print() 85240b57cec5SDimitry Andric switch (absolute_short_name) { 85250b57cec5SDimitry Andric case 't': 85260b57cec5SDimitry Andric rc = __kmp_str_buf_print(field_buffer, format, __kmp_aux_get_team_num()); 85270b57cec5SDimitry Andric break; 85280b57cec5SDimitry Andric case 'T': 85290b57cec5SDimitry Andric rc = __kmp_str_buf_print(field_buffer, format, __kmp_aux_get_num_teams()); 85300b57cec5SDimitry Andric break; 85310b57cec5SDimitry Andric case 'L': 85320b57cec5SDimitry Andric rc = __kmp_str_buf_print(field_buffer, format, th->th.th_team->t.t_level); 85330b57cec5SDimitry Andric break; 85340b57cec5SDimitry Andric case 'n': 85350b57cec5SDimitry Andric rc = __kmp_str_buf_print(field_buffer, format, __kmp_tid_from_gtid(gtid)); 85360b57cec5SDimitry Andric break; 85370b57cec5SDimitry Andric case 'H': { 85380b57cec5SDimitry Andric static const int BUFFER_SIZE = 256; 85390b57cec5SDimitry Andric char buf[BUFFER_SIZE]; 85400b57cec5SDimitry Andric __kmp_expand_host_name(buf, BUFFER_SIZE); 85410b57cec5SDimitry Andric rc = __kmp_str_buf_print(field_buffer, format, buf); 85420b57cec5SDimitry Andric } break; 85430b57cec5SDimitry Andric case 'P': 85440b57cec5SDimitry Andric rc = __kmp_str_buf_print(field_buffer, format, getpid()); 85450b57cec5SDimitry Andric break; 85460b57cec5SDimitry Andric case 'i': 85470b57cec5SDimitry Andric rc = __kmp_str_buf_print(field_buffer, format, __kmp_gettid()); 85480b57cec5SDimitry Andric break; 85490b57cec5SDimitry Andric case 'N': 85500b57cec5SDimitry Andric rc = __kmp_str_buf_print(field_buffer, format, th->th.th_team->t.t_nproc); 85510b57cec5SDimitry Andric break; 85520b57cec5SDimitry Andric case 'a': 85530b57cec5SDimitry Andric field_value = 85540b57cec5SDimitry Andric __kmp_get_ancestor_thread_num(gtid, th->th.th_team->t.t_level - 1); 85550b57cec5SDimitry Andric rc = __kmp_str_buf_print(field_buffer, format, field_value); 85560b57cec5SDimitry Andric break; 85570b57cec5SDimitry Andric #if KMP_AFFINITY_SUPPORTED 85580b57cec5SDimitry Andric case 'A': { 85590b57cec5SDimitry Andric kmp_str_buf_t buf; 85600b57cec5SDimitry Andric __kmp_str_buf_init(&buf); 85610b57cec5SDimitry Andric __kmp_affinity_str_buf_mask(&buf, th->th.th_affin_mask); 85620b57cec5SDimitry Andric rc = __kmp_str_buf_print(field_buffer, format, buf.str); 85630b57cec5SDimitry Andric __kmp_str_buf_free(&buf); 85640b57cec5SDimitry Andric } break; 85650b57cec5SDimitry Andric #endif 85660b57cec5SDimitry Andric default: 85670b57cec5SDimitry Andric // According to spec, If an implementation does not have info for field 85680b57cec5SDimitry Andric // type, then "undefined" is printed 85690b57cec5SDimitry Andric rc = __kmp_str_buf_print(field_buffer, "%s", "undefined"); 85700b57cec5SDimitry Andric // Skip the field 85710b57cec5SDimitry Andric if (parse_long_name) { 85720b57cec5SDimitry Andric SKIP_TOKEN(*ptr); 85730b57cec5SDimitry Andric if (**ptr == '}') 85740b57cec5SDimitry Andric (*ptr)++; 85750b57cec5SDimitry Andric } else { 85760b57cec5SDimitry Andric (*ptr)++; 85770b57cec5SDimitry Andric } 85780b57cec5SDimitry Andric } 85790b57cec5SDimitry Andric 85800b57cec5SDimitry Andric KMP_ASSERT(format_index <= FORMAT_SIZE); 85810b57cec5SDimitry Andric return rc; 85820b57cec5SDimitry Andric } 85830b57cec5SDimitry Andric 85840b57cec5SDimitry Andric /* 85850b57cec5SDimitry Andric * Return number of characters needed to hold the affinity string 85860b57cec5SDimitry Andric * (not including null byte character) 85870b57cec5SDimitry Andric * The resultant string is printed to buffer, which the caller can then 85880b57cec5SDimitry Andric * handle afterwards 85890b57cec5SDimitry Andric */ 85900b57cec5SDimitry Andric size_t __kmp_aux_capture_affinity(int gtid, const char *format, 85910b57cec5SDimitry Andric kmp_str_buf_t *buffer) { 85920b57cec5SDimitry Andric const char *parse_ptr; 85930b57cec5SDimitry Andric size_t retval; 85940b57cec5SDimitry Andric const kmp_info_t *th; 85950b57cec5SDimitry Andric kmp_str_buf_t field; 85960b57cec5SDimitry Andric 85970b57cec5SDimitry Andric KMP_DEBUG_ASSERT(buffer); 85980b57cec5SDimitry Andric KMP_DEBUG_ASSERT(gtid >= 0); 85990b57cec5SDimitry Andric 86000b57cec5SDimitry Andric __kmp_str_buf_init(&field); 86010b57cec5SDimitry Andric __kmp_str_buf_clear(buffer); 86020b57cec5SDimitry Andric 86030b57cec5SDimitry Andric th = __kmp_threads[gtid]; 86040b57cec5SDimitry Andric retval = 0; 86050b57cec5SDimitry Andric 86060b57cec5SDimitry Andric // If format is NULL or zero-length string, then we use 86070b57cec5SDimitry Andric // affinity-format-var ICV 86080b57cec5SDimitry Andric parse_ptr = format; 86090b57cec5SDimitry Andric if (parse_ptr == NULL || *parse_ptr == '\0') { 86100b57cec5SDimitry Andric parse_ptr = __kmp_affinity_format; 86110b57cec5SDimitry Andric } 86120b57cec5SDimitry Andric KMP_DEBUG_ASSERT(parse_ptr); 86130b57cec5SDimitry Andric 86140b57cec5SDimitry Andric while (*parse_ptr != '\0') { 86150b57cec5SDimitry Andric // Parse a field 86160b57cec5SDimitry Andric if (*parse_ptr == '%') { 86170b57cec5SDimitry Andric // Put field in the buffer 86180b57cec5SDimitry Andric int rc = __kmp_aux_capture_affinity_field(gtid, th, &parse_ptr, &field); 86190b57cec5SDimitry Andric __kmp_str_buf_catbuf(buffer, &field); 86200b57cec5SDimitry Andric retval += rc; 86210b57cec5SDimitry Andric } else { 86220b57cec5SDimitry Andric // Put literal character in buffer 86230b57cec5SDimitry Andric __kmp_str_buf_cat(buffer, parse_ptr, 1); 86240b57cec5SDimitry Andric retval++; 86250b57cec5SDimitry Andric parse_ptr++; 86260b57cec5SDimitry Andric } 86270b57cec5SDimitry Andric } 86280b57cec5SDimitry Andric __kmp_str_buf_free(&field); 86290b57cec5SDimitry Andric return retval; 86300b57cec5SDimitry Andric } 86310b57cec5SDimitry Andric 86320b57cec5SDimitry Andric // Displays the affinity string to stdout 86330b57cec5SDimitry Andric void __kmp_aux_display_affinity(int gtid, const char *format) { 86340b57cec5SDimitry Andric kmp_str_buf_t buf; 86350b57cec5SDimitry Andric __kmp_str_buf_init(&buf); 86360b57cec5SDimitry Andric __kmp_aux_capture_affinity(gtid, format, &buf); 86370b57cec5SDimitry Andric __kmp_fprintf(kmp_out, "%s" KMP_END_OF_LINE, buf.str); 86380b57cec5SDimitry Andric __kmp_str_buf_free(&buf); 86390b57cec5SDimitry Andric } 86400b57cec5SDimitry Andric 86410b57cec5SDimitry Andric /* ------------------------------------------------------------------------ */ 86420b57cec5SDimitry Andric 86430b57cec5SDimitry Andric void __kmp_aux_set_blocktime(int arg, kmp_info_t *thread, int tid) { 86440b57cec5SDimitry Andric int blocktime = arg; /* argument is in milliseconds */ 86450b57cec5SDimitry Andric #if KMP_USE_MONITOR 86460b57cec5SDimitry Andric int bt_intervals; 86470b57cec5SDimitry Andric #endif 8648e8d8bef9SDimitry Andric kmp_int8 bt_set; 86490b57cec5SDimitry Andric 86500b57cec5SDimitry Andric __kmp_save_internal_controls(thread); 86510b57cec5SDimitry Andric 86520b57cec5SDimitry Andric /* Normalize and set blocktime for the teams */ 86530b57cec5SDimitry Andric if (blocktime < KMP_MIN_BLOCKTIME) 86540b57cec5SDimitry Andric blocktime = KMP_MIN_BLOCKTIME; 86550b57cec5SDimitry Andric else if (blocktime > KMP_MAX_BLOCKTIME) 86560b57cec5SDimitry Andric blocktime = KMP_MAX_BLOCKTIME; 86570b57cec5SDimitry Andric 86580b57cec5SDimitry Andric set__blocktime_team(thread->th.th_team, tid, blocktime); 86590b57cec5SDimitry Andric set__blocktime_team(thread->th.th_serial_team, 0, blocktime); 86600b57cec5SDimitry Andric 86610b57cec5SDimitry Andric #if KMP_USE_MONITOR 86620b57cec5SDimitry Andric /* Calculate and set blocktime intervals for the teams */ 86630b57cec5SDimitry Andric bt_intervals = KMP_INTERVALS_FROM_BLOCKTIME(blocktime, __kmp_monitor_wakeups); 86640b57cec5SDimitry Andric 86650b57cec5SDimitry Andric set__bt_intervals_team(thread->th.th_team, tid, bt_intervals); 86660b57cec5SDimitry Andric set__bt_intervals_team(thread->th.th_serial_team, 0, bt_intervals); 86670b57cec5SDimitry Andric #endif 86680b57cec5SDimitry Andric 86690b57cec5SDimitry Andric /* Set whether blocktime has been set to "TRUE" */ 86700b57cec5SDimitry Andric bt_set = TRUE; 86710b57cec5SDimitry Andric 86720b57cec5SDimitry Andric set__bt_set_team(thread->th.th_team, tid, bt_set); 86730b57cec5SDimitry Andric set__bt_set_team(thread->th.th_serial_team, 0, bt_set); 86740b57cec5SDimitry Andric #if KMP_USE_MONITOR 86750b57cec5SDimitry Andric KF_TRACE(10, ("kmp_set_blocktime: T#%d(%d:%d), blocktime=%d, " 86760b57cec5SDimitry Andric "bt_intervals=%d, monitor_updates=%d\n", 86770b57cec5SDimitry Andric __kmp_gtid_from_tid(tid, thread->th.th_team), 86780b57cec5SDimitry Andric thread->th.th_team->t.t_id, tid, blocktime, bt_intervals, 86790b57cec5SDimitry Andric __kmp_monitor_wakeups)); 86800b57cec5SDimitry Andric #else 86810b57cec5SDimitry Andric KF_TRACE(10, ("kmp_set_blocktime: T#%d(%d:%d), blocktime=%d\n", 86820b57cec5SDimitry Andric __kmp_gtid_from_tid(tid, thread->th.th_team), 86830b57cec5SDimitry Andric thread->th.th_team->t.t_id, tid, blocktime)); 86840b57cec5SDimitry Andric #endif 86850b57cec5SDimitry Andric } 86860b57cec5SDimitry Andric 8687e8d8bef9SDimitry Andric void __kmp_aux_set_defaults(char const *str, size_t len) { 86880b57cec5SDimitry Andric if (!__kmp_init_serial) { 86890b57cec5SDimitry Andric __kmp_serial_initialize(); 86900b57cec5SDimitry Andric } 86910b57cec5SDimitry Andric __kmp_env_initialize(str); 86920b57cec5SDimitry Andric 86930b57cec5SDimitry Andric if (__kmp_settings || __kmp_display_env || __kmp_display_env_verbose) { 86940b57cec5SDimitry Andric __kmp_env_print(); 86950b57cec5SDimitry Andric } 86960b57cec5SDimitry Andric } // __kmp_aux_set_defaults 86970b57cec5SDimitry Andric 86980b57cec5SDimitry Andric /* ------------------------------------------------------------------------ */ 86990b57cec5SDimitry Andric /* internal fast reduction routines */ 87000b57cec5SDimitry Andric 87010b57cec5SDimitry Andric PACKED_REDUCTION_METHOD_T 87020b57cec5SDimitry Andric __kmp_determine_reduction_method( 87030b57cec5SDimitry Andric ident_t *loc, kmp_int32 global_tid, kmp_int32 num_vars, size_t reduce_size, 87040b57cec5SDimitry Andric void *reduce_data, void (*reduce_func)(void *lhs_data, void *rhs_data), 87050b57cec5SDimitry Andric kmp_critical_name *lck) { 87060b57cec5SDimitry Andric 87070b57cec5SDimitry Andric // Default reduction method: critical construct ( lck != NULL, like in current 87080b57cec5SDimitry Andric // PAROPT ) 87090b57cec5SDimitry Andric // If ( reduce_data!=NULL && reduce_func!=NULL ): the tree-reduction method 87100b57cec5SDimitry Andric // can be selected by RTL 87110b57cec5SDimitry Andric // If loc->flags contains KMP_IDENT_ATOMIC_REDUCE, the atomic reduce method 87120b57cec5SDimitry Andric // can be selected by RTL 87130b57cec5SDimitry Andric // Finally, it's up to OpenMP RTL to make a decision on which method to select 87140b57cec5SDimitry Andric // among generated by PAROPT. 87150b57cec5SDimitry Andric 87160b57cec5SDimitry Andric PACKED_REDUCTION_METHOD_T retval; 87170b57cec5SDimitry Andric 87180b57cec5SDimitry Andric int team_size; 87190b57cec5SDimitry Andric 87200b57cec5SDimitry Andric KMP_DEBUG_ASSERT(loc); // it would be nice to test ( loc != 0 ) 87210b57cec5SDimitry Andric KMP_DEBUG_ASSERT(lck); // it would be nice to test ( lck != 0 ) 87220b57cec5SDimitry Andric 87230b57cec5SDimitry Andric #define FAST_REDUCTION_ATOMIC_METHOD_GENERATED \ 872481ad6265SDimitry Andric (loc && \ 872581ad6265SDimitry Andric ((loc->flags & (KMP_IDENT_ATOMIC_REDUCE)) == (KMP_IDENT_ATOMIC_REDUCE))) 87260b57cec5SDimitry Andric #define FAST_REDUCTION_TREE_METHOD_GENERATED ((reduce_data) && (reduce_func)) 87270b57cec5SDimitry Andric 87280b57cec5SDimitry Andric retval = critical_reduce_block; 87290b57cec5SDimitry Andric 87300b57cec5SDimitry Andric // another choice of getting a team size (with 1 dynamic deference) is slower 87310b57cec5SDimitry Andric team_size = __kmp_get_team_num_threads(global_tid); 87320b57cec5SDimitry Andric if (team_size == 1) { 87330b57cec5SDimitry Andric 87340b57cec5SDimitry Andric retval = empty_reduce_block; 87350b57cec5SDimitry Andric 87360b57cec5SDimitry Andric } else { 87370b57cec5SDimitry Andric 87380b57cec5SDimitry Andric int atomic_available = FAST_REDUCTION_ATOMIC_METHOD_GENERATED; 87390b57cec5SDimitry Andric 8740489b1cf2SDimitry Andric #if KMP_ARCH_X86_64 || KMP_ARCH_PPC64 || KMP_ARCH_AARCH64 || \ 8741489b1cf2SDimitry Andric KMP_ARCH_MIPS64 || KMP_ARCH_RISCV64 87420b57cec5SDimitry Andric 87430b57cec5SDimitry Andric #if KMP_OS_LINUX || KMP_OS_DRAGONFLY || KMP_OS_FREEBSD || KMP_OS_NETBSD || \ 87440b57cec5SDimitry Andric KMP_OS_OPENBSD || KMP_OS_WINDOWS || KMP_OS_DARWIN || KMP_OS_HURD 87450b57cec5SDimitry Andric 87460b57cec5SDimitry Andric int teamsize_cutoff = 4; 87470b57cec5SDimitry Andric 87480b57cec5SDimitry Andric #if KMP_MIC_SUPPORTED 87490b57cec5SDimitry Andric if (__kmp_mic_type != non_mic) { 87500b57cec5SDimitry Andric teamsize_cutoff = 8; 87510b57cec5SDimitry Andric } 87520b57cec5SDimitry Andric #endif 87530b57cec5SDimitry Andric int tree_available = FAST_REDUCTION_TREE_METHOD_GENERATED; 87540b57cec5SDimitry Andric if (tree_available) { 87550b57cec5SDimitry Andric if (team_size <= teamsize_cutoff) { 87560b57cec5SDimitry Andric if (atomic_available) { 87570b57cec5SDimitry Andric retval = atomic_reduce_block; 87580b57cec5SDimitry Andric } 87590b57cec5SDimitry Andric } else { 87600b57cec5SDimitry Andric retval = TREE_REDUCE_BLOCK_WITH_REDUCTION_BARRIER; 87610b57cec5SDimitry Andric } 87620b57cec5SDimitry Andric } else if (atomic_available) { 87630b57cec5SDimitry Andric retval = atomic_reduce_block; 87640b57cec5SDimitry Andric } 87650b57cec5SDimitry Andric #else 87660b57cec5SDimitry Andric #error "Unknown or unsupported OS" 87670b57cec5SDimitry Andric #endif // KMP_OS_LINUX || KMP_OS_DRAGONFLY || KMP_OS_FREEBSD || KMP_OS_NETBSD || 87680b57cec5SDimitry Andric // KMP_OS_OPENBSD || KMP_OS_WINDOWS || KMP_OS_DARWIN || KMP_OS_HURD 87690b57cec5SDimitry Andric 87700b57cec5SDimitry Andric #elif KMP_ARCH_X86 || KMP_ARCH_ARM || KMP_ARCH_AARCH || KMP_ARCH_MIPS 87710b57cec5SDimitry Andric 87720b57cec5SDimitry Andric #if KMP_OS_LINUX || KMP_OS_FREEBSD || KMP_OS_WINDOWS || KMP_OS_HURD 87730b57cec5SDimitry Andric 87740b57cec5SDimitry Andric // basic tuning 87750b57cec5SDimitry Andric 87760b57cec5SDimitry Andric if (atomic_available) { 87770b57cec5SDimitry Andric if (num_vars <= 2) { // && ( team_size <= 8 ) due to false-sharing ??? 87780b57cec5SDimitry Andric retval = atomic_reduce_block; 87790b57cec5SDimitry Andric } 87800b57cec5SDimitry Andric } // otherwise: use critical section 87810b57cec5SDimitry Andric 87820b57cec5SDimitry Andric #elif KMP_OS_DARWIN 87830b57cec5SDimitry Andric 87840b57cec5SDimitry Andric int tree_available = FAST_REDUCTION_TREE_METHOD_GENERATED; 87850b57cec5SDimitry Andric if (atomic_available && (num_vars <= 3)) { 87860b57cec5SDimitry Andric retval = atomic_reduce_block; 87870b57cec5SDimitry Andric } else if (tree_available) { 87880b57cec5SDimitry Andric if ((reduce_size > (9 * sizeof(kmp_real64))) && 87890b57cec5SDimitry Andric (reduce_size < (2000 * sizeof(kmp_real64)))) { 87900b57cec5SDimitry Andric retval = TREE_REDUCE_BLOCK_WITH_PLAIN_BARRIER; 87910b57cec5SDimitry Andric } 87920b57cec5SDimitry Andric } // otherwise: use critical section 87930b57cec5SDimitry Andric 87940b57cec5SDimitry Andric #else 87950b57cec5SDimitry Andric #error "Unknown or unsupported OS" 87960b57cec5SDimitry Andric #endif 87970b57cec5SDimitry Andric 87980b57cec5SDimitry Andric #else 87990b57cec5SDimitry Andric #error "Unknown or unsupported architecture" 88000b57cec5SDimitry Andric #endif 88010b57cec5SDimitry Andric } 88020b57cec5SDimitry Andric 88030b57cec5SDimitry Andric // KMP_FORCE_REDUCTION 88040b57cec5SDimitry Andric 88050b57cec5SDimitry Andric // If the team is serialized (team_size == 1), ignore the forced reduction 88060b57cec5SDimitry Andric // method and stay with the unsynchronized method (empty_reduce_block) 88070b57cec5SDimitry Andric if (__kmp_force_reduction_method != reduction_method_not_defined && 88080b57cec5SDimitry Andric team_size != 1) { 88090b57cec5SDimitry Andric 88100b57cec5SDimitry Andric PACKED_REDUCTION_METHOD_T forced_retval = critical_reduce_block; 88110b57cec5SDimitry Andric 88120b57cec5SDimitry Andric int atomic_available, tree_available; 88130b57cec5SDimitry Andric 88140b57cec5SDimitry Andric switch ((forced_retval = __kmp_force_reduction_method)) { 88150b57cec5SDimitry Andric case critical_reduce_block: 88160b57cec5SDimitry Andric KMP_ASSERT(lck); // lck should be != 0 88170b57cec5SDimitry Andric break; 88180b57cec5SDimitry Andric 88190b57cec5SDimitry Andric case atomic_reduce_block: 88200b57cec5SDimitry Andric atomic_available = FAST_REDUCTION_ATOMIC_METHOD_GENERATED; 88210b57cec5SDimitry Andric if (!atomic_available) { 88220b57cec5SDimitry Andric KMP_WARNING(RedMethodNotSupported, "atomic"); 88230b57cec5SDimitry Andric forced_retval = critical_reduce_block; 88240b57cec5SDimitry Andric } 88250b57cec5SDimitry Andric break; 88260b57cec5SDimitry Andric 88270b57cec5SDimitry Andric case tree_reduce_block: 88280b57cec5SDimitry Andric tree_available = FAST_REDUCTION_TREE_METHOD_GENERATED; 88290b57cec5SDimitry Andric if (!tree_available) { 88300b57cec5SDimitry Andric KMP_WARNING(RedMethodNotSupported, "tree"); 88310b57cec5SDimitry Andric forced_retval = critical_reduce_block; 88320b57cec5SDimitry Andric } else { 88330b57cec5SDimitry Andric #if KMP_FAST_REDUCTION_BARRIER 88340b57cec5SDimitry Andric forced_retval = TREE_REDUCE_BLOCK_WITH_REDUCTION_BARRIER; 88350b57cec5SDimitry Andric #endif 88360b57cec5SDimitry Andric } 88370b57cec5SDimitry Andric break; 88380b57cec5SDimitry Andric 88390b57cec5SDimitry Andric default: 88400b57cec5SDimitry Andric KMP_ASSERT(0); // "unsupported method specified" 88410b57cec5SDimitry Andric } 88420b57cec5SDimitry Andric 88430b57cec5SDimitry Andric retval = forced_retval; 88440b57cec5SDimitry Andric } 88450b57cec5SDimitry Andric 88460b57cec5SDimitry Andric KA_TRACE(10, ("reduction method selected=%08x\n", retval)); 88470b57cec5SDimitry Andric 88480b57cec5SDimitry Andric #undef FAST_REDUCTION_TREE_METHOD_GENERATED 88490b57cec5SDimitry Andric #undef FAST_REDUCTION_ATOMIC_METHOD_GENERATED 88500b57cec5SDimitry Andric 88510b57cec5SDimitry Andric return (retval); 88520b57cec5SDimitry Andric } 88530b57cec5SDimitry Andric // this function is for testing set/get/determine reduce method 88540b57cec5SDimitry Andric kmp_int32 __kmp_get_reduce_method(void) { 88550b57cec5SDimitry Andric return ((__kmp_entry_thread()->th.th_local.packed_reduction_method) >> 8); 88560b57cec5SDimitry Andric } 88570b57cec5SDimitry Andric 88580b57cec5SDimitry Andric // Soft pause sets up threads to ignore blocktime and just go to sleep. 88590b57cec5SDimitry Andric // Spin-wait code checks __kmp_pause_status and reacts accordingly. 88600b57cec5SDimitry Andric void __kmp_soft_pause() { __kmp_pause_status = kmp_soft_paused; } 88610b57cec5SDimitry Andric 88620b57cec5SDimitry Andric // Hard pause shuts down the runtime completely. Resume happens naturally when 88630b57cec5SDimitry Andric // OpenMP is used subsequently. 88640b57cec5SDimitry Andric void __kmp_hard_pause() { 88650b57cec5SDimitry Andric __kmp_pause_status = kmp_hard_paused; 88660b57cec5SDimitry Andric __kmp_internal_end_thread(-1); 88670b57cec5SDimitry Andric } 88680b57cec5SDimitry Andric 88690b57cec5SDimitry Andric // Soft resume sets __kmp_pause_status, and wakes up all threads. 88700b57cec5SDimitry Andric void __kmp_resume_if_soft_paused() { 88710b57cec5SDimitry Andric if (__kmp_pause_status == kmp_soft_paused) { 88720b57cec5SDimitry Andric __kmp_pause_status = kmp_not_paused; 88730b57cec5SDimitry Andric 88740b57cec5SDimitry Andric for (int gtid = 1; gtid < __kmp_threads_capacity; ++gtid) { 88750b57cec5SDimitry Andric kmp_info_t *thread = __kmp_threads[gtid]; 88760b57cec5SDimitry Andric if (thread) { // Wake it if sleeping 8877e8d8bef9SDimitry Andric kmp_flag_64<> fl(&thread->th.th_bar[bs_forkjoin_barrier].bb.b_go, 8878e8d8bef9SDimitry Andric thread); 88790b57cec5SDimitry Andric if (fl.is_sleeping()) 88800b57cec5SDimitry Andric fl.resume(gtid); 88810b57cec5SDimitry Andric else if (__kmp_try_suspend_mx(thread)) { // got suspend lock 88820b57cec5SDimitry Andric __kmp_unlock_suspend_mx(thread); // unlock it; it won't sleep 88830b57cec5SDimitry Andric } else { // thread holds the lock and may sleep soon 88840b57cec5SDimitry Andric do { // until either the thread sleeps, or we can get the lock 88850b57cec5SDimitry Andric if (fl.is_sleeping()) { 88860b57cec5SDimitry Andric fl.resume(gtid); 88870b57cec5SDimitry Andric break; 88880b57cec5SDimitry Andric } else if (__kmp_try_suspend_mx(thread)) { 88890b57cec5SDimitry Andric __kmp_unlock_suspend_mx(thread); 88900b57cec5SDimitry Andric break; 88910b57cec5SDimitry Andric } 88920b57cec5SDimitry Andric } while (1); 88930b57cec5SDimitry Andric } 88940b57cec5SDimitry Andric } 88950b57cec5SDimitry Andric } 88960b57cec5SDimitry Andric } 88970b57cec5SDimitry Andric } 88980b57cec5SDimitry Andric 88990b57cec5SDimitry Andric // This function is called via __kmpc_pause_resource. Returns 0 if successful. 89000b57cec5SDimitry Andric // TODO: add warning messages 89010b57cec5SDimitry Andric int __kmp_pause_resource(kmp_pause_status_t level) { 89020b57cec5SDimitry Andric if (level == kmp_not_paused) { // requesting resume 89030b57cec5SDimitry Andric if (__kmp_pause_status == kmp_not_paused) { 89040b57cec5SDimitry Andric // error message about runtime not being paused, so can't resume 89050b57cec5SDimitry Andric return 1; 89060b57cec5SDimitry Andric } else { 89070b57cec5SDimitry Andric KMP_DEBUG_ASSERT(__kmp_pause_status == kmp_soft_paused || 89080b57cec5SDimitry Andric __kmp_pause_status == kmp_hard_paused); 89090b57cec5SDimitry Andric __kmp_pause_status = kmp_not_paused; 89100b57cec5SDimitry Andric return 0; 89110b57cec5SDimitry Andric } 89120b57cec5SDimitry Andric } else if (level == kmp_soft_paused) { // requesting soft pause 89130b57cec5SDimitry Andric if (__kmp_pause_status != kmp_not_paused) { 89140b57cec5SDimitry Andric // error message about already being paused 89150b57cec5SDimitry Andric return 1; 89160b57cec5SDimitry Andric } else { 89170b57cec5SDimitry Andric __kmp_soft_pause(); 89180b57cec5SDimitry Andric return 0; 89190b57cec5SDimitry Andric } 89200b57cec5SDimitry Andric } else if (level == kmp_hard_paused) { // requesting hard pause 89210b57cec5SDimitry Andric if (__kmp_pause_status != kmp_not_paused) { 89220b57cec5SDimitry Andric // error message about already being paused 89230b57cec5SDimitry Andric return 1; 89240b57cec5SDimitry Andric } else { 89250b57cec5SDimitry Andric __kmp_hard_pause(); 89260b57cec5SDimitry Andric return 0; 89270b57cec5SDimitry Andric } 89280b57cec5SDimitry Andric } else { 89290b57cec5SDimitry Andric // error message about invalid level 89300b57cec5SDimitry Andric return 1; 89310b57cec5SDimitry Andric } 89320b57cec5SDimitry Andric } 89335ffd83dbSDimitry Andric 89345ffd83dbSDimitry Andric void __kmp_omp_display_env(int verbose) { 89355ffd83dbSDimitry Andric __kmp_acquire_bootstrap_lock(&__kmp_initz_lock); 89365ffd83dbSDimitry Andric if (__kmp_init_serial == 0) 89375ffd83dbSDimitry Andric __kmp_do_serial_initialize(); 89385ffd83dbSDimitry Andric __kmp_display_env_impl(!verbose, verbose); 89395ffd83dbSDimitry Andric __kmp_release_bootstrap_lock(&__kmp_initz_lock); 89405ffd83dbSDimitry Andric } 8941e8d8bef9SDimitry Andric 8942349cc55cSDimitry Andric // The team size is changing, so distributed barrier must be modified 8943349cc55cSDimitry Andric void __kmp_resize_dist_barrier(kmp_team_t *team, int old_nthreads, 8944349cc55cSDimitry Andric int new_nthreads) { 8945349cc55cSDimitry Andric KMP_DEBUG_ASSERT(__kmp_barrier_release_pattern[bs_forkjoin_barrier] == 8946349cc55cSDimitry Andric bp_dist_bar); 8947349cc55cSDimitry Andric kmp_info_t **other_threads = team->t.t_threads; 8948349cc55cSDimitry Andric 8949349cc55cSDimitry Andric // We want all the workers to stop waiting on the barrier while we adjust the 8950349cc55cSDimitry Andric // size of the team. 8951349cc55cSDimitry Andric for (int f = 1; f < old_nthreads; ++f) { 8952349cc55cSDimitry Andric KMP_DEBUG_ASSERT(other_threads[f] != NULL); 8953349cc55cSDimitry Andric // Ignore threads that are already inactive or not present in the team 8954349cc55cSDimitry Andric if (team->t.t_threads[f]->th.th_used_in_team.load() == 0) { 8955349cc55cSDimitry Andric // teams construct causes thread_limit to get passed in, and some of 8956349cc55cSDimitry Andric // those could be inactive; just ignore them 8957349cc55cSDimitry Andric continue; 8958349cc55cSDimitry Andric } 8959349cc55cSDimitry Andric // If thread is transitioning still to in_use state, wait for it 8960349cc55cSDimitry Andric if (team->t.t_threads[f]->th.th_used_in_team.load() == 3) { 8961349cc55cSDimitry Andric while (team->t.t_threads[f]->th.th_used_in_team.load() == 3) 8962349cc55cSDimitry Andric KMP_CPU_PAUSE(); 8963349cc55cSDimitry Andric } 8964349cc55cSDimitry Andric // The thread should be in_use now 8965349cc55cSDimitry Andric KMP_DEBUG_ASSERT(team->t.t_threads[f]->th.th_used_in_team.load() == 1); 8966349cc55cSDimitry Andric // Transition to unused state 8967349cc55cSDimitry Andric team->t.t_threads[f]->th.th_used_in_team.store(2); 8968349cc55cSDimitry Andric KMP_DEBUG_ASSERT(team->t.t_threads[f]->th.th_used_in_team.load() == 2); 8969349cc55cSDimitry Andric } 8970349cc55cSDimitry Andric // Release all the workers 897181ad6265SDimitry Andric team->t.b->go_release(); 8972349cc55cSDimitry Andric 8973349cc55cSDimitry Andric KMP_MFENCE(); 8974349cc55cSDimitry Andric 8975349cc55cSDimitry Andric // Workers should see transition status 2 and move to 0; but may need to be 8976349cc55cSDimitry Andric // woken up first 8977349cc55cSDimitry Andric int count = old_nthreads - 1; 8978349cc55cSDimitry Andric while (count > 0) { 8979349cc55cSDimitry Andric count = old_nthreads - 1; 8980349cc55cSDimitry Andric for (int f = 1; f < old_nthreads; ++f) { 8981349cc55cSDimitry Andric if (other_threads[f]->th.th_used_in_team.load() != 0) { 8982349cc55cSDimitry Andric if (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME) { // Wake up the workers 8983349cc55cSDimitry Andric kmp_atomic_flag_64<> *flag = (kmp_atomic_flag_64<> *)CCAST( 8984349cc55cSDimitry Andric void *, other_threads[f]->th.th_sleep_loc); 8985349cc55cSDimitry Andric __kmp_atomic_resume_64(other_threads[f]->th.th_info.ds.ds_gtid, flag); 8986349cc55cSDimitry Andric } 8987349cc55cSDimitry Andric } else { 8988349cc55cSDimitry Andric KMP_DEBUG_ASSERT(team->t.t_threads[f]->th.th_used_in_team.load() == 0); 8989349cc55cSDimitry Andric count--; 8990349cc55cSDimitry Andric } 8991349cc55cSDimitry Andric } 8992349cc55cSDimitry Andric } 8993349cc55cSDimitry Andric // Now update the barrier size 8994349cc55cSDimitry Andric team->t.b->update_num_threads(new_nthreads); 8995349cc55cSDimitry Andric team->t.b->go_reset(); 8996349cc55cSDimitry Andric } 8997349cc55cSDimitry Andric 8998349cc55cSDimitry Andric void __kmp_add_threads_to_team(kmp_team_t *team, int new_nthreads) { 8999349cc55cSDimitry Andric // Add the threads back to the team 9000349cc55cSDimitry Andric KMP_DEBUG_ASSERT(team); 9001349cc55cSDimitry Andric // Threads were paused and pointed at th_used_in_team temporarily during a 9002349cc55cSDimitry Andric // resize of the team. We're going to set th_used_in_team to 3 to indicate to 9003349cc55cSDimitry Andric // the thread that it should transition itself back into the team. Then, if 9004349cc55cSDimitry Andric // blocktime isn't infinite, the thread could be sleeping, so we send a resume 9005349cc55cSDimitry Andric // to wake it up. 9006349cc55cSDimitry Andric for (int f = 1; f < new_nthreads; ++f) { 9007349cc55cSDimitry Andric KMP_DEBUG_ASSERT(team->t.t_threads[f]); 9008349cc55cSDimitry Andric KMP_COMPARE_AND_STORE_ACQ32(&(team->t.t_threads[f]->th.th_used_in_team), 0, 9009349cc55cSDimitry Andric 3); 9010349cc55cSDimitry Andric if (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME) { // Wake up sleeping threads 9011349cc55cSDimitry Andric __kmp_resume_32(team->t.t_threads[f]->th.th_info.ds.ds_gtid, 9012349cc55cSDimitry Andric (kmp_flag_32<false, false> *)NULL); 9013349cc55cSDimitry Andric } 9014349cc55cSDimitry Andric } 9015349cc55cSDimitry Andric // The threads should be transitioning to the team; when they are done, they 9016349cc55cSDimitry Andric // should have set th_used_in_team to 1. This loop forces master to wait until 9017349cc55cSDimitry Andric // all threads have moved into the team and are waiting in the barrier. 9018349cc55cSDimitry Andric int count = new_nthreads - 1; 9019349cc55cSDimitry Andric while (count > 0) { 9020349cc55cSDimitry Andric count = new_nthreads - 1; 9021349cc55cSDimitry Andric for (int f = 1; f < new_nthreads; ++f) { 9022349cc55cSDimitry Andric if (team->t.t_threads[f]->th.th_used_in_team.load() == 1) { 9023349cc55cSDimitry Andric count--; 9024349cc55cSDimitry Andric } 9025349cc55cSDimitry Andric } 9026349cc55cSDimitry Andric } 9027349cc55cSDimitry Andric } 9028349cc55cSDimitry Andric 9029e8d8bef9SDimitry Andric // Globals and functions for hidden helper task 9030e8d8bef9SDimitry Andric kmp_info_t **__kmp_hidden_helper_threads; 9031e8d8bef9SDimitry Andric kmp_info_t *__kmp_hidden_helper_main_thread; 9032e8d8bef9SDimitry Andric std::atomic<kmp_int32> __kmp_unexecuted_hidden_helper_tasks; 9033e8d8bef9SDimitry Andric #if KMP_OS_LINUX 9034fe6060f1SDimitry Andric kmp_int32 __kmp_hidden_helper_threads_num = 8; 9035e8d8bef9SDimitry Andric kmp_int32 __kmp_enable_hidden_helper = TRUE; 9036e8d8bef9SDimitry Andric #else 9037fe6060f1SDimitry Andric kmp_int32 __kmp_hidden_helper_threads_num = 0; 9038e8d8bef9SDimitry Andric kmp_int32 __kmp_enable_hidden_helper = FALSE; 9039e8d8bef9SDimitry Andric #endif 9040e8d8bef9SDimitry Andric 9041e8d8bef9SDimitry Andric namespace { 9042e8d8bef9SDimitry Andric std::atomic<kmp_int32> __kmp_hit_hidden_helper_threads_num; 9043e8d8bef9SDimitry Andric 9044e8d8bef9SDimitry Andric void __kmp_hidden_helper_wrapper_fn(int *gtid, int *, ...) { 9045e8d8bef9SDimitry Andric // This is an explicit synchronization on all hidden helper threads in case 9046e8d8bef9SDimitry Andric // that when a regular thread pushes a hidden helper task to one hidden 9047e8d8bef9SDimitry Andric // helper thread, the thread has not been awaken once since they're released 9048e8d8bef9SDimitry Andric // by the main thread after creating the team. 9049e8d8bef9SDimitry Andric KMP_ATOMIC_INC(&__kmp_hit_hidden_helper_threads_num); 9050e8d8bef9SDimitry Andric while (KMP_ATOMIC_LD_ACQ(&__kmp_hit_hidden_helper_threads_num) != 9051e8d8bef9SDimitry Andric __kmp_hidden_helper_threads_num) 9052e8d8bef9SDimitry Andric ; 9053e8d8bef9SDimitry Andric 9054e8d8bef9SDimitry Andric // If main thread, then wait for signal 9055e8d8bef9SDimitry Andric if (__kmpc_master(nullptr, *gtid)) { 9056e8d8bef9SDimitry Andric // First, unset the initial state and release the initial thread 9057e8d8bef9SDimitry Andric TCW_4(__kmp_init_hidden_helper_threads, FALSE); 9058e8d8bef9SDimitry Andric __kmp_hidden_helper_initz_release(); 9059e8d8bef9SDimitry Andric __kmp_hidden_helper_main_thread_wait(); 9060e8d8bef9SDimitry Andric // Now wake up all worker threads 9061e8d8bef9SDimitry Andric for (int i = 1; i < __kmp_hit_hidden_helper_threads_num; ++i) { 9062e8d8bef9SDimitry Andric __kmp_hidden_helper_worker_thread_signal(); 9063e8d8bef9SDimitry Andric } 9064e8d8bef9SDimitry Andric } 9065e8d8bef9SDimitry Andric } 9066e8d8bef9SDimitry Andric } // namespace 9067e8d8bef9SDimitry Andric 9068e8d8bef9SDimitry Andric void __kmp_hidden_helper_threads_initz_routine() { 9069e8d8bef9SDimitry Andric // Create a new root for hidden helper team/threads 9070e8d8bef9SDimitry Andric const int gtid = __kmp_register_root(TRUE); 9071e8d8bef9SDimitry Andric __kmp_hidden_helper_main_thread = __kmp_threads[gtid]; 9072e8d8bef9SDimitry Andric __kmp_hidden_helper_threads = &__kmp_threads[gtid]; 9073e8d8bef9SDimitry Andric __kmp_hidden_helper_main_thread->th.th_set_nproc = 9074e8d8bef9SDimitry Andric __kmp_hidden_helper_threads_num; 9075e8d8bef9SDimitry Andric 9076e8d8bef9SDimitry Andric KMP_ATOMIC_ST_REL(&__kmp_hit_hidden_helper_threads_num, 0); 9077e8d8bef9SDimitry Andric 9078e8d8bef9SDimitry Andric __kmpc_fork_call(nullptr, 0, __kmp_hidden_helper_wrapper_fn); 9079e8d8bef9SDimitry Andric 9080e8d8bef9SDimitry Andric // Set the initialization flag to FALSE 9081e8d8bef9SDimitry Andric TCW_SYNC_4(__kmp_init_hidden_helper, FALSE); 9082e8d8bef9SDimitry Andric 9083e8d8bef9SDimitry Andric __kmp_hidden_helper_threads_deinitz_release(); 9084e8d8bef9SDimitry Andric } 9085fe6060f1SDimitry Andric 9086fe6060f1SDimitry Andric /* Nesting Mode: 9087fe6060f1SDimitry Andric Set via KMP_NESTING_MODE, which takes an integer. 9088fe6060f1SDimitry Andric Note: we skip duplicate topology levels, and skip levels with only 9089fe6060f1SDimitry Andric one entity. 9090fe6060f1SDimitry Andric KMP_NESTING_MODE=0 is the default, and doesn't use nesting mode. 9091fe6060f1SDimitry Andric KMP_NESTING_MODE=1 sets as many nesting levels as there are distinct levels 9092fe6060f1SDimitry Andric in the topology, and initializes the number of threads at each of those 9093fe6060f1SDimitry Andric levels to the number of entities at each level, respectively, below the 9094fe6060f1SDimitry Andric entity at the parent level. 9095fe6060f1SDimitry Andric KMP_NESTING_MODE=N, where N>1, attempts to create up to N nesting levels, 9096fe6060f1SDimitry Andric but starts with nesting OFF -- max-active-levels-var is 1 -- and requires 9097fe6060f1SDimitry Andric the user to turn nesting on explicitly. This is an even more experimental 9098fe6060f1SDimitry Andric option to this experimental feature, and may change or go away in the 9099fe6060f1SDimitry Andric future. 9100fe6060f1SDimitry Andric */ 9101fe6060f1SDimitry Andric 9102fe6060f1SDimitry Andric // Allocate space to store nesting levels 9103fe6060f1SDimitry Andric void __kmp_init_nesting_mode() { 9104fe6060f1SDimitry Andric int levels = KMP_HW_LAST; 9105fe6060f1SDimitry Andric __kmp_nesting_mode_nlevels = levels; 9106fe6060f1SDimitry Andric __kmp_nesting_nth_level = (int *)KMP_INTERNAL_MALLOC(levels * sizeof(int)); 9107fe6060f1SDimitry Andric for (int i = 0; i < levels; ++i) 9108fe6060f1SDimitry Andric __kmp_nesting_nth_level[i] = 0; 9109fe6060f1SDimitry Andric if (__kmp_nested_nth.size < levels) { 9110fe6060f1SDimitry Andric __kmp_nested_nth.nth = 9111fe6060f1SDimitry Andric (int *)KMP_INTERNAL_REALLOC(__kmp_nested_nth.nth, levels * sizeof(int)); 9112fe6060f1SDimitry Andric __kmp_nested_nth.size = levels; 9113fe6060f1SDimitry Andric } 9114fe6060f1SDimitry Andric } 9115fe6060f1SDimitry Andric 9116fe6060f1SDimitry Andric // Set # threads for top levels of nesting; must be called after topology set 9117fe6060f1SDimitry Andric void __kmp_set_nesting_mode_threads() { 9118fe6060f1SDimitry Andric kmp_info_t *thread = __kmp_threads[__kmp_entry_gtid()]; 9119fe6060f1SDimitry Andric 9120fe6060f1SDimitry Andric if (__kmp_nesting_mode == 1) 9121fe6060f1SDimitry Andric __kmp_nesting_mode_nlevels = KMP_MAX_ACTIVE_LEVELS_LIMIT; 9122fe6060f1SDimitry Andric else if (__kmp_nesting_mode > 1) 9123fe6060f1SDimitry Andric __kmp_nesting_mode_nlevels = __kmp_nesting_mode; 9124fe6060f1SDimitry Andric 9125fe6060f1SDimitry Andric if (__kmp_topology) { // use topology info 9126fe6060f1SDimitry Andric int loc, hw_level; 9127fe6060f1SDimitry Andric for (loc = 0, hw_level = 0; hw_level < __kmp_topology->get_depth() && 9128fe6060f1SDimitry Andric loc < __kmp_nesting_mode_nlevels; 9129fe6060f1SDimitry Andric loc++, hw_level++) { 9130fe6060f1SDimitry Andric __kmp_nesting_nth_level[loc] = __kmp_topology->get_ratio(hw_level); 9131fe6060f1SDimitry Andric if (__kmp_nesting_nth_level[loc] == 1) 9132fe6060f1SDimitry Andric loc--; 9133fe6060f1SDimitry Andric } 9134fe6060f1SDimitry Andric // Make sure all cores are used 9135fe6060f1SDimitry Andric if (__kmp_nesting_mode > 1 && loc > 1) { 9136fe6060f1SDimitry Andric int core_level = __kmp_topology->get_level(KMP_HW_CORE); 9137fe6060f1SDimitry Andric int num_cores = __kmp_topology->get_count(core_level); 9138fe6060f1SDimitry Andric int upper_levels = 1; 9139fe6060f1SDimitry Andric for (int level = 0; level < loc - 1; ++level) 9140fe6060f1SDimitry Andric upper_levels *= __kmp_nesting_nth_level[level]; 9141fe6060f1SDimitry Andric if (upper_levels * __kmp_nesting_nth_level[loc - 1] < num_cores) 9142fe6060f1SDimitry Andric __kmp_nesting_nth_level[loc - 1] = 9143fe6060f1SDimitry Andric num_cores / __kmp_nesting_nth_level[loc - 2]; 9144fe6060f1SDimitry Andric } 9145fe6060f1SDimitry Andric __kmp_nesting_mode_nlevels = loc; 9146fe6060f1SDimitry Andric __kmp_nested_nth.used = __kmp_nesting_mode_nlevels; 9147fe6060f1SDimitry Andric } else { // no topology info available; provide a reasonable guesstimation 9148fe6060f1SDimitry Andric if (__kmp_avail_proc >= 4) { 9149fe6060f1SDimitry Andric __kmp_nesting_nth_level[0] = __kmp_avail_proc / 2; 9150fe6060f1SDimitry Andric __kmp_nesting_nth_level[1] = 2; 9151fe6060f1SDimitry Andric __kmp_nesting_mode_nlevels = 2; 9152fe6060f1SDimitry Andric } else { 9153fe6060f1SDimitry Andric __kmp_nesting_nth_level[0] = __kmp_avail_proc; 9154fe6060f1SDimitry Andric __kmp_nesting_mode_nlevels = 1; 9155fe6060f1SDimitry Andric } 9156fe6060f1SDimitry Andric __kmp_nested_nth.used = __kmp_nesting_mode_nlevels; 9157fe6060f1SDimitry Andric } 9158fe6060f1SDimitry Andric for (int i = 0; i < __kmp_nesting_mode_nlevels; ++i) { 9159fe6060f1SDimitry Andric __kmp_nested_nth.nth[i] = __kmp_nesting_nth_level[i]; 9160fe6060f1SDimitry Andric } 9161fe6060f1SDimitry Andric set__nproc(thread, __kmp_nesting_nth_level[0]); 9162fe6060f1SDimitry Andric if (__kmp_nesting_mode > 1 && __kmp_nesting_mode_nlevels > __kmp_nesting_mode) 9163fe6060f1SDimitry Andric __kmp_nesting_mode_nlevels = __kmp_nesting_mode; 9164fe6060f1SDimitry Andric if (get__max_active_levels(thread) > 1) { 9165fe6060f1SDimitry Andric // if max levels was set, set nesting mode levels to same 9166fe6060f1SDimitry Andric __kmp_nesting_mode_nlevels = get__max_active_levels(thread); 9167fe6060f1SDimitry Andric } 9168fe6060f1SDimitry Andric if (__kmp_nesting_mode == 1) // turn on nesting for this case only 9169fe6060f1SDimitry Andric set__max_active_levels(thread, __kmp_nesting_mode_nlevels); 9170fe6060f1SDimitry Andric } 9171