10b57cec5SDimitry Andric /* 20b57cec5SDimitry Andric * kmp_runtime.cpp -- KPTS runtime support library 30b57cec5SDimitry Andric */ 40b57cec5SDimitry Andric 50b57cec5SDimitry Andric //===----------------------------------------------------------------------===// 60b57cec5SDimitry Andric // 70b57cec5SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 80b57cec5SDimitry Andric // See https://llvm.org/LICENSE.txt for license information. 90b57cec5SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 100b57cec5SDimitry Andric // 110b57cec5SDimitry Andric //===----------------------------------------------------------------------===// 120b57cec5SDimitry Andric 130b57cec5SDimitry Andric #include "kmp.h" 140b57cec5SDimitry Andric #include "kmp_affinity.h" 150b57cec5SDimitry Andric #include "kmp_atomic.h" 160b57cec5SDimitry Andric #include "kmp_environment.h" 170b57cec5SDimitry Andric #include "kmp_error.h" 180b57cec5SDimitry Andric #include "kmp_i18n.h" 190b57cec5SDimitry Andric #include "kmp_io.h" 200b57cec5SDimitry Andric #include "kmp_itt.h" 210b57cec5SDimitry Andric #include "kmp_settings.h" 220b57cec5SDimitry Andric #include "kmp_stats.h" 230b57cec5SDimitry Andric #include "kmp_str.h" 240b57cec5SDimitry Andric #include "kmp_wait_release.h" 250b57cec5SDimitry Andric #include "kmp_wrapper_getpid.h" 260b57cec5SDimitry Andric #include "kmp_dispatch.h" 270b57cec5SDimitry Andric #if KMP_USE_HIER_SCHED 280b57cec5SDimitry Andric #include "kmp_dispatch_hier.h" 290b57cec5SDimitry Andric #endif 300b57cec5SDimitry Andric 310b57cec5SDimitry Andric #if OMPT_SUPPORT 320b57cec5SDimitry Andric #include "ompt-specific.h" 330b57cec5SDimitry Andric #endif 340b57cec5SDimitry Andric 350b57cec5SDimitry Andric /* these are temporary issues to be dealt with */ 360b57cec5SDimitry Andric #define KMP_USE_PRCTL 0 370b57cec5SDimitry Andric 380b57cec5SDimitry Andric #if KMP_OS_WINDOWS 390b57cec5SDimitry Andric #include <process.h> 400b57cec5SDimitry Andric #endif 410b57cec5SDimitry Andric 420b57cec5SDimitry Andric #include "tsan_annotations.h" 430b57cec5SDimitry Andric 440b57cec5SDimitry Andric #if defined(KMP_GOMP_COMPAT) 450b57cec5SDimitry Andric char const __kmp_version_alt_comp[] = 460b57cec5SDimitry Andric KMP_VERSION_PREFIX "alternative compiler support: yes"; 470b57cec5SDimitry Andric #endif /* defined(KMP_GOMP_COMPAT) */ 480b57cec5SDimitry Andric 490b57cec5SDimitry Andric char const __kmp_version_omp_api[] = 500b57cec5SDimitry Andric KMP_VERSION_PREFIX "API version: 5.0 (201611)"; 510b57cec5SDimitry Andric 520b57cec5SDimitry Andric #ifdef KMP_DEBUG 530b57cec5SDimitry Andric char const __kmp_version_lock[] = 540b57cec5SDimitry Andric KMP_VERSION_PREFIX "lock type: run time selectable"; 550b57cec5SDimitry Andric #endif /* KMP_DEBUG */ 560b57cec5SDimitry Andric 570b57cec5SDimitry Andric #define KMP_MIN(x, y) ((x) < (y) ? (x) : (y)) 580b57cec5SDimitry Andric 590b57cec5SDimitry Andric /* ------------------------------------------------------------------------ */ 600b57cec5SDimitry Andric 610b57cec5SDimitry Andric #if KMP_USE_MONITOR 620b57cec5SDimitry Andric kmp_info_t __kmp_monitor; 630b57cec5SDimitry Andric #endif 640b57cec5SDimitry Andric 650b57cec5SDimitry Andric /* Forward declarations */ 660b57cec5SDimitry Andric 670b57cec5SDimitry Andric void __kmp_cleanup(void); 680b57cec5SDimitry Andric 690b57cec5SDimitry Andric static void __kmp_initialize_info(kmp_info_t *, kmp_team_t *, int tid, 700b57cec5SDimitry Andric int gtid); 710b57cec5SDimitry Andric static void __kmp_initialize_team(kmp_team_t *team, int new_nproc, 720b57cec5SDimitry Andric kmp_internal_control_t *new_icvs, 730b57cec5SDimitry Andric ident_t *loc); 740b57cec5SDimitry Andric #if KMP_AFFINITY_SUPPORTED 750b57cec5SDimitry Andric static void __kmp_partition_places(kmp_team_t *team, 760b57cec5SDimitry Andric int update_master_only = 0); 770b57cec5SDimitry Andric #endif 780b57cec5SDimitry Andric static void __kmp_do_serial_initialize(void); 790b57cec5SDimitry Andric void __kmp_fork_barrier(int gtid, int tid); 800b57cec5SDimitry Andric void __kmp_join_barrier(int gtid); 810b57cec5SDimitry Andric void __kmp_setup_icv_copy(kmp_team_t *team, int new_nproc, 820b57cec5SDimitry Andric kmp_internal_control_t *new_icvs, ident_t *loc); 830b57cec5SDimitry Andric 840b57cec5SDimitry Andric #ifdef USE_LOAD_BALANCE 850b57cec5SDimitry Andric static int __kmp_load_balance_nproc(kmp_root_t *root, int set_nproc); 860b57cec5SDimitry Andric #endif 870b57cec5SDimitry Andric 880b57cec5SDimitry Andric static int __kmp_expand_threads(int nNeed); 890b57cec5SDimitry Andric #if KMP_OS_WINDOWS 900b57cec5SDimitry Andric static int __kmp_unregister_root_other_thread(int gtid); 910b57cec5SDimitry Andric #endif 920b57cec5SDimitry Andric static void __kmp_unregister_library(void); // called by __kmp_internal_end() 930b57cec5SDimitry Andric static void __kmp_reap_thread(kmp_info_t *thread, int is_root); 940b57cec5SDimitry Andric kmp_info_t *__kmp_thread_pool_insert_pt = NULL; 950b57cec5SDimitry Andric 960b57cec5SDimitry Andric /* Calculate the identifier of the current thread */ 970b57cec5SDimitry Andric /* fast (and somewhat portable) way to get unique identifier of executing 980b57cec5SDimitry Andric thread. Returns KMP_GTID_DNE if we haven't been assigned a gtid. */ 990b57cec5SDimitry Andric int __kmp_get_global_thread_id() { 1000b57cec5SDimitry Andric int i; 1010b57cec5SDimitry Andric kmp_info_t **other_threads; 1020b57cec5SDimitry Andric size_t stack_data; 1030b57cec5SDimitry Andric char *stack_addr; 1040b57cec5SDimitry Andric size_t stack_size; 1050b57cec5SDimitry Andric char *stack_base; 1060b57cec5SDimitry Andric 1070b57cec5SDimitry Andric KA_TRACE( 1080b57cec5SDimitry Andric 1000, 1090b57cec5SDimitry Andric ("*** __kmp_get_global_thread_id: entering, nproc=%d all_nproc=%d\n", 1100b57cec5SDimitry Andric __kmp_nth, __kmp_all_nth)); 1110b57cec5SDimitry Andric 1120b57cec5SDimitry Andric /* JPH - to handle the case where __kmpc_end(0) is called immediately prior to 1130b57cec5SDimitry Andric a parallel region, made it return KMP_GTID_DNE to force serial_initialize 1140b57cec5SDimitry Andric by caller. Had to handle KMP_GTID_DNE at all call-sites, or else guarantee 1150b57cec5SDimitry Andric __kmp_init_gtid for this to work. */ 1160b57cec5SDimitry Andric 1170b57cec5SDimitry Andric if (!TCR_4(__kmp_init_gtid)) 1180b57cec5SDimitry Andric return KMP_GTID_DNE; 1190b57cec5SDimitry Andric 1200b57cec5SDimitry Andric #ifdef KMP_TDATA_GTID 1210b57cec5SDimitry Andric if (TCR_4(__kmp_gtid_mode) >= 3) { 1220b57cec5SDimitry Andric KA_TRACE(1000, ("*** __kmp_get_global_thread_id: using TDATA\n")); 1230b57cec5SDimitry Andric return __kmp_gtid; 1240b57cec5SDimitry Andric } 1250b57cec5SDimitry Andric #endif 1260b57cec5SDimitry Andric if (TCR_4(__kmp_gtid_mode) >= 2) { 1270b57cec5SDimitry Andric KA_TRACE(1000, ("*** __kmp_get_global_thread_id: using keyed TLS\n")); 1280b57cec5SDimitry Andric return __kmp_gtid_get_specific(); 1290b57cec5SDimitry Andric } 1300b57cec5SDimitry Andric KA_TRACE(1000, ("*** __kmp_get_global_thread_id: using internal alg.\n")); 1310b57cec5SDimitry Andric 1320b57cec5SDimitry Andric stack_addr = (char *)&stack_data; 1330b57cec5SDimitry Andric other_threads = __kmp_threads; 1340b57cec5SDimitry Andric 1350b57cec5SDimitry Andric /* ATT: The code below is a source of potential bugs due to unsynchronized 1360b57cec5SDimitry Andric access to __kmp_threads array. For example: 1370b57cec5SDimitry Andric 1. Current thread loads other_threads[i] to thr and checks it, it is 1380b57cec5SDimitry Andric non-NULL. 1390b57cec5SDimitry Andric 2. Current thread is suspended by OS. 1400b57cec5SDimitry Andric 3. Another thread unregisters and finishes (debug versions of free() 1410b57cec5SDimitry Andric may fill memory with something like 0xEF). 1420b57cec5SDimitry Andric 4. Current thread is resumed. 1430b57cec5SDimitry Andric 5. Current thread reads junk from *thr. 1440b57cec5SDimitry Andric TODO: Fix it. --ln */ 1450b57cec5SDimitry Andric 1460b57cec5SDimitry Andric for (i = 0; i < __kmp_threads_capacity; i++) { 1470b57cec5SDimitry Andric 1480b57cec5SDimitry Andric kmp_info_t *thr = (kmp_info_t *)TCR_SYNC_PTR(other_threads[i]); 1490b57cec5SDimitry Andric if (!thr) 1500b57cec5SDimitry Andric continue; 1510b57cec5SDimitry Andric 1520b57cec5SDimitry Andric stack_size = (size_t)TCR_PTR(thr->th.th_info.ds.ds_stacksize); 1530b57cec5SDimitry Andric stack_base = (char *)TCR_PTR(thr->th.th_info.ds.ds_stackbase); 1540b57cec5SDimitry Andric 1550b57cec5SDimitry Andric /* stack grows down -- search through all of the active threads */ 1560b57cec5SDimitry Andric 1570b57cec5SDimitry Andric if (stack_addr <= stack_base) { 1580b57cec5SDimitry Andric size_t stack_diff = stack_base - stack_addr; 1590b57cec5SDimitry Andric 1600b57cec5SDimitry Andric if (stack_diff <= stack_size) { 1610b57cec5SDimitry Andric /* The only way we can be closer than the allocated */ 1620b57cec5SDimitry Andric /* stack size is if we are running on this thread. */ 1630b57cec5SDimitry Andric KMP_DEBUG_ASSERT(__kmp_gtid_get_specific() == i); 1640b57cec5SDimitry Andric return i; 1650b57cec5SDimitry Andric } 1660b57cec5SDimitry Andric } 1670b57cec5SDimitry Andric } 1680b57cec5SDimitry Andric 1690b57cec5SDimitry Andric /* get specific to try and determine our gtid */ 1700b57cec5SDimitry Andric KA_TRACE(1000, 1710b57cec5SDimitry Andric ("*** __kmp_get_global_thread_id: internal alg. failed to find " 1720b57cec5SDimitry Andric "thread, using TLS\n")); 1730b57cec5SDimitry Andric i = __kmp_gtid_get_specific(); 1740b57cec5SDimitry Andric 1750b57cec5SDimitry Andric /*fprintf( stderr, "=== %d\n", i ); */ /* GROO */ 1760b57cec5SDimitry Andric 1770b57cec5SDimitry Andric /* if we havn't been assigned a gtid, then return code */ 1780b57cec5SDimitry Andric if (i < 0) 1790b57cec5SDimitry Andric return i; 1800b57cec5SDimitry Andric 1810b57cec5SDimitry Andric /* dynamically updated stack window for uber threads to avoid get_specific 1820b57cec5SDimitry Andric call */ 1830b57cec5SDimitry Andric if (!TCR_4(other_threads[i]->th.th_info.ds.ds_stackgrow)) { 1840b57cec5SDimitry Andric KMP_FATAL(StackOverflow, i); 1850b57cec5SDimitry Andric } 1860b57cec5SDimitry Andric 1870b57cec5SDimitry Andric stack_base = (char *)other_threads[i]->th.th_info.ds.ds_stackbase; 1880b57cec5SDimitry Andric if (stack_addr > stack_base) { 1890b57cec5SDimitry Andric TCW_PTR(other_threads[i]->th.th_info.ds.ds_stackbase, stack_addr); 1900b57cec5SDimitry Andric TCW_PTR(other_threads[i]->th.th_info.ds.ds_stacksize, 1910b57cec5SDimitry Andric other_threads[i]->th.th_info.ds.ds_stacksize + stack_addr - 1920b57cec5SDimitry Andric stack_base); 1930b57cec5SDimitry Andric } else { 1940b57cec5SDimitry Andric TCW_PTR(other_threads[i]->th.th_info.ds.ds_stacksize, 1950b57cec5SDimitry Andric stack_base - stack_addr); 1960b57cec5SDimitry Andric } 1970b57cec5SDimitry Andric 1980b57cec5SDimitry Andric /* Reprint stack bounds for ubermaster since they have been refined */ 1990b57cec5SDimitry Andric if (__kmp_storage_map) { 2000b57cec5SDimitry Andric char *stack_end = (char *)other_threads[i]->th.th_info.ds.ds_stackbase; 2010b57cec5SDimitry Andric char *stack_beg = stack_end - other_threads[i]->th.th_info.ds.ds_stacksize; 2020b57cec5SDimitry Andric __kmp_print_storage_map_gtid(i, stack_beg, stack_end, 2030b57cec5SDimitry Andric other_threads[i]->th.th_info.ds.ds_stacksize, 2040b57cec5SDimitry Andric "th_%d stack (refinement)", i); 2050b57cec5SDimitry Andric } 2060b57cec5SDimitry Andric return i; 2070b57cec5SDimitry Andric } 2080b57cec5SDimitry Andric 2090b57cec5SDimitry Andric int __kmp_get_global_thread_id_reg() { 2100b57cec5SDimitry Andric int gtid; 2110b57cec5SDimitry Andric 2120b57cec5SDimitry Andric if (!__kmp_init_serial) { 2130b57cec5SDimitry Andric gtid = KMP_GTID_DNE; 2140b57cec5SDimitry Andric } else 2150b57cec5SDimitry Andric #ifdef KMP_TDATA_GTID 2160b57cec5SDimitry Andric if (TCR_4(__kmp_gtid_mode) >= 3) { 2170b57cec5SDimitry Andric KA_TRACE(1000, ("*** __kmp_get_global_thread_id_reg: using TDATA\n")); 2180b57cec5SDimitry Andric gtid = __kmp_gtid; 2190b57cec5SDimitry Andric } else 2200b57cec5SDimitry Andric #endif 2210b57cec5SDimitry Andric if (TCR_4(__kmp_gtid_mode) >= 2) { 2220b57cec5SDimitry Andric KA_TRACE(1000, ("*** __kmp_get_global_thread_id_reg: using keyed TLS\n")); 2230b57cec5SDimitry Andric gtid = __kmp_gtid_get_specific(); 2240b57cec5SDimitry Andric } else { 2250b57cec5SDimitry Andric KA_TRACE(1000, 2260b57cec5SDimitry Andric ("*** __kmp_get_global_thread_id_reg: using internal alg.\n")); 2270b57cec5SDimitry Andric gtid = __kmp_get_global_thread_id(); 2280b57cec5SDimitry Andric } 2290b57cec5SDimitry Andric 2300b57cec5SDimitry Andric /* we must be a new uber master sibling thread */ 2310b57cec5SDimitry Andric if (gtid == KMP_GTID_DNE) { 2320b57cec5SDimitry Andric KA_TRACE(10, 2330b57cec5SDimitry Andric ("__kmp_get_global_thread_id_reg: Encountered new root thread. " 2340b57cec5SDimitry Andric "Registering a new gtid.\n")); 2350b57cec5SDimitry Andric __kmp_acquire_bootstrap_lock(&__kmp_initz_lock); 2360b57cec5SDimitry Andric if (!__kmp_init_serial) { 2370b57cec5SDimitry Andric __kmp_do_serial_initialize(); 2380b57cec5SDimitry Andric gtid = __kmp_gtid_get_specific(); 2390b57cec5SDimitry Andric } else { 2400b57cec5SDimitry Andric gtid = __kmp_register_root(FALSE); 2410b57cec5SDimitry Andric } 2420b57cec5SDimitry Andric __kmp_release_bootstrap_lock(&__kmp_initz_lock); 2430b57cec5SDimitry Andric /*__kmp_printf( "+++ %d\n", gtid ); */ /* GROO */ 2440b57cec5SDimitry Andric } 2450b57cec5SDimitry Andric 2460b57cec5SDimitry Andric KMP_DEBUG_ASSERT(gtid >= 0); 2470b57cec5SDimitry Andric 2480b57cec5SDimitry Andric return gtid; 2490b57cec5SDimitry Andric } 2500b57cec5SDimitry Andric 2510b57cec5SDimitry Andric /* caller must hold forkjoin_lock */ 2520b57cec5SDimitry Andric void __kmp_check_stack_overlap(kmp_info_t *th) { 2530b57cec5SDimitry Andric int f; 2540b57cec5SDimitry Andric char *stack_beg = NULL; 2550b57cec5SDimitry Andric char *stack_end = NULL; 2560b57cec5SDimitry Andric int gtid; 2570b57cec5SDimitry Andric 2580b57cec5SDimitry Andric KA_TRACE(10, ("__kmp_check_stack_overlap: called\n")); 2590b57cec5SDimitry Andric if (__kmp_storage_map) { 2600b57cec5SDimitry Andric stack_end = (char *)th->th.th_info.ds.ds_stackbase; 2610b57cec5SDimitry Andric stack_beg = stack_end - th->th.th_info.ds.ds_stacksize; 2620b57cec5SDimitry Andric 2630b57cec5SDimitry Andric gtid = __kmp_gtid_from_thread(th); 2640b57cec5SDimitry Andric 2650b57cec5SDimitry Andric if (gtid == KMP_GTID_MONITOR) { 2660b57cec5SDimitry Andric __kmp_print_storage_map_gtid( 2670b57cec5SDimitry Andric gtid, stack_beg, stack_end, th->th.th_info.ds.ds_stacksize, 2680b57cec5SDimitry Andric "th_%s stack (%s)", "mon", 2690b57cec5SDimitry Andric (th->th.th_info.ds.ds_stackgrow) ? "initial" : "actual"); 2700b57cec5SDimitry Andric } else { 2710b57cec5SDimitry Andric __kmp_print_storage_map_gtid( 2720b57cec5SDimitry Andric gtid, stack_beg, stack_end, th->th.th_info.ds.ds_stacksize, 2730b57cec5SDimitry Andric "th_%d stack (%s)", gtid, 2740b57cec5SDimitry Andric (th->th.th_info.ds.ds_stackgrow) ? "initial" : "actual"); 2750b57cec5SDimitry Andric } 2760b57cec5SDimitry Andric } 2770b57cec5SDimitry Andric 2780b57cec5SDimitry Andric /* No point in checking ubermaster threads since they use refinement and 2790b57cec5SDimitry Andric * cannot overlap */ 2800b57cec5SDimitry Andric gtid = __kmp_gtid_from_thread(th); 2810b57cec5SDimitry Andric if (__kmp_env_checks == TRUE && !KMP_UBER_GTID(gtid)) { 2820b57cec5SDimitry Andric KA_TRACE(10, 2830b57cec5SDimitry Andric ("__kmp_check_stack_overlap: performing extensive checking\n")); 2840b57cec5SDimitry Andric if (stack_beg == NULL) { 2850b57cec5SDimitry Andric stack_end = (char *)th->th.th_info.ds.ds_stackbase; 2860b57cec5SDimitry Andric stack_beg = stack_end - th->th.th_info.ds.ds_stacksize; 2870b57cec5SDimitry Andric } 2880b57cec5SDimitry Andric 2890b57cec5SDimitry Andric for (f = 0; f < __kmp_threads_capacity; f++) { 2900b57cec5SDimitry Andric kmp_info_t *f_th = (kmp_info_t *)TCR_SYNC_PTR(__kmp_threads[f]); 2910b57cec5SDimitry Andric 2920b57cec5SDimitry Andric if (f_th && f_th != th) { 2930b57cec5SDimitry Andric char *other_stack_end = 2940b57cec5SDimitry Andric (char *)TCR_PTR(f_th->th.th_info.ds.ds_stackbase); 2950b57cec5SDimitry Andric char *other_stack_beg = 2960b57cec5SDimitry Andric other_stack_end - (size_t)TCR_PTR(f_th->th.th_info.ds.ds_stacksize); 2970b57cec5SDimitry Andric if ((stack_beg > other_stack_beg && stack_beg < other_stack_end) || 2980b57cec5SDimitry Andric (stack_end > other_stack_beg && stack_end < other_stack_end)) { 2990b57cec5SDimitry Andric 3000b57cec5SDimitry Andric /* Print the other stack values before the abort */ 3010b57cec5SDimitry Andric if (__kmp_storage_map) 3020b57cec5SDimitry Andric __kmp_print_storage_map_gtid( 3030b57cec5SDimitry Andric -1, other_stack_beg, other_stack_end, 3040b57cec5SDimitry Andric (size_t)TCR_PTR(f_th->th.th_info.ds.ds_stacksize), 3050b57cec5SDimitry Andric "th_%d stack (overlapped)", __kmp_gtid_from_thread(f_th)); 3060b57cec5SDimitry Andric 3070b57cec5SDimitry Andric __kmp_fatal(KMP_MSG(StackOverlap), KMP_HNT(ChangeStackLimit), 3080b57cec5SDimitry Andric __kmp_msg_null); 3090b57cec5SDimitry Andric } 3100b57cec5SDimitry Andric } 3110b57cec5SDimitry Andric } 3120b57cec5SDimitry Andric } 3130b57cec5SDimitry Andric KA_TRACE(10, ("__kmp_check_stack_overlap: returning\n")); 3140b57cec5SDimitry Andric } 3150b57cec5SDimitry Andric 3160b57cec5SDimitry Andric /* ------------------------------------------------------------------------ */ 3170b57cec5SDimitry Andric 3180b57cec5SDimitry Andric void __kmp_infinite_loop(void) { 3190b57cec5SDimitry Andric static int done = FALSE; 3200b57cec5SDimitry Andric 3210b57cec5SDimitry Andric while (!done) { 3220b57cec5SDimitry Andric KMP_YIELD(TRUE); 3230b57cec5SDimitry Andric } 3240b57cec5SDimitry Andric } 3250b57cec5SDimitry Andric 3260b57cec5SDimitry Andric #define MAX_MESSAGE 512 3270b57cec5SDimitry Andric 3280b57cec5SDimitry Andric void __kmp_print_storage_map_gtid(int gtid, void *p1, void *p2, size_t size, 3290b57cec5SDimitry Andric char const *format, ...) { 3300b57cec5SDimitry Andric char buffer[MAX_MESSAGE]; 3310b57cec5SDimitry Andric va_list ap; 3320b57cec5SDimitry Andric 3330b57cec5SDimitry Andric va_start(ap, format); 3340b57cec5SDimitry Andric KMP_SNPRINTF(buffer, sizeof(buffer), "OMP storage map: %p %p%8lu %s\n", p1, 3350b57cec5SDimitry Andric p2, (unsigned long)size, format); 3360b57cec5SDimitry Andric __kmp_acquire_bootstrap_lock(&__kmp_stdio_lock); 3370b57cec5SDimitry Andric __kmp_vprintf(kmp_err, buffer, ap); 3380b57cec5SDimitry Andric #if KMP_PRINT_DATA_PLACEMENT 3390b57cec5SDimitry Andric int node; 3400b57cec5SDimitry Andric if (gtid >= 0) { 3410b57cec5SDimitry Andric if (p1 <= p2 && (char *)p2 - (char *)p1 == size) { 3420b57cec5SDimitry Andric if (__kmp_storage_map_verbose) { 3430b57cec5SDimitry Andric node = __kmp_get_host_node(p1); 3440b57cec5SDimitry Andric if (node < 0) /* doesn't work, so don't try this next time */ 3450b57cec5SDimitry Andric __kmp_storage_map_verbose = FALSE; 3460b57cec5SDimitry Andric else { 3470b57cec5SDimitry Andric char *last; 3480b57cec5SDimitry Andric int lastNode; 3490b57cec5SDimitry Andric int localProc = __kmp_get_cpu_from_gtid(gtid); 3500b57cec5SDimitry Andric 3510b57cec5SDimitry Andric const int page_size = KMP_GET_PAGE_SIZE(); 3520b57cec5SDimitry Andric 3530b57cec5SDimitry Andric p1 = (void *)((size_t)p1 & ~((size_t)page_size - 1)); 3540b57cec5SDimitry Andric p2 = (void *)(((size_t)p2 - 1) & ~((size_t)page_size - 1)); 3550b57cec5SDimitry Andric if (localProc >= 0) 3560b57cec5SDimitry Andric __kmp_printf_no_lock(" GTID %d localNode %d\n", gtid, 3570b57cec5SDimitry Andric localProc >> 1); 3580b57cec5SDimitry Andric else 3590b57cec5SDimitry Andric __kmp_printf_no_lock(" GTID %d\n", gtid); 3600b57cec5SDimitry Andric #if KMP_USE_PRCTL 3610b57cec5SDimitry Andric /* The more elaborate format is disabled for now because of the prctl 3620b57cec5SDimitry Andric * hanging bug. */ 3630b57cec5SDimitry Andric do { 3640b57cec5SDimitry Andric last = p1; 3650b57cec5SDimitry Andric lastNode = node; 3660b57cec5SDimitry Andric /* This loop collates adjacent pages with the same host node. */ 3670b57cec5SDimitry Andric do { 3680b57cec5SDimitry Andric (char *)p1 += page_size; 3690b57cec5SDimitry Andric } while (p1 <= p2 && (node = __kmp_get_host_node(p1)) == lastNode); 3700b57cec5SDimitry Andric __kmp_printf_no_lock(" %p-%p memNode %d\n", last, (char *)p1 - 1, 3710b57cec5SDimitry Andric lastNode); 3720b57cec5SDimitry Andric } while (p1 <= p2); 3730b57cec5SDimitry Andric #else 3740b57cec5SDimitry Andric __kmp_printf_no_lock(" %p-%p memNode %d\n", p1, 3750b57cec5SDimitry Andric (char *)p1 + (page_size - 1), 3760b57cec5SDimitry Andric __kmp_get_host_node(p1)); 3770b57cec5SDimitry Andric if (p1 < p2) { 3780b57cec5SDimitry Andric __kmp_printf_no_lock(" %p-%p memNode %d\n", p2, 3790b57cec5SDimitry Andric (char *)p2 + (page_size - 1), 3800b57cec5SDimitry Andric __kmp_get_host_node(p2)); 3810b57cec5SDimitry Andric } 3820b57cec5SDimitry Andric #endif 3830b57cec5SDimitry Andric } 3840b57cec5SDimitry Andric } 3850b57cec5SDimitry Andric } else 3860b57cec5SDimitry Andric __kmp_printf_no_lock(" %s\n", KMP_I18N_STR(StorageMapWarning)); 3870b57cec5SDimitry Andric } 3880b57cec5SDimitry Andric #endif /* KMP_PRINT_DATA_PLACEMENT */ 3890b57cec5SDimitry Andric __kmp_release_bootstrap_lock(&__kmp_stdio_lock); 3900b57cec5SDimitry Andric } 3910b57cec5SDimitry Andric 3920b57cec5SDimitry Andric void __kmp_warn(char const *format, ...) { 3930b57cec5SDimitry Andric char buffer[MAX_MESSAGE]; 3940b57cec5SDimitry Andric va_list ap; 3950b57cec5SDimitry Andric 3960b57cec5SDimitry Andric if (__kmp_generate_warnings == kmp_warnings_off) { 3970b57cec5SDimitry Andric return; 3980b57cec5SDimitry Andric } 3990b57cec5SDimitry Andric 4000b57cec5SDimitry Andric va_start(ap, format); 4010b57cec5SDimitry Andric 4020b57cec5SDimitry Andric KMP_SNPRINTF(buffer, sizeof(buffer), "OMP warning: %s\n", format); 4030b57cec5SDimitry Andric __kmp_acquire_bootstrap_lock(&__kmp_stdio_lock); 4040b57cec5SDimitry Andric __kmp_vprintf(kmp_err, buffer, ap); 4050b57cec5SDimitry Andric __kmp_release_bootstrap_lock(&__kmp_stdio_lock); 4060b57cec5SDimitry Andric 4070b57cec5SDimitry Andric va_end(ap); 4080b57cec5SDimitry Andric } 4090b57cec5SDimitry Andric 4100b57cec5SDimitry Andric void __kmp_abort_process() { 4110b57cec5SDimitry Andric // Later threads may stall here, but that's ok because abort() will kill them. 4120b57cec5SDimitry Andric __kmp_acquire_bootstrap_lock(&__kmp_exit_lock); 4130b57cec5SDimitry Andric 4140b57cec5SDimitry Andric if (__kmp_debug_buf) { 4150b57cec5SDimitry Andric __kmp_dump_debug_buffer(); 4160b57cec5SDimitry Andric } 4170b57cec5SDimitry Andric 4180b57cec5SDimitry Andric if (KMP_OS_WINDOWS) { 4190b57cec5SDimitry Andric // Let other threads know of abnormal termination and prevent deadlock 4200b57cec5SDimitry Andric // if abort happened during library initialization or shutdown 4210b57cec5SDimitry Andric __kmp_global.g.g_abort = SIGABRT; 4220b57cec5SDimitry Andric 4230b57cec5SDimitry Andric /* On Windows* OS by default abort() causes pop-up error box, which stalls 4240b57cec5SDimitry Andric nightly testing. Unfortunately, we cannot reliably suppress pop-up error 4250b57cec5SDimitry Andric boxes. _set_abort_behavior() works well, but this function is not 4260b57cec5SDimitry Andric available in VS7 (this is not problem for DLL, but it is a problem for 4270b57cec5SDimitry Andric static OpenMP RTL). SetErrorMode (and so, timelimit utility) does not 4280b57cec5SDimitry Andric help, at least in some versions of MS C RTL. 4290b57cec5SDimitry Andric 4300b57cec5SDimitry Andric It seems following sequence is the only way to simulate abort() and 4310b57cec5SDimitry Andric avoid pop-up error box. */ 4320b57cec5SDimitry Andric raise(SIGABRT); 4330b57cec5SDimitry Andric _exit(3); // Just in case, if signal ignored, exit anyway. 4340b57cec5SDimitry Andric } else { 4350b57cec5SDimitry Andric abort(); 4360b57cec5SDimitry Andric } 4370b57cec5SDimitry Andric 4380b57cec5SDimitry Andric __kmp_infinite_loop(); 4390b57cec5SDimitry Andric __kmp_release_bootstrap_lock(&__kmp_exit_lock); 4400b57cec5SDimitry Andric 4410b57cec5SDimitry Andric } // __kmp_abort_process 4420b57cec5SDimitry Andric 4430b57cec5SDimitry Andric void __kmp_abort_thread(void) { 4440b57cec5SDimitry Andric // TODO: Eliminate g_abort global variable and this function. 4450b57cec5SDimitry Andric // In case of abort just call abort(), it will kill all the threads. 4460b57cec5SDimitry Andric __kmp_infinite_loop(); 4470b57cec5SDimitry Andric } // __kmp_abort_thread 4480b57cec5SDimitry Andric 4490b57cec5SDimitry Andric /* Print out the storage map for the major kmp_info_t thread data structures 4500b57cec5SDimitry Andric that are allocated together. */ 4510b57cec5SDimitry Andric 4520b57cec5SDimitry Andric static void __kmp_print_thread_storage_map(kmp_info_t *thr, int gtid) { 4530b57cec5SDimitry Andric __kmp_print_storage_map_gtid(gtid, thr, thr + 1, sizeof(kmp_info_t), "th_%d", 4540b57cec5SDimitry Andric gtid); 4550b57cec5SDimitry Andric 4560b57cec5SDimitry Andric __kmp_print_storage_map_gtid(gtid, &thr->th.th_info, &thr->th.th_team, 4570b57cec5SDimitry Andric sizeof(kmp_desc_t), "th_%d.th_info", gtid); 4580b57cec5SDimitry Andric 4590b57cec5SDimitry Andric __kmp_print_storage_map_gtid(gtid, &thr->th.th_local, &thr->th.th_pri_head, 4600b57cec5SDimitry Andric sizeof(kmp_local_t), "th_%d.th_local", gtid); 4610b57cec5SDimitry Andric 4620b57cec5SDimitry Andric __kmp_print_storage_map_gtid( 4630b57cec5SDimitry Andric gtid, &thr->th.th_bar[0], &thr->th.th_bar[bs_last_barrier], 4640b57cec5SDimitry Andric sizeof(kmp_balign_t) * bs_last_barrier, "th_%d.th_bar", gtid); 4650b57cec5SDimitry Andric 4660b57cec5SDimitry Andric __kmp_print_storage_map_gtid(gtid, &thr->th.th_bar[bs_plain_barrier], 4670b57cec5SDimitry Andric &thr->th.th_bar[bs_plain_barrier + 1], 4680b57cec5SDimitry Andric sizeof(kmp_balign_t), "th_%d.th_bar[plain]", 4690b57cec5SDimitry Andric gtid); 4700b57cec5SDimitry Andric 4710b57cec5SDimitry Andric __kmp_print_storage_map_gtid(gtid, &thr->th.th_bar[bs_forkjoin_barrier], 4720b57cec5SDimitry Andric &thr->th.th_bar[bs_forkjoin_barrier + 1], 4730b57cec5SDimitry Andric sizeof(kmp_balign_t), "th_%d.th_bar[forkjoin]", 4740b57cec5SDimitry Andric gtid); 4750b57cec5SDimitry Andric 4760b57cec5SDimitry Andric #if KMP_FAST_REDUCTION_BARRIER 4770b57cec5SDimitry Andric __kmp_print_storage_map_gtid(gtid, &thr->th.th_bar[bs_reduction_barrier], 4780b57cec5SDimitry Andric &thr->th.th_bar[bs_reduction_barrier + 1], 4790b57cec5SDimitry Andric sizeof(kmp_balign_t), "th_%d.th_bar[reduction]", 4800b57cec5SDimitry Andric gtid); 4810b57cec5SDimitry Andric #endif // KMP_FAST_REDUCTION_BARRIER 4820b57cec5SDimitry Andric } 4830b57cec5SDimitry Andric 4840b57cec5SDimitry Andric /* Print out the storage map for the major kmp_team_t team data structures 4850b57cec5SDimitry Andric that are allocated together. */ 4860b57cec5SDimitry Andric 4870b57cec5SDimitry Andric static void __kmp_print_team_storage_map(const char *header, kmp_team_t *team, 4880b57cec5SDimitry Andric int team_id, int num_thr) { 4890b57cec5SDimitry Andric int num_disp_buff = team->t.t_max_nproc > 1 ? __kmp_dispatch_num_buffers : 2; 4900b57cec5SDimitry Andric __kmp_print_storage_map_gtid(-1, team, team + 1, sizeof(kmp_team_t), "%s_%d", 4910b57cec5SDimitry Andric header, team_id); 4920b57cec5SDimitry Andric 4930b57cec5SDimitry Andric __kmp_print_storage_map_gtid(-1, &team->t.t_bar[0], 4940b57cec5SDimitry Andric &team->t.t_bar[bs_last_barrier], 4950b57cec5SDimitry Andric sizeof(kmp_balign_team_t) * bs_last_barrier, 4960b57cec5SDimitry Andric "%s_%d.t_bar", header, team_id); 4970b57cec5SDimitry Andric 4980b57cec5SDimitry Andric __kmp_print_storage_map_gtid(-1, &team->t.t_bar[bs_plain_barrier], 4990b57cec5SDimitry Andric &team->t.t_bar[bs_plain_barrier + 1], 5000b57cec5SDimitry Andric sizeof(kmp_balign_team_t), "%s_%d.t_bar[plain]", 5010b57cec5SDimitry Andric header, team_id); 5020b57cec5SDimitry Andric 5030b57cec5SDimitry Andric __kmp_print_storage_map_gtid(-1, &team->t.t_bar[bs_forkjoin_barrier], 5040b57cec5SDimitry Andric &team->t.t_bar[bs_forkjoin_barrier + 1], 5050b57cec5SDimitry Andric sizeof(kmp_balign_team_t), 5060b57cec5SDimitry Andric "%s_%d.t_bar[forkjoin]", header, team_id); 5070b57cec5SDimitry Andric 5080b57cec5SDimitry Andric #if KMP_FAST_REDUCTION_BARRIER 5090b57cec5SDimitry Andric __kmp_print_storage_map_gtid(-1, &team->t.t_bar[bs_reduction_barrier], 5100b57cec5SDimitry Andric &team->t.t_bar[bs_reduction_barrier + 1], 5110b57cec5SDimitry Andric sizeof(kmp_balign_team_t), 5120b57cec5SDimitry Andric "%s_%d.t_bar[reduction]", header, team_id); 5130b57cec5SDimitry Andric #endif // KMP_FAST_REDUCTION_BARRIER 5140b57cec5SDimitry Andric 5150b57cec5SDimitry Andric __kmp_print_storage_map_gtid( 5160b57cec5SDimitry Andric -1, &team->t.t_dispatch[0], &team->t.t_dispatch[num_thr], 5170b57cec5SDimitry Andric sizeof(kmp_disp_t) * num_thr, "%s_%d.t_dispatch", header, team_id); 5180b57cec5SDimitry Andric 5190b57cec5SDimitry Andric __kmp_print_storage_map_gtid( 5200b57cec5SDimitry Andric -1, &team->t.t_threads[0], &team->t.t_threads[num_thr], 5210b57cec5SDimitry Andric sizeof(kmp_info_t *) * num_thr, "%s_%d.t_threads", header, team_id); 5220b57cec5SDimitry Andric 5230b57cec5SDimitry Andric __kmp_print_storage_map_gtid(-1, &team->t.t_disp_buffer[0], 5240b57cec5SDimitry Andric &team->t.t_disp_buffer[num_disp_buff], 5250b57cec5SDimitry Andric sizeof(dispatch_shared_info_t) * num_disp_buff, 5260b57cec5SDimitry Andric "%s_%d.t_disp_buffer", header, team_id); 5270b57cec5SDimitry Andric } 5280b57cec5SDimitry Andric 5290b57cec5SDimitry Andric static void __kmp_init_allocator() { __kmp_init_memkind(); } 5300b57cec5SDimitry Andric static void __kmp_fini_allocator() { __kmp_fini_memkind(); } 5310b57cec5SDimitry Andric 5320b57cec5SDimitry Andric /* ------------------------------------------------------------------------ */ 5330b57cec5SDimitry Andric 5340b57cec5SDimitry Andric #if KMP_DYNAMIC_LIB 5350b57cec5SDimitry Andric #if KMP_OS_WINDOWS 5360b57cec5SDimitry Andric 5370b57cec5SDimitry Andric static void __kmp_reset_lock(kmp_bootstrap_lock_t *lck) { 5380b57cec5SDimitry Andric // TODO: Change to __kmp_break_bootstrap_lock(). 5390b57cec5SDimitry Andric __kmp_init_bootstrap_lock(lck); // make the lock released 5400b57cec5SDimitry Andric } 5410b57cec5SDimitry Andric 5420b57cec5SDimitry Andric static void __kmp_reset_locks_on_process_detach(int gtid_req) { 5430b57cec5SDimitry Andric int i; 5440b57cec5SDimitry Andric int thread_count; 5450b57cec5SDimitry Andric 5460b57cec5SDimitry Andric // PROCESS_DETACH is expected to be called by a thread that executes 5470b57cec5SDimitry Andric // ProcessExit() or FreeLibrary(). OS terminates other threads (except the one 5480b57cec5SDimitry Andric // calling ProcessExit or FreeLibrary). So, it might be safe to access the 5490b57cec5SDimitry Andric // __kmp_threads[] without taking the forkjoin_lock. However, in fact, some 5500b57cec5SDimitry Andric // threads can be still alive here, although being about to be terminated. The 5510b57cec5SDimitry Andric // threads in the array with ds_thread==0 are most suspicious. Actually, it 5520b57cec5SDimitry Andric // can be not safe to access the __kmp_threads[]. 5530b57cec5SDimitry Andric 5540b57cec5SDimitry Andric // TODO: does it make sense to check __kmp_roots[] ? 5550b57cec5SDimitry Andric 5560b57cec5SDimitry Andric // Let's check that there are no other alive threads registered with the OMP 5570b57cec5SDimitry Andric // lib. 5580b57cec5SDimitry Andric while (1) { 5590b57cec5SDimitry Andric thread_count = 0; 5600b57cec5SDimitry Andric for (i = 0; i < __kmp_threads_capacity; ++i) { 5610b57cec5SDimitry Andric if (!__kmp_threads) 5620b57cec5SDimitry Andric continue; 5630b57cec5SDimitry Andric kmp_info_t *th = __kmp_threads[i]; 5640b57cec5SDimitry Andric if (th == NULL) 5650b57cec5SDimitry Andric continue; 5660b57cec5SDimitry Andric int gtid = th->th.th_info.ds.ds_gtid; 5670b57cec5SDimitry Andric if (gtid == gtid_req) 5680b57cec5SDimitry Andric continue; 5690b57cec5SDimitry Andric if (gtid < 0) 5700b57cec5SDimitry Andric continue; 5710b57cec5SDimitry Andric DWORD exit_val; 5720b57cec5SDimitry Andric int alive = __kmp_is_thread_alive(th, &exit_val); 5730b57cec5SDimitry Andric if (alive) { 5740b57cec5SDimitry Andric ++thread_count; 5750b57cec5SDimitry Andric } 5760b57cec5SDimitry Andric } 5770b57cec5SDimitry Andric if (thread_count == 0) 5780b57cec5SDimitry Andric break; // success 5790b57cec5SDimitry Andric } 5800b57cec5SDimitry Andric 5810b57cec5SDimitry Andric // Assume that I'm alone. Now it might be safe to check and reset locks. 5820b57cec5SDimitry Andric // __kmp_forkjoin_lock and __kmp_stdio_lock are expected to be reset. 5830b57cec5SDimitry Andric __kmp_reset_lock(&__kmp_forkjoin_lock); 5840b57cec5SDimitry Andric #ifdef KMP_DEBUG 5850b57cec5SDimitry Andric __kmp_reset_lock(&__kmp_stdio_lock); 5860b57cec5SDimitry Andric #endif // KMP_DEBUG 5870b57cec5SDimitry Andric } 5880b57cec5SDimitry Andric 5890b57cec5SDimitry Andric BOOL WINAPI DllMain(HINSTANCE hInstDLL, DWORD fdwReason, LPVOID lpReserved) { 5900b57cec5SDimitry Andric //__kmp_acquire_bootstrap_lock( &__kmp_initz_lock ); 5910b57cec5SDimitry Andric 5920b57cec5SDimitry Andric switch (fdwReason) { 5930b57cec5SDimitry Andric 5940b57cec5SDimitry Andric case DLL_PROCESS_ATTACH: 5950b57cec5SDimitry Andric KA_TRACE(10, ("DllMain: PROCESS_ATTACH\n")); 5960b57cec5SDimitry Andric 5970b57cec5SDimitry Andric return TRUE; 5980b57cec5SDimitry Andric 5990b57cec5SDimitry Andric case DLL_PROCESS_DETACH: 6000b57cec5SDimitry Andric KA_TRACE(10, ("DllMain: PROCESS_DETACH T#%d\n", __kmp_gtid_get_specific())); 6010b57cec5SDimitry Andric 6020b57cec5SDimitry Andric if (lpReserved != NULL) { 6030b57cec5SDimitry Andric // lpReserved is used for telling the difference: 6040b57cec5SDimitry Andric // lpReserved == NULL when FreeLibrary() was called, 6050b57cec5SDimitry Andric // lpReserved != NULL when the process terminates. 6060b57cec5SDimitry Andric // When FreeLibrary() is called, worker threads remain alive. So they will 6070b57cec5SDimitry Andric // release the forkjoin lock by themselves. When the process terminates, 6080b57cec5SDimitry Andric // worker threads disappear triggering the problem of unreleased forkjoin 6090b57cec5SDimitry Andric // lock as described below. 6100b57cec5SDimitry Andric 6110b57cec5SDimitry Andric // A worker thread can take the forkjoin lock. The problem comes up if 6120b57cec5SDimitry Andric // that worker thread becomes dead before it releases the forkjoin lock. 6130b57cec5SDimitry Andric // The forkjoin lock remains taken, while the thread executing 6140b57cec5SDimitry Andric // DllMain()->PROCESS_DETACH->__kmp_internal_end_library() below will try 6150b57cec5SDimitry Andric // to take the forkjoin lock and will always fail, so that the application 6160b57cec5SDimitry Andric // will never finish [normally]. This scenario is possible if 6170b57cec5SDimitry Andric // __kmpc_end() has not been executed. It looks like it's not a corner 6180b57cec5SDimitry Andric // case, but common cases: 6190b57cec5SDimitry Andric // - the main function was compiled by an alternative compiler; 6200b57cec5SDimitry Andric // - the main function was compiled by icl but without /Qopenmp 6210b57cec5SDimitry Andric // (application with plugins); 6220b57cec5SDimitry Andric // - application terminates by calling C exit(), Fortran CALL EXIT() or 6230b57cec5SDimitry Andric // Fortran STOP. 6240b57cec5SDimitry Andric // - alive foreign thread prevented __kmpc_end from doing cleanup. 6250b57cec5SDimitry Andric // 6260b57cec5SDimitry Andric // This is a hack to work around the problem. 6270b57cec5SDimitry Andric // TODO: !!! figure out something better. 6280b57cec5SDimitry Andric __kmp_reset_locks_on_process_detach(__kmp_gtid_get_specific()); 6290b57cec5SDimitry Andric } 6300b57cec5SDimitry Andric 6310b57cec5SDimitry Andric __kmp_internal_end_library(__kmp_gtid_get_specific()); 6320b57cec5SDimitry Andric 6330b57cec5SDimitry Andric return TRUE; 6340b57cec5SDimitry Andric 6350b57cec5SDimitry Andric case DLL_THREAD_ATTACH: 6360b57cec5SDimitry Andric KA_TRACE(10, ("DllMain: THREAD_ATTACH\n")); 6370b57cec5SDimitry Andric 6380b57cec5SDimitry Andric /* if we want to register new siblings all the time here call 6390b57cec5SDimitry Andric * __kmp_get_gtid(); */ 6400b57cec5SDimitry Andric return TRUE; 6410b57cec5SDimitry Andric 6420b57cec5SDimitry Andric case DLL_THREAD_DETACH: 6430b57cec5SDimitry Andric KA_TRACE(10, ("DllMain: THREAD_DETACH T#%d\n", __kmp_gtid_get_specific())); 6440b57cec5SDimitry Andric 6450b57cec5SDimitry Andric __kmp_internal_end_thread(__kmp_gtid_get_specific()); 6460b57cec5SDimitry Andric return TRUE; 6470b57cec5SDimitry Andric } 6480b57cec5SDimitry Andric 6490b57cec5SDimitry Andric return TRUE; 6500b57cec5SDimitry Andric } 6510b57cec5SDimitry Andric 6520b57cec5SDimitry Andric #endif /* KMP_OS_WINDOWS */ 6530b57cec5SDimitry Andric #endif /* KMP_DYNAMIC_LIB */ 6540b57cec5SDimitry Andric 6550b57cec5SDimitry Andric /* __kmp_parallel_deo -- Wait until it's our turn. */ 6560b57cec5SDimitry Andric void __kmp_parallel_deo(int *gtid_ref, int *cid_ref, ident_t *loc_ref) { 6570b57cec5SDimitry Andric int gtid = *gtid_ref; 6580b57cec5SDimitry Andric #ifdef BUILD_PARALLEL_ORDERED 6590b57cec5SDimitry Andric kmp_team_t *team = __kmp_team_from_gtid(gtid); 6600b57cec5SDimitry Andric #endif /* BUILD_PARALLEL_ORDERED */ 6610b57cec5SDimitry Andric 6620b57cec5SDimitry Andric if (__kmp_env_consistency_check) { 6630b57cec5SDimitry Andric if (__kmp_threads[gtid]->th.th_root->r.r_active) 6640b57cec5SDimitry Andric #if KMP_USE_DYNAMIC_LOCK 6650b57cec5SDimitry Andric __kmp_push_sync(gtid, ct_ordered_in_parallel, loc_ref, NULL, 0); 6660b57cec5SDimitry Andric #else 6670b57cec5SDimitry Andric __kmp_push_sync(gtid, ct_ordered_in_parallel, loc_ref, NULL); 6680b57cec5SDimitry Andric #endif 6690b57cec5SDimitry Andric } 6700b57cec5SDimitry Andric #ifdef BUILD_PARALLEL_ORDERED 6710b57cec5SDimitry Andric if (!team->t.t_serialized) { 6720b57cec5SDimitry Andric KMP_MB(); 6730b57cec5SDimitry Andric KMP_WAIT(&team->t.t_ordered.dt.t_value, __kmp_tid_from_gtid(gtid), KMP_EQ, 6740b57cec5SDimitry Andric NULL); 6750b57cec5SDimitry Andric KMP_MB(); 6760b57cec5SDimitry Andric } 6770b57cec5SDimitry Andric #endif /* BUILD_PARALLEL_ORDERED */ 6780b57cec5SDimitry Andric } 6790b57cec5SDimitry Andric 6800b57cec5SDimitry Andric /* __kmp_parallel_dxo -- Signal the next task. */ 6810b57cec5SDimitry Andric void __kmp_parallel_dxo(int *gtid_ref, int *cid_ref, ident_t *loc_ref) { 6820b57cec5SDimitry Andric int gtid = *gtid_ref; 6830b57cec5SDimitry Andric #ifdef BUILD_PARALLEL_ORDERED 6840b57cec5SDimitry Andric int tid = __kmp_tid_from_gtid(gtid); 6850b57cec5SDimitry Andric kmp_team_t *team = __kmp_team_from_gtid(gtid); 6860b57cec5SDimitry Andric #endif /* BUILD_PARALLEL_ORDERED */ 6870b57cec5SDimitry Andric 6880b57cec5SDimitry Andric if (__kmp_env_consistency_check) { 6890b57cec5SDimitry Andric if (__kmp_threads[gtid]->th.th_root->r.r_active) 6900b57cec5SDimitry Andric __kmp_pop_sync(gtid, ct_ordered_in_parallel, loc_ref); 6910b57cec5SDimitry Andric } 6920b57cec5SDimitry Andric #ifdef BUILD_PARALLEL_ORDERED 6930b57cec5SDimitry Andric if (!team->t.t_serialized) { 6940b57cec5SDimitry Andric KMP_MB(); /* Flush all pending memory write invalidates. */ 6950b57cec5SDimitry Andric 6960b57cec5SDimitry Andric /* use the tid of the next thread in this team */ 6970b57cec5SDimitry Andric /* TODO replace with general release procedure */ 6980b57cec5SDimitry Andric team->t.t_ordered.dt.t_value = ((tid + 1) % team->t.t_nproc); 6990b57cec5SDimitry Andric 7000b57cec5SDimitry Andric KMP_MB(); /* Flush all pending memory write invalidates. */ 7010b57cec5SDimitry Andric } 7020b57cec5SDimitry Andric #endif /* BUILD_PARALLEL_ORDERED */ 7030b57cec5SDimitry Andric } 7040b57cec5SDimitry Andric 7050b57cec5SDimitry Andric /* ------------------------------------------------------------------------ */ 7060b57cec5SDimitry Andric /* The BARRIER for a SINGLE process section is always explicit */ 7070b57cec5SDimitry Andric 7080b57cec5SDimitry Andric int __kmp_enter_single(int gtid, ident_t *id_ref, int push_ws) { 7090b57cec5SDimitry Andric int status; 7100b57cec5SDimitry Andric kmp_info_t *th; 7110b57cec5SDimitry Andric kmp_team_t *team; 7120b57cec5SDimitry Andric 7130b57cec5SDimitry Andric if (!TCR_4(__kmp_init_parallel)) 7140b57cec5SDimitry Andric __kmp_parallel_initialize(); 7150b57cec5SDimitry Andric __kmp_resume_if_soft_paused(); 7160b57cec5SDimitry Andric 7170b57cec5SDimitry Andric th = __kmp_threads[gtid]; 7180b57cec5SDimitry Andric team = th->th.th_team; 7190b57cec5SDimitry Andric status = 0; 7200b57cec5SDimitry Andric 7210b57cec5SDimitry Andric th->th.th_ident = id_ref; 7220b57cec5SDimitry Andric 7230b57cec5SDimitry Andric if (team->t.t_serialized) { 7240b57cec5SDimitry Andric status = 1; 7250b57cec5SDimitry Andric } else { 7260b57cec5SDimitry Andric kmp_int32 old_this = th->th.th_local.this_construct; 7270b57cec5SDimitry Andric 7280b57cec5SDimitry Andric ++th->th.th_local.this_construct; 7290b57cec5SDimitry Andric /* try to set team count to thread count--success means thread got the 7300b57cec5SDimitry Andric single block */ 7310b57cec5SDimitry Andric /* TODO: Should this be acquire or release? */ 7320b57cec5SDimitry Andric if (team->t.t_construct == old_this) { 7330b57cec5SDimitry Andric status = __kmp_atomic_compare_store_acq(&team->t.t_construct, old_this, 7340b57cec5SDimitry Andric th->th.th_local.this_construct); 7350b57cec5SDimitry Andric } 7360b57cec5SDimitry Andric #if USE_ITT_BUILD 7370b57cec5SDimitry Andric if (__itt_metadata_add_ptr && __kmp_forkjoin_frames_mode == 3 && 7380b57cec5SDimitry Andric KMP_MASTER_GTID(gtid) && th->th.th_teams_microtask == NULL && 7390b57cec5SDimitry Andric team->t.t_active_level == 7400b57cec5SDimitry Andric 1) { // Only report metadata by master of active team at level 1 7410b57cec5SDimitry Andric __kmp_itt_metadata_single(id_ref); 7420b57cec5SDimitry Andric } 7430b57cec5SDimitry Andric #endif /* USE_ITT_BUILD */ 7440b57cec5SDimitry Andric } 7450b57cec5SDimitry Andric 7460b57cec5SDimitry Andric if (__kmp_env_consistency_check) { 7470b57cec5SDimitry Andric if (status && push_ws) { 7480b57cec5SDimitry Andric __kmp_push_workshare(gtid, ct_psingle, id_ref); 7490b57cec5SDimitry Andric } else { 7500b57cec5SDimitry Andric __kmp_check_workshare(gtid, ct_psingle, id_ref); 7510b57cec5SDimitry Andric } 7520b57cec5SDimitry Andric } 7530b57cec5SDimitry Andric #if USE_ITT_BUILD 7540b57cec5SDimitry Andric if (status) { 7550b57cec5SDimitry Andric __kmp_itt_single_start(gtid); 7560b57cec5SDimitry Andric } 7570b57cec5SDimitry Andric #endif /* USE_ITT_BUILD */ 7580b57cec5SDimitry Andric return status; 7590b57cec5SDimitry Andric } 7600b57cec5SDimitry Andric 7610b57cec5SDimitry Andric void __kmp_exit_single(int gtid) { 7620b57cec5SDimitry Andric #if USE_ITT_BUILD 7630b57cec5SDimitry Andric __kmp_itt_single_end(gtid); 7640b57cec5SDimitry Andric #endif /* USE_ITT_BUILD */ 7650b57cec5SDimitry Andric if (__kmp_env_consistency_check) 7660b57cec5SDimitry Andric __kmp_pop_workshare(gtid, ct_psingle, NULL); 7670b57cec5SDimitry Andric } 7680b57cec5SDimitry Andric 7690b57cec5SDimitry Andric /* determine if we can go parallel or must use a serialized parallel region and 7700b57cec5SDimitry Andric * how many threads we can use 7710b57cec5SDimitry Andric * set_nproc is the number of threads requested for the team 7720b57cec5SDimitry Andric * returns 0 if we should serialize or only use one thread, 7730b57cec5SDimitry Andric * otherwise the number of threads to use 7740b57cec5SDimitry Andric * The forkjoin lock is held by the caller. */ 7750b57cec5SDimitry Andric static int __kmp_reserve_threads(kmp_root_t *root, kmp_team_t *parent_team, 7760b57cec5SDimitry Andric int master_tid, int set_nthreads, 7770b57cec5SDimitry Andric int enter_teams) { 7780b57cec5SDimitry Andric int capacity; 7790b57cec5SDimitry Andric int new_nthreads; 7800b57cec5SDimitry Andric KMP_DEBUG_ASSERT(__kmp_init_serial); 7810b57cec5SDimitry Andric KMP_DEBUG_ASSERT(root && parent_team); 7820b57cec5SDimitry Andric kmp_info_t *this_thr = parent_team->t.t_threads[master_tid]; 7830b57cec5SDimitry Andric 7840b57cec5SDimitry Andric // If dyn-var is set, dynamically adjust the number of desired threads, 7850b57cec5SDimitry Andric // according to the method specified by dynamic_mode. 7860b57cec5SDimitry Andric new_nthreads = set_nthreads; 7870b57cec5SDimitry Andric if (!get__dynamic_2(parent_team, master_tid)) { 7880b57cec5SDimitry Andric ; 7890b57cec5SDimitry Andric } 7900b57cec5SDimitry Andric #ifdef USE_LOAD_BALANCE 7910b57cec5SDimitry Andric else if (__kmp_global.g.g_dynamic_mode == dynamic_load_balance) { 7920b57cec5SDimitry Andric new_nthreads = __kmp_load_balance_nproc(root, set_nthreads); 7930b57cec5SDimitry Andric if (new_nthreads == 1) { 7940b57cec5SDimitry Andric KC_TRACE(10, ("__kmp_reserve_threads: T#%d load balance reduced " 7950b57cec5SDimitry Andric "reservation to 1 thread\n", 7960b57cec5SDimitry Andric master_tid)); 7970b57cec5SDimitry Andric return 1; 7980b57cec5SDimitry Andric } 7990b57cec5SDimitry Andric if (new_nthreads < set_nthreads) { 8000b57cec5SDimitry Andric KC_TRACE(10, ("__kmp_reserve_threads: T#%d load balance reduced " 8010b57cec5SDimitry Andric "reservation to %d threads\n", 8020b57cec5SDimitry Andric master_tid, new_nthreads)); 8030b57cec5SDimitry Andric } 8040b57cec5SDimitry Andric } 8050b57cec5SDimitry Andric #endif /* USE_LOAD_BALANCE */ 8060b57cec5SDimitry Andric else if (__kmp_global.g.g_dynamic_mode == dynamic_thread_limit) { 8070b57cec5SDimitry Andric new_nthreads = __kmp_avail_proc - __kmp_nth + 8080b57cec5SDimitry Andric (root->r.r_active ? 1 : root->r.r_hot_team->t.t_nproc); 8090b57cec5SDimitry Andric if (new_nthreads <= 1) { 8100b57cec5SDimitry Andric KC_TRACE(10, ("__kmp_reserve_threads: T#%d thread limit reduced " 8110b57cec5SDimitry Andric "reservation to 1 thread\n", 8120b57cec5SDimitry Andric master_tid)); 8130b57cec5SDimitry Andric return 1; 8140b57cec5SDimitry Andric } 8150b57cec5SDimitry Andric if (new_nthreads < set_nthreads) { 8160b57cec5SDimitry Andric KC_TRACE(10, ("__kmp_reserve_threads: T#%d thread limit reduced " 8170b57cec5SDimitry Andric "reservation to %d threads\n", 8180b57cec5SDimitry Andric master_tid, new_nthreads)); 8190b57cec5SDimitry Andric } else { 8200b57cec5SDimitry Andric new_nthreads = set_nthreads; 8210b57cec5SDimitry Andric } 8220b57cec5SDimitry Andric } else if (__kmp_global.g.g_dynamic_mode == dynamic_random) { 8230b57cec5SDimitry Andric if (set_nthreads > 2) { 8240b57cec5SDimitry Andric new_nthreads = __kmp_get_random(parent_team->t.t_threads[master_tid]); 8250b57cec5SDimitry Andric new_nthreads = (new_nthreads % set_nthreads) + 1; 8260b57cec5SDimitry Andric if (new_nthreads == 1) { 8270b57cec5SDimitry Andric KC_TRACE(10, ("__kmp_reserve_threads: T#%d dynamic random reduced " 8280b57cec5SDimitry Andric "reservation to 1 thread\n", 8290b57cec5SDimitry Andric master_tid)); 8300b57cec5SDimitry Andric return 1; 8310b57cec5SDimitry Andric } 8320b57cec5SDimitry Andric if (new_nthreads < set_nthreads) { 8330b57cec5SDimitry Andric KC_TRACE(10, ("__kmp_reserve_threads: T#%d dynamic random reduced " 8340b57cec5SDimitry Andric "reservation to %d threads\n", 8350b57cec5SDimitry Andric master_tid, new_nthreads)); 8360b57cec5SDimitry Andric } 8370b57cec5SDimitry Andric } 8380b57cec5SDimitry Andric } else { 8390b57cec5SDimitry Andric KMP_ASSERT(0); 8400b57cec5SDimitry Andric } 8410b57cec5SDimitry Andric 8420b57cec5SDimitry Andric // Respect KMP_ALL_THREADS/KMP_DEVICE_THREAD_LIMIT. 8430b57cec5SDimitry Andric if (__kmp_nth + new_nthreads - 8440b57cec5SDimitry Andric (root->r.r_active ? 1 : root->r.r_hot_team->t.t_nproc) > 8450b57cec5SDimitry Andric __kmp_max_nth) { 8460b57cec5SDimitry Andric int tl_nthreads = __kmp_max_nth - __kmp_nth + 8470b57cec5SDimitry Andric (root->r.r_active ? 1 : root->r.r_hot_team->t.t_nproc); 8480b57cec5SDimitry Andric if (tl_nthreads <= 0) { 8490b57cec5SDimitry Andric tl_nthreads = 1; 8500b57cec5SDimitry Andric } 8510b57cec5SDimitry Andric 8520b57cec5SDimitry Andric // If dyn-var is false, emit a 1-time warning. 8530b57cec5SDimitry Andric if (!get__dynamic_2(parent_team, master_tid) && (!__kmp_reserve_warn)) { 8540b57cec5SDimitry Andric __kmp_reserve_warn = 1; 8550b57cec5SDimitry Andric __kmp_msg(kmp_ms_warning, 8560b57cec5SDimitry Andric KMP_MSG(CantFormThrTeam, set_nthreads, tl_nthreads), 8570b57cec5SDimitry Andric KMP_HNT(Unset_ALL_THREADS), __kmp_msg_null); 8580b57cec5SDimitry Andric } 8590b57cec5SDimitry Andric if (tl_nthreads == 1) { 8600b57cec5SDimitry Andric KC_TRACE(10, ("__kmp_reserve_threads: T#%d KMP_DEVICE_THREAD_LIMIT " 8610b57cec5SDimitry Andric "reduced reservation to 1 thread\n", 8620b57cec5SDimitry Andric master_tid)); 8630b57cec5SDimitry Andric return 1; 8640b57cec5SDimitry Andric } 8650b57cec5SDimitry Andric KC_TRACE(10, ("__kmp_reserve_threads: T#%d KMP_DEVICE_THREAD_LIMIT reduced " 8660b57cec5SDimitry Andric "reservation to %d threads\n", 8670b57cec5SDimitry Andric master_tid, tl_nthreads)); 8680b57cec5SDimitry Andric new_nthreads = tl_nthreads; 8690b57cec5SDimitry Andric } 8700b57cec5SDimitry Andric 8710b57cec5SDimitry Andric // Respect OMP_THREAD_LIMIT 8720b57cec5SDimitry Andric int cg_nthreads = this_thr->th.th_cg_roots->cg_nthreads; 8730b57cec5SDimitry Andric int max_cg_threads = this_thr->th.th_cg_roots->cg_thread_limit; 8740b57cec5SDimitry Andric if (cg_nthreads + new_nthreads - 8750b57cec5SDimitry Andric (root->r.r_active ? 1 : root->r.r_hot_team->t.t_nproc) > 8760b57cec5SDimitry Andric max_cg_threads) { 8770b57cec5SDimitry Andric int tl_nthreads = max_cg_threads - cg_nthreads + 8780b57cec5SDimitry Andric (root->r.r_active ? 1 : root->r.r_hot_team->t.t_nproc); 8790b57cec5SDimitry Andric if (tl_nthreads <= 0) { 8800b57cec5SDimitry Andric tl_nthreads = 1; 8810b57cec5SDimitry Andric } 8820b57cec5SDimitry Andric 8830b57cec5SDimitry Andric // If dyn-var is false, emit a 1-time warning. 8840b57cec5SDimitry Andric if (!get__dynamic_2(parent_team, master_tid) && (!__kmp_reserve_warn)) { 8850b57cec5SDimitry Andric __kmp_reserve_warn = 1; 8860b57cec5SDimitry Andric __kmp_msg(kmp_ms_warning, 8870b57cec5SDimitry Andric KMP_MSG(CantFormThrTeam, set_nthreads, tl_nthreads), 8880b57cec5SDimitry Andric KMP_HNT(Unset_ALL_THREADS), __kmp_msg_null); 8890b57cec5SDimitry Andric } 8900b57cec5SDimitry Andric if (tl_nthreads == 1) { 8910b57cec5SDimitry Andric KC_TRACE(10, ("__kmp_reserve_threads: T#%d OMP_THREAD_LIMIT " 8920b57cec5SDimitry Andric "reduced reservation to 1 thread\n", 8930b57cec5SDimitry Andric master_tid)); 8940b57cec5SDimitry Andric return 1; 8950b57cec5SDimitry Andric } 8960b57cec5SDimitry Andric KC_TRACE(10, ("__kmp_reserve_threads: T#%d OMP_THREAD_LIMIT reduced " 8970b57cec5SDimitry Andric "reservation to %d threads\n", 8980b57cec5SDimitry Andric master_tid, tl_nthreads)); 8990b57cec5SDimitry Andric new_nthreads = tl_nthreads; 9000b57cec5SDimitry Andric } 9010b57cec5SDimitry Andric 9020b57cec5SDimitry Andric // Check if the threads array is large enough, or needs expanding. 9030b57cec5SDimitry Andric // See comment in __kmp_register_root() about the adjustment if 9040b57cec5SDimitry Andric // __kmp_threads[0] == NULL. 9050b57cec5SDimitry Andric capacity = __kmp_threads_capacity; 9060b57cec5SDimitry Andric if (TCR_PTR(__kmp_threads[0]) == NULL) { 9070b57cec5SDimitry Andric --capacity; 9080b57cec5SDimitry Andric } 9090b57cec5SDimitry Andric if (__kmp_nth + new_nthreads - 9100b57cec5SDimitry Andric (root->r.r_active ? 1 : root->r.r_hot_team->t.t_nproc) > 9110b57cec5SDimitry Andric capacity) { 9120b57cec5SDimitry Andric // Expand the threads array. 9130b57cec5SDimitry Andric int slotsRequired = __kmp_nth + new_nthreads - 9140b57cec5SDimitry Andric (root->r.r_active ? 1 : root->r.r_hot_team->t.t_nproc) - 9150b57cec5SDimitry Andric capacity; 9160b57cec5SDimitry Andric int slotsAdded = __kmp_expand_threads(slotsRequired); 9170b57cec5SDimitry Andric if (slotsAdded < slotsRequired) { 9180b57cec5SDimitry Andric // The threads array was not expanded enough. 9190b57cec5SDimitry Andric new_nthreads -= (slotsRequired - slotsAdded); 9200b57cec5SDimitry Andric KMP_ASSERT(new_nthreads >= 1); 9210b57cec5SDimitry Andric 9220b57cec5SDimitry Andric // If dyn-var is false, emit a 1-time warning. 9230b57cec5SDimitry Andric if (!get__dynamic_2(parent_team, master_tid) && (!__kmp_reserve_warn)) { 9240b57cec5SDimitry Andric __kmp_reserve_warn = 1; 9250b57cec5SDimitry Andric if (__kmp_tp_cached) { 9260b57cec5SDimitry Andric __kmp_msg(kmp_ms_warning, 9270b57cec5SDimitry Andric KMP_MSG(CantFormThrTeam, set_nthreads, new_nthreads), 9280b57cec5SDimitry Andric KMP_HNT(Set_ALL_THREADPRIVATE, __kmp_tp_capacity), 9290b57cec5SDimitry Andric KMP_HNT(PossibleSystemLimitOnThreads), __kmp_msg_null); 9300b57cec5SDimitry Andric } else { 9310b57cec5SDimitry Andric __kmp_msg(kmp_ms_warning, 9320b57cec5SDimitry Andric KMP_MSG(CantFormThrTeam, set_nthreads, new_nthreads), 9330b57cec5SDimitry Andric KMP_HNT(SystemLimitOnThreads), __kmp_msg_null); 9340b57cec5SDimitry Andric } 9350b57cec5SDimitry Andric } 9360b57cec5SDimitry Andric } 9370b57cec5SDimitry Andric } 9380b57cec5SDimitry Andric 9390b57cec5SDimitry Andric #ifdef KMP_DEBUG 9400b57cec5SDimitry Andric if (new_nthreads == 1) { 9410b57cec5SDimitry Andric KC_TRACE(10, 9420b57cec5SDimitry Andric ("__kmp_reserve_threads: T#%d serializing team after reclaiming " 9430b57cec5SDimitry Andric "dead roots and rechecking; requested %d threads\n", 9440b57cec5SDimitry Andric __kmp_get_gtid(), set_nthreads)); 9450b57cec5SDimitry Andric } else { 9460b57cec5SDimitry Andric KC_TRACE(10, ("__kmp_reserve_threads: T#%d allocating %d threads; requested" 9470b57cec5SDimitry Andric " %d threads\n", 9480b57cec5SDimitry Andric __kmp_get_gtid(), new_nthreads, set_nthreads)); 9490b57cec5SDimitry Andric } 9500b57cec5SDimitry Andric #endif // KMP_DEBUG 9510b57cec5SDimitry Andric return new_nthreads; 9520b57cec5SDimitry Andric } 9530b57cec5SDimitry Andric 9540b57cec5SDimitry Andric /* Allocate threads from the thread pool and assign them to the new team. We are 9550b57cec5SDimitry Andric assured that there are enough threads available, because we checked on that 9560b57cec5SDimitry Andric earlier within critical section forkjoin */ 9570b57cec5SDimitry Andric static void __kmp_fork_team_threads(kmp_root_t *root, kmp_team_t *team, 9580b57cec5SDimitry Andric kmp_info_t *master_th, int master_gtid) { 9590b57cec5SDimitry Andric int i; 9600b57cec5SDimitry Andric int use_hot_team; 9610b57cec5SDimitry Andric 9620b57cec5SDimitry Andric KA_TRACE(10, ("__kmp_fork_team_threads: new_nprocs = %d\n", team->t.t_nproc)); 9630b57cec5SDimitry Andric KMP_DEBUG_ASSERT(master_gtid == __kmp_get_gtid()); 9640b57cec5SDimitry Andric KMP_MB(); 9650b57cec5SDimitry Andric 9660b57cec5SDimitry Andric /* first, let's setup the master thread */ 9670b57cec5SDimitry Andric master_th->th.th_info.ds.ds_tid = 0; 9680b57cec5SDimitry Andric master_th->th.th_team = team; 9690b57cec5SDimitry Andric master_th->th.th_team_nproc = team->t.t_nproc; 9700b57cec5SDimitry Andric master_th->th.th_team_master = master_th; 9710b57cec5SDimitry Andric master_th->th.th_team_serialized = FALSE; 9720b57cec5SDimitry Andric master_th->th.th_dispatch = &team->t.t_dispatch[0]; 9730b57cec5SDimitry Andric 9740b57cec5SDimitry Andric /* make sure we are not the optimized hot team */ 9750b57cec5SDimitry Andric #if KMP_NESTED_HOT_TEAMS 9760b57cec5SDimitry Andric use_hot_team = 0; 9770b57cec5SDimitry Andric kmp_hot_team_ptr_t *hot_teams = master_th->th.th_hot_teams; 9780b57cec5SDimitry Andric if (hot_teams) { // hot teams array is not allocated if 9790b57cec5SDimitry Andric // KMP_HOT_TEAMS_MAX_LEVEL=0 9800b57cec5SDimitry Andric int level = team->t.t_active_level - 1; // index in array of hot teams 9810b57cec5SDimitry Andric if (master_th->th.th_teams_microtask) { // are we inside the teams? 9820b57cec5SDimitry Andric if (master_th->th.th_teams_size.nteams > 1) { 9830b57cec5SDimitry Andric ++level; // level was not increased in teams construct for 9840b57cec5SDimitry Andric // team_of_masters 9850b57cec5SDimitry Andric } 9860b57cec5SDimitry Andric if (team->t.t_pkfn != (microtask_t)__kmp_teams_master && 9870b57cec5SDimitry Andric master_th->th.th_teams_level == team->t.t_level) { 9880b57cec5SDimitry Andric ++level; // level was not increased in teams construct for 9890b57cec5SDimitry Andric // team_of_workers before the parallel 9900b57cec5SDimitry Andric } // team->t.t_level will be increased inside parallel 9910b57cec5SDimitry Andric } 9920b57cec5SDimitry Andric if (level < __kmp_hot_teams_max_level) { 9930b57cec5SDimitry Andric if (hot_teams[level].hot_team) { 9940b57cec5SDimitry Andric // hot team has already been allocated for given level 9950b57cec5SDimitry Andric KMP_DEBUG_ASSERT(hot_teams[level].hot_team == team); 9960b57cec5SDimitry Andric use_hot_team = 1; // the team is ready to use 9970b57cec5SDimitry Andric } else { 9980b57cec5SDimitry Andric use_hot_team = 0; // AC: threads are not allocated yet 9990b57cec5SDimitry Andric hot_teams[level].hot_team = team; // remember new hot team 10000b57cec5SDimitry Andric hot_teams[level].hot_team_nth = team->t.t_nproc; 10010b57cec5SDimitry Andric } 10020b57cec5SDimitry Andric } else { 10030b57cec5SDimitry Andric use_hot_team = 0; 10040b57cec5SDimitry Andric } 10050b57cec5SDimitry Andric } 10060b57cec5SDimitry Andric #else 10070b57cec5SDimitry Andric use_hot_team = team == root->r.r_hot_team; 10080b57cec5SDimitry Andric #endif 10090b57cec5SDimitry Andric if (!use_hot_team) { 10100b57cec5SDimitry Andric 10110b57cec5SDimitry Andric /* install the master thread */ 10120b57cec5SDimitry Andric team->t.t_threads[0] = master_th; 10130b57cec5SDimitry Andric __kmp_initialize_info(master_th, team, 0, master_gtid); 10140b57cec5SDimitry Andric 10150b57cec5SDimitry Andric /* now, install the worker threads */ 10160b57cec5SDimitry Andric for (i = 1; i < team->t.t_nproc; i++) { 10170b57cec5SDimitry Andric 10180b57cec5SDimitry Andric /* fork or reallocate a new thread and install it in team */ 10190b57cec5SDimitry Andric kmp_info_t *thr = __kmp_allocate_thread(root, team, i); 10200b57cec5SDimitry Andric team->t.t_threads[i] = thr; 10210b57cec5SDimitry Andric KMP_DEBUG_ASSERT(thr); 10220b57cec5SDimitry Andric KMP_DEBUG_ASSERT(thr->th.th_team == team); 10230b57cec5SDimitry Andric /* align team and thread arrived states */ 10240b57cec5SDimitry Andric KA_TRACE(20, ("__kmp_fork_team_threads: T#%d(%d:%d) init arrived " 10250b57cec5SDimitry Andric "T#%d(%d:%d) join =%llu, plain=%llu\n", 10260b57cec5SDimitry Andric __kmp_gtid_from_tid(0, team), team->t.t_id, 0, 10270b57cec5SDimitry Andric __kmp_gtid_from_tid(i, team), team->t.t_id, i, 10280b57cec5SDimitry Andric team->t.t_bar[bs_forkjoin_barrier].b_arrived, 10290b57cec5SDimitry Andric team->t.t_bar[bs_plain_barrier].b_arrived)); 10300b57cec5SDimitry Andric thr->th.th_teams_microtask = master_th->th.th_teams_microtask; 10310b57cec5SDimitry Andric thr->th.th_teams_level = master_th->th.th_teams_level; 10320b57cec5SDimitry Andric thr->th.th_teams_size = master_th->th.th_teams_size; 10330b57cec5SDimitry Andric { // Initialize threads' barrier data. 10340b57cec5SDimitry Andric int b; 10350b57cec5SDimitry Andric kmp_balign_t *balign = team->t.t_threads[i]->th.th_bar; 10360b57cec5SDimitry Andric for (b = 0; b < bs_last_barrier; ++b) { 10370b57cec5SDimitry Andric balign[b].bb.b_arrived = team->t.t_bar[b].b_arrived; 10380b57cec5SDimitry Andric KMP_DEBUG_ASSERT(balign[b].bb.wait_flag != KMP_BARRIER_PARENT_FLAG); 10390b57cec5SDimitry Andric #if USE_DEBUGGER 10400b57cec5SDimitry Andric balign[b].bb.b_worker_arrived = team->t.t_bar[b].b_team_arrived; 10410b57cec5SDimitry Andric #endif 10420b57cec5SDimitry Andric } 10430b57cec5SDimitry Andric } 10440b57cec5SDimitry Andric } 10450b57cec5SDimitry Andric 10460b57cec5SDimitry Andric #if KMP_AFFINITY_SUPPORTED 10470b57cec5SDimitry Andric __kmp_partition_places(team); 10480b57cec5SDimitry Andric #endif 10490b57cec5SDimitry Andric } 10500b57cec5SDimitry Andric 10510b57cec5SDimitry Andric if (__kmp_display_affinity && team->t.t_display_affinity != 1) { 10520b57cec5SDimitry Andric for (i = 0; i < team->t.t_nproc; i++) { 10530b57cec5SDimitry Andric kmp_info_t *thr = team->t.t_threads[i]; 10540b57cec5SDimitry Andric if (thr->th.th_prev_num_threads != team->t.t_nproc || 10550b57cec5SDimitry Andric thr->th.th_prev_level != team->t.t_level) { 10560b57cec5SDimitry Andric team->t.t_display_affinity = 1; 10570b57cec5SDimitry Andric break; 10580b57cec5SDimitry Andric } 10590b57cec5SDimitry Andric } 10600b57cec5SDimitry Andric } 10610b57cec5SDimitry Andric 10620b57cec5SDimitry Andric KMP_MB(); 10630b57cec5SDimitry Andric } 10640b57cec5SDimitry Andric 10650b57cec5SDimitry Andric #if KMP_ARCH_X86 || KMP_ARCH_X86_64 10660b57cec5SDimitry Andric // Propagate any changes to the floating point control registers out to the team 10670b57cec5SDimitry Andric // We try to avoid unnecessary writes to the relevant cache line in the team 10680b57cec5SDimitry Andric // structure, so we don't make changes unless they are needed. 10690b57cec5SDimitry Andric inline static void propagateFPControl(kmp_team_t *team) { 10700b57cec5SDimitry Andric if (__kmp_inherit_fp_control) { 10710b57cec5SDimitry Andric kmp_int16 x87_fpu_control_word; 10720b57cec5SDimitry Andric kmp_uint32 mxcsr; 10730b57cec5SDimitry Andric 10740b57cec5SDimitry Andric // Get master values of FPU control flags (both X87 and vector) 10750b57cec5SDimitry Andric __kmp_store_x87_fpu_control_word(&x87_fpu_control_word); 10760b57cec5SDimitry Andric __kmp_store_mxcsr(&mxcsr); 10770b57cec5SDimitry Andric mxcsr &= KMP_X86_MXCSR_MASK; 10780b57cec5SDimitry Andric 10790b57cec5SDimitry Andric // There is no point looking at t_fp_control_saved here. 10800b57cec5SDimitry Andric // If it is TRUE, we still have to update the values if they are different 10810b57cec5SDimitry Andric // from those we now have. If it is FALSE we didn't save anything yet, but 10820b57cec5SDimitry Andric // our objective is the same. We have to ensure that the values in the team 10830b57cec5SDimitry Andric // are the same as those we have. 10840b57cec5SDimitry Andric // So, this code achieves what we need whether or not t_fp_control_saved is 10850b57cec5SDimitry Andric // true. By checking whether the value needs updating we avoid unnecessary 10860b57cec5SDimitry Andric // writes that would put the cache-line into a written state, causing all 10870b57cec5SDimitry Andric // threads in the team to have to read it again. 10880b57cec5SDimitry Andric KMP_CHECK_UPDATE(team->t.t_x87_fpu_control_word, x87_fpu_control_word); 10890b57cec5SDimitry Andric KMP_CHECK_UPDATE(team->t.t_mxcsr, mxcsr); 10900b57cec5SDimitry Andric // Although we don't use this value, other code in the runtime wants to know 10910b57cec5SDimitry Andric // whether it should restore them. So we must ensure it is correct. 10920b57cec5SDimitry Andric KMP_CHECK_UPDATE(team->t.t_fp_control_saved, TRUE); 10930b57cec5SDimitry Andric } else { 10940b57cec5SDimitry Andric // Similarly here. Don't write to this cache-line in the team structure 10950b57cec5SDimitry Andric // unless we have to. 10960b57cec5SDimitry Andric KMP_CHECK_UPDATE(team->t.t_fp_control_saved, FALSE); 10970b57cec5SDimitry Andric } 10980b57cec5SDimitry Andric } 10990b57cec5SDimitry Andric 11000b57cec5SDimitry Andric // Do the opposite, setting the hardware registers to the updated values from 11010b57cec5SDimitry Andric // the team. 11020b57cec5SDimitry Andric inline static void updateHWFPControl(kmp_team_t *team) { 11030b57cec5SDimitry Andric if (__kmp_inherit_fp_control && team->t.t_fp_control_saved) { 11040b57cec5SDimitry Andric // Only reset the fp control regs if they have been changed in the team. 11050b57cec5SDimitry Andric // the parallel region that we are exiting. 11060b57cec5SDimitry Andric kmp_int16 x87_fpu_control_word; 11070b57cec5SDimitry Andric kmp_uint32 mxcsr; 11080b57cec5SDimitry Andric __kmp_store_x87_fpu_control_word(&x87_fpu_control_word); 11090b57cec5SDimitry Andric __kmp_store_mxcsr(&mxcsr); 11100b57cec5SDimitry Andric mxcsr &= KMP_X86_MXCSR_MASK; 11110b57cec5SDimitry Andric 11120b57cec5SDimitry Andric if (team->t.t_x87_fpu_control_word != x87_fpu_control_word) { 11130b57cec5SDimitry Andric __kmp_clear_x87_fpu_status_word(); 11140b57cec5SDimitry Andric __kmp_load_x87_fpu_control_word(&team->t.t_x87_fpu_control_word); 11150b57cec5SDimitry Andric } 11160b57cec5SDimitry Andric 11170b57cec5SDimitry Andric if (team->t.t_mxcsr != mxcsr) { 11180b57cec5SDimitry Andric __kmp_load_mxcsr(&team->t.t_mxcsr); 11190b57cec5SDimitry Andric } 11200b57cec5SDimitry Andric } 11210b57cec5SDimitry Andric } 11220b57cec5SDimitry Andric #else 11230b57cec5SDimitry Andric #define propagateFPControl(x) ((void)0) 11240b57cec5SDimitry Andric #define updateHWFPControl(x) ((void)0) 11250b57cec5SDimitry Andric #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */ 11260b57cec5SDimitry Andric 11270b57cec5SDimitry Andric static void __kmp_alloc_argv_entries(int argc, kmp_team_t *team, 11280b57cec5SDimitry Andric int realloc); // forward declaration 11290b57cec5SDimitry Andric 11300b57cec5SDimitry Andric /* Run a parallel region that has been serialized, so runs only in a team of the 11310b57cec5SDimitry Andric single master thread. */ 11320b57cec5SDimitry Andric void __kmp_serialized_parallel(ident_t *loc, kmp_int32 global_tid) { 11330b57cec5SDimitry Andric kmp_info_t *this_thr; 11340b57cec5SDimitry Andric kmp_team_t *serial_team; 11350b57cec5SDimitry Andric 11360b57cec5SDimitry Andric KC_TRACE(10, ("__kmpc_serialized_parallel: called by T#%d\n", global_tid)); 11370b57cec5SDimitry Andric 11380b57cec5SDimitry Andric /* Skip all this code for autopar serialized loops since it results in 11390b57cec5SDimitry Andric unacceptable overhead */ 11400b57cec5SDimitry Andric if (loc != NULL && (loc->flags & KMP_IDENT_AUTOPAR)) 11410b57cec5SDimitry Andric return; 11420b57cec5SDimitry Andric 11430b57cec5SDimitry Andric if (!TCR_4(__kmp_init_parallel)) 11440b57cec5SDimitry Andric __kmp_parallel_initialize(); 11450b57cec5SDimitry Andric __kmp_resume_if_soft_paused(); 11460b57cec5SDimitry Andric 11470b57cec5SDimitry Andric this_thr = __kmp_threads[global_tid]; 11480b57cec5SDimitry Andric serial_team = this_thr->th.th_serial_team; 11490b57cec5SDimitry Andric 11500b57cec5SDimitry Andric /* utilize the serialized team held by this thread */ 11510b57cec5SDimitry Andric KMP_DEBUG_ASSERT(serial_team); 11520b57cec5SDimitry Andric KMP_MB(); 11530b57cec5SDimitry Andric 11540b57cec5SDimitry Andric if (__kmp_tasking_mode != tskm_immediate_exec) { 11550b57cec5SDimitry Andric KMP_DEBUG_ASSERT( 11560b57cec5SDimitry Andric this_thr->th.th_task_team == 11570b57cec5SDimitry Andric this_thr->th.th_team->t.t_task_team[this_thr->th.th_task_state]); 11580b57cec5SDimitry Andric KMP_DEBUG_ASSERT(serial_team->t.t_task_team[this_thr->th.th_task_state] == 11590b57cec5SDimitry Andric NULL); 11600b57cec5SDimitry Andric KA_TRACE(20, ("__kmpc_serialized_parallel: T#%d pushing task_team %p / " 11610b57cec5SDimitry Andric "team %p, new task_team = NULL\n", 11620b57cec5SDimitry Andric global_tid, this_thr->th.th_task_team, this_thr->th.th_team)); 11630b57cec5SDimitry Andric this_thr->th.th_task_team = NULL; 11640b57cec5SDimitry Andric } 11650b57cec5SDimitry Andric 11660b57cec5SDimitry Andric kmp_proc_bind_t proc_bind = this_thr->th.th_set_proc_bind; 11670b57cec5SDimitry Andric if (this_thr->th.th_current_task->td_icvs.proc_bind == proc_bind_false) { 11680b57cec5SDimitry Andric proc_bind = proc_bind_false; 11690b57cec5SDimitry Andric } else if (proc_bind == proc_bind_default) { 11700b57cec5SDimitry Andric // No proc_bind clause was specified, so use the current value 11710b57cec5SDimitry Andric // of proc-bind-var for this parallel region. 11720b57cec5SDimitry Andric proc_bind = this_thr->th.th_current_task->td_icvs.proc_bind; 11730b57cec5SDimitry Andric } 11740b57cec5SDimitry Andric // Reset for next parallel region 11750b57cec5SDimitry Andric this_thr->th.th_set_proc_bind = proc_bind_default; 11760b57cec5SDimitry Andric 11770b57cec5SDimitry Andric #if OMPT_SUPPORT 11780b57cec5SDimitry Andric ompt_data_t ompt_parallel_data = ompt_data_none; 11790b57cec5SDimitry Andric ompt_data_t *implicit_task_data; 11800b57cec5SDimitry Andric void *codeptr = OMPT_LOAD_RETURN_ADDRESS(global_tid); 11810b57cec5SDimitry Andric if (ompt_enabled.enabled && 11820b57cec5SDimitry Andric this_thr->th.ompt_thread_info.state != ompt_state_overhead) { 11830b57cec5SDimitry Andric 11840b57cec5SDimitry Andric ompt_task_info_t *parent_task_info; 11850b57cec5SDimitry Andric parent_task_info = OMPT_CUR_TASK_INFO(this_thr); 11860b57cec5SDimitry Andric 11870b57cec5SDimitry Andric parent_task_info->frame.enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); 11880b57cec5SDimitry Andric if (ompt_enabled.ompt_callback_parallel_begin) { 11890b57cec5SDimitry Andric int team_size = 1; 11900b57cec5SDimitry Andric 11910b57cec5SDimitry Andric ompt_callbacks.ompt_callback(ompt_callback_parallel_begin)( 11920b57cec5SDimitry Andric &(parent_task_info->task_data), &(parent_task_info->frame), 1193489b1cf2SDimitry Andric &ompt_parallel_data, team_size, 1194489b1cf2SDimitry Andric ompt_parallel_invoker_program | ompt_parallel_team, codeptr); 11950b57cec5SDimitry Andric } 11960b57cec5SDimitry Andric } 11970b57cec5SDimitry Andric #endif // OMPT_SUPPORT 11980b57cec5SDimitry Andric 11990b57cec5SDimitry Andric if (this_thr->th.th_team != serial_team) { 12000b57cec5SDimitry Andric // Nested level will be an index in the nested nthreads array 12010b57cec5SDimitry Andric int level = this_thr->th.th_team->t.t_level; 12020b57cec5SDimitry Andric 12030b57cec5SDimitry Andric if (serial_team->t.t_serialized) { 12040b57cec5SDimitry Andric /* this serial team was already used 12050b57cec5SDimitry Andric TODO increase performance by making this locks more specific */ 12060b57cec5SDimitry Andric kmp_team_t *new_team; 12070b57cec5SDimitry Andric 12080b57cec5SDimitry Andric __kmp_acquire_bootstrap_lock(&__kmp_forkjoin_lock); 12090b57cec5SDimitry Andric 12100b57cec5SDimitry Andric new_team = 12110b57cec5SDimitry Andric __kmp_allocate_team(this_thr->th.th_root, 1, 1, 12120b57cec5SDimitry Andric #if OMPT_SUPPORT 12130b57cec5SDimitry Andric ompt_parallel_data, 12140b57cec5SDimitry Andric #endif 12150b57cec5SDimitry Andric proc_bind, &this_thr->th.th_current_task->td_icvs, 12160b57cec5SDimitry Andric 0 USE_NESTED_HOT_ARG(NULL)); 12170b57cec5SDimitry Andric __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock); 12180b57cec5SDimitry Andric KMP_ASSERT(new_team); 12190b57cec5SDimitry Andric 12200b57cec5SDimitry Andric /* setup new serialized team and install it */ 12210b57cec5SDimitry Andric new_team->t.t_threads[0] = this_thr; 12220b57cec5SDimitry Andric new_team->t.t_parent = this_thr->th.th_team; 12230b57cec5SDimitry Andric serial_team = new_team; 12240b57cec5SDimitry Andric this_thr->th.th_serial_team = serial_team; 12250b57cec5SDimitry Andric 12260b57cec5SDimitry Andric KF_TRACE( 12270b57cec5SDimitry Andric 10, 12280b57cec5SDimitry Andric ("__kmpc_serialized_parallel: T#%d allocated new serial team %p\n", 12290b57cec5SDimitry Andric global_tid, serial_team)); 12300b57cec5SDimitry Andric 12310b57cec5SDimitry Andric /* TODO the above breaks the requirement that if we run out of resources, 12320b57cec5SDimitry Andric then we can still guarantee that serialized teams are ok, since we may 12330b57cec5SDimitry Andric need to allocate a new one */ 12340b57cec5SDimitry Andric } else { 12350b57cec5SDimitry Andric KF_TRACE( 12360b57cec5SDimitry Andric 10, 12370b57cec5SDimitry Andric ("__kmpc_serialized_parallel: T#%d reusing cached serial team %p\n", 12380b57cec5SDimitry Andric global_tid, serial_team)); 12390b57cec5SDimitry Andric } 12400b57cec5SDimitry Andric 12410b57cec5SDimitry Andric /* we have to initialize this serial team */ 12420b57cec5SDimitry Andric KMP_DEBUG_ASSERT(serial_team->t.t_threads); 12430b57cec5SDimitry Andric KMP_DEBUG_ASSERT(serial_team->t.t_threads[0] == this_thr); 12440b57cec5SDimitry Andric KMP_DEBUG_ASSERT(this_thr->th.th_team != serial_team); 12450b57cec5SDimitry Andric serial_team->t.t_ident = loc; 12460b57cec5SDimitry Andric serial_team->t.t_serialized = 1; 12470b57cec5SDimitry Andric serial_team->t.t_nproc = 1; 12480b57cec5SDimitry Andric serial_team->t.t_parent = this_thr->th.th_team; 12490b57cec5SDimitry Andric serial_team->t.t_sched.sched = this_thr->th.th_team->t.t_sched.sched; 12500b57cec5SDimitry Andric this_thr->th.th_team = serial_team; 12510b57cec5SDimitry Andric serial_team->t.t_master_tid = this_thr->th.th_info.ds.ds_tid; 12520b57cec5SDimitry Andric 12530b57cec5SDimitry Andric KF_TRACE(10, ("__kmpc_serialized_parallel: T#d curtask=%p\n", global_tid, 12540b57cec5SDimitry Andric this_thr->th.th_current_task)); 12550b57cec5SDimitry Andric KMP_ASSERT(this_thr->th.th_current_task->td_flags.executing == 1); 12560b57cec5SDimitry Andric this_thr->th.th_current_task->td_flags.executing = 0; 12570b57cec5SDimitry Andric 12580b57cec5SDimitry Andric __kmp_push_current_task_to_thread(this_thr, serial_team, 0); 12590b57cec5SDimitry Andric 12600b57cec5SDimitry Andric /* TODO: GEH: do ICVs work for nested serialized teams? Don't we need an 12610b57cec5SDimitry Andric implicit task for each serialized task represented by 12620b57cec5SDimitry Andric team->t.t_serialized? */ 12630b57cec5SDimitry Andric copy_icvs(&this_thr->th.th_current_task->td_icvs, 12640b57cec5SDimitry Andric &this_thr->th.th_current_task->td_parent->td_icvs); 12650b57cec5SDimitry Andric 12660b57cec5SDimitry Andric // Thread value exists in the nested nthreads array for the next nested 12670b57cec5SDimitry Andric // level 12680b57cec5SDimitry Andric if (__kmp_nested_nth.used && (level + 1 < __kmp_nested_nth.used)) { 12690b57cec5SDimitry Andric this_thr->th.th_current_task->td_icvs.nproc = 12700b57cec5SDimitry Andric __kmp_nested_nth.nth[level + 1]; 12710b57cec5SDimitry Andric } 12720b57cec5SDimitry Andric 12730b57cec5SDimitry Andric if (__kmp_nested_proc_bind.used && 12740b57cec5SDimitry Andric (level + 1 < __kmp_nested_proc_bind.used)) { 12750b57cec5SDimitry Andric this_thr->th.th_current_task->td_icvs.proc_bind = 12760b57cec5SDimitry Andric __kmp_nested_proc_bind.bind_types[level + 1]; 12770b57cec5SDimitry Andric } 12780b57cec5SDimitry Andric 12790b57cec5SDimitry Andric #if USE_DEBUGGER 12800b57cec5SDimitry Andric serial_team->t.t_pkfn = (microtask_t)(~0); // For the debugger. 12810b57cec5SDimitry Andric #endif 12820b57cec5SDimitry Andric this_thr->th.th_info.ds.ds_tid = 0; 12830b57cec5SDimitry Andric 12840b57cec5SDimitry Andric /* set thread cache values */ 12850b57cec5SDimitry Andric this_thr->th.th_team_nproc = 1; 12860b57cec5SDimitry Andric this_thr->th.th_team_master = this_thr; 12870b57cec5SDimitry Andric this_thr->th.th_team_serialized = 1; 12880b57cec5SDimitry Andric 12890b57cec5SDimitry Andric serial_team->t.t_level = serial_team->t.t_parent->t.t_level + 1; 12900b57cec5SDimitry Andric serial_team->t.t_active_level = serial_team->t.t_parent->t.t_active_level; 12910b57cec5SDimitry Andric serial_team->t.t_def_allocator = this_thr->th.th_def_allocator; // save 12920b57cec5SDimitry Andric 12930b57cec5SDimitry Andric propagateFPControl(serial_team); 12940b57cec5SDimitry Andric 12950b57cec5SDimitry Andric /* check if we need to allocate dispatch buffers stack */ 12960b57cec5SDimitry Andric KMP_DEBUG_ASSERT(serial_team->t.t_dispatch); 12970b57cec5SDimitry Andric if (!serial_team->t.t_dispatch->th_disp_buffer) { 12980b57cec5SDimitry Andric serial_team->t.t_dispatch->th_disp_buffer = 12990b57cec5SDimitry Andric (dispatch_private_info_t *)__kmp_allocate( 13000b57cec5SDimitry Andric sizeof(dispatch_private_info_t)); 13010b57cec5SDimitry Andric } 13020b57cec5SDimitry Andric this_thr->th.th_dispatch = serial_team->t.t_dispatch; 13030b57cec5SDimitry Andric 13040b57cec5SDimitry Andric KMP_MB(); 13050b57cec5SDimitry Andric 13060b57cec5SDimitry Andric } else { 13070b57cec5SDimitry Andric /* this serialized team is already being used, 13080b57cec5SDimitry Andric * that's fine, just add another nested level */ 13090b57cec5SDimitry Andric KMP_DEBUG_ASSERT(this_thr->th.th_team == serial_team); 13100b57cec5SDimitry Andric KMP_DEBUG_ASSERT(serial_team->t.t_threads); 13110b57cec5SDimitry Andric KMP_DEBUG_ASSERT(serial_team->t.t_threads[0] == this_thr); 13120b57cec5SDimitry Andric ++serial_team->t.t_serialized; 13130b57cec5SDimitry Andric this_thr->th.th_team_serialized = serial_team->t.t_serialized; 13140b57cec5SDimitry Andric 13150b57cec5SDimitry Andric // Nested level will be an index in the nested nthreads array 13160b57cec5SDimitry Andric int level = this_thr->th.th_team->t.t_level; 13170b57cec5SDimitry Andric // Thread value exists in the nested nthreads array for the next nested 13180b57cec5SDimitry Andric // level 13190b57cec5SDimitry Andric if (__kmp_nested_nth.used && (level + 1 < __kmp_nested_nth.used)) { 13200b57cec5SDimitry Andric this_thr->th.th_current_task->td_icvs.nproc = 13210b57cec5SDimitry Andric __kmp_nested_nth.nth[level + 1]; 13220b57cec5SDimitry Andric } 13230b57cec5SDimitry Andric serial_team->t.t_level++; 13240b57cec5SDimitry Andric KF_TRACE(10, ("__kmpc_serialized_parallel: T#%d increasing nesting level " 13250b57cec5SDimitry Andric "of serial team %p to %d\n", 13260b57cec5SDimitry Andric global_tid, serial_team, serial_team->t.t_level)); 13270b57cec5SDimitry Andric 13280b57cec5SDimitry Andric /* allocate/push dispatch buffers stack */ 13290b57cec5SDimitry Andric KMP_DEBUG_ASSERT(serial_team->t.t_dispatch); 13300b57cec5SDimitry Andric { 13310b57cec5SDimitry Andric dispatch_private_info_t *disp_buffer = 13320b57cec5SDimitry Andric (dispatch_private_info_t *)__kmp_allocate( 13330b57cec5SDimitry Andric sizeof(dispatch_private_info_t)); 13340b57cec5SDimitry Andric disp_buffer->next = serial_team->t.t_dispatch->th_disp_buffer; 13350b57cec5SDimitry Andric serial_team->t.t_dispatch->th_disp_buffer = disp_buffer; 13360b57cec5SDimitry Andric } 13370b57cec5SDimitry Andric this_thr->th.th_dispatch = serial_team->t.t_dispatch; 13380b57cec5SDimitry Andric 13390b57cec5SDimitry Andric KMP_MB(); 13400b57cec5SDimitry Andric } 13410b57cec5SDimitry Andric KMP_CHECK_UPDATE(serial_team->t.t_cancel_request, cancel_noreq); 13420b57cec5SDimitry Andric 13430b57cec5SDimitry Andric // Perform the display affinity functionality for 13440b57cec5SDimitry Andric // serialized parallel regions 13450b57cec5SDimitry Andric if (__kmp_display_affinity) { 13460b57cec5SDimitry Andric if (this_thr->th.th_prev_level != serial_team->t.t_level || 13470b57cec5SDimitry Andric this_thr->th.th_prev_num_threads != 1) { 13480b57cec5SDimitry Andric // NULL means use the affinity-format-var ICV 13490b57cec5SDimitry Andric __kmp_aux_display_affinity(global_tid, NULL); 13500b57cec5SDimitry Andric this_thr->th.th_prev_level = serial_team->t.t_level; 13510b57cec5SDimitry Andric this_thr->th.th_prev_num_threads = 1; 13520b57cec5SDimitry Andric } 13530b57cec5SDimitry Andric } 13540b57cec5SDimitry Andric 13550b57cec5SDimitry Andric if (__kmp_env_consistency_check) 13560b57cec5SDimitry Andric __kmp_push_parallel(global_tid, NULL); 13570b57cec5SDimitry Andric #if OMPT_SUPPORT 13580b57cec5SDimitry Andric serial_team->t.ompt_team_info.master_return_address = codeptr; 13590b57cec5SDimitry Andric if (ompt_enabled.enabled && 13600b57cec5SDimitry Andric this_thr->th.ompt_thread_info.state != ompt_state_overhead) { 13610b57cec5SDimitry Andric OMPT_CUR_TASK_INFO(this_thr)->frame.exit_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); 13620b57cec5SDimitry Andric 13630b57cec5SDimitry Andric ompt_lw_taskteam_t lw_taskteam; 13640b57cec5SDimitry Andric __ompt_lw_taskteam_init(&lw_taskteam, this_thr, global_tid, 13650b57cec5SDimitry Andric &ompt_parallel_data, codeptr); 13660b57cec5SDimitry Andric 13670b57cec5SDimitry Andric __ompt_lw_taskteam_link(&lw_taskteam, this_thr, 1); 13680b57cec5SDimitry Andric // don't use lw_taskteam after linking. content was swaped 13690b57cec5SDimitry Andric 13700b57cec5SDimitry Andric /* OMPT implicit task begin */ 13710b57cec5SDimitry Andric implicit_task_data = OMPT_CUR_TASK_DATA(this_thr); 13720b57cec5SDimitry Andric if (ompt_enabled.ompt_callback_implicit_task) { 13730b57cec5SDimitry Andric ompt_callbacks.ompt_callback(ompt_callback_implicit_task)( 13740b57cec5SDimitry Andric ompt_scope_begin, OMPT_CUR_TEAM_DATA(this_thr), 13750b57cec5SDimitry Andric OMPT_CUR_TASK_DATA(this_thr), 1, __kmp_tid_from_gtid(global_tid), ompt_task_implicit); // TODO: Can this be ompt_task_initial? 13760b57cec5SDimitry Andric OMPT_CUR_TASK_INFO(this_thr) 13770b57cec5SDimitry Andric ->thread_num = __kmp_tid_from_gtid(global_tid); 13780b57cec5SDimitry Andric } 13790b57cec5SDimitry Andric 13800b57cec5SDimitry Andric /* OMPT state */ 13810b57cec5SDimitry Andric this_thr->th.ompt_thread_info.state = ompt_state_work_parallel; 13820b57cec5SDimitry Andric OMPT_CUR_TASK_INFO(this_thr)->frame.exit_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); 13830b57cec5SDimitry Andric } 13840b57cec5SDimitry Andric #endif 13850b57cec5SDimitry Andric } 13860b57cec5SDimitry Andric 13870b57cec5SDimitry Andric /* most of the work for a fork */ 13880b57cec5SDimitry Andric /* return true if we really went parallel, false if serialized */ 13890b57cec5SDimitry Andric int __kmp_fork_call(ident_t *loc, int gtid, 13900b57cec5SDimitry Andric enum fork_context_e call_context, // Intel, GNU, ... 13910b57cec5SDimitry Andric kmp_int32 argc, microtask_t microtask, launch_t invoker, 13920b57cec5SDimitry Andric /* TODO: revert workaround for Intel(R) 64 tracker #96 */ 13930b57cec5SDimitry Andric #if (KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) && KMP_OS_LINUX 13940b57cec5SDimitry Andric va_list *ap 13950b57cec5SDimitry Andric #else 13960b57cec5SDimitry Andric va_list ap 13970b57cec5SDimitry Andric #endif 13980b57cec5SDimitry Andric ) { 13990b57cec5SDimitry Andric void **argv; 14000b57cec5SDimitry Andric int i; 14010b57cec5SDimitry Andric int master_tid; 14020b57cec5SDimitry Andric int master_this_cons; 14030b57cec5SDimitry Andric kmp_team_t *team; 14040b57cec5SDimitry Andric kmp_team_t *parent_team; 14050b57cec5SDimitry Andric kmp_info_t *master_th; 14060b57cec5SDimitry Andric kmp_root_t *root; 14070b57cec5SDimitry Andric int nthreads; 14080b57cec5SDimitry Andric int master_active; 14090b57cec5SDimitry Andric int master_set_numthreads; 14100b57cec5SDimitry Andric int level; 14110b57cec5SDimitry Andric int active_level; 14120b57cec5SDimitry Andric int teams_level; 14130b57cec5SDimitry Andric #if KMP_NESTED_HOT_TEAMS 14140b57cec5SDimitry Andric kmp_hot_team_ptr_t **p_hot_teams; 14150b57cec5SDimitry Andric #endif 14160b57cec5SDimitry Andric { // KMP_TIME_BLOCK 14170b57cec5SDimitry Andric KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_fork_call); 14180b57cec5SDimitry Andric KMP_COUNT_VALUE(OMP_PARALLEL_args, argc); 14190b57cec5SDimitry Andric 14200b57cec5SDimitry Andric KA_TRACE(20, ("__kmp_fork_call: enter T#%d\n", gtid)); 14210b57cec5SDimitry Andric if (__kmp_stkpadding > 0 && __kmp_root[gtid] != NULL) { 14220b57cec5SDimitry Andric /* Some systems prefer the stack for the root thread(s) to start with */ 14230b57cec5SDimitry Andric /* some gap from the parent stack to prevent false sharing. */ 14240b57cec5SDimitry Andric void *dummy = KMP_ALLOCA(__kmp_stkpadding); 14250b57cec5SDimitry Andric /* These 2 lines below are so this does not get optimized out */ 14260b57cec5SDimitry Andric if (__kmp_stkpadding > KMP_MAX_STKPADDING) 14270b57cec5SDimitry Andric __kmp_stkpadding += (short)((kmp_int64)dummy); 14280b57cec5SDimitry Andric } 14290b57cec5SDimitry Andric 14300b57cec5SDimitry Andric /* initialize if needed */ 14310b57cec5SDimitry Andric KMP_DEBUG_ASSERT( 14320b57cec5SDimitry Andric __kmp_init_serial); // AC: potentially unsafe, not in sync with shutdown 14330b57cec5SDimitry Andric if (!TCR_4(__kmp_init_parallel)) 14340b57cec5SDimitry Andric __kmp_parallel_initialize(); 14350b57cec5SDimitry Andric __kmp_resume_if_soft_paused(); 14360b57cec5SDimitry Andric 14370b57cec5SDimitry Andric /* setup current data */ 14380b57cec5SDimitry Andric master_th = __kmp_threads[gtid]; // AC: potentially unsafe, not in sync with 14390b57cec5SDimitry Andric // shutdown 14400b57cec5SDimitry Andric parent_team = master_th->th.th_team; 14410b57cec5SDimitry Andric master_tid = master_th->th.th_info.ds.ds_tid; 14420b57cec5SDimitry Andric master_this_cons = master_th->th.th_local.this_construct; 14430b57cec5SDimitry Andric root = master_th->th.th_root; 14440b57cec5SDimitry Andric master_active = root->r.r_active; 14450b57cec5SDimitry Andric master_set_numthreads = master_th->th.th_set_nproc; 14460b57cec5SDimitry Andric 14470b57cec5SDimitry Andric #if OMPT_SUPPORT 14480b57cec5SDimitry Andric ompt_data_t ompt_parallel_data = ompt_data_none; 14490b57cec5SDimitry Andric ompt_data_t *parent_task_data; 14500b57cec5SDimitry Andric ompt_frame_t *ompt_frame; 14510b57cec5SDimitry Andric ompt_data_t *implicit_task_data; 14520b57cec5SDimitry Andric void *return_address = NULL; 14530b57cec5SDimitry Andric 14540b57cec5SDimitry Andric if (ompt_enabled.enabled) { 14550b57cec5SDimitry Andric __ompt_get_task_info_internal(0, NULL, &parent_task_data, &ompt_frame, 14560b57cec5SDimitry Andric NULL, NULL); 14570b57cec5SDimitry Andric return_address = OMPT_LOAD_RETURN_ADDRESS(gtid); 14580b57cec5SDimitry Andric } 14590b57cec5SDimitry Andric #endif 14600b57cec5SDimitry Andric 14610b57cec5SDimitry Andric // Nested level will be an index in the nested nthreads array 14620b57cec5SDimitry Andric level = parent_team->t.t_level; 14630b57cec5SDimitry Andric // used to launch non-serial teams even if nested is not allowed 14640b57cec5SDimitry Andric active_level = parent_team->t.t_active_level; 14650b57cec5SDimitry Andric // needed to check nesting inside the teams 14660b57cec5SDimitry Andric teams_level = master_th->th.th_teams_level; 14670b57cec5SDimitry Andric #if KMP_NESTED_HOT_TEAMS 14680b57cec5SDimitry Andric p_hot_teams = &master_th->th.th_hot_teams; 14690b57cec5SDimitry Andric if (*p_hot_teams == NULL && __kmp_hot_teams_max_level > 0) { 14700b57cec5SDimitry Andric *p_hot_teams = (kmp_hot_team_ptr_t *)__kmp_allocate( 14710b57cec5SDimitry Andric sizeof(kmp_hot_team_ptr_t) * __kmp_hot_teams_max_level); 14720b57cec5SDimitry Andric (*p_hot_teams)[0].hot_team = root->r.r_hot_team; 14730b57cec5SDimitry Andric // it is either actual or not needed (when active_level > 0) 14740b57cec5SDimitry Andric (*p_hot_teams)[0].hot_team_nth = 1; 14750b57cec5SDimitry Andric } 14760b57cec5SDimitry Andric #endif 14770b57cec5SDimitry Andric 14780b57cec5SDimitry Andric #if OMPT_SUPPORT 14790b57cec5SDimitry Andric if (ompt_enabled.enabled) { 14800b57cec5SDimitry Andric if (ompt_enabled.ompt_callback_parallel_begin) { 14810b57cec5SDimitry Andric int team_size = master_set_numthreads 14820b57cec5SDimitry Andric ? master_set_numthreads 14830b57cec5SDimitry Andric : get__nproc_2(parent_team, master_tid); 1484489b1cf2SDimitry Andric int flags = OMPT_INVOKER(call_context) | 1485489b1cf2SDimitry Andric ((microtask == (microtask_t)__kmp_teams_master) 1486489b1cf2SDimitry Andric ? ompt_parallel_league 1487489b1cf2SDimitry Andric : ompt_parallel_team); 14880b57cec5SDimitry Andric ompt_callbacks.ompt_callback(ompt_callback_parallel_begin)( 1489489b1cf2SDimitry Andric parent_task_data, ompt_frame, &ompt_parallel_data, team_size, flags, 1490489b1cf2SDimitry Andric return_address); 14910b57cec5SDimitry Andric } 14920b57cec5SDimitry Andric master_th->th.ompt_thread_info.state = ompt_state_overhead; 14930b57cec5SDimitry Andric } 14940b57cec5SDimitry Andric #endif 14950b57cec5SDimitry Andric 14960b57cec5SDimitry Andric master_th->th.th_ident = loc; 14970b57cec5SDimitry Andric 14980b57cec5SDimitry Andric if (master_th->th.th_teams_microtask && ap && 14990b57cec5SDimitry Andric microtask != (microtask_t)__kmp_teams_master && level == teams_level) { 15000b57cec5SDimitry Andric // AC: This is start of parallel that is nested inside teams construct. 15010b57cec5SDimitry Andric // The team is actual (hot), all workers are ready at the fork barrier. 15020b57cec5SDimitry Andric // No lock needed to initialize the team a bit, then free workers. 15030b57cec5SDimitry Andric parent_team->t.t_ident = loc; 15040b57cec5SDimitry Andric __kmp_alloc_argv_entries(argc, parent_team, TRUE); 15050b57cec5SDimitry Andric parent_team->t.t_argc = argc; 15060b57cec5SDimitry Andric argv = (void **)parent_team->t.t_argv; 15070b57cec5SDimitry Andric for (i = argc - 1; i >= 0; --i) 15080b57cec5SDimitry Andric /* TODO: revert workaround for Intel(R) 64 tracker #96 */ 15090b57cec5SDimitry Andric #if (KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) && KMP_OS_LINUX 15100b57cec5SDimitry Andric *argv++ = va_arg(*ap, void *); 15110b57cec5SDimitry Andric #else 15120b57cec5SDimitry Andric *argv++ = va_arg(ap, void *); 15130b57cec5SDimitry Andric #endif 15140b57cec5SDimitry Andric // Increment our nested depth levels, but not increase the serialization 15150b57cec5SDimitry Andric if (parent_team == master_th->th.th_serial_team) { 15160b57cec5SDimitry Andric // AC: we are in serialized parallel 15170b57cec5SDimitry Andric __kmpc_serialized_parallel(loc, gtid); 15180b57cec5SDimitry Andric KMP_DEBUG_ASSERT(parent_team->t.t_serialized > 1); 1519489b1cf2SDimitry Andric 15200b57cec5SDimitry Andric #if OMPT_SUPPORT 15210b57cec5SDimitry Andric void *dummy; 1522489b1cf2SDimitry Andric void **exit_frame_p; 15230b57cec5SDimitry Andric 15240b57cec5SDimitry Andric ompt_lw_taskteam_t lw_taskteam; 15250b57cec5SDimitry Andric 15260b57cec5SDimitry Andric if (ompt_enabled.enabled) { 15270b57cec5SDimitry Andric __ompt_lw_taskteam_init(&lw_taskteam, master_th, gtid, 15280b57cec5SDimitry Andric &ompt_parallel_data, return_address); 1529489b1cf2SDimitry Andric exit_frame_p = &(lw_taskteam.ompt_task_info.frame.exit_frame.ptr); 15300b57cec5SDimitry Andric 15310b57cec5SDimitry Andric __ompt_lw_taskteam_link(&lw_taskteam, master_th, 0); 15320b57cec5SDimitry Andric // don't use lw_taskteam after linking. content was swaped 15330b57cec5SDimitry Andric 15340b57cec5SDimitry Andric /* OMPT implicit task begin */ 15350b57cec5SDimitry Andric implicit_task_data = OMPT_CUR_TASK_DATA(master_th); 15360b57cec5SDimitry Andric if (ompt_enabled.ompt_callback_implicit_task) { 15370b57cec5SDimitry Andric OMPT_CUR_TASK_INFO(master_th) 15380b57cec5SDimitry Andric ->thread_num = __kmp_tid_from_gtid(gtid); 1539489b1cf2SDimitry Andric ompt_callbacks.ompt_callback(ompt_callback_implicit_task)( 1540489b1cf2SDimitry Andric ompt_scope_begin, OMPT_CUR_TEAM_DATA(master_th), 1541489b1cf2SDimitry Andric implicit_task_data, 1, 1542489b1cf2SDimitry Andric OMPT_CUR_TASK_INFO(master_th)->thread_num, ompt_task_implicit); 15430b57cec5SDimitry Andric } 15440b57cec5SDimitry Andric 15450b57cec5SDimitry Andric /* OMPT state */ 15460b57cec5SDimitry Andric master_th->th.ompt_thread_info.state = ompt_state_work_parallel; 15470b57cec5SDimitry Andric } else { 1548489b1cf2SDimitry Andric exit_frame_p = &dummy; 15490b57cec5SDimitry Andric } 15500b57cec5SDimitry Andric #endif 1551489b1cf2SDimitry Andric // AC: need to decrement t_serialized for enquiry functions to work 1552489b1cf2SDimitry Andric // correctly, will restore at join time 1553489b1cf2SDimitry Andric parent_team->t.t_serialized--; 15540b57cec5SDimitry Andric 15550b57cec5SDimitry Andric { 15560b57cec5SDimitry Andric KMP_TIME_PARTITIONED_BLOCK(OMP_parallel); 15570b57cec5SDimitry Andric KMP_SET_THREAD_STATE_BLOCK(IMPLICIT_TASK); 15580b57cec5SDimitry Andric __kmp_invoke_microtask(microtask, gtid, 0, argc, parent_team->t.t_argv 15590b57cec5SDimitry Andric #if OMPT_SUPPORT 15600b57cec5SDimitry Andric , 1561489b1cf2SDimitry Andric exit_frame_p 15620b57cec5SDimitry Andric #endif 15630b57cec5SDimitry Andric ); 15640b57cec5SDimitry Andric } 15650b57cec5SDimitry Andric 15660b57cec5SDimitry Andric #if OMPT_SUPPORT 15670b57cec5SDimitry Andric if (ompt_enabled.enabled) { 1568489b1cf2SDimitry Andric *exit_frame_p = NULL; 15690b57cec5SDimitry Andric OMPT_CUR_TASK_INFO(master_th)->frame.exit_frame = ompt_data_none; 15700b57cec5SDimitry Andric if (ompt_enabled.ompt_callback_implicit_task) { 15710b57cec5SDimitry Andric ompt_callbacks.ompt_callback(ompt_callback_implicit_task)( 15720b57cec5SDimitry Andric ompt_scope_end, NULL, implicit_task_data, 1, 1573489b1cf2SDimitry Andric OMPT_CUR_TASK_INFO(master_th)->thread_num, ompt_task_implicit); 15740b57cec5SDimitry Andric } 1575489b1cf2SDimitry Andric ompt_parallel_data = *OMPT_CUR_TEAM_DATA(master_th); 15760b57cec5SDimitry Andric __ompt_lw_taskteam_unlink(master_th); 15770b57cec5SDimitry Andric if (ompt_enabled.ompt_callback_parallel_end) { 15780b57cec5SDimitry Andric ompt_callbacks.ompt_callback(ompt_callback_parallel_end)( 1579489b1cf2SDimitry Andric &ompt_parallel_data, OMPT_CUR_TASK_DATA(master_th), 1580489b1cf2SDimitry Andric OMPT_INVOKER(call_context) | ompt_parallel_team, 1581489b1cf2SDimitry Andric return_address); 15820b57cec5SDimitry Andric } 15830b57cec5SDimitry Andric master_th->th.ompt_thread_info.state = ompt_state_overhead; 15840b57cec5SDimitry Andric } 15850b57cec5SDimitry Andric #endif 15860b57cec5SDimitry Andric return TRUE; 15870b57cec5SDimitry Andric } 15880b57cec5SDimitry Andric 15890b57cec5SDimitry Andric parent_team->t.t_pkfn = microtask; 15900b57cec5SDimitry Andric parent_team->t.t_invoke = invoker; 15910b57cec5SDimitry Andric KMP_ATOMIC_INC(&root->r.r_in_parallel); 15920b57cec5SDimitry Andric parent_team->t.t_active_level++; 15930b57cec5SDimitry Andric parent_team->t.t_level++; 15940b57cec5SDimitry Andric parent_team->t.t_def_allocator = master_th->th.th_def_allocator; // save 15950b57cec5SDimitry Andric 1596489b1cf2SDimitry Andric #if OMPT_SUPPORT 1597489b1cf2SDimitry Andric if (ompt_enabled.enabled) { 1598489b1cf2SDimitry Andric ompt_lw_taskteam_t lw_taskteam; 1599489b1cf2SDimitry Andric __ompt_lw_taskteam_init(&lw_taskteam, master_th, gtid, 1600489b1cf2SDimitry Andric &ompt_parallel_data, return_address); 1601489b1cf2SDimitry Andric __ompt_lw_taskteam_link(&lw_taskteam, master_th, 1, true); 1602489b1cf2SDimitry Andric } 1603489b1cf2SDimitry Andric #endif 1604489b1cf2SDimitry Andric 16050b57cec5SDimitry Andric /* Change number of threads in the team if requested */ 16060b57cec5SDimitry Andric if (master_set_numthreads) { // The parallel has num_threads clause 16070b57cec5SDimitry Andric if (master_set_numthreads < master_th->th.th_teams_size.nth) { 16080b57cec5SDimitry Andric // AC: only can reduce number of threads dynamically, can't increase 16090b57cec5SDimitry Andric kmp_info_t **other_threads = parent_team->t.t_threads; 16100b57cec5SDimitry Andric parent_team->t.t_nproc = master_set_numthreads; 16110b57cec5SDimitry Andric for (i = 0; i < master_set_numthreads; ++i) { 16120b57cec5SDimitry Andric other_threads[i]->th.th_team_nproc = master_set_numthreads; 16130b57cec5SDimitry Andric } 16140b57cec5SDimitry Andric // Keep extra threads hot in the team for possible next parallels 16150b57cec5SDimitry Andric } 16160b57cec5SDimitry Andric master_th->th.th_set_nproc = 0; 16170b57cec5SDimitry Andric } 16180b57cec5SDimitry Andric 16190b57cec5SDimitry Andric #if USE_DEBUGGER 16200b57cec5SDimitry Andric if (__kmp_debugging) { // Let debugger override number of threads. 16210b57cec5SDimitry Andric int nth = __kmp_omp_num_threads(loc); 16220b57cec5SDimitry Andric if (nth > 0) { // 0 means debugger doesn't want to change num threads 16230b57cec5SDimitry Andric master_set_numthreads = nth; 16240b57cec5SDimitry Andric } 16250b57cec5SDimitry Andric } 16260b57cec5SDimitry Andric #endif 16270b57cec5SDimitry Andric 16280b57cec5SDimitry Andric KF_TRACE(10, ("__kmp_fork_call: before internal fork: root=%p, team=%p, " 16290b57cec5SDimitry Andric "master_th=%p, gtid=%d\n", 16300b57cec5SDimitry Andric root, parent_team, master_th, gtid)); 16310b57cec5SDimitry Andric __kmp_internal_fork(loc, gtid, parent_team); 16320b57cec5SDimitry Andric KF_TRACE(10, ("__kmp_fork_call: after internal fork: root=%p, team=%p, " 16330b57cec5SDimitry Andric "master_th=%p, gtid=%d\n", 16340b57cec5SDimitry Andric root, parent_team, master_th, gtid)); 16350b57cec5SDimitry Andric 16360b57cec5SDimitry Andric /* Invoke microtask for MASTER thread */ 16370b57cec5SDimitry Andric KA_TRACE(20, ("__kmp_fork_call: T#%d(%d:0) invoke microtask = %p\n", gtid, 16380b57cec5SDimitry Andric parent_team->t.t_id, parent_team->t.t_pkfn)); 16390b57cec5SDimitry Andric 16400b57cec5SDimitry Andric if (!parent_team->t.t_invoke(gtid)) { 16410b57cec5SDimitry Andric KMP_ASSERT2(0, "cannot invoke microtask for MASTER thread"); 16420b57cec5SDimitry Andric } 16430b57cec5SDimitry Andric KA_TRACE(20, ("__kmp_fork_call: T#%d(%d:0) done microtask = %p\n", gtid, 16440b57cec5SDimitry Andric parent_team->t.t_id, parent_team->t.t_pkfn)); 16450b57cec5SDimitry Andric KMP_MB(); /* Flush all pending memory write invalidates. */ 16460b57cec5SDimitry Andric 16470b57cec5SDimitry Andric KA_TRACE(20, ("__kmp_fork_call: parallel exit T#%d\n", gtid)); 16480b57cec5SDimitry Andric 16490b57cec5SDimitry Andric return TRUE; 16500b57cec5SDimitry Andric } // Parallel closely nested in teams construct 16510b57cec5SDimitry Andric 16520b57cec5SDimitry Andric #if KMP_DEBUG 16530b57cec5SDimitry Andric if (__kmp_tasking_mode != tskm_immediate_exec) { 16540b57cec5SDimitry Andric KMP_DEBUG_ASSERT(master_th->th.th_task_team == 16550b57cec5SDimitry Andric parent_team->t.t_task_team[master_th->th.th_task_state]); 16560b57cec5SDimitry Andric } 16570b57cec5SDimitry Andric #endif 16580b57cec5SDimitry Andric 16590b57cec5SDimitry Andric if (parent_team->t.t_active_level >= 16600b57cec5SDimitry Andric master_th->th.th_current_task->td_icvs.max_active_levels) { 16610b57cec5SDimitry Andric nthreads = 1; 16620b57cec5SDimitry Andric } else { 16630b57cec5SDimitry Andric int enter_teams = ((ap == NULL && active_level == 0) || 16640b57cec5SDimitry Andric (ap && teams_level > 0 && teams_level == level)); 16650b57cec5SDimitry Andric nthreads = 16660b57cec5SDimitry Andric master_set_numthreads 16670b57cec5SDimitry Andric ? master_set_numthreads 16680b57cec5SDimitry Andric : get__nproc_2( 16690b57cec5SDimitry Andric parent_team, 16700b57cec5SDimitry Andric master_tid); // TODO: get nproc directly from current task 16710b57cec5SDimitry Andric 16720b57cec5SDimitry Andric // Check if we need to take forkjoin lock? (no need for serialized 16730b57cec5SDimitry Andric // parallel out of teams construct). This code moved here from 16740b57cec5SDimitry Andric // __kmp_reserve_threads() to speedup nested serialized parallels. 16750b57cec5SDimitry Andric if (nthreads > 1) { 16760b57cec5SDimitry Andric if ((get__max_active_levels(master_th) == 1 && 16770b57cec5SDimitry Andric (root->r.r_in_parallel && !enter_teams)) || 16780b57cec5SDimitry Andric (__kmp_library == library_serial)) { 16790b57cec5SDimitry Andric KC_TRACE(10, ("__kmp_fork_call: T#%d serializing team; requested %d" 16800b57cec5SDimitry Andric " threads\n", 16810b57cec5SDimitry Andric gtid, nthreads)); 16820b57cec5SDimitry Andric nthreads = 1; 16830b57cec5SDimitry Andric } 16840b57cec5SDimitry Andric } 16850b57cec5SDimitry Andric if (nthreads > 1) { 16860b57cec5SDimitry Andric /* determine how many new threads we can use */ 16870b57cec5SDimitry Andric __kmp_acquire_bootstrap_lock(&__kmp_forkjoin_lock); 16880b57cec5SDimitry Andric /* AC: If we execute teams from parallel region (on host), then teams 16890b57cec5SDimitry Andric should be created but each can only have 1 thread if nesting is 16900b57cec5SDimitry Andric disabled. If teams called from serial region, then teams and their 16910b57cec5SDimitry Andric threads should be created regardless of the nesting setting. */ 16920b57cec5SDimitry Andric nthreads = __kmp_reserve_threads(root, parent_team, master_tid, 16930b57cec5SDimitry Andric nthreads, enter_teams); 16940b57cec5SDimitry Andric if (nthreads == 1) { 16950b57cec5SDimitry Andric // Free lock for single thread execution here; for multi-thread 16960b57cec5SDimitry Andric // execution it will be freed later after team of threads created 16970b57cec5SDimitry Andric // and initialized 16980b57cec5SDimitry Andric __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock); 16990b57cec5SDimitry Andric } 17000b57cec5SDimitry Andric } 17010b57cec5SDimitry Andric } 17020b57cec5SDimitry Andric KMP_DEBUG_ASSERT(nthreads > 0); 17030b57cec5SDimitry Andric 17040b57cec5SDimitry Andric // If we temporarily changed the set number of threads then restore it now 17050b57cec5SDimitry Andric master_th->th.th_set_nproc = 0; 17060b57cec5SDimitry Andric 17070b57cec5SDimitry Andric /* create a serialized parallel region? */ 17080b57cec5SDimitry Andric if (nthreads == 1) { 17090b57cec5SDimitry Andric /* josh todo: hypothetical question: what do we do for OS X*? */ 17100b57cec5SDimitry Andric #if KMP_OS_LINUX && \ 17110b57cec5SDimitry Andric (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) 17120b57cec5SDimitry Andric void *args[argc]; 17130b57cec5SDimitry Andric #else 17140b57cec5SDimitry Andric void **args = (void **)KMP_ALLOCA(argc * sizeof(void *)); 17150b57cec5SDimitry Andric #endif /* KMP_OS_LINUX && ( KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || \ 17160b57cec5SDimitry Andric KMP_ARCH_AARCH64) */ 17170b57cec5SDimitry Andric 17180b57cec5SDimitry Andric KA_TRACE(20, 17190b57cec5SDimitry Andric ("__kmp_fork_call: T#%d serializing parallel region\n", gtid)); 17200b57cec5SDimitry Andric 17210b57cec5SDimitry Andric __kmpc_serialized_parallel(loc, gtid); 17220b57cec5SDimitry Andric 17230b57cec5SDimitry Andric if (call_context == fork_context_intel) { 17240b57cec5SDimitry Andric /* TODO this sucks, use the compiler itself to pass args! :) */ 17250b57cec5SDimitry Andric master_th->th.th_serial_team->t.t_ident = loc; 17260b57cec5SDimitry Andric if (!ap) { 17270b57cec5SDimitry Andric // revert change made in __kmpc_serialized_parallel() 17280b57cec5SDimitry Andric master_th->th.th_serial_team->t.t_level--; 17290b57cec5SDimitry Andric // Get args from parent team for teams construct 17300b57cec5SDimitry Andric 17310b57cec5SDimitry Andric #if OMPT_SUPPORT 17320b57cec5SDimitry Andric void *dummy; 1733489b1cf2SDimitry Andric void **exit_frame_p; 17340b57cec5SDimitry Andric ompt_task_info_t *task_info; 17350b57cec5SDimitry Andric 17360b57cec5SDimitry Andric ompt_lw_taskteam_t lw_taskteam; 17370b57cec5SDimitry Andric 17380b57cec5SDimitry Andric if (ompt_enabled.enabled) { 17390b57cec5SDimitry Andric __ompt_lw_taskteam_init(&lw_taskteam, master_th, gtid, 17400b57cec5SDimitry Andric &ompt_parallel_data, return_address); 17410b57cec5SDimitry Andric 17420b57cec5SDimitry Andric __ompt_lw_taskteam_link(&lw_taskteam, master_th, 0); 17430b57cec5SDimitry Andric // don't use lw_taskteam after linking. content was swaped 17440b57cec5SDimitry Andric 17450b57cec5SDimitry Andric task_info = OMPT_CUR_TASK_INFO(master_th); 1746489b1cf2SDimitry Andric exit_frame_p = &(task_info->frame.exit_frame.ptr); 17470b57cec5SDimitry Andric if (ompt_enabled.ompt_callback_implicit_task) { 17480b57cec5SDimitry Andric OMPT_CUR_TASK_INFO(master_th) 17490b57cec5SDimitry Andric ->thread_num = __kmp_tid_from_gtid(gtid); 1750489b1cf2SDimitry Andric ompt_callbacks.ompt_callback(ompt_callback_implicit_task)( 1751489b1cf2SDimitry Andric ompt_scope_begin, OMPT_CUR_TEAM_DATA(master_th), 1752489b1cf2SDimitry Andric &(task_info->task_data), 1, 1753489b1cf2SDimitry Andric OMPT_CUR_TASK_INFO(master_th)->thread_num, 1754489b1cf2SDimitry Andric ompt_task_implicit); 17550b57cec5SDimitry Andric } 17560b57cec5SDimitry Andric 17570b57cec5SDimitry Andric /* OMPT state */ 17580b57cec5SDimitry Andric master_th->th.ompt_thread_info.state = ompt_state_work_parallel; 17590b57cec5SDimitry Andric } else { 1760489b1cf2SDimitry Andric exit_frame_p = &dummy; 17610b57cec5SDimitry Andric } 17620b57cec5SDimitry Andric #endif 17630b57cec5SDimitry Andric 17640b57cec5SDimitry Andric { 17650b57cec5SDimitry Andric KMP_TIME_PARTITIONED_BLOCK(OMP_parallel); 17660b57cec5SDimitry Andric KMP_SET_THREAD_STATE_BLOCK(IMPLICIT_TASK); 17670b57cec5SDimitry Andric __kmp_invoke_microtask(microtask, gtid, 0, argc, 17680b57cec5SDimitry Andric parent_team->t.t_argv 17690b57cec5SDimitry Andric #if OMPT_SUPPORT 17700b57cec5SDimitry Andric , 1771489b1cf2SDimitry Andric exit_frame_p 17720b57cec5SDimitry Andric #endif 17730b57cec5SDimitry Andric ); 17740b57cec5SDimitry Andric } 17750b57cec5SDimitry Andric 17760b57cec5SDimitry Andric #if OMPT_SUPPORT 17770b57cec5SDimitry Andric if (ompt_enabled.enabled) { 1778489b1cf2SDimitry Andric *exit_frame_p = NULL; 17790b57cec5SDimitry Andric if (ompt_enabled.ompt_callback_implicit_task) { 17800b57cec5SDimitry Andric ompt_callbacks.ompt_callback(ompt_callback_implicit_task)( 17810b57cec5SDimitry Andric ompt_scope_end, NULL, &(task_info->task_data), 1, 1782489b1cf2SDimitry Andric OMPT_CUR_TASK_INFO(master_th)->thread_num, 1783489b1cf2SDimitry Andric ompt_task_implicit); 17840b57cec5SDimitry Andric } 1785489b1cf2SDimitry Andric ompt_parallel_data = *OMPT_CUR_TEAM_DATA(master_th); 17860b57cec5SDimitry Andric __ompt_lw_taskteam_unlink(master_th); 17870b57cec5SDimitry Andric if (ompt_enabled.ompt_callback_parallel_end) { 17880b57cec5SDimitry Andric ompt_callbacks.ompt_callback(ompt_callback_parallel_end)( 1789489b1cf2SDimitry Andric &ompt_parallel_data, parent_task_data, 1790489b1cf2SDimitry Andric OMPT_INVOKER(call_context) | ompt_parallel_team, 1791489b1cf2SDimitry Andric return_address); 17920b57cec5SDimitry Andric } 17930b57cec5SDimitry Andric master_th->th.ompt_thread_info.state = ompt_state_overhead; 17940b57cec5SDimitry Andric } 17950b57cec5SDimitry Andric #endif 17960b57cec5SDimitry Andric } else if (microtask == (microtask_t)__kmp_teams_master) { 17970b57cec5SDimitry Andric KMP_DEBUG_ASSERT(master_th->th.th_team == 17980b57cec5SDimitry Andric master_th->th.th_serial_team); 17990b57cec5SDimitry Andric team = master_th->th.th_team; 18000b57cec5SDimitry Andric // team->t.t_pkfn = microtask; 18010b57cec5SDimitry Andric team->t.t_invoke = invoker; 18020b57cec5SDimitry Andric __kmp_alloc_argv_entries(argc, team, TRUE); 18030b57cec5SDimitry Andric team->t.t_argc = argc; 18040b57cec5SDimitry Andric argv = (void **)team->t.t_argv; 18050b57cec5SDimitry Andric if (ap) { 18060b57cec5SDimitry Andric for (i = argc - 1; i >= 0; --i) 18070b57cec5SDimitry Andric // TODO: revert workaround for Intel(R) 64 tracker #96 18080b57cec5SDimitry Andric #if (KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) && KMP_OS_LINUX 18090b57cec5SDimitry Andric *argv++ = va_arg(*ap, void *); 18100b57cec5SDimitry Andric #else 18110b57cec5SDimitry Andric *argv++ = va_arg(ap, void *); 18120b57cec5SDimitry Andric #endif 18130b57cec5SDimitry Andric } else { 18140b57cec5SDimitry Andric for (i = 0; i < argc; ++i) 18150b57cec5SDimitry Andric // Get args from parent team for teams construct 18160b57cec5SDimitry Andric argv[i] = parent_team->t.t_argv[i]; 18170b57cec5SDimitry Andric } 18180b57cec5SDimitry Andric // AC: revert change made in __kmpc_serialized_parallel() 18190b57cec5SDimitry Andric // because initial code in teams should have level=0 18200b57cec5SDimitry Andric team->t.t_level--; 18210b57cec5SDimitry Andric // AC: call special invoker for outer "parallel" of teams construct 18220b57cec5SDimitry Andric invoker(gtid); 1823489b1cf2SDimitry Andric #if OMPT_SUPPORT 1824489b1cf2SDimitry Andric if (ompt_enabled.enabled) { 1825489b1cf2SDimitry Andric ompt_task_info_t *task_info = OMPT_CUR_TASK_INFO(master_th); 1826489b1cf2SDimitry Andric if (ompt_enabled.ompt_callback_implicit_task) { 1827489b1cf2SDimitry Andric ompt_callbacks.ompt_callback(ompt_callback_implicit_task)( 1828489b1cf2SDimitry Andric ompt_scope_end, NULL, &(task_info->task_data), 0, 1829489b1cf2SDimitry Andric OMPT_CUR_TASK_INFO(master_th)->thread_num, ompt_task_initial); 1830489b1cf2SDimitry Andric } 1831489b1cf2SDimitry Andric if (ompt_enabled.ompt_callback_parallel_end) { 1832489b1cf2SDimitry Andric ompt_callbacks.ompt_callback(ompt_callback_parallel_end)( 1833489b1cf2SDimitry Andric &ompt_parallel_data, parent_task_data, 1834489b1cf2SDimitry Andric OMPT_INVOKER(call_context) | ompt_parallel_league, 1835489b1cf2SDimitry Andric return_address); 1836489b1cf2SDimitry Andric } 1837489b1cf2SDimitry Andric master_th->th.ompt_thread_info.state = ompt_state_overhead; 1838489b1cf2SDimitry Andric } 1839489b1cf2SDimitry Andric #endif 18400b57cec5SDimitry Andric } else { 18410b57cec5SDimitry Andric argv = args; 18420b57cec5SDimitry Andric for (i = argc - 1; i >= 0; --i) 18430b57cec5SDimitry Andric // TODO: revert workaround for Intel(R) 64 tracker #96 18440b57cec5SDimitry Andric #if (KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) && KMP_OS_LINUX 18450b57cec5SDimitry Andric *argv++ = va_arg(*ap, void *); 18460b57cec5SDimitry Andric #else 18470b57cec5SDimitry Andric *argv++ = va_arg(ap, void *); 18480b57cec5SDimitry Andric #endif 18490b57cec5SDimitry Andric KMP_MB(); 18500b57cec5SDimitry Andric 18510b57cec5SDimitry Andric #if OMPT_SUPPORT 18520b57cec5SDimitry Andric void *dummy; 1853489b1cf2SDimitry Andric void **exit_frame_p; 18540b57cec5SDimitry Andric ompt_task_info_t *task_info; 18550b57cec5SDimitry Andric 18560b57cec5SDimitry Andric ompt_lw_taskteam_t lw_taskteam; 18570b57cec5SDimitry Andric 18580b57cec5SDimitry Andric if (ompt_enabled.enabled) { 18590b57cec5SDimitry Andric __ompt_lw_taskteam_init(&lw_taskteam, master_th, gtid, 18600b57cec5SDimitry Andric &ompt_parallel_data, return_address); 18610b57cec5SDimitry Andric __ompt_lw_taskteam_link(&lw_taskteam, master_th, 0); 18620b57cec5SDimitry Andric // don't use lw_taskteam after linking. content was swaped 18630b57cec5SDimitry Andric task_info = OMPT_CUR_TASK_INFO(master_th); 1864489b1cf2SDimitry Andric exit_frame_p = &(task_info->frame.exit_frame.ptr); 18650b57cec5SDimitry Andric 18660b57cec5SDimitry Andric /* OMPT implicit task begin */ 18670b57cec5SDimitry Andric implicit_task_data = OMPT_CUR_TASK_DATA(master_th); 18680b57cec5SDimitry Andric if (ompt_enabled.ompt_callback_implicit_task) { 18690b57cec5SDimitry Andric ompt_callbacks.ompt_callback(ompt_callback_implicit_task)( 18700b57cec5SDimitry Andric ompt_scope_begin, OMPT_CUR_TEAM_DATA(master_th), 1871489b1cf2SDimitry Andric implicit_task_data, 1, __kmp_tid_from_gtid(gtid), 1872489b1cf2SDimitry Andric ompt_task_implicit); 18730b57cec5SDimitry Andric OMPT_CUR_TASK_INFO(master_th) 18740b57cec5SDimitry Andric ->thread_num = __kmp_tid_from_gtid(gtid); 18750b57cec5SDimitry Andric } 18760b57cec5SDimitry Andric 18770b57cec5SDimitry Andric /* OMPT state */ 18780b57cec5SDimitry Andric master_th->th.ompt_thread_info.state = ompt_state_work_parallel; 18790b57cec5SDimitry Andric } else { 1880489b1cf2SDimitry Andric exit_frame_p = &dummy; 18810b57cec5SDimitry Andric } 18820b57cec5SDimitry Andric #endif 18830b57cec5SDimitry Andric 18840b57cec5SDimitry Andric { 18850b57cec5SDimitry Andric KMP_TIME_PARTITIONED_BLOCK(OMP_parallel); 18860b57cec5SDimitry Andric KMP_SET_THREAD_STATE_BLOCK(IMPLICIT_TASK); 18870b57cec5SDimitry Andric __kmp_invoke_microtask(microtask, gtid, 0, argc, args 18880b57cec5SDimitry Andric #if OMPT_SUPPORT 18890b57cec5SDimitry Andric , 1890489b1cf2SDimitry Andric exit_frame_p 18910b57cec5SDimitry Andric #endif 18920b57cec5SDimitry Andric ); 18930b57cec5SDimitry Andric } 18940b57cec5SDimitry Andric 18950b57cec5SDimitry Andric #if OMPT_SUPPORT 18960b57cec5SDimitry Andric if (ompt_enabled.enabled) { 1897489b1cf2SDimitry Andric *exit_frame_p = NULL; 18980b57cec5SDimitry Andric if (ompt_enabled.ompt_callback_implicit_task) { 18990b57cec5SDimitry Andric ompt_callbacks.ompt_callback(ompt_callback_implicit_task)( 19000b57cec5SDimitry Andric ompt_scope_end, NULL, &(task_info->task_data), 1, 1901489b1cf2SDimitry Andric OMPT_CUR_TASK_INFO(master_th)->thread_num, 1902489b1cf2SDimitry Andric ompt_task_implicit); 19030b57cec5SDimitry Andric } 19040b57cec5SDimitry Andric 19050b57cec5SDimitry Andric ompt_parallel_data = *OMPT_CUR_TEAM_DATA(master_th); 19060b57cec5SDimitry Andric __ompt_lw_taskteam_unlink(master_th); 19070b57cec5SDimitry Andric if (ompt_enabled.ompt_callback_parallel_end) { 19080b57cec5SDimitry Andric ompt_callbacks.ompt_callback(ompt_callback_parallel_end)( 19090b57cec5SDimitry Andric &ompt_parallel_data, parent_task_data, 1910489b1cf2SDimitry Andric OMPT_INVOKER(call_context) | ompt_parallel_team, 1911489b1cf2SDimitry Andric return_address); 19120b57cec5SDimitry Andric } 19130b57cec5SDimitry Andric master_th->th.ompt_thread_info.state = ompt_state_overhead; 19140b57cec5SDimitry Andric } 19150b57cec5SDimitry Andric #endif 19160b57cec5SDimitry Andric } 19170b57cec5SDimitry Andric } else if (call_context == fork_context_gnu) { 19180b57cec5SDimitry Andric #if OMPT_SUPPORT 19190b57cec5SDimitry Andric ompt_lw_taskteam_t lwt; 19200b57cec5SDimitry Andric __ompt_lw_taskteam_init(&lwt, master_th, gtid, &ompt_parallel_data, 19210b57cec5SDimitry Andric return_address); 19220b57cec5SDimitry Andric 19230b57cec5SDimitry Andric lwt.ompt_task_info.frame.exit_frame = ompt_data_none; 19240b57cec5SDimitry Andric __ompt_lw_taskteam_link(&lwt, master_th, 1); 19250b57cec5SDimitry Andric // don't use lw_taskteam after linking. content was swaped 19260b57cec5SDimitry Andric #endif 19270b57cec5SDimitry Andric 19280b57cec5SDimitry Andric // we were called from GNU native code 19290b57cec5SDimitry Andric KA_TRACE(20, ("__kmp_fork_call: T#%d serial exit\n", gtid)); 19300b57cec5SDimitry Andric return FALSE; 19310b57cec5SDimitry Andric } else { 19320b57cec5SDimitry Andric KMP_ASSERT2(call_context < fork_context_last, 19330b57cec5SDimitry Andric "__kmp_fork_call: unknown fork_context parameter"); 19340b57cec5SDimitry Andric } 19350b57cec5SDimitry Andric 19360b57cec5SDimitry Andric KA_TRACE(20, ("__kmp_fork_call: T#%d serial exit\n", gtid)); 19370b57cec5SDimitry Andric KMP_MB(); 19380b57cec5SDimitry Andric return FALSE; 19390b57cec5SDimitry Andric } // if (nthreads == 1) 19400b57cec5SDimitry Andric 19410b57cec5SDimitry Andric // GEH: only modify the executing flag in the case when not serialized 19420b57cec5SDimitry Andric // serialized case is handled in kmpc_serialized_parallel 19430b57cec5SDimitry Andric KF_TRACE(10, ("__kmp_fork_call: parent_team_aclevel=%d, master_th=%p, " 19440b57cec5SDimitry Andric "curtask=%p, curtask_max_aclevel=%d\n", 19450b57cec5SDimitry Andric parent_team->t.t_active_level, master_th, 19460b57cec5SDimitry Andric master_th->th.th_current_task, 19470b57cec5SDimitry Andric master_th->th.th_current_task->td_icvs.max_active_levels)); 19480b57cec5SDimitry Andric // TODO: GEH - cannot do this assertion because root thread not set up as 19490b57cec5SDimitry Andric // executing 19500b57cec5SDimitry Andric // KMP_ASSERT( master_th->th.th_current_task->td_flags.executing == 1 ); 19510b57cec5SDimitry Andric master_th->th.th_current_task->td_flags.executing = 0; 19520b57cec5SDimitry Andric 19530b57cec5SDimitry Andric if (!master_th->th.th_teams_microtask || level > teams_level) { 19540b57cec5SDimitry Andric /* Increment our nested depth level */ 19550b57cec5SDimitry Andric KMP_ATOMIC_INC(&root->r.r_in_parallel); 19560b57cec5SDimitry Andric } 19570b57cec5SDimitry Andric 19580b57cec5SDimitry Andric // See if we need to make a copy of the ICVs. 19590b57cec5SDimitry Andric int nthreads_icv = master_th->th.th_current_task->td_icvs.nproc; 19600b57cec5SDimitry Andric if ((level + 1 < __kmp_nested_nth.used) && 19610b57cec5SDimitry Andric (__kmp_nested_nth.nth[level + 1] != nthreads_icv)) { 19620b57cec5SDimitry Andric nthreads_icv = __kmp_nested_nth.nth[level + 1]; 19630b57cec5SDimitry Andric } else { 19640b57cec5SDimitry Andric nthreads_icv = 0; // don't update 19650b57cec5SDimitry Andric } 19660b57cec5SDimitry Andric 19670b57cec5SDimitry Andric // Figure out the proc_bind_policy for the new team. 19680b57cec5SDimitry Andric kmp_proc_bind_t proc_bind = master_th->th.th_set_proc_bind; 19690b57cec5SDimitry Andric kmp_proc_bind_t proc_bind_icv = 19700b57cec5SDimitry Andric proc_bind_default; // proc_bind_default means don't update 19710b57cec5SDimitry Andric if (master_th->th.th_current_task->td_icvs.proc_bind == proc_bind_false) { 19720b57cec5SDimitry Andric proc_bind = proc_bind_false; 19730b57cec5SDimitry Andric } else { 19740b57cec5SDimitry Andric if (proc_bind == proc_bind_default) { 19750b57cec5SDimitry Andric // No proc_bind clause specified; use current proc-bind-var for this 19760b57cec5SDimitry Andric // parallel region 19770b57cec5SDimitry Andric proc_bind = master_th->th.th_current_task->td_icvs.proc_bind; 19780b57cec5SDimitry Andric } 19790b57cec5SDimitry Andric /* else: The proc_bind policy was specified explicitly on parallel clause. 19800b57cec5SDimitry Andric This overrides proc-bind-var for this parallel region, but does not 19810b57cec5SDimitry Andric change proc-bind-var. */ 19820b57cec5SDimitry Andric // Figure the value of proc-bind-var for the child threads. 19830b57cec5SDimitry Andric if ((level + 1 < __kmp_nested_proc_bind.used) && 19840b57cec5SDimitry Andric (__kmp_nested_proc_bind.bind_types[level + 1] != 19850b57cec5SDimitry Andric master_th->th.th_current_task->td_icvs.proc_bind)) { 19860b57cec5SDimitry Andric proc_bind_icv = __kmp_nested_proc_bind.bind_types[level + 1]; 19870b57cec5SDimitry Andric } 19880b57cec5SDimitry Andric } 19890b57cec5SDimitry Andric 19900b57cec5SDimitry Andric // Reset for next parallel region 19910b57cec5SDimitry Andric master_th->th.th_set_proc_bind = proc_bind_default; 19920b57cec5SDimitry Andric 19930b57cec5SDimitry Andric if ((nthreads_icv > 0) || (proc_bind_icv != proc_bind_default)) { 19940b57cec5SDimitry Andric kmp_internal_control_t new_icvs; 19950b57cec5SDimitry Andric copy_icvs(&new_icvs, &master_th->th.th_current_task->td_icvs); 19960b57cec5SDimitry Andric new_icvs.next = NULL; 19970b57cec5SDimitry Andric if (nthreads_icv > 0) { 19980b57cec5SDimitry Andric new_icvs.nproc = nthreads_icv; 19990b57cec5SDimitry Andric } 20000b57cec5SDimitry Andric if (proc_bind_icv != proc_bind_default) { 20010b57cec5SDimitry Andric new_icvs.proc_bind = proc_bind_icv; 20020b57cec5SDimitry Andric } 20030b57cec5SDimitry Andric 20040b57cec5SDimitry Andric /* allocate a new parallel team */ 20050b57cec5SDimitry Andric KF_TRACE(10, ("__kmp_fork_call: before __kmp_allocate_team\n")); 20060b57cec5SDimitry Andric team = __kmp_allocate_team(root, nthreads, nthreads, 20070b57cec5SDimitry Andric #if OMPT_SUPPORT 20080b57cec5SDimitry Andric ompt_parallel_data, 20090b57cec5SDimitry Andric #endif 20100b57cec5SDimitry Andric proc_bind, &new_icvs, 20110b57cec5SDimitry Andric argc USE_NESTED_HOT_ARG(master_th)); 20120b57cec5SDimitry Andric } else { 20130b57cec5SDimitry Andric /* allocate a new parallel team */ 20140b57cec5SDimitry Andric KF_TRACE(10, ("__kmp_fork_call: before __kmp_allocate_team\n")); 20150b57cec5SDimitry Andric team = __kmp_allocate_team(root, nthreads, nthreads, 20160b57cec5SDimitry Andric #if OMPT_SUPPORT 20170b57cec5SDimitry Andric ompt_parallel_data, 20180b57cec5SDimitry Andric #endif 20190b57cec5SDimitry Andric proc_bind, 20200b57cec5SDimitry Andric &master_th->th.th_current_task->td_icvs, 20210b57cec5SDimitry Andric argc USE_NESTED_HOT_ARG(master_th)); 20220b57cec5SDimitry Andric } 20230b57cec5SDimitry Andric KF_TRACE( 20240b57cec5SDimitry Andric 10, ("__kmp_fork_call: after __kmp_allocate_team - team = %p\n", team)); 20250b57cec5SDimitry Andric 20260b57cec5SDimitry Andric /* setup the new team */ 20270b57cec5SDimitry Andric KMP_CHECK_UPDATE(team->t.t_master_tid, master_tid); 20280b57cec5SDimitry Andric KMP_CHECK_UPDATE(team->t.t_master_this_cons, master_this_cons); 20290b57cec5SDimitry Andric KMP_CHECK_UPDATE(team->t.t_ident, loc); 20300b57cec5SDimitry Andric KMP_CHECK_UPDATE(team->t.t_parent, parent_team); 20310b57cec5SDimitry Andric KMP_CHECK_UPDATE_SYNC(team->t.t_pkfn, microtask); 20320b57cec5SDimitry Andric #if OMPT_SUPPORT 20330b57cec5SDimitry Andric KMP_CHECK_UPDATE_SYNC(team->t.ompt_team_info.master_return_address, 20340b57cec5SDimitry Andric return_address); 20350b57cec5SDimitry Andric #endif 20360b57cec5SDimitry Andric KMP_CHECK_UPDATE(team->t.t_invoke, invoker); // TODO move to root, maybe 20370b57cec5SDimitry Andric // TODO: parent_team->t.t_level == INT_MAX ??? 20380b57cec5SDimitry Andric if (!master_th->th.th_teams_microtask || level > teams_level) { 20390b57cec5SDimitry Andric int new_level = parent_team->t.t_level + 1; 20400b57cec5SDimitry Andric KMP_CHECK_UPDATE(team->t.t_level, new_level); 20410b57cec5SDimitry Andric new_level = parent_team->t.t_active_level + 1; 20420b57cec5SDimitry Andric KMP_CHECK_UPDATE(team->t.t_active_level, new_level); 20430b57cec5SDimitry Andric } else { 20440b57cec5SDimitry Andric // AC: Do not increase parallel level at start of the teams construct 20450b57cec5SDimitry Andric int new_level = parent_team->t.t_level; 20460b57cec5SDimitry Andric KMP_CHECK_UPDATE(team->t.t_level, new_level); 20470b57cec5SDimitry Andric new_level = parent_team->t.t_active_level; 20480b57cec5SDimitry Andric KMP_CHECK_UPDATE(team->t.t_active_level, new_level); 20490b57cec5SDimitry Andric } 20500b57cec5SDimitry Andric kmp_r_sched_t new_sched = get__sched_2(parent_team, master_tid); 20510b57cec5SDimitry Andric // set master's schedule as new run-time schedule 20520b57cec5SDimitry Andric KMP_CHECK_UPDATE(team->t.t_sched.sched, new_sched.sched); 20530b57cec5SDimitry Andric 20540b57cec5SDimitry Andric KMP_CHECK_UPDATE(team->t.t_cancel_request, cancel_noreq); 20550b57cec5SDimitry Andric KMP_CHECK_UPDATE(team->t.t_def_allocator, master_th->th.th_def_allocator); 20560b57cec5SDimitry Andric 20570b57cec5SDimitry Andric // Update the floating point rounding in the team if required. 20580b57cec5SDimitry Andric propagateFPControl(team); 20590b57cec5SDimitry Andric 20600b57cec5SDimitry Andric if (__kmp_tasking_mode != tskm_immediate_exec) { 20610b57cec5SDimitry Andric // Set master's task team to team's task team. Unless this is hot team, it 20620b57cec5SDimitry Andric // should be NULL. 20630b57cec5SDimitry Andric KMP_DEBUG_ASSERT(master_th->th.th_task_team == 20640b57cec5SDimitry Andric parent_team->t.t_task_team[master_th->th.th_task_state]); 20650b57cec5SDimitry Andric KA_TRACE(20, ("__kmp_fork_call: Master T#%d pushing task_team %p / team " 20660b57cec5SDimitry Andric "%p, new task_team %p / team %p\n", 20670b57cec5SDimitry Andric __kmp_gtid_from_thread(master_th), 20680b57cec5SDimitry Andric master_th->th.th_task_team, parent_team, 20690b57cec5SDimitry Andric team->t.t_task_team[master_th->th.th_task_state], team)); 20700b57cec5SDimitry Andric 20710b57cec5SDimitry Andric if (active_level || master_th->th.th_task_team) { 20720b57cec5SDimitry Andric // Take a memo of master's task_state 20730b57cec5SDimitry Andric KMP_DEBUG_ASSERT(master_th->th.th_task_state_memo_stack); 20740b57cec5SDimitry Andric if (master_th->th.th_task_state_top >= 20750b57cec5SDimitry Andric master_th->th.th_task_state_stack_sz) { // increase size 20760b57cec5SDimitry Andric kmp_uint32 new_size = 2 * master_th->th.th_task_state_stack_sz; 20770b57cec5SDimitry Andric kmp_uint8 *old_stack, *new_stack; 20780b57cec5SDimitry Andric kmp_uint32 i; 20790b57cec5SDimitry Andric new_stack = (kmp_uint8 *)__kmp_allocate(new_size); 20800b57cec5SDimitry Andric for (i = 0; i < master_th->th.th_task_state_stack_sz; ++i) { 20810b57cec5SDimitry Andric new_stack[i] = master_th->th.th_task_state_memo_stack[i]; 20820b57cec5SDimitry Andric } 20830b57cec5SDimitry Andric for (i = master_th->th.th_task_state_stack_sz; i < new_size; 20840b57cec5SDimitry Andric ++i) { // zero-init rest of stack 20850b57cec5SDimitry Andric new_stack[i] = 0; 20860b57cec5SDimitry Andric } 20870b57cec5SDimitry Andric old_stack = master_th->th.th_task_state_memo_stack; 20880b57cec5SDimitry Andric master_th->th.th_task_state_memo_stack = new_stack; 20890b57cec5SDimitry Andric master_th->th.th_task_state_stack_sz = new_size; 20900b57cec5SDimitry Andric __kmp_free(old_stack); 20910b57cec5SDimitry Andric } 20920b57cec5SDimitry Andric // Store master's task_state on stack 20930b57cec5SDimitry Andric master_th->th 20940b57cec5SDimitry Andric .th_task_state_memo_stack[master_th->th.th_task_state_top] = 20950b57cec5SDimitry Andric master_th->th.th_task_state; 20960b57cec5SDimitry Andric master_th->th.th_task_state_top++; 20970b57cec5SDimitry Andric #if KMP_NESTED_HOT_TEAMS 20980b57cec5SDimitry Andric if (master_th->th.th_hot_teams && 20990b57cec5SDimitry Andric active_level < __kmp_hot_teams_max_level && 21000b57cec5SDimitry Andric team == master_th->th.th_hot_teams[active_level].hot_team) { 21010b57cec5SDimitry Andric // Restore master's nested state if nested hot team 21020b57cec5SDimitry Andric master_th->th.th_task_state = 21030b57cec5SDimitry Andric master_th->th 21040b57cec5SDimitry Andric .th_task_state_memo_stack[master_th->th.th_task_state_top]; 21050b57cec5SDimitry Andric } else { 21060b57cec5SDimitry Andric #endif 21070b57cec5SDimitry Andric master_th->th.th_task_state = 0; 21080b57cec5SDimitry Andric #if KMP_NESTED_HOT_TEAMS 21090b57cec5SDimitry Andric } 21100b57cec5SDimitry Andric #endif 21110b57cec5SDimitry Andric } 21120b57cec5SDimitry Andric #if !KMP_NESTED_HOT_TEAMS 21130b57cec5SDimitry Andric KMP_DEBUG_ASSERT((master_th->th.th_task_team == NULL) || 21140b57cec5SDimitry Andric (team == root->r.r_hot_team)); 21150b57cec5SDimitry Andric #endif 21160b57cec5SDimitry Andric } 21170b57cec5SDimitry Andric 21180b57cec5SDimitry Andric KA_TRACE( 21190b57cec5SDimitry Andric 20, 21200b57cec5SDimitry Andric ("__kmp_fork_call: T#%d(%d:%d)->(%d:0) created a team of %d threads\n", 21210b57cec5SDimitry Andric gtid, parent_team->t.t_id, team->t.t_master_tid, team->t.t_id, 21220b57cec5SDimitry Andric team->t.t_nproc)); 21230b57cec5SDimitry Andric KMP_DEBUG_ASSERT(team != root->r.r_hot_team || 21240b57cec5SDimitry Andric (team->t.t_master_tid == 0 && 21250b57cec5SDimitry Andric (team->t.t_parent == root->r.r_root_team || 21260b57cec5SDimitry Andric team->t.t_parent->t.t_serialized))); 21270b57cec5SDimitry Andric KMP_MB(); 21280b57cec5SDimitry Andric 21290b57cec5SDimitry Andric /* now, setup the arguments */ 21300b57cec5SDimitry Andric argv = (void **)team->t.t_argv; 21310b57cec5SDimitry Andric if (ap) { 21320b57cec5SDimitry Andric for (i = argc - 1; i >= 0; --i) { 21330b57cec5SDimitry Andric // TODO: revert workaround for Intel(R) 64 tracker #96 21340b57cec5SDimitry Andric #if (KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) && KMP_OS_LINUX 21350b57cec5SDimitry Andric void *new_argv = va_arg(*ap, void *); 21360b57cec5SDimitry Andric #else 21370b57cec5SDimitry Andric void *new_argv = va_arg(ap, void *); 21380b57cec5SDimitry Andric #endif 21390b57cec5SDimitry Andric KMP_CHECK_UPDATE(*argv, new_argv); 21400b57cec5SDimitry Andric argv++; 21410b57cec5SDimitry Andric } 21420b57cec5SDimitry Andric } else { 21430b57cec5SDimitry Andric for (i = 0; i < argc; ++i) { 21440b57cec5SDimitry Andric // Get args from parent team for teams construct 21450b57cec5SDimitry Andric KMP_CHECK_UPDATE(argv[i], team->t.t_parent->t.t_argv[i]); 21460b57cec5SDimitry Andric } 21470b57cec5SDimitry Andric } 21480b57cec5SDimitry Andric 21490b57cec5SDimitry Andric /* now actually fork the threads */ 21500b57cec5SDimitry Andric KMP_CHECK_UPDATE(team->t.t_master_active, master_active); 21510b57cec5SDimitry Andric if (!root->r.r_active) // Only do assignment if it prevents cache ping-pong 21520b57cec5SDimitry Andric root->r.r_active = TRUE; 21530b57cec5SDimitry Andric 21540b57cec5SDimitry Andric __kmp_fork_team_threads(root, team, master_th, gtid); 21550b57cec5SDimitry Andric __kmp_setup_icv_copy(team, nthreads, 21560b57cec5SDimitry Andric &master_th->th.th_current_task->td_icvs, loc); 21570b57cec5SDimitry Andric 21580b57cec5SDimitry Andric #if OMPT_SUPPORT 21590b57cec5SDimitry Andric master_th->th.ompt_thread_info.state = ompt_state_work_parallel; 21600b57cec5SDimitry Andric #endif 21610b57cec5SDimitry Andric 21620b57cec5SDimitry Andric __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock); 21630b57cec5SDimitry Andric 21640b57cec5SDimitry Andric #if USE_ITT_BUILD 21650b57cec5SDimitry Andric if (team->t.t_active_level == 1 // only report frames at level 1 21660b57cec5SDimitry Andric && !master_th->th.th_teams_microtask) { // not in teams construct 21670b57cec5SDimitry Andric #if USE_ITT_NOTIFY 21680b57cec5SDimitry Andric if ((__itt_frame_submit_v3_ptr || KMP_ITT_DEBUG) && 21690b57cec5SDimitry Andric (__kmp_forkjoin_frames_mode == 3 || 21700b57cec5SDimitry Andric __kmp_forkjoin_frames_mode == 1)) { 21710b57cec5SDimitry Andric kmp_uint64 tmp_time = 0; 21720b57cec5SDimitry Andric if (__itt_get_timestamp_ptr) 21730b57cec5SDimitry Andric tmp_time = __itt_get_timestamp(); 21740b57cec5SDimitry Andric // Internal fork - report frame begin 21750b57cec5SDimitry Andric master_th->th.th_frame_time = tmp_time; 21760b57cec5SDimitry Andric if (__kmp_forkjoin_frames_mode == 3) 21770b57cec5SDimitry Andric team->t.t_region_time = tmp_time; 21780b57cec5SDimitry Andric } else 21790b57cec5SDimitry Andric // only one notification scheme (either "submit" or "forking/joined", not both) 21800b57cec5SDimitry Andric #endif /* USE_ITT_NOTIFY */ 21810b57cec5SDimitry Andric if ((__itt_frame_begin_v3_ptr || KMP_ITT_DEBUG) && 21820b57cec5SDimitry Andric __kmp_forkjoin_frames && !__kmp_forkjoin_frames_mode) { 21830b57cec5SDimitry Andric // Mark start of "parallel" region for Intel(R) VTune(TM) analyzer. 21840b57cec5SDimitry Andric __kmp_itt_region_forking(gtid, team->t.t_nproc, 0); 21850b57cec5SDimitry Andric } 21860b57cec5SDimitry Andric } 21870b57cec5SDimitry Andric #endif /* USE_ITT_BUILD */ 21880b57cec5SDimitry Andric 21890b57cec5SDimitry Andric /* now go on and do the work */ 21900b57cec5SDimitry Andric KMP_DEBUG_ASSERT(team == __kmp_threads[gtid]->th.th_team); 21910b57cec5SDimitry Andric KMP_MB(); 21920b57cec5SDimitry Andric KF_TRACE(10, 21930b57cec5SDimitry Andric ("__kmp_internal_fork : root=%p, team=%p, master_th=%p, gtid=%d\n", 21940b57cec5SDimitry Andric root, team, master_th, gtid)); 21950b57cec5SDimitry Andric 21960b57cec5SDimitry Andric #if USE_ITT_BUILD 21970b57cec5SDimitry Andric if (__itt_stack_caller_create_ptr) { 21980b57cec5SDimitry Andric team->t.t_stack_id = 21990b57cec5SDimitry Andric __kmp_itt_stack_caller_create(); // create new stack stitching id 22000b57cec5SDimitry Andric // before entering fork barrier 22010b57cec5SDimitry Andric } 22020b57cec5SDimitry Andric #endif /* USE_ITT_BUILD */ 22030b57cec5SDimitry Andric 22040b57cec5SDimitry Andric // AC: skip __kmp_internal_fork at teams construct, let only master 22050b57cec5SDimitry Andric // threads execute 22060b57cec5SDimitry Andric if (ap) { 22070b57cec5SDimitry Andric __kmp_internal_fork(loc, gtid, team); 22080b57cec5SDimitry Andric KF_TRACE(10, ("__kmp_internal_fork : after : root=%p, team=%p, " 22090b57cec5SDimitry Andric "master_th=%p, gtid=%d\n", 22100b57cec5SDimitry Andric root, team, master_th, gtid)); 22110b57cec5SDimitry Andric } 22120b57cec5SDimitry Andric 22130b57cec5SDimitry Andric if (call_context == fork_context_gnu) { 22140b57cec5SDimitry Andric KA_TRACE(20, ("__kmp_fork_call: parallel exit T#%d\n", gtid)); 22150b57cec5SDimitry Andric return TRUE; 22160b57cec5SDimitry Andric } 22170b57cec5SDimitry Andric 22180b57cec5SDimitry Andric /* Invoke microtask for MASTER thread */ 22190b57cec5SDimitry Andric KA_TRACE(20, ("__kmp_fork_call: T#%d(%d:0) invoke microtask = %p\n", gtid, 22200b57cec5SDimitry Andric team->t.t_id, team->t.t_pkfn)); 22210b57cec5SDimitry Andric } // END of timer KMP_fork_call block 22220b57cec5SDimitry Andric 22230b57cec5SDimitry Andric #if KMP_STATS_ENABLED 22240b57cec5SDimitry Andric // If beginning a teams construct, then change thread state 22250b57cec5SDimitry Andric stats_state_e previous_state = KMP_GET_THREAD_STATE(); 22260b57cec5SDimitry Andric if (!ap) { 22270b57cec5SDimitry Andric KMP_SET_THREAD_STATE(stats_state_e::TEAMS_REGION); 22280b57cec5SDimitry Andric } 22290b57cec5SDimitry Andric #endif 22300b57cec5SDimitry Andric 22310b57cec5SDimitry Andric if (!team->t.t_invoke(gtid)) { 22320b57cec5SDimitry Andric KMP_ASSERT2(0, "cannot invoke microtask for MASTER thread"); 22330b57cec5SDimitry Andric } 22340b57cec5SDimitry Andric 22350b57cec5SDimitry Andric #if KMP_STATS_ENABLED 22360b57cec5SDimitry Andric // If was beginning of a teams construct, then reset thread state 22370b57cec5SDimitry Andric if (!ap) { 22380b57cec5SDimitry Andric KMP_SET_THREAD_STATE(previous_state); 22390b57cec5SDimitry Andric } 22400b57cec5SDimitry Andric #endif 22410b57cec5SDimitry Andric 22420b57cec5SDimitry Andric KA_TRACE(20, ("__kmp_fork_call: T#%d(%d:0) done microtask = %p\n", gtid, 22430b57cec5SDimitry Andric team->t.t_id, team->t.t_pkfn)); 22440b57cec5SDimitry Andric KMP_MB(); /* Flush all pending memory write invalidates. */ 22450b57cec5SDimitry Andric 22460b57cec5SDimitry Andric KA_TRACE(20, ("__kmp_fork_call: parallel exit T#%d\n", gtid)); 22470b57cec5SDimitry Andric 22480b57cec5SDimitry Andric #if OMPT_SUPPORT 22490b57cec5SDimitry Andric if (ompt_enabled.enabled) { 22500b57cec5SDimitry Andric master_th->th.ompt_thread_info.state = ompt_state_overhead; 22510b57cec5SDimitry Andric } 22520b57cec5SDimitry Andric #endif 22530b57cec5SDimitry Andric 22540b57cec5SDimitry Andric return TRUE; 22550b57cec5SDimitry Andric } 22560b57cec5SDimitry Andric 22570b57cec5SDimitry Andric #if OMPT_SUPPORT 22580b57cec5SDimitry Andric static inline void __kmp_join_restore_state(kmp_info_t *thread, 22590b57cec5SDimitry Andric kmp_team_t *team) { 22600b57cec5SDimitry Andric // restore state outside the region 22610b57cec5SDimitry Andric thread->th.ompt_thread_info.state = 22620b57cec5SDimitry Andric ((team->t.t_serialized) ? ompt_state_work_serial 22630b57cec5SDimitry Andric : ompt_state_work_parallel); 22640b57cec5SDimitry Andric } 22650b57cec5SDimitry Andric 22660b57cec5SDimitry Andric static inline void __kmp_join_ompt(int gtid, kmp_info_t *thread, 22670b57cec5SDimitry Andric kmp_team_t *team, ompt_data_t *parallel_data, 2268489b1cf2SDimitry Andric int flags, void *codeptr) { 22690b57cec5SDimitry Andric ompt_task_info_t *task_info = __ompt_get_task_info_object(0); 22700b57cec5SDimitry Andric if (ompt_enabled.ompt_callback_parallel_end) { 22710b57cec5SDimitry Andric ompt_callbacks.ompt_callback(ompt_callback_parallel_end)( 2272489b1cf2SDimitry Andric parallel_data, &(task_info->task_data), flags, codeptr); 22730b57cec5SDimitry Andric } 22740b57cec5SDimitry Andric 22750b57cec5SDimitry Andric task_info->frame.enter_frame = ompt_data_none; 22760b57cec5SDimitry Andric __kmp_join_restore_state(thread, team); 22770b57cec5SDimitry Andric } 22780b57cec5SDimitry Andric #endif 22790b57cec5SDimitry Andric 22800b57cec5SDimitry Andric void __kmp_join_call(ident_t *loc, int gtid 22810b57cec5SDimitry Andric #if OMPT_SUPPORT 22820b57cec5SDimitry Andric , 22830b57cec5SDimitry Andric enum fork_context_e fork_context 22840b57cec5SDimitry Andric #endif 22850b57cec5SDimitry Andric , 22860b57cec5SDimitry Andric int exit_teams) { 22870b57cec5SDimitry Andric KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_join_call); 22880b57cec5SDimitry Andric kmp_team_t *team; 22890b57cec5SDimitry Andric kmp_team_t *parent_team; 22900b57cec5SDimitry Andric kmp_info_t *master_th; 22910b57cec5SDimitry Andric kmp_root_t *root; 22920b57cec5SDimitry Andric int master_active; 22930b57cec5SDimitry Andric 22940b57cec5SDimitry Andric KA_TRACE(20, ("__kmp_join_call: enter T#%d\n", gtid)); 22950b57cec5SDimitry Andric 22960b57cec5SDimitry Andric /* setup current data */ 22970b57cec5SDimitry Andric master_th = __kmp_threads[gtid]; 22980b57cec5SDimitry Andric root = master_th->th.th_root; 22990b57cec5SDimitry Andric team = master_th->th.th_team; 23000b57cec5SDimitry Andric parent_team = team->t.t_parent; 23010b57cec5SDimitry Andric 23020b57cec5SDimitry Andric master_th->th.th_ident = loc; 23030b57cec5SDimitry Andric 23040b57cec5SDimitry Andric #if OMPT_SUPPORT 2305489b1cf2SDimitry Andric void *team_microtask = (void *)team->t.t_pkfn; 23060b57cec5SDimitry Andric if (ompt_enabled.enabled) { 23070b57cec5SDimitry Andric master_th->th.ompt_thread_info.state = ompt_state_overhead; 23080b57cec5SDimitry Andric } 23090b57cec5SDimitry Andric #endif 23100b57cec5SDimitry Andric 23110b57cec5SDimitry Andric #if KMP_DEBUG 23120b57cec5SDimitry Andric if (__kmp_tasking_mode != tskm_immediate_exec && !exit_teams) { 23130b57cec5SDimitry Andric KA_TRACE(20, ("__kmp_join_call: T#%d, old team = %p old task_team = %p, " 23140b57cec5SDimitry Andric "th_task_team = %p\n", 23150b57cec5SDimitry Andric __kmp_gtid_from_thread(master_th), team, 23160b57cec5SDimitry Andric team->t.t_task_team[master_th->th.th_task_state], 23170b57cec5SDimitry Andric master_th->th.th_task_team)); 23180b57cec5SDimitry Andric KMP_DEBUG_ASSERT(master_th->th.th_task_team == 23190b57cec5SDimitry Andric team->t.t_task_team[master_th->th.th_task_state]); 23200b57cec5SDimitry Andric } 23210b57cec5SDimitry Andric #endif 23220b57cec5SDimitry Andric 23230b57cec5SDimitry Andric if (team->t.t_serialized) { 23240b57cec5SDimitry Andric if (master_th->th.th_teams_microtask) { 23250b57cec5SDimitry Andric // We are in teams construct 23260b57cec5SDimitry Andric int level = team->t.t_level; 23270b57cec5SDimitry Andric int tlevel = master_th->th.th_teams_level; 23280b57cec5SDimitry Andric if (level == tlevel) { 23290b57cec5SDimitry Andric // AC: we haven't incremented it earlier at start of teams construct, 23300b57cec5SDimitry Andric // so do it here - at the end of teams construct 23310b57cec5SDimitry Andric team->t.t_level++; 23320b57cec5SDimitry Andric } else if (level == tlevel + 1) { 23330b57cec5SDimitry Andric // AC: we are exiting parallel inside teams, need to increment 23340b57cec5SDimitry Andric // serialization in order to restore it in the next call to 23350b57cec5SDimitry Andric // __kmpc_end_serialized_parallel 23360b57cec5SDimitry Andric team->t.t_serialized++; 23370b57cec5SDimitry Andric } 23380b57cec5SDimitry Andric } 23390b57cec5SDimitry Andric __kmpc_end_serialized_parallel(loc, gtid); 23400b57cec5SDimitry Andric 23410b57cec5SDimitry Andric #if OMPT_SUPPORT 23420b57cec5SDimitry Andric if (ompt_enabled.enabled) { 23430b57cec5SDimitry Andric __kmp_join_restore_state(master_th, parent_team); 23440b57cec5SDimitry Andric } 23450b57cec5SDimitry Andric #endif 23460b57cec5SDimitry Andric 23470b57cec5SDimitry Andric return; 23480b57cec5SDimitry Andric } 23490b57cec5SDimitry Andric 23500b57cec5SDimitry Andric master_active = team->t.t_master_active; 23510b57cec5SDimitry Andric 23520b57cec5SDimitry Andric if (!exit_teams) { 23530b57cec5SDimitry Andric // AC: No barrier for internal teams at exit from teams construct. 23540b57cec5SDimitry Andric // But there is barrier for external team (league). 23550b57cec5SDimitry Andric __kmp_internal_join(loc, gtid, team); 23560b57cec5SDimitry Andric } else { 23570b57cec5SDimitry Andric master_th->th.th_task_state = 23580b57cec5SDimitry Andric 0; // AC: no tasking in teams (out of any parallel) 23590b57cec5SDimitry Andric } 23600b57cec5SDimitry Andric 23610b57cec5SDimitry Andric KMP_MB(); 23620b57cec5SDimitry Andric 23630b57cec5SDimitry Andric #if OMPT_SUPPORT 23640b57cec5SDimitry Andric ompt_data_t *parallel_data = &(team->t.ompt_team_info.parallel_data); 23650b57cec5SDimitry Andric void *codeptr = team->t.ompt_team_info.master_return_address; 23660b57cec5SDimitry Andric #endif 23670b57cec5SDimitry Andric 23680b57cec5SDimitry Andric #if USE_ITT_BUILD 23690b57cec5SDimitry Andric if (__itt_stack_caller_create_ptr) { 23700b57cec5SDimitry Andric __kmp_itt_stack_caller_destroy( 23710b57cec5SDimitry Andric (__itt_caller)team->t 23720b57cec5SDimitry Andric .t_stack_id); // destroy the stack stitching id after join barrier 23730b57cec5SDimitry Andric } 23740b57cec5SDimitry Andric 23750b57cec5SDimitry Andric // Mark end of "parallel" region for Intel(R) VTune(TM) analyzer. 23760b57cec5SDimitry Andric if (team->t.t_active_level == 1 && 23770b57cec5SDimitry Andric !master_th->th.th_teams_microtask) { /* not in teams construct */ 23780b57cec5SDimitry Andric master_th->th.th_ident = loc; 23790b57cec5SDimitry Andric // only one notification scheme (either "submit" or "forking/joined", not 23800b57cec5SDimitry Andric // both) 23810b57cec5SDimitry Andric if ((__itt_frame_submit_v3_ptr || KMP_ITT_DEBUG) && 23820b57cec5SDimitry Andric __kmp_forkjoin_frames_mode == 3) 23830b57cec5SDimitry Andric __kmp_itt_frame_submit(gtid, team->t.t_region_time, 23840b57cec5SDimitry Andric master_th->th.th_frame_time, 0, loc, 23850b57cec5SDimitry Andric master_th->th.th_team_nproc, 1); 23860b57cec5SDimitry Andric else if ((__itt_frame_end_v3_ptr || KMP_ITT_DEBUG) && 23870b57cec5SDimitry Andric !__kmp_forkjoin_frames_mode && __kmp_forkjoin_frames) 23880b57cec5SDimitry Andric __kmp_itt_region_joined(gtid); 23890b57cec5SDimitry Andric } // active_level == 1 23900b57cec5SDimitry Andric #endif /* USE_ITT_BUILD */ 23910b57cec5SDimitry Andric 23920b57cec5SDimitry Andric if (master_th->th.th_teams_microtask && !exit_teams && 23930b57cec5SDimitry Andric team->t.t_pkfn != (microtask_t)__kmp_teams_master && 23940b57cec5SDimitry Andric team->t.t_level == master_th->th.th_teams_level + 1) { 23950b57cec5SDimitry Andric // AC: We need to leave the team structure intact at the end of parallel 23960b57cec5SDimitry Andric // inside the teams construct, so that at the next parallel same (hot) team 23970b57cec5SDimitry Andric // works, only adjust nesting levels 2398489b1cf2SDimitry Andric #if OMPT_SUPPORT 2399489b1cf2SDimitry Andric ompt_data_t ompt_parallel_data = ompt_data_none; 2400489b1cf2SDimitry Andric if (ompt_enabled.enabled) { 2401489b1cf2SDimitry Andric ompt_task_info_t *task_info = __ompt_get_task_info_object(0); 2402489b1cf2SDimitry Andric if (ompt_enabled.ompt_callback_implicit_task) { 2403489b1cf2SDimitry Andric int ompt_team_size = team->t.t_nproc; 2404489b1cf2SDimitry Andric ompt_callbacks.ompt_callback(ompt_callback_implicit_task)( 2405489b1cf2SDimitry Andric ompt_scope_end, NULL, &(task_info->task_data), ompt_team_size, 2406489b1cf2SDimitry Andric OMPT_CUR_TASK_INFO(master_th)->thread_num, ompt_task_implicit); 2407489b1cf2SDimitry Andric } 2408489b1cf2SDimitry Andric task_info->frame.exit_frame = ompt_data_none; 2409489b1cf2SDimitry Andric task_info->task_data = ompt_data_none; 2410489b1cf2SDimitry Andric ompt_parallel_data = *OMPT_CUR_TEAM_DATA(master_th); 2411489b1cf2SDimitry Andric __ompt_lw_taskteam_unlink(master_th); 2412489b1cf2SDimitry Andric } 2413489b1cf2SDimitry Andric #endif 24140b57cec5SDimitry Andric /* Decrement our nested depth level */ 24150b57cec5SDimitry Andric team->t.t_level--; 24160b57cec5SDimitry Andric team->t.t_active_level--; 24170b57cec5SDimitry Andric KMP_ATOMIC_DEC(&root->r.r_in_parallel); 24180b57cec5SDimitry Andric 24190b57cec5SDimitry Andric // Restore number of threads in the team if needed. This code relies on 24200b57cec5SDimitry Andric // the proper adjustment of th_teams_size.nth after the fork in 24210b57cec5SDimitry Andric // __kmp_teams_master on each teams master in the case that 24220b57cec5SDimitry Andric // __kmp_reserve_threads reduced it. 24230b57cec5SDimitry Andric if (master_th->th.th_team_nproc < master_th->th.th_teams_size.nth) { 24240b57cec5SDimitry Andric int old_num = master_th->th.th_team_nproc; 24250b57cec5SDimitry Andric int new_num = master_th->th.th_teams_size.nth; 24260b57cec5SDimitry Andric kmp_info_t **other_threads = team->t.t_threads; 24270b57cec5SDimitry Andric team->t.t_nproc = new_num; 24280b57cec5SDimitry Andric for (int i = 0; i < old_num; ++i) { 24290b57cec5SDimitry Andric other_threads[i]->th.th_team_nproc = new_num; 24300b57cec5SDimitry Andric } 24310b57cec5SDimitry Andric // Adjust states of non-used threads of the team 24320b57cec5SDimitry Andric for (int i = old_num; i < new_num; ++i) { 24330b57cec5SDimitry Andric // Re-initialize thread's barrier data. 24340b57cec5SDimitry Andric KMP_DEBUG_ASSERT(other_threads[i]); 24350b57cec5SDimitry Andric kmp_balign_t *balign = other_threads[i]->th.th_bar; 24360b57cec5SDimitry Andric for (int b = 0; b < bs_last_barrier; ++b) { 24370b57cec5SDimitry Andric balign[b].bb.b_arrived = team->t.t_bar[b].b_arrived; 24380b57cec5SDimitry Andric KMP_DEBUG_ASSERT(balign[b].bb.wait_flag != KMP_BARRIER_PARENT_FLAG); 24390b57cec5SDimitry Andric #if USE_DEBUGGER 24400b57cec5SDimitry Andric balign[b].bb.b_worker_arrived = team->t.t_bar[b].b_team_arrived; 24410b57cec5SDimitry Andric #endif 24420b57cec5SDimitry Andric } 24430b57cec5SDimitry Andric if (__kmp_tasking_mode != tskm_immediate_exec) { 24440b57cec5SDimitry Andric // Synchronize thread's task state 24450b57cec5SDimitry Andric other_threads[i]->th.th_task_state = master_th->th.th_task_state; 24460b57cec5SDimitry Andric } 24470b57cec5SDimitry Andric } 24480b57cec5SDimitry Andric } 24490b57cec5SDimitry Andric 24500b57cec5SDimitry Andric #if OMPT_SUPPORT 24510b57cec5SDimitry Andric if (ompt_enabled.enabled) { 2452489b1cf2SDimitry Andric __kmp_join_ompt(gtid, master_th, parent_team, &ompt_parallel_data, 2453489b1cf2SDimitry Andric OMPT_INVOKER(fork_context) | ompt_parallel_team, codeptr); 24540b57cec5SDimitry Andric } 24550b57cec5SDimitry Andric #endif 24560b57cec5SDimitry Andric 24570b57cec5SDimitry Andric return; 24580b57cec5SDimitry Andric } 24590b57cec5SDimitry Andric 24600b57cec5SDimitry Andric /* do cleanup and restore the parent team */ 24610b57cec5SDimitry Andric master_th->th.th_info.ds.ds_tid = team->t.t_master_tid; 24620b57cec5SDimitry Andric master_th->th.th_local.this_construct = team->t.t_master_this_cons; 24630b57cec5SDimitry Andric 24640b57cec5SDimitry Andric master_th->th.th_dispatch = &parent_team->t.t_dispatch[team->t.t_master_tid]; 24650b57cec5SDimitry Andric 24660b57cec5SDimitry Andric /* jc: The following lock has instructions with REL and ACQ semantics, 24670b57cec5SDimitry Andric separating the parallel user code called in this parallel region 24680b57cec5SDimitry Andric from the serial user code called after this function returns. */ 24690b57cec5SDimitry Andric __kmp_acquire_bootstrap_lock(&__kmp_forkjoin_lock); 24700b57cec5SDimitry Andric 24710b57cec5SDimitry Andric if (!master_th->th.th_teams_microtask || 24720b57cec5SDimitry Andric team->t.t_level > master_th->th.th_teams_level) { 24730b57cec5SDimitry Andric /* Decrement our nested depth level */ 24740b57cec5SDimitry Andric KMP_ATOMIC_DEC(&root->r.r_in_parallel); 24750b57cec5SDimitry Andric } 24760b57cec5SDimitry Andric KMP_DEBUG_ASSERT(root->r.r_in_parallel >= 0); 24770b57cec5SDimitry Andric 24780b57cec5SDimitry Andric #if OMPT_SUPPORT 24790b57cec5SDimitry Andric if (ompt_enabled.enabled) { 24800b57cec5SDimitry Andric ompt_task_info_t *task_info = __ompt_get_task_info_object(0); 24810b57cec5SDimitry Andric if (ompt_enabled.ompt_callback_implicit_task) { 2482489b1cf2SDimitry Andric int flags = (team_microtask == (void *)__kmp_teams_master) 2483489b1cf2SDimitry Andric ? ompt_task_initial 2484489b1cf2SDimitry Andric : ompt_task_implicit; 2485489b1cf2SDimitry Andric int ompt_team_size = (flags == ompt_task_initial) ? 0 : team->t.t_nproc; 24860b57cec5SDimitry Andric ompt_callbacks.ompt_callback(ompt_callback_implicit_task)( 24870b57cec5SDimitry Andric ompt_scope_end, NULL, &(task_info->task_data), ompt_team_size, 2488489b1cf2SDimitry Andric OMPT_CUR_TASK_INFO(master_th)->thread_num, flags); 24890b57cec5SDimitry Andric } 24900b57cec5SDimitry Andric task_info->frame.exit_frame = ompt_data_none; 24910b57cec5SDimitry Andric task_info->task_data = ompt_data_none; 24920b57cec5SDimitry Andric } 24930b57cec5SDimitry Andric #endif 24940b57cec5SDimitry Andric 24950b57cec5SDimitry Andric KF_TRACE(10, ("__kmp_join_call1: T#%d, this_thread=%p team=%p\n", 0, 24960b57cec5SDimitry Andric master_th, team)); 24970b57cec5SDimitry Andric __kmp_pop_current_task_from_thread(master_th); 24980b57cec5SDimitry Andric 24990b57cec5SDimitry Andric #if KMP_AFFINITY_SUPPORTED 25000b57cec5SDimitry Andric // Restore master thread's partition. 25010b57cec5SDimitry Andric master_th->th.th_first_place = team->t.t_first_place; 25020b57cec5SDimitry Andric master_th->th.th_last_place = team->t.t_last_place; 25030b57cec5SDimitry Andric #endif // KMP_AFFINITY_SUPPORTED 25040b57cec5SDimitry Andric master_th->th.th_def_allocator = team->t.t_def_allocator; 25050b57cec5SDimitry Andric 25060b57cec5SDimitry Andric updateHWFPControl(team); 25070b57cec5SDimitry Andric 25080b57cec5SDimitry Andric if (root->r.r_active != master_active) 25090b57cec5SDimitry Andric root->r.r_active = master_active; 25100b57cec5SDimitry Andric 25110b57cec5SDimitry Andric __kmp_free_team(root, team USE_NESTED_HOT_ARG( 25120b57cec5SDimitry Andric master_th)); // this will free worker threads 25130b57cec5SDimitry Andric 25140b57cec5SDimitry Andric /* this race was fun to find. make sure the following is in the critical 25150b57cec5SDimitry Andric region otherwise assertions may fail occasionally since the old team may be 25160b57cec5SDimitry Andric reallocated and the hierarchy appears inconsistent. it is actually safe to 25170b57cec5SDimitry Andric run and won't cause any bugs, but will cause those assertion failures. it's 25180b57cec5SDimitry Andric only one deref&assign so might as well put this in the critical region */ 25190b57cec5SDimitry Andric master_th->th.th_team = parent_team; 25200b57cec5SDimitry Andric master_th->th.th_team_nproc = parent_team->t.t_nproc; 25210b57cec5SDimitry Andric master_th->th.th_team_master = parent_team->t.t_threads[0]; 25220b57cec5SDimitry Andric master_th->th.th_team_serialized = parent_team->t.t_serialized; 25230b57cec5SDimitry Andric 25240b57cec5SDimitry Andric /* restore serialized team, if need be */ 25250b57cec5SDimitry Andric if (parent_team->t.t_serialized && 25260b57cec5SDimitry Andric parent_team != master_th->th.th_serial_team && 25270b57cec5SDimitry Andric parent_team != root->r.r_root_team) { 25280b57cec5SDimitry Andric __kmp_free_team(root, 25290b57cec5SDimitry Andric master_th->th.th_serial_team USE_NESTED_HOT_ARG(NULL)); 25300b57cec5SDimitry Andric master_th->th.th_serial_team = parent_team; 25310b57cec5SDimitry Andric } 25320b57cec5SDimitry Andric 25330b57cec5SDimitry Andric if (__kmp_tasking_mode != tskm_immediate_exec) { 25340b57cec5SDimitry Andric if (master_th->th.th_task_state_top > 25350b57cec5SDimitry Andric 0) { // Restore task state from memo stack 25360b57cec5SDimitry Andric KMP_DEBUG_ASSERT(master_th->th.th_task_state_memo_stack); 25370b57cec5SDimitry Andric // Remember master's state if we re-use this nested hot team 25380b57cec5SDimitry Andric master_th->th.th_task_state_memo_stack[master_th->th.th_task_state_top] = 25390b57cec5SDimitry Andric master_th->th.th_task_state; 25400b57cec5SDimitry Andric --master_th->th.th_task_state_top; // pop 25410b57cec5SDimitry Andric // Now restore state at this level 25420b57cec5SDimitry Andric master_th->th.th_task_state = 25430b57cec5SDimitry Andric master_th->th 25440b57cec5SDimitry Andric .th_task_state_memo_stack[master_th->th.th_task_state_top]; 25450b57cec5SDimitry Andric } 25460b57cec5SDimitry Andric // Copy the task team from the parent team to the master thread 25470b57cec5SDimitry Andric master_th->th.th_task_team = 25480b57cec5SDimitry Andric parent_team->t.t_task_team[master_th->th.th_task_state]; 25490b57cec5SDimitry Andric KA_TRACE(20, 25500b57cec5SDimitry Andric ("__kmp_join_call: Master T#%d restoring task_team %p / team %p\n", 25510b57cec5SDimitry Andric __kmp_gtid_from_thread(master_th), master_th->th.th_task_team, 25520b57cec5SDimitry Andric parent_team)); 25530b57cec5SDimitry Andric } 25540b57cec5SDimitry Andric 25550b57cec5SDimitry Andric // TODO: GEH - cannot do this assertion because root thread not set up as 25560b57cec5SDimitry Andric // executing 25570b57cec5SDimitry Andric // KMP_ASSERT( master_th->th.th_current_task->td_flags.executing == 0 ); 25580b57cec5SDimitry Andric master_th->th.th_current_task->td_flags.executing = 1; 25590b57cec5SDimitry Andric 25600b57cec5SDimitry Andric __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock); 25610b57cec5SDimitry Andric 25620b57cec5SDimitry Andric #if OMPT_SUPPORT 2563489b1cf2SDimitry Andric int flags = 2564489b1cf2SDimitry Andric OMPT_INVOKER(fork_context) | 2565489b1cf2SDimitry Andric ((team_microtask == (void *)__kmp_teams_master) ? ompt_parallel_league 2566489b1cf2SDimitry Andric : ompt_parallel_team); 25670b57cec5SDimitry Andric if (ompt_enabled.enabled) { 2568489b1cf2SDimitry Andric __kmp_join_ompt(gtid, master_th, parent_team, parallel_data, flags, 25690b57cec5SDimitry Andric codeptr); 25700b57cec5SDimitry Andric } 25710b57cec5SDimitry Andric #endif 25720b57cec5SDimitry Andric 25730b57cec5SDimitry Andric KMP_MB(); 25740b57cec5SDimitry Andric KA_TRACE(20, ("__kmp_join_call: exit T#%d\n", gtid)); 25750b57cec5SDimitry Andric } 25760b57cec5SDimitry Andric 25770b57cec5SDimitry Andric /* Check whether we should push an internal control record onto the 25780b57cec5SDimitry Andric serial team stack. If so, do it. */ 25790b57cec5SDimitry Andric void __kmp_save_internal_controls(kmp_info_t *thread) { 25800b57cec5SDimitry Andric 25810b57cec5SDimitry Andric if (thread->th.th_team != thread->th.th_serial_team) { 25820b57cec5SDimitry Andric return; 25830b57cec5SDimitry Andric } 25840b57cec5SDimitry Andric if (thread->th.th_team->t.t_serialized > 1) { 25850b57cec5SDimitry Andric int push = 0; 25860b57cec5SDimitry Andric 25870b57cec5SDimitry Andric if (thread->th.th_team->t.t_control_stack_top == NULL) { 25880b57cec5SDimitry Andric push = 1; 25890b57cec5SDimitry Andric } else { 25900b57cec5SDimitry Andric if (thread->th.th_team->t.t_control_stack_top->serial_nesting_level != 25910b57cec5SDimitry Andric thread->th.th_team->t.t_serialized) { 25920b57cec5SDimitry Andric push = 1; 25930b57cec5SDimitry Andric } 25940b57cec5SDimitry Andric } 25950b57cec5SDimitry Andric if (push) { /* push a record on the serial team's stack */ 25960b57cec5SDimitry Andric kmp_internal_control_t *control = 25970b57cec5SDimitry Andric (kmp_internal_control_t *)__kmp_allocate( 25980b57cec5SDimitry Andric sizeof(kmp_internal_control_t)); 25990b57cec5SDimitry Andric 26000b57cec5SDimitry Andric copy_icvs(control, &thread->th.th_current_task->td_icvs); 26010b57cec5SDimitry Andric 26020b57cec5SDimitry Andric control->serial_nesting_level = thread->th.th_team->t.t_serialized; 26030b57cec5SDimitry Andric 26040b57cec5SDimitry Andric control->next = thread->th.th_team->t.t_control_stack_top; 26050b57cec5SDimitry Andric thread->th.th_team->t.t_control_stack_top = control; 26060b57cec5SDimitry Andric } 26070b57cec5SDimitry Andric } 26080b57cec5SDimitry Andric } 26090b57cec5SDimitry Andric 26100b57cec5SDimitry Andric /* Changes set_nproc */ 26110b57cec5SDimitry Andric void __kmp_set_num_threads(int new_nth, int gtid) { 26120b57cec5SDimitry Andric kmp_info_t *thread; 26130b57cec5SDimitry Andric kmp_root_t *root; 26140b57cec5SDimitry Andric 26150b57cec5SDimitry Andric KF_TRACE(10, ("__kmp_set_num_threads: new __kmp_nth = %d\n", new_nth)); 26160b57cec5SDimitry Andric KMP_DEBUG_ASSERT(__kmp_init_serial); 26170b57cec5SDimitry Andric 26180b57cec5SDimitry Andric if (new_nth < 1) 26190b57cec5SDimitry Andric new_nth = 1; 26200b57cec5SDimitry Andric else if (new_nth > __kmp_max_nth) 26210b57cec5SDimitry Andric new_nth = __kmp_max_nth; 26220b57cec5SDimitry Andric 26230b57cec5SDimitry Andric KMP_COUNT_VALUE(OMP_set_numthreads, new_nth); 26240b57cec5SDimitry Andric thread = __kmp_threads[gtid]; 26250b57cec5SDimitry Andric if (thread->th.th_current_task->td_icvs.nproc == new_nth) 26260b57cec5SDimitry Andric return; // nothing to do 26270b57cec5SDimitry Andric 26280b57cec5SDimitry Andric __kmp_save_internal_controls(thread); 26290b57cec5SDimitry Andric 26300b57cec5SDimitry Andric set__nproc(thread, new_nth); 26310b57cec5SDimitry Andric 26320b57cec5SDimitry Andric // If this omp_set_num_threads() call will cause the hot team size to be 26330b57cec5SDimitry Andric // reduced (in the absence of a num_threads clause), then reduce it now, 26340b57cec5SDimitry Andric // rather than waiting for the next parallel region. 26350b57cec5SDimitry Andric root = thread->th.th_root; 26360b57cec5SDimitry Andric if (__kmp_init_parallel && (!root->r.r_active) && 26370b57cec5SDimitry Andric (root->r.r_hot_team->t.t_nproc > new_nth) 26380b57cec5SDimitry Andric #if KMP_NESTED_HOT_TEAMS 26390b57cec5SDimitry Andric && __kmp_hot_teams_max_level && !__kmp_hot_teams_mode 26400b57cec5SDimitry Andric #endif 26410b57cec5SDimitry Andric ) { 26420b57cec5SDimitry Andric kmp_team_t *hot_team = root->r.r_hot_team; 26430b57cec5SDimitry Andric int f; 26440b57cec5SDimitry Andric 26450b57cec5SDimitry Andric __kmp_acquire_bootstrap_lock(&__kmp_forkjoin_lock); 26460b57cec5SDimitry Andric 26470b57cec5SDimitry Andric // Release the extra threads we don't need any more. 26480b57cec5SDimitry Andric for (f = new_nth; f < hot_team->t.t_nproc; f++) { 26490b57cec5SDimitry Andric KMP_DEBUG_ASSERT(hot_team->t.t_threads[f] != NULL); 26500b57cec5SDimitry Andric if (__kmp_tasking_mode != tskm_immediate_exec) { 26510b57cec5SDimitry Andric // When decreasing team size, threads no longer in the team should unref 26520b57cec5SDimitry Andric // task team. 26530b57cec5SDimitry Andric hot_team->t.t_threads[f]->th.th_task_team = NULL; 26540b57cec5SDimitry Andric } 26550b57cec5SDimitry Andric __kmp_free_thread(hot_team->t.t_threads[f]); 26560b57cec5SDimitry Andric hot_team->t.t_threads[f] = NULL; 26570b57cec5SDimitry Andric } 26580b57cec5SDimitry Andric hot_team->t.t_nproc = new_nth; 26590b57cec5SDimitry Andric #if KMP_NESTED_HOT_TEAMS 26600b57cec5SDimitry Andric if (thread->th.th_hot_teams) { 26610b57cec5SDimitry Andric KMP_DEBUG_ASSERT(hot_team == thread->th.th_hot_teams[0].hot_team); 26620b57cec5SDimitry Andric thread->th.th_hot_teams[0].hot_team_nth = new_nth; 26630b57cec5SDimitry Andric } 26640b57cec5SDimitry Andric #endif 26650b57cec5SDimitry Andric 26660b57cec5SDimitry Andric __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock); 26670b57cec5SDimitry Andric 26680b57cec5SDimitry Andric // Update the t_nproc field in the threads that are still active. 26690b57cec5SDimitry Andric for (f = 0; f < new_nth; f++) { 26700b57cec5SDimitry Andric KMP_DEBUG_ASSERT(hot_team->t.t_threads[f] != NULL); 26710b57cec5SDimitry Andric hot_team->t.t_threads[f]->th.th_team_nproc = new_nth; 26720b57cec5SDimitry Andric } 26730b57cec5SDimitry Andric // Special flag in case omp_set_num_threads() call 26740b57cec5SDimitry Andric hot_team->t.t_size_changed = -1; 26750b57cec5SDimitry Andric } 26760b57cec5SDimitry Andric } 26770b57cec5SDimitry Andric 26780b57cec5SDimitry Andric /* Changes max_active_levels */ 26790b57cec5SDimitry Andric void __kmp_set_max_active_levels(int gtid, int max_active_levels) { 26800b57cec5SDimitry Andric kmp_info_t *thread; 26810b57cec5SDimitry Andric 26820b57cec5SDimitry Andric KF_TRACE(10, ("__kmp_set_max_active_levels: new max_active_levels for thread " 26830b57cec5SDimitry Andric "%d = (%d)\n", 26840b57cec5SDimitry Andric gtid, max_active_levels)); 26850b57cec5SDimitry Andric KMP_DEBUG_ASSERT(__kmp_init_serial); 26860b57cec5SDimitry Andric 26870b57cec5SDimitry Andric // validate max_active_levels 26880b57cec5SDimitry Andric if (max_active_levels < 0) { 26890b57cec5SDimitry Andric KMP_WARNING(ActiveLevelsNegative, max_active_levels); 26900b57cec5SDimitry Andric // We ignore this call if the user has specified a negative value. 26910b57cec5SDimitry Andric // The current setting won't be changed. The last valid setting will be 26920b57cec5SDimitry Andric // used. A warning will be issued (if warnings are allowed as controlled by 26930b57cec5SDimitry Andric // the KMP_WARNINGS env var). 26940b57cec5SDimitry Andric KF_TRACE(10, ("__kmp_set_max_active_levels: the call is ignored: new " 26950b57cec5SDimitry Andric "max_active_levels for thread %d = (%d)\n", 26960b57cec5SDimitry Andric gtid, max_active_levels)); 26970b57cec5SDimitry Andric return; 26980b57cec5SDimitry Andric } 26990b57cec5SDimitry Andric if (max_active_levels <= KMP_MAX_ACTIVE_LEVELS_LIMIT) { 27000b57cec5SDimitry Andric // it's OK, the max_active_levels is within the valid range: [ 0; 27010b57cec5SDimitry Andric // KMP_MAX_ACTIVE_LEVELS_LIMIT ] 27020b57cec5SDimitry Andric // We allow a zero value. (implementation defined behavior) 27030b57cec5SDimitry Andric } else { 27040b57cec5SDimitry Andric KMP_WARNING(ActiveLevelsExceedLimit, max_active_levels, 27050b57cec5SDimitry Andric KMP_MAX_ACTIVE_LEVELS_LIMIT); 27060b57cec5SDimitry Andric max_active_levels = KMP_MAX_ACTIVE_LEVELS_LIMIT; 27070b57cec5SDimitry Andric // Current upper limit is MAX_INT. (implementation defined behavior) 27080b57cec5SDimitry Andric // If the input exceeds the upper limit, we correct the input to be the 27090b57cec5SDimitry Andric // upper limit. (implementation defined behavior) 27100b57cec5SDimitry Andric // Actually, the flow should never get here until we use MAX_INT limit. 27110b57cec5SDimitry Andric } 27120b57cec5SDimitry Andric KF_TRACE(10, ("__kmp_set_max_active_levels: after validation: new " 27130b57cec5SDimitry Andric "max_active_levels for thread %d = (%d)\n", 27140b57cec5SDimitry Andric gtid, max_active_levels)); 27150b57cec5SDimitry Andric 27160b57cec5SDimitry Andric thread = __kmp_threads[gtid]; 27170b57cec5SDimitry Andric 27180b57cec5SDimitry Andric __kmp_save_internal_controls(thread); 27190b57cec5SDimitry Andric 27200b57cec5SDimitry Andric set__max_active_levels(thread, max_active_levels); 27210b57cec5SDimitry Andric } 27220b57cec5SDimitry Andric 27230b57cec5SDimitry Andric /* Gets max_active_levels */ 27240b57cec5SDimitry Andric int __kmp_get_max_active_levels(int gtid) { 27250b57cec5SDimitry Andric kmp_info_t *thread; 27260b57cec5SDimitry Andric 27270b57cec5SDimitry Andric KF_TRACE(10, ("__kmp_get_max_active_levels: thread %d\n", gtid)); 27280b57cec5SDimitry Andric KMP_DEBUG_ASSERT(__kmp_init_serial); 27290b57cec5SDimitry Andric 27300b57cec5SDimitry Andric thread = __kmp_threads[gtid]; 27310b57cec5SDimitry Andric KMP_DEBUG_ASSERT(thread->th.th_current_task); 27320b57cec5SDimitry Andric KF_TRACE(10, ("__kmp_get_max_active_levels: thread %d, curtask=%p, " 27330b57cec5SDimitry Andric "curtask_maxaclevel=%d\n", 27340b57cec5SDimitry Andric gtid, thread->th.th_current_task, 27350b57cec5SDimitry Andric thread->th.th_current_task->td_icvs.max_active_levels)); 27360b57cec5SDimitry Andric return thread->th.th_current_task->td_icvs.max_active_levels; 27370b57cec5SDimitry Andric } 27380b57cec5SDimitry Andric 27390b57cec5SDimitry Andric KMP_BUILD_ASSERT(sizeof(kmp_sched_t) == sizeof(int)); 27400b57cec5SDimitry Andric KMP_BUILD_ASSERT(sizeof(enum sched_type) == sizeof(int)); 27410b57cec5SDimitry Andric 27420b57cec5SDimitry Andric /* Changes def_sched_var ICV values (run-time schedule kind and chunk) */ 27430b57cec5SDimitry Andric void __kmp_set_schedule(int gtid, kmp_sched_t kind, int chunk) { 27440b57cec5SDimitry Andric kmp_info_t *thread; 27450b57cec5SDimitry Andric kmp_sched_t orig_kind; 27460b57cec5SDimitry Andric // kmp_team_t *team; 27470b57cec5SDimitry Andric 27480b57cec5SDimitry Andric KF_TRACE(10, ("__kmp_set_schedule: new schedule for thread %d = (%d, %d)\n", 27490b57cec5SDimitry Andric gtid, (int)kind, chunk)); 27500b57cec5SDimitry Andric KMP_DEBUG_ASSERT(__kmp_init_serial); 27510b57cec5SDimitry Andric 27520b57cec5SDimitry Andric // Check if the kind parameter is valid, correct if needed. 27530b57cec5SDimitry Andric // Valid parameters should fit in one of two intervals - standard or extended: 27540b57cec5SDimitry Andric // <lower>, <valid>, <upper_std>, <lower_ext>, <valid>, <upper> 27550b57cec5SDimitry Andric // 2008-01-25: 0, 1 - 4, 5, 100, 101 - 102, 103 27560b57cec5SDimitry Andric orig_kind = kind; 27570b57cec5SDimitry Andric kind = __kmp_sched_without_mods(kind); 27580b57cec5SDimitry Andric 27590b57cec5SDimitry Andric if (kind <= kmp_sched_lower || kind >= kmp_sched_upper || 27600b57cec5SDimitry Andric (kind <= kmp_sched_lower_ext && kind >= kmp_sched_upper_std)) { 27610b57cec5SDimitry Andric // TODO: Hint needs attention in case we change the default schedule. 27620b57cec5SDimitry Andric __kmp_msg(kmp_ms_warning, KMP_MSG(ScheduleKindOutOfRange, kind), 27630b57cec5SDimitry Andric KMP_HNT(DefaultScheduleKindUsed, "static, no chunk"), 27640b57cec5SDimitry Andric __kmp_msg_null); 27650b57cec5SDimitry Andric kind = kmp_sched_default; 27660b57cec5SDimitry Andric chunk = 0; // ignore chunk value in case of bad kind 27670b57cec5SDimitry Andric } 27680b57cec5SDimitry Andric 27690b57cec5SDimitry Andric thread = __kmp_threads[gtid]; 27700b57cec5SDimitry Andric 27710b57cec5SDimitry Andric __kmp_save_internal_controls(thread); 27720b57cec5SDimitry Andric 27730b57cec5SDimitry Andric if (kind < kmp_sched_upper_std) { 27740b57cec5SDimitry Andric if (kind == kmp_sched_static && chunk < KMP_DEFAULT_CHUNK) { 27750b57cec5SDimitry Andric // differ static chunked vs. unchunked: chunk should be invalid to 27760b57cec5SDimitry Andric // indicate unchunked schedule (which is the default) 27770b57cec5SDimitry Andric thread->th.th_current_task->td_icvs.sched.r_sched_type = kmp_sch_static; 27780b57cec5SDimitry Andric } else { 27790b57cec5SDimitry Andric thread->th.th_current_task->td_icvs.sched.r_sched_type = 27800b57cec5SDimitry Andric __kmp_sch_map[kind - kmp_sched_lower - 1]; 27810b57cec5SDimitry Andric } 27820b57cec5SDimitry Andric } else { 27830b57cec5SDimitry Andric // __kmp_sch_map[ kind - kmp_sched_lower_ext + kmp_sched_upper_std - 27840b57cec5SDimitry Andric // kmp_sched_lower - 2 ]; 27850b57cec5SDimitry Andric thread->th.th_current_task->td_icvs.sched.r_sched_type = 27860b57cec5SDimitry Andric __kmp_sch_map[kind - kmp_sched_lower_ext + kmp_sched_upper_std - 27870b57cec5SDimitry Andric kmp_sched_lower - 2]; 27880b57cec5SDimitry Andric } 27890b57cec5SDimitry Andric __kmp_sched_apply_mods_intkind( 27900b57cec5SDimitry Andric orig_kind, &(thread->th.th_current_task->td_icvs.sched.r_sched_type)); 27910b57cec5SDimitry Andric if (kind == kmp_sched_auto || chunk < 1) { 27920b57cec5SDimitry Andric // ignore parameter chunk for schedule auto 27930b57cec5SDimitry Andric thread->th.th_current_task->td_icvs.sched.chunk = KMP_DEFAULT_CHUNK; 27940b57cec5SDimitry Andric } else { 27950b57cec5SDimitry Andric thread->th.th_current_task->td_icvs.sched.chunk = chunk; 27960b57cec5SDimitry Andric } 27970b57cec5SDimitry Andric } 27980b57cec5SDimitry Andric 27990b57cec5SDimitry Andric /* Gets def_sched_var ICV values */ 28000b57cec5SDimitry Andric void __kmp_get_schedule(int gtid, kmp_sched_t *kind, int *chunk) { 28010b57cec5SDimitry Andric kmp_info_t *thread; 28020b57cec5SDimitry Andric enum sched_type th_type; 28030b57cec5SDimitry Andric 28040b57cec5SDimitry Andric KF_TRACE(10, ("__kmp_get_schedule: thread %d\n", gtid)); 28050b57cec5SDimitry Andric KMP_DEBUG_ASSERT(__kmp_init_serial); 28060b57cec5SDimitry Andric 28070b57cec5SDimitry Andric thread = __kmp_threads[gtid]; 28080b57cec5SDimitry Andric 28090b57cec5SDimitry Andric th_type = thread->th.th_current_task->td_icvs.sched.r_sched_type; 28100b57cec5SDimitry Andric switch (SCHEDULE_WITHOUT_MODIFIERS(th_type)) { 28110b57cec5SDimitry Andric case kmp_sch_static: 28120b57cec5SDimitry Andric case kmp_sch_static_greedy: 28130b57cec5SDimitry Andric case kmp_sch_static_balanced: 28140b57cec5SDimitry Andric *kind = kmp_sched_static; 28150b57cec5SDimitry Andric __kmp_sched_apply_mods_stdkind(kind, th_type); 28160b57cec5SDimitry Andric *chunk = 0; // chunk was not set, try to show this fact via zero value 28170b57cec5SDimitry Andric return; 28180b57cec5SDimitry Andric case kmp_sch_static_chunked: 28190b57cec5SDimitry Andric *kind = kmp_sched_static; 28200b57cec5SDimitry Andric break; 28210b57cec5SDimitry Andric case kmp_sch_dynamic_chunked: 28220b57cec5SDimitry Andric *kind = kmp_sched_dynamic; 28230b57cec5SDimitry Andric break; 28240b57cec5SDimitry Andric case kmp_sch_guided_chunked: 28250b57cec5SDimitry Andric case kmp_sch_guided_iterative_chunked: 28260b57cec5SDimitry Andric case kmp_sch_guided_analytical_chunked: 28270b57cec5SDimitry Andric *kind = kmp_sched_guided; 28280b57cec5SDimitry Andric break; 28290b57cec5SDimitry Andric case kmp_sch_auto: 28300b57cec5SDimitry Andric *kind = kmp_sched_auto; 28310b57cec5SDimitry Andric break; 28320b57cec5SDimitry Andric case kmp_sch_trapezoidal: 28330b57cec5SDimitry Andric *kind = kmp_sched_trapezoidal; 28340b57cec5SDimitry Andric break; 28350b57cec5SDimitry Andric #if KMP_STATIC_STEAL_ENABLED 28360b57cec5SDimitry Andric case kmp_sch_static_steal: 28370b57cec5SDimitry Andric *kind = kmp_sched_static_steal; 28380b57cec5SDimitry Andric break; 28390b57cec5SDimitry Andric #endif 28400b57cec5SDimitry Andric default: 28410b57cec5SDimitry Andric KMP_FATAL(UnknownSchedulingType, th_type); 28420b57cec5SDimitry Andric } 28430b57cec5SDimitry Andric 28440b57cec5SDimitry Andric __kmp_sched_apply_mods_stdkind(kind, th_type); 28450b57cec5SDimitry Andric *chunk = thread->th.th_current_task->td_icvs.sched.chunk; 28460b57cec5SDimitry Andric } 28470b57cec5SDimitry Andric 28480b57cec5SDimitry Andric int __kmp_get_ancestor_thread_num(int gtid, int level) { 28490b57cec5SDimitry Andric 28500b57cec5SDimitry Andric int ii, dd; 28510b57cec5SDimitry Andric kmp_team_t *team; 28520b57cec5SDimitry Andric kmp_info_t *thr; 28530b57cec5SDimitry Andric 28540b57cec5SDimitry Andric KF_TRACE(10, ("__kmp_get_ancestor_thread_num: thread %d %d\n", gtid, level)); 28550b57cec5SDimitry Andric KMP_DEBUG_ASSERT(__kmp_init_serial); 28560b57cec5SDimitry Andric 28570b57cec5SDimitry Andric // validate level 28580b57cec5SDimitry Andric if (level == 0) 28590b57cec5SDimitry Andric return 0; 28600b57cec5SDimitry Andric if (level < 0) 28610b57cec5SDimitry Andric return -1; 28620b57cec5SDimitry Andric thr = __kmp_threads[gtid]; 28630b57cec5SDimitry Andric team = thr->th.th_team; 28640b57cec5SDimitry Andric ii = team->t.t_level; 28650b57cec5SDimitry Andric if (level > ii) 28660b57cec5SDimitry Andric return -1; 28670b57cec5SDimitry Andric 28680b57cec5SDimitry Andric if (thr->th.th_teams_microtask) { 28690b57cec5SDimitry Andric // AC: we are in teams region where multiple nested teams have same level 28700b57cec5SDimitry Andric int tlevel = thr->th.th_teams_level; // the level of the teams construct 28710b57cec5SDimitry Andric if (level <= 28720b57cec5SDimitry Andric tlevel) { // otherwise usual algorithm works (will not touch the teams) 28730b57cec5SDimitry Andric KMP_DEBUG_ASSERT(ii >= tlevel); 28740b57cec5SDimitry Andric // AC: As we need to pass by the teams league, we need to artificially 28750b57cec5SDimitry Andric // increase ii 28760b57cec5SDimitry Andric if (ii == tlevel) { 28770b57cec5SDimitry Andric ii += 2; // three teams have same level 28780b57cec5SDimitry Andric } else { 28790b57cec5SDimitry Andric ii++; // two teams have same level 28800b57cec5SDimitry Andric } 28810b57cec5SDimitry Andric } 28820b57cec5SDimitry Andric } 28830b57cec5SDimitry Andric 28840b57cec5SDimitry Andric if (ii == level) 28850b57cec5SDimitry Andric return __kmp_tid_from_gtid(gtid); 28860b57cec5SDimitry Andric 28870b57cec5SDimitry Andric dd = team->t.t_serialized; 28880b57cec5SDimitry Andric level++; 28890b57cec5SDimitry Andric while (ii > level) { 28900b57cec5SDimitry Andric for (dd = team->t.t_serialized; (dd > 0) && (ii > level); dd--, ii--) { 28910b57cec5SDimitry Andric } 28920b57cec5SDimitry Andric if ((team->t.t_serialized) && (!dd)) { 28930b57cec5SDimitry Andric team = team->t.t_parent; 28940b57cec5SDimitry Andric continue; 28950b57cec5SDimitry Andric } 28960b57cec5SDimitry Andric if (ii > level) { 28970b57cec5SDimitry Andric team = team->t.t_parent; 28980b57cec5SDimitry Andric dd = team->t.t_serialized; 28990b57cec5SDimitry Andric ii--; 29000b57cec5SDimitry Andric } 29010b57cec5SDimitry Andric } 29020b57cec5SDimitry Andric 29030b57cec5SDimitry Andric return (dd > 1) ? (0) : (team->t.t_master_tid); 29040b57cec5SDimitry Andric } 29050b57cec5SDimitry Andric 29060b57cec5SDimitry Andric int __kmp_get_team_size(int gtid, int level) { 29070b57cec5SDimitry Andric 29080b57cec5SDimitry Andric int ii, dd; 29090b57cec5SDimitry Andric kmp_team_t *team; 29100b57cec5SDimitry Andric kmp_info_t *thr; 29110b57cec5SDimitry Andric 29120b57cec5SDimitry Andric KF_TRACE(10, ("__kmp_get_team_size: thread %d %d\n", gtid, level)); 29130b57cec5SDimitry Andric KMP_DEBUG_ASSERT(__kmp_init_serial); 29140b57cec5SDimitry Andric 29150b57cec5SDimitry Andric // validate level 29160b57cec5SDimitry Andric if (level == 0) 29170b57cec5SDimitry Andric return 1; 29180b57cec5SDimitry Andric if (level < 0) 29190b57cec5SDimitry Andric return -1; 29200b57cec5SDimitry Andric thr = __kmp_threads[gtid]; 29210b57cec5SDimitry Andric team = thr->th.th_team; 29220b57cec5SDimitry Andric ii = team->t.t_level; 29230b57cec5SDimitry Andric if (level > ii) 29240b57cec5SDimitry Andric return -1; 29250b57cec5SDimitry Andric 29260b57cec5SDimitry Andric if (thr->th.th_teams_microtask) { 29270b57cec5SDimitry Andric // AC: we are in teams region where multiple nested teams have same level 29280b57cec5SDimitry Andric int tlevel = thr->th.th_teams_level; // the level of the teams construct 29290b57cec5SDimitry Andric if (level <= 29300b57cec5SDimitry Andric tlevel) { // otherwise usual algorithm works (will not touch the teams) 29310b57cec5SDimitry Andric KMP_DEBUG_ASSERT(ii >= tlevel); 29320b57cec5SDimitry Andric // AC: As we need to pass by the teams league, we need to artificially 29330b57cec5SDimitry Andric // increase ii 29340b57cec5SDimitry Andric if (ii == tlevel) { 29350b57cec5SDimitry Andric ii += 2; // three teams have same level 29360b57cec5SDimitry Andric } else { 29370b57cec5SDimitry Andric ii++; // two teams have same level 29380b57cec5SDimitry Andric } 29390b57cec5SDimitry Andric } 29400b57cec5SDimitry Andric } 29410b57cec5SDimitry Andric 29420b57cec5SDimitry Andric while (ii > level) { 29430b57cec5SDimitry Andric for (dd = team->t.t_serialized; (dd > 0) && (ii > level); dd--, ii--) { 29440b57cec5SDimitry Andric } 29450b57cec5SDimitry Andric if (team->t.t_serialized && (!dd)) { 29460b57cec5SDimitry Andric team = team->t.t_parent; 29470b57cec5SDimitry Andric continue; 29480b57cec5SDimitry Andric } 29490b57cec5SDimitry Andric if (ii > level) { 29500b57cec5SDimitry Andric team = team->t.t_parent; 29510b57cec5SDimitry Andric ii--; 29520b57cec5SDimitry Andric } 29530b57cec5SDimitry Andric } 29540b57cec5SDimitry Andric 29550b57cec5SDimitry Andric return team->t.t_nproc; 29560b57cec5SDimitry Andric } 29570b57cec5SDimitry Andric 29580b57cec5SDimitry Andric kmp_r_sched_t __kmp_get_schedule_global() { 29590b57cec5SDimitry Andric // This routine created because pairs (__kmp_sched, __kmp_chunk) and 29600b57cec5SDimitry Andric // (__kmp_static, __kmp_guided) may be changed by kmp_set_defaults 29610b57cec5SDimitry Andric // independently. So one can get the updated schedule here. 29620b57cec5SDimitry Andric 29630b57cec5SDimitry Andric kmp_r_sched_t r_sched; 29640b57cec5SDimitry Andric 29650b57cec5SDimitry Andric // create schedule from 4 globals: __kmp_sched, __kmp_chunk, __kmp_static, 29660b57cec5SDimitry Andric // __kmp_guided. __kmp_sched should keep original value, so that user can set 29670b57cec5SDimitry Andric // KMP_SCHEDULE multiple times, and thus have different run-time schedules in 29680b57cec5SDimitry Andric // different roots (even in OMP 2.5) 29690b57cec5SDimitry Andric enum sched_type s = SCHEDULE_WITHOUT_MODIFIERS(__kmp_sched); 29700b57cec5SDimitry Andric enum sched_type sched_modifiers = SCHEDULE_GET_MODIFIERS(__kmp_sched); 29710b57cec5SDimitry Andric if (s == kmp_sch_static) { 29720b57cec5SDimitry Andric // replace STATIC with more detailed schedule (balanced or greedy) 29730b57cec5SDimitry Andric r_sched.r_sched_type = __kmp_static; 29740b57cec5SDimitry Andric } else if (s == kmp_sch_guided_chunked) { 29750b57cec5SDimitry Andric // replace GUIDED with more detailed schedule (iterative or analytical) 29760b57cec5SDimitry Andric r_sched.r_sched_type = __kmp_guided; 29770b57cec5SDimitry Andric } else { // (STATIC_CHUNKED), or (DYNAMIC_CHUNKED), or other 29780b57cec5SDimitry Andric r_sched.r_sched_type = __kmp_sched; 29790b57cec5SDimitry Andric } 29800b57cec5SDimitry Andric SCHEDULE_SET_MODIFIERS(r_sched.r_sched_type, sched_modifiers); 29810b57cec5SDimitry Andric 29820b57cec5SDimitry Andric if (__kmp_chunk < KMP_DEFAULT_CHUNK) { 29830b57cec5SDimitry Andric // __kmp_chunk may be wrong here (if it was not ever set) 29840b57cec5SDimitry Andric r_sched.chunk = KMP_DEFAULT_CHUNK; 29850b57cec5SDimitry Andric } else { 29860b57cec5SDimitry Andric r_sched.chunk = __kmp_chunk; 29870b57cec5SDimitry Andric } 29880b57cec5SDimitry Andric 29890b57cec5SDimitry Andric return r_sched; 29900b57cec5SDimitry Andric } 29910b57cec5SDimitry Andric 29920b57cec5SDimitry Andric /* Allocate (realloc == FALSE) * or reallocate (realloc == TRUE) 29930b57cec5SDimitry Andric at least argc number of *t_argv entries for the requested team. */ 29940b57cec5SDimitry Andric static void __kmp_alloc_argv_entries(int argc, kmp_team_t *team, int realloc) { 29950b57cec5SDimitry Andric 29960b57cec5SDimitry Andric KMP_DEBUG_ASSERT(team); 29970b57cec5SDimitry Andric if (!realloc || argc > team->t.t_max_argc) { 29980b57cec5SDimitry Andric 29990b57cec5SDimitry Andric KA_TRACE(100, ("__kmp_alloc_argv_entries: team %d: needed entries=%d, " 30000b57cec5SDimitry Andric "current entries=%d\n", 30010b57cec5SDimitry Andric team->t.t_id, argc, (realloc) ? team->t.t_max_argc : 0)); 30020b57cec5SDimitry Andric /* if previously allocated heap space for args, free them */ 30030b57cec5SDimitry Andric if (realloc && team->t.t_argv != &team->t.t_inline_argv[0]) 30040b57cec5SDimitry Andric __kmp_free((void *)team->t.t_argv); 30050b57cec5SDimitry Andric 30060b57cec5SDimitry Andric if (argc <= KMP_INLINE_ARGV_ENTRIES) { 30070b57cec5SDimitry Andric /* use unused space in the cache line for arguments */ 30080b57cec5SDimitry Andric team->t.t_max_argc = KMP_INLINE_ARGV_ENTRIES; 30090b57cec5SDimitry Andric KA_TRACE(100, ("__kmp_alloc_argv_entries: team %d: inline allocate %d " 30100b57cec5SDimitry Andric "argv entries\n", 30110b57cec5SDimitry Andric team->t.t_id, team->t.t_max_argc)); 30120b57cec5SDimitry Andric team->t.t_argv = &team->t.t_inline_argv[0]; 30130b57cec5SDimitry Andric if (__kmp_storage_map) { 30140b57cec5SDimitry Andric __kmp_print_storage_map_gtid( 30150b57cec5SDimitry Andric -1, &team->t.t_inline_argv[0], 30160b57cec5SDimitry Andric &team->t.t_inline_argv[KMP_INLINE_ARGV_ENTRIES], 30170b57cec5SDimitry Andric (sizeof(void *) * KMP_INLINE_ARGV_ENTRIES), "team_%d.t_inline_argv", 30180b57cec5SDimitry Andric team->t.t_id); 30190b57cec5SDimitry Andric } 30200b57cec5SDimitry Andric } else { 30210b57cec5SDimitry Andric /* allocate space for arguments in the heap */ 30220b57cec5SDimitry Andric team->t.t_max_argc = (argc <= (KMP_MIN_MALLOC_ARGV_ENTRIES >> 1)) 30230b57cec5SDimitry Andric ? KMP_MIN_MALLOC_ARGV_ENTRIES 30240b57cec5SDimitry Andric : 2 * argc; 30250b57cec5SDimitry Andric KA_TRACE(100, ("__kmp_alloc_argv_entries: team %d: dynamic allocate %d " 30260b57cec5SDimitry Andric "argv entries\n", 30270b57cec5SDimitry Andric team->t.t_id, team->t.t_max_argc)); 30280b57cec5SDimitry Andric team->t.t_argv = 30290b57cec5SDimitry Andric (void **)__kmp_page_allocate(sizeof(void *) * team->t.t_max_argc); 30300b57cec5SDimitry Andric if (__kmp_storage_map) { 30310b57cec5SDimitry Andric __kmp_print_storage_map_gtid(-1, &team->t.t_argv[0], 30320b57cec5SDimitry Andric &team->t.t_argv[team->t.t_max_argc], 30330b57cec5SDimitry Andric sizeof(void *) * team->t.t_max_argc, 30340b57cec5SDimitry Andric "team_%d.t_argv", team->t.t_id); 30350b57cec5SDimitry Andric } 30360b57cec5SDimitry Andric } 30370b57cec5SDimitry Andric } 30380b57cec5SDimitry Andric } 30390b57cec5SDimitry Andric 30400b57cec5SDimitry Andric static void __kmp_allocate_team_arrays(kmp_team_t *team, int max_nth) { 30410b57cec5SDimitry Andric int i; 30420b57cec5SDimitry Andric int num_disp_buff = max_nth > 1 ? __kmp_dispatch_num_buffers : 2; 30430b57cec5SDimitry Andric team->t.t_threads = 30440b57cec5SDimitry Andric (kmp_info_t **)__kmp_allocate(sizeof(kmp_info_t *) * max_nth); 30450b57cec5SDimitry Andric team->t.t_disp_buffer = (dispatch_shared_info_t *)__kmp_allocate( 30460b57cec5SDimitry Andric sizeof(dispatch_shared_info_t) * num_disp_buff); 30470b57cec5SDimitry Andric team->t.t_dispatch = 30480b57cec5SDimitry Andric (kmp_disp_t *)__kmp_allocate(sizeof(kmp_disp_t) * max_nth); 30490b57cec5SDimitry Andric team->t.t_implicit_task_taskdata = 30500b57cec5SDimitry Andric (kmp_taskdata_t *)__kmp_allocate(sizeof(kmp_taskdata_t) * max_nth); 30510b57cec5SDimitry Andric team->t.t_max_nproc = max_nth; 30520b57cec5SDimitry Andric 30530b57cec5SDimitry Andric /* setup dispatch buffers */ 30540b57cec5SDimitry Andric for (i = 0; i < num_disp_buff; ++i) { 30550b57cec5SDimitry Andric team->t.t_disp_buffer[i].buffer_index = i; 30560b57cec5SDimitry Andric team->t.t_disp_buffer[i].doacross_buf_idx = i; 30570b57cec5SDimitry Andric } 30580b57cec5SDimitry Andric } 30590b57cec5SDimitry Andric 30600b57cec5SDimitry Andric static void __kmp_free_team_arrays(kmp_team_t *team) { 30610b57cec5SDimitry Andric /* Note: this does not free the threads in t_threads (__kmp_free_threads) */ 30620b57cec5SDimitry Andric int i; 30630b57cec5SDimitry Andric for (i = 0; i < team->t.t_max_nproc; ++i) { 30640b57cec5SDimitry Andric if (team->t.t_dispatch[i].th_disp_buffer != NULL) { 30650b57cec5SDimitry Andric __kmp_free(team->t.t_dispatch[i].th_disp_buffer); 30660b57cec5SDimitry Andric team->t.t_dispatch[i].th_disp_buffer = NULL; 30670b57cec5SDimitry Andric } 30680b57cec5SDimitry Andric } 30690b57cec5SDimitry Andric #if KMP_USE_HIER_SCHED 30700b57cec5SDimitry Andric __kmp_dispatch_free_hierarchies(team); 30710b57cec5SDimitry Andric #endif 30720b57cec5SDimitry Andric __kmp_free(team->t.t_threads); 30730b57cec5SDimitry Andric __kmp_free(team->t.t_disp_buffer); 30740b57cec5SDimitry Andric __kmp_free(team->t.t_dispatch); 30750b57cec5SDimitry Andric __kmp_free(team->t.t_implicit_task_taskdata); 30760b57cec5SDimitry Andric team->t.t_threads = NULL; 30770b57cec5SDimitry Andric team->t.t_disp_buffer = NULL; 30780b57cec5SDimitry Andric team->t.t_dispatch = NULL; 30790b57cec5SDimitry Andric team->t.t_implicit_task_taskdata = 0; 30800b57cec5SDimitry Andric } 30810b57cec5SDimitry Andric 30820b57cec5SDimitry Andric static void __kmp_reallocate_team_arrays(kmp_team_t *team, int max_nth) { 30830b57cec5SDimitry Andric kmp_info_t **oldThreads = team->t.t_threads; 30840b57cec5SDimitry Andric 30850b57cec5SDimitry Andric __kmp_free(team->t.t_disp_buffer); 30860b57cec5SDimitry Andric __kmp_free(team->t.t_dispatch); 30870b57cec5SDimitry Andric __kmp_free(team->t.t_implicit_task_taskdata); 30880b57cec5SDimitry Andric __kmp_allocate_team_arrays(team, max_nth); 30890b57cec5SDimitry Andric 30900b57cec5SDimitry Andric KMP_MEMCPY(team->t.t_threads, oldThreads, 30910b57cec5SDimitry Andric team->t.t_nproc * sizeof(kmp_info_t *)); 30920b57cec5SDimitry Andric 30930b57cec5SDimitry Andric __kmp_free(oldThreads); 30940b57cec5SDimitry Andric } 30950b57cec5SDimitry Andric 30960b57cec5SDimitry Andric static kmp_internal_control_t __kmp_get_global_icvs(void) { 30970b57cec5SDimitry Andric 30980b57cec5SDimitry Andric kmp_r_sched_t r_sched = 30990b57cec5SDimitry Andric __kmp_get_schedule_global(); // get current state of scheduling globals 31000b57cec5SDimitry Andric 31010b57cec5SDimitry Andric KMP_DEBUG_ASSERT(__kmp_nested_proc_bind.used > 0); 31020b57cec5SDimitry Andric 31030b57cec5SDimitry Andric kmp_internal_control_t g_icvs = { 31040b57cec5SDimitry Andric 0, // int serial_nesting_level; //corresponds to value of th_team_serialized 31050b57cec5SDimitry Andric (kmp_int8)__kmp_global.g.g_dynamic, // internal control for dynamic 31060b57cec5SDimitry Andric // adjustment of threads (per thread) 31070b57cec5SDimitry Andric (kmp_int8)__kmp_env_blocktime, // int bt_set; //internal control for 31080b57cec5SDimitry Andric // whether blocktime is explicitly set 31090b57cec5SDimitry Andric __kmp_dflt_blocktime, // int blocktime; //internal control for blocktime 31100b57cec5SDimitry Andric #if KMP_USE_MONITOR 31110b57cec5SDimitry Andric __kmp_bt_intervals, // int bt_intervals; //internal control for blocktime 31120b57cec5SDimitry Andric // intervals 31130b57cec5SDimitry Andric #endif 31140b57cec5SDimitry Andric __kmp_dflt_team_nth, // int nproc; //internal control for # of threads for 31150b57cec5SDimitry Andric // next parallel region (per thread) 31160b57cec5SDimitry Andric // (use a max ub on value if __kmp_parallel_initialize not called yet) 31170b57cec5SDimitry Andric __kmp_cg_max_nth, // int thread_limit; 31180b57cec5SDimitry Andric __kmp_dflt_max_active_levels, // int max_active_levels; //internal control 31190b57cec5SDimitry Andric // for max_active_levels 31200b57cec5SDimitry Andric r_sched, // kmp_r_sched_t sched; //internal control for runtime schedule 31210b57cec5SDimitry Andric // {sched,chunk} pair 31220b57cec5SDimitry Andric __kmp_nested_proc_bind.bind_types[0], 31230b57cec5SDimitry Andric __kmp_default_device, 31240b57cec5SDimitry Andric NULL // struct kmp_internal_control *next; 31250b57cec5SDimitry Andric }; 31260b57cec5SDimitry Andric 31270b57cec5SDimitry Andric return g_icvs; 31280b57cec5SDimitry Andric } 31290b57cec5SDimitry Andric 31300b57cec5SDimitry Andric static kmp_internal_control_t __kmp_get_x_global_icvs(const kmp_team_t *team) { 31310b57cec5SDimitry Andric 31320b57cec5SDimitry Andric kmp_internal_control_t gx_icvs; 31330b57cec5SDimitry Andric gx_icvs.serial_nesting_level = 31340b57cec5SDimitry Andric 0; // probably =team->t.t_serial like in save_inter_controls 31350b57cec5SDimitry Andric copy_icvs(&gx_icvs, &team->t.t_threads[0]->th.th_current_task->td_icvs); 31360b57cec5SDimitry Andric gx_icvs.next = NULL; 31370b57cec5SDimitry Andric 31380b57cec5SDimitry Andric return gx_icvs; 31390b57cec5SDimitry Andric } 31400b57cec5SDimitry Andric 31410b57cec5SDimitry Andric static void __kmp_initialize_root(kmp_root_t *root) { 31420b57cec5SDimitry Andric int f; 31430b57cec5SDimitry Andric kmp_team_t *root_team; 31440b57cec5SDimitry Andric kmp_team_t *hot_team; 31450b57cec5SDimitry Andric int hot_team_max_nth; 31460b57cec5SDimitry Andric kmp_r_sched_t r_sched = 31470b57cec5SDimitry Andric __kmp_get_schedule_global(); // get current state of scheduling globals 31480b57cec5SDimitry Andric kmp_internal_control_t r_icvs = __kmp_get_global_icvs(); 31490b57cec5SDimitry Andric KMP_DEBUG_ASSERT(root); 31500b57cec5SDimitry Andric KMP_ASSERT(!root->r.r_begin); 31510b57cec5SDimitry Andric 31520b57cec5SDimitry Andric /* setup the root state structure */ 31530b57cec5SDimitry Andric __kmp_init_lock(&root->r.r_begin_lock); 31540b57cec5SDimitry Andric root->r.r_begin = FALSE; 31550b57cec5SDimitry Andric root->r.r_active = FALSE; 31560b57cec5SDimitry Andric root->r.r_in_parallel = 0; 31570b57cec5SDimitry Andric root->r.r_blocktime = __kmp_dflt_blocktime; 31580b57cec5SDimitry Andric 31590b57cec5SDimitry Andric /* setup the root team for this task */ 31600b57cec5SDimitry Andric /* allocate the root team structure */ 31610b57cec5SDimitry Andric KF_TRACE(10, ("__kmp_initialize_root: before root_team\n")); 31620b57cec5SDimitry Andric 31630b57cec5SDimitry Andric root_team = 31640b57cec5SDimitry Andric __kmp_allocate_team(root, 31650b57cec5SDimitry Andric 1, // new_nproc 31660b57cec5SDimitry Andric 1, // max_nproc 31670b57cec5SDimitry Andric #if OMPT_SUPPORT 31680b57cec5SDimitry Andric ompt_data_none, // root parallel id 31690b57cec5SDimitry Andric #endif 31700b57cec5SDimitry Andric __kmp_nested_proc_bind.bind_types[0], &r_icvs, 31710b57cec5SDimitry Andric 0 // argc 31720b57cec5SDimitry Andric USE_NESTED_HOT_ARG(NULL) // master thread is unknown 31730b57cec5SDimitry Andric ); 31740b57cec5SDimitry Andric #if USE_DEBUGGER 31750b57cec5SDimitry Andric // Non-NULL value should be assigned to make the debugger display the root 31760b57cec5SDimitry Andric // team. 31770b57cec5SDimitry Andric TCW_SYNC_PTR(root_team->t.t_pkfn, (microtask_t)(~0)); 31780b57cec5SDimitry Andric #endif 31790b57cec5SDimitry Andric 31800b57cec5SDimitry Andric KF_TRACE(10, ("__kmp_initialize_root: after root_team = %p\n", root_team)); 31810b57cec5SDimitry Andric 31820b57cec5SDimitry Andric root->r.r_root_team = root_team; 31830b57cec5SDimitry Andric root_team->t.t_control_stack_top = NULL; 31840b57cec5SDimitry Andric 31850b57cec5SDimitry Andric /* initialize root team */ 31860b57cec5SDimitry Andric root_team->t.t_threads[0] = NULL; 31870b57cec5SDimitry Andric root_team->t.t_nproc = 1; 31880b57cec5SDimitry Andric root_team->t.t_serialized = 1; 31890b57cec5SDimitry Andric // TODO???: root_team->t.t_max_active_levels = __kmp_dflt_max_active_levels; 31900b57cec5SDimitry Andric root_team->t.t_sched.sched = r_sched.sched; 31910b57cec5SDimitry Andric KA_TRACE( 31920b57cec5SDimitry Andric 20, 31930b57cec5SDimitry Andric ("__kmp_initialize_root: init root team %d arrived: join=%u, plain=%u\n", 31940b57cec5SDimitry Andric root_team->t.t_id, KMP_INIT_BARRIER_STATE, KMP_INIT_BARRIER_STATE)); 31950b57cec5SDimitry Andric 31960b57cec5SDimitry Andric /* setup the hot team for this task */ 31970b57cec5SDimitry Andric /* allocate the hot team structure */ 31980b57cec5SDimitry Andric KF_TRACE(10, ("__kmp_initialize_root: before hot_team\n")); 31990b57cec5SDimitry Andric 32000b57cec5SDimitry Andric hot_team = 32010b57cec5SDimitry Andric __kmp_allocate_team(root, 32020b57cec5SDimitry Andric 1, // new_nproc 32030b57cec5SDimitry Andric __kmp_dflt_team_nth_ub * 2, // max_nproc 32040b57cec5SDimitry Andric #if OMPT_SUPPORT 32050b57cec5SDimitry Andric ompt_data_none, // root parallel id 32060b57cec5SDimitry Andric #endif 32070b57cec5SDimitry Andric __kmp_nested_proc_bind.bind_types[0], &r_icvs, 32080b57cec5SDimitry Andric 0 // argc 32090b57cec5SDimitry Andric USE_NESTED_HOT_ARG(NULL) // master thread is unknown 32100b57cec5SDimitry Andric ); 32110b57cec5SDimitry Andric KF_TRACE(10, ("__kmp_initialize_root: after hot_team = %p\n", hot_team)); 32120b57cec5SDimitry Andric 32130b57cec5SDimitry Andric root->r.r_hot_team = hot_team; 32140b57cec5SDimitry Andric root_team->t.t_control_stack_top = NULL; 32150b57cec5SDimitry Andric 32160b57cec5SDimitry Andric /* first-time initialization */ 32170b57cec5SDimitry Andric hot_team->t.t_parent = root_team; 32180b57cec5SDimitry Andric 32190b57cec5SDimitry Andric /* initialize hot team */ 32200b57cec5SDimitry Andric hot_team_max_nth = hot_team->t.t_max_nproc; 32210b57cec5SDimitry Andric for (f = 0; f < hot_team_max_nth; ++f) { 32220b57cec5SDimitry Andric hot_team->t.t_threads[f] = NULL; 32230b57cec5SDimitry Andric } 32240b57cec5SDimitry Andric hot_team->t.t_nproc = 1; 32250b57cec5SDimitry Andric // TODO???: hot_team->t.t_max_active_levels = __kmp_dflt_max_active_levels; 32260b57cec5SDimitry Andric hot_team->t.t_sched.sched = r_sched.sched; 32270b57cec5SDimitry Andric hot_team->t.t_size_changed = 0; 32280b57cec5SDimitry Andric } 32290b57cec5SDimitry Andric 32300b57cec5SDimitry Andric #ifdef KMP_DEBUG 32310b57cec5SDimitry Andric 32320b57cec5SDimitry Andric typedef struct kmp_team_list_item { 32330b57cec5SDimitry Andric kmp_team_p const *entry; 32340b57cec5SDimitry Andric struct kmp_team_list_item *next; 32350b57cec5SDimitry Andric } kmp_team_list_item_t; 32360b57cec5SDimitry Andric typedef kmp_team_list_item_t *kmp_team_list_t; 32370b57cec5SDimitry Andric 32380b57cec5SDimitry Andric static void __kmp_print_structure_team_accum( // Add team to list of teams. 32390b57cec5SDimitry Andric kmp_team_list_t list, // List of teams. 32400b57cec5SDimitry Andric kmp_team_p const *team // Team to add. 32410b57cec5SDimitry Andric ) { 32420b57cec5SDimitry Andric 32430b57cec5SDimitry Andric // List must terminate with item where both entry and next are NULL. 32440b57cec5SDimitry Andric // Team is added to the list only once. 32450b57cec5SDimitry Andric // List is sorted in ascending order by team id. 32460b57cec5SDimitry Andric // Team id is *not* a key. 32470b57cec5SDimitry Andric 32480b57cec5SDimitry Andric kmp_team_list_t l; 32490b57cec5SDimitry Andric 32500b57cec5SDimitry Andric KMP_DEBUG_ASSERT(list != NULL); 32510b57cec5SDimitry Andric if (team == NULL) { 32520b57cec5SDimitry Andric return; 32530b57cec5SDimitry Andric } 32540b57cec5SDimitry Andric 32550b57cec5SDimitry Andric __kmp_print_structure_team_accum(list, team->t.t_parent); 32560b57cec5SDimitry Andric __kmp_print_structure_team_accum(list, team->t.t_next_pool); 32570b57cec5SDimitry Andric 32580b57cec5SDimitry Andric // Search list for the team. 32590b57cec5SDimitry Andric l = list; 32600b57cec5SDimitry Andric while (l->next != NULL && l->entry != team) { 32610b57cec5SDimitry Andric l = l->next; 32620b57cec5SDimitry Andric } 32630b57cec5SDimitry Andric if (l->next != NULL) { 32640b57cec5SDimitry Andric return; // Team has been added before, exit. 32650b57cec5SDimitry Andric } 32660b57cec5SDimitry Andric 32670b57cec5SDimitry Andric // Team is not found. Search list again for insertion point. 32680b57cec5SDimitry Andric l = list; 32690b57cec5SDimitry Andric while (l->next != NULL && l->entry->t.t_id <= team->t.t_id) { 32700b57cec5SDimitry Andric l = l->next; 32710b57cec5SDimitry Andric } 32720b57cec5SDimitry Andric 32730b57cec5SDimitry Andric // Insert team. 32740b57cec5SDimitry Andric { 32750b57cec5SDimitry Andric kmp_team_list_item_t *item = (kmp_team_list_item_t *)KMP_INTERNAL_MALLOC( 32760b57cec5SDimitry Andric sizeof(kmp_team_list_item_t)); 32770b57cec5SDimitry Andric *item = *l; 32780b57cec5SDimitry Andric l->entry = team; 32790b57cec5SDimitry Andric l->next = item; 32800b57cec5SDimitry Andric } 32810b57cec5SDimitry Andric } 32820b57cec5SDimitry Andric 32830b57cec5SDimitry Andric static void __kmp_print_structure_team(char const *title, kmp_team_p const *team 32840b57cec5SDimitry Andric 32850b57cec5SDimitry Andric ) { 32860b57cec5SDimitry Andric __kmp_printf("%s", title); 32870b57cec5SDimitry Andric if (team != NULL) { 32880b57cec5SDimitry Andric __kmp_printf("%2x %p\n", team->t.t_id, team); 32890b57cec5SDimitry Andric } else { 32900b57cec5SDimitry Andric __kmp_printf(" - (nil)\n"); 32910b57cec5SDimitry Andric } 32920b57cec5SDimitry Andric } 32930b57cec5SDimitry Andric 32940b57cec5SDimitry Andric static void __kmp_print_structure_thread(char const *title, 32950b57cec5SDimitry Andric kmp_info_p const *thread) { 32960b57cec5SDimitry Andric __kmp_printf("%s", title); 32970b57cec5SDimitry Andric if (thread != NULL) { 32980b57cec5SDimitry Andric __kmp_printf("%2d %p\n", thread->th.th_info.ds.ds_gtid, thread); 32990b57cec5SDimitry Andric } else { 33000b57cec5SDimitry Andric __kmp_printf(" - (nil)\n"); 33010b57cec5SDimitry Andric } 33020b57cec5SDimitry Andric } 33030b57cec5SDimitry Andric 33040b57cec5SDimitry Andric void __kmp_print_structure(void) { 33050b57cec5SDimitry Andric 33060b57cec5SDimitry Andric kmp_team_list_t list; 33070b57cec5SDimitry Andric 33080b57cec5SDimitry Andric // Initialize list of teams. 33090b57cec5SDimitry Andric list = 33100b57cec5SDimitry Andric (kmp_team_list_item_t *)KMP_INTERNAL_MALLOC(sizeof(kmp_team_list_item_t)); 33110b57cec5SDimitry Andric list->entry = NULL; 33120b57cec5SDimitry Andric list->next = NULL; 33130b57cec5SDimitry Andric 33140b57cec5SDimitry Andric __kmp_printf("\n------------------------------\nGlobal Thread " 33150b57cec5SDimitry Andric "Table\n------------------------------\n"); 33160b57cec5SDimitry Andric { 33170b57cec5SDimitry Andric int gtid; 33180b57cec5SDimitry Andric for (gtid = 0; gtid < __kmp_threads_capacity; ++gtid) { 33190b57cec5SDimitry Andric __kmp_printf("%2d", gtid); 33200b57cec5SDimitry Andric if (__kmp_threads != NULL) { 33210b57cec5SDimitry Andric __kmp_printf(" %p", __kmp_threads[gtid]); 33220b57cec5SDimitry Andric } 33230b57cec5SDimitry Andric if (__kmp_root != NULL) { 33240b57cec5SDimitry Andric __kmp_printf(" %p", __kmp_root[gtid]); 33250b57cec5SDimitry Andric } 33260b57cec5SDimitry Andric __kmp_printf("\n"); 33270b57cec5SDimitry Andric } 33280b57cec5SDimitry Andric } 33290b57cec5SDimitry Andric 33300b57cec5SDimitry Andric // Print out __kmp_threads array. 33310b57cec5SDimitry Andric __kmp_printf("\n------------------------------\nThreads\n--------------------" 33320b57cec5SDimitry Andric "----------\n"); 33330b57cec5SDimitry Andric if (__kmp_threads != NULL) { 33340b57cec5SDimitry Andric int gtid; 33350b57cec5SDimitry Andric for (gtid = 0; gtid < __kmp_threads_capacity; ++gtid) { 33360b57cec5SDimitry Andric kmp_info_t const *thread = __kmp_threads[gtid]; 33370b57cec5SDimitry Andric if (thread != NULL) { 33380b57cec5SDimitry Andric __kmp_printf("GTID %2d %p:\n", gtid, thread); 33390b57cec5SDimitry Andric __kmp_printf(" Our Root: %p\n", thread->th.th_root); 33400b57cec5SDimitry Andric __kmp_print_structure_team(" Our Team: ", thread->th.th_team); 33410b57cec5SDimitry Andric __kmp_print_structure_team(" Serial Team: ", 33420b57cec5SDimitry Andric thread->th.th_serial_team); 33430b57cec5SDimitry Andric __kmp_printf(" Threads: %2d\n", thread->th.th_team_nproc); 33440b57cec5SDimitry Andric __kmp_print_structure_thread(" Master: ", 33450b57cec5SDimitry Andric thread->th.th_team_master); 33460b57cec5SDimitry Andric __kmp_printf(" Serialized?: %2d\n", thread->th.th_team_serialized); 33470b57cec5SDimitry Andric __kmp_printf(" Set NProc: %2d\n", thread->th.th_set_nproc); 33480b57cec5SDimitry Andric __kmp_printf(" Set Proc Bind: %2d\n", thread->th.th_set_proc_bind); 33490b57cec5SDimitry Andric __kmp_print_structure_thread(" Next in pool: ", 33500b57cec5SDimitry Andric thread->th.th_next_pool); 33510b57cec5SDimitry Andric __kmp_printf("\n"); 33520b57cec5SDimitry Andric __kmp_print_structure_team_accum(list, thread->th.th_team); 33530b57cec5SDimitry Andric __kmp_print_structure_team_accum(list, thread->th.th_serial_team); 33540b57cec5SDimitry Andric } 33550b57cec5SDimitry Andric } 33560b57cec5SDimitry Andric } else { 33570b57cec5SDimitry Andric __kmp_printf("Threads array is not allocated.\n"); 33580b57cec5SDimitry Andric } 33590b57cec5SDimitry Andric 33600b57cec5SDimitry Andric // Print out __kmp_root array. 33610b57cec5SDimitry Andric __kmp_printf("\n------------------------------\nUbers\n----------------------" 33620b57cec5SDimitry Andric "--------\n"); 33630b57cec5SDimitry Andric if (__kmp_root != NULL) { 33640b57cec5SDimitry Andric int gtid; 33650b57cec5SDimitry Andric for (gtid = 0; gtid < __kmp_threads_capacity; ++gtid) { 33660b57cec5SDimitry Andric kmp_root_t const *root = __kmp_root[gtid]; 33670b57cec5SDimitry Andric if (root != NULL) { 33680b57cec5SDimitry Andric __kmp_printf("GTID %2d %p:\n", gtid, root); 33690b57cec5SDimitry Andric __kmp_print_structure_team(" Root Team: ", root->r.r_root_team); 33700b57cec5SDimitry Andric __kmp_print_structure_team(" Hot Team: ", root->r.r_hot_team); 33710b57cec5SDimitry Andric __kmp_print_structure_thread(" Uber Thread: ", 33720b57cec5SDimitry Andric root->r.r_uber_thread); 33730b57cec5SDimitry Andric __kmp_printf(" Active?: %2d\n", root->r.r_active); 33740b57cec5SDimitry Andric __kmp_printf(" In Parallel: %2d\n", 33750b57cec5SDimitry Andric KMP_ATOMIC_LD_RLX(&root->r.r_in_parallel)); 33760b57cec5SDimitry Andric __kmp_printf("\n"); 33770b57cec5SDimitry Andric __kmp_print_structure_team_accum(list, root->r.r_root_team); 33780b57cec5SDimitry Andric __kmp_print_structure_team_accum(list, root->r.r_hot_team); 33790b57cec5SDimitry Andric } 33800b57cec5SDimitry Andric } 33810b57cec5SDimitry Andric } else { 33820b57cec5SDimitry Andric __kmp_printf("Ubers array is not allocated.\n"); 33830b57cec5SDimitry Andric } 33840b57cec5SDimitry Andric 33850b57cec5SDimitry Andric __kmp_printf("\n------------------------------\nTeams\n----------------------" 33860b57cec5SDimitry Andric "--------\n"); 33870b57cec5SDimitry Andric while (list->next != NULL) { 33880b57cec5SDimitry Andric kmp_team_p const *team = list->entry; 33890b57cec5SDimitry Andric int i; 33900b57cec5SDimitry Andric __kmp_printf("Team %2x %p:\n", team->t.t_id, team); 33910b57cec5SDimitry Andric __kmp_print_structure_team(" Parent Team: ", team->t.t_parent); 33920b57cec5SDimitry Andric __kmp_printf(" Master TID: %2d\n", team->t.t_master_tid); 33930b57cec5SDimitry Andric __kmp_printf(" Max threads: %2d\n", team->t.t_max_nproc); 33940b57cec5SDimitry Andric __kmp_printf(" Levels of serial: %2d\n", team->t.t_serialized); 33950b57cec5SDimitry Andric __kmp_printf(" Number threads: %2d\n", team->t.t_nproc); 33960b57cec5SDimitry Andric for (i = 0; i < team->t.t_nproc; ++i) { 33970b57cec5SDimitry Andric __kmp_printf(" Thread %2d: ", i); 33980b57cec5SDimitry Andric __kmp_print_structure_thread("", team->t.t_threads[i]); 33990b57cec5SDimitry Andric } 34000b57cec5SDimitry Andric __kmp_print_structure_team(" Next in pool: ", team->t.t_next_pool); 34010b57cec5SDimitry Andric __kmp_printf("\n"); 34020b57cec5SDimitry Andric list = list->next; 34030b57cec5SDimitry Andric } 34040b57cec5SDimitry Andric 34050b57cec5SDimitry Andric // Print out __kmp_thread_pool and __kmp_team_pool. 34060b57cec5SDimitry Andric __kmp_printf("\n------------------------------\nPools\n----------------------" 34070b57cec5SDimitry Andric "--------\n"); 34080b57cec5SDimitry Andric __kmp_print_structure_thread("Thread pool: ", 34090b57cec5SDimitry Andric CCAST(kmp_info_t *, __kmp_thread_pool)); 34100b57cec5SDimitry Andric __kmp_print_structure_team("Team pool: ", 34110b57cec5SDimitry Andric CCAST(kmp_team_t *, __kmp_team_pool)); 34120b57cec5SDimitry Andric __kmp_printf("\n"); 34130b57cec5SDimitry Andric 34140b57cec5SDimitry Andric // Free team list. 34150b57cec5SDimitry Andric while (list != NULL) { 34160b57cec5SDimitry Andric kmp_team_list_item_t *item = list; 34170b57cec5SDimitry Andric list = list->next; 34180b57cec5SDimitry Andric KMP_INTERNAL_FREE(item); 34190b57cec5SDimitry Andric } 34200b57cec5SDimitry Andric } 34210b57cec5SDimitry Andric 34220b57cec5SDimitry Andric #endif 34230b57cec5SDimitry Andric 34240b57cec5SDimitry Andric //--------------------------------------------------------------------------- 34250b57cec5SDimitry Andric // Stuff for per-thread fast random number generator 34260b57cec5SDimitry Andric // Table of primes 34270b57cec5SDimitry Andric static const unsigned __kmp_primes[] = { 34280b57cec5SDimitry Andric 0x9e3779b1, 0xffe6cc59, 0x2109f6dd, 0x43977ab5, 0xba5703f5, 0xb495a877, 34290b57cec5SDimitry Andric 0xe1626741, 0x79695e6b, 0xbc98c09f, 0xd5bee2b3, 0x287488f9, 0x3af18231, 34300b57cec5SDimitry Andric 0x9677cd4d, 0xbe3a6929, 0xadc6a877, 0xdcf0674b, 0xbe4d6fe9, 0x5f15e201, 34310b57cec5SDimitry Andric 0x99afc3fd, 0xf3f16801, 0xe222cfff, 0x24ba5fdb, 0x0620452d, 0x79f149e3, 34320b57cec5SDimitry Andric 0xc8b93f49, 0x972702cd, 0xb07dd827, 0x6c97d5ed, 0x085a3d61, 0x46eb5ea7, 34330b57cec5SDimitry Andric 0x3d9910ed, 0x2e687b5b, 0x29609227, 0x6eb081f1, 0x0954c4e1, 0x9d114db9, 34340b57cec5SDimitry Andric 0x542acfa9, 0xb3e6bd7b, 0x0742d917, 0xe9f3ffa7, 0x54581edb, 0xf2480f45, 34350b57cec5SDimitry Andric 0x0bb9288f, 0xef1affc7, 0x85fa0ca7, 0x3ccc14db, 0xe6baf34b, 0x343377f7, 34360b57cec5SDimitry Andric 0x5ca19031, 0xe6d9293b, 0xf0a9f391, 0x5d2e980b, 0xfc411073, 0xc3749363, 34370b57cec5SDimitry Andric 0xb892d829, 0x3549366b, 0x629750ad, 0xb98294e5, 0x892d9483, 0xc235baf3, 34380b57cec5SDimitry Andric 0x3d2402a3, 0x6bdef3c9, 0xbec333cd, 0x40c9520f}; 34390b57cec5SDimitry Andric 34400b57cec5SDimitry Andric //--------------------------------------------------------------------------- 34410b57cec5SDimitry Andric // __kmp_get_random: Get a random number using a linear congruential method. 34420b57cec5SDimitry Andric unsigned short __kmp_get_random(kmp_info_t *thread) { 34430b57cec5SDimitry Andric unsigned x = thread->th.th_x; 34440b57cec5SDimitry Andric unsigned short r = x >> 16; 34450b57cec5SDimitry Andric 34460b57cec5SDimitry Andric thread->th.th_x = x * thread->th.th_a + 1; 34470b57cec5SDimitry Andric 34480b57cec5SDimitry Andric KA_TRACE(30, ("__kmp_get_random: THREAD: %d, RETURN: %u\n", 34490b57cec5SDimitry Andric thread->th.th_info.ds.ds_tid, r)); 34500b57cec5SDimitry Andric 34510b57cec5SDimitry Andric return r; 34520b57cec5SDimitry Andric } 34530b57cec5SDimitry Andric //-------------------------------------------------------- 34540b57cec5SDimitry Andric // __kmp_init_random: Initialize a random number generator 34550b57cec5SDimitry Andric void __kmp_init_random(kmp_info_t *thread) { 34560b57cec5SDimitry Andric unsigned seed = thread->th.th_info.ds.ds_tid; 34570b57cec5SDimitry Andric 34580b57cec5SDimitry Andric thread->th.th_a = 34590b57cec5SDimitry Andric __kmp_primes[seed % (sizeof(__kmp_primes) / sizeof(__kmp_primes[0]))]; 34600b57cec5SDimitry Andric thread->th.th_x = (seed + 1) * thread->th.th_a + 1; 34610b57cec5SDimitry Andric KA_TRACE(30, 34620b57cec5SDimitry Andric ("__kmp_init_random: THREAD: %u; A: %u\n", seed, thread->th.th_a)); 34630b57cec5SDimitry Andric } 34640b57cec5SDimitry Andric 34650b57cec5SDimitry Andric #if KMP_OS_WINDOWS 34660b57cec5SDimitry Andric /* reclaim array entries for root threads that are already dead, returns number 34670b57cec5SDimitry Andric * reclaimed */ 34680b57cec5SDimitry Andric static int __kmp_reclaim_dead_roots(void) { 34690b57cec5SDimitry Andric int i, r = 0; 34700b57cec5SDimitry Andric 34710b57cec5SDimitry Andric for (i = 0; i < __kmp_threads_capacity; ++i) { 34720b57cec5SDimitry Andric if (KMP_UBER_GTID(i) && 34730b57cec5SDimitry Andric !__kmp_still_running((kmp_info_t *)TCR_SYNC_PTR(__kmp_threads[i])) && 34740b57cec5SDimitry Andric !__kmp_root[i] 34750b57cec5SDimitry Andric ->r.r_active) { // AC: reclaim only roots died in non-active state 34760b57cec5SDimitry Andric r += __kmp_unregister_root_other_thread(i); 34770b57cec5SDimitry Andric } 34780b57cec5SDimitry Andric } 34790b57cec5SDimitry Andric return r; 34800b57cec5SDimitry Andric } 34810b57cec5SDimitry Andric #endif 34820b57cec5SDimitry Andric 34830b57cec5SDimitry Andric /* This function attempts to create free entries in __kmp_threads and 34840b57cec5SDimitry Andric __kmp_root, and returns the number of free entries generated. 34850b57cec5SDimitry Andric 34860b57cec5SDimitry Andric For Windows* OS static library, the first mechanism used is to reclaim array 34870b57cec5SDimitry Andric entries for root threads that are already dead. 34880b57cec5SDimitry Andric 34890b57cec5SDimitry Andric On all platforms, expansion is attempted on the arrays __kmp_threads_ and 34900b57cec5SDimitry Andric __kmp_root, with appropriate update to __kmp_threads_capacity. Array 34910b57cec5SDimitry Andric capacity is increased by doubling with clipping to __kmp_tp_capacity, if 34920b57cec5SDimitry Andric threadprivate cache array has been created. Synchronization with 34930b57cec5SDimitry Andric __kmpc_threadprivate_cached is done using __kmp_tp_cached_lock. 34940b57cec5SDimitry Andric 34950b57cec5SDimitry Andric After any dead root reclamation, if the clipping value allows array expansion 34960b57cec5SDimitry Andric to result in the generation of a total of nNeed free slots, the function does 34970b57cec5SDimitry Andric that expansion. If not, nothing is done beyond the possible initial root 34980b57cec5SDimitry Andric thread reclamation. 34990b57cec5SDimitry Andric 35000b57cec5SDimitry Andric If any argument is negative, the behavior is undefined. */ 35010b57cec5SDimitry Andric static int __kmp_expand_threads(int nNeed) { 35020b57cec5SDimitry Andric int added = 0; 35030b57cec5SDimitry Andric int minimumRequiredCapacity; 35040b57cec5SDimitry Andric int newCapacity; 35050b57cec5SDimitry Andric kmp_info_t **newThreads; 35060b57cec5SDimitry Andric kmp_root_t **newRoot; 35070b57cec5SDimitry Andric 35080b57cec5SDimitry Andric // All calls to __kmp_expand_threads should be under __kmp_forkjoin_lock, so 35090b57cec5SDimitry Andric // resizing __kmp_threads does not need additional protection if foreign 35100b57cec5SDimitry Andric // threads are present 35110b57cec5SDimitry Andric 35120b57cec5SDimitry Andric #if KMP_OS_WINDOWS && !KMP_DYNAMIC_LIB 35130b57cec5SDimitry Andric /* only for Windows static library */ 35140b57cec5SDimitry Andric /* reclaim array entries for root threads that are already dead */ 35150b57cec5SDimitry Andric added = __kmp_reclaim_dead_roots(); 35160b57cec5SDimitry Andric 35170b57cec5SDimitry Andric if (nNeed) { 35180b57cec5SDimitry Andric nNeed -= added; 35190b57cec5SDimitry Andric if (nNeed < 0) 35200b57cec5SDimitry Andric nNeed = 0; 35210b57cec5SDimitry Andric } 35220b57cec5SDimitry Andric #endif 35230b57cec5SDimitry Andric if (nNeed <= 0) 35240b57cec5SDimitry Andric return added; 35250b57cec5SDimitry Andric 35260b57cec5SDimitry Andric // Note that __kmp_threads_capacity is not bounded by __kmp_max_nth. If 35270b57cec5SDimitry Andric // __kmp_max_nth is set to some value less than __kmp_sys_max_nth by the 35280b57cec5SDimitry Andric // user via KMP_DEVICE_THREAD_LIMIT, then __kmp_threads_capacity may become 35290b57cec5SDimitry Andric // > __kmp_max_nth in one of two ways: 35300b57cec5SDimitry Andric // 35310b57cec5SDimitry Andric // 1) The initialization thread (gtid = 0) exits. __kmp_threads[0] 35320b57cec5SDimitry Andric // may not be resused by another thread, so we may need to increase 35330b57cec5SDimitry Andric // __kmp_threads_capacity to __kmp_max_nth + 1. 35340b57cec5SDimitry Andric // 35350b57cec5SDimitry Andric // 2) New foreign root(s) are encountered. We always register new foreign 35360b57cec5SDimitry Andric // roots. This may cause a smaller # of threads to be allocated at 35370b57cec5SDimitry Andric // subsequent parallel regions, but the worker threads hang around (and 35380b57cec5SDimitry Andric // eventually go to sleep) and need slots in the __kmp_threads[] array. 35390b57cec5SDimitry Andric // 35400b57cec5SDimitry Andric // Anyway, that is the reason for moving the check to see if 35410b57cec5SDimitry Andric // __kmp_max_nth was exceeded into __kmp_reserve_threads() 35420b57cec5SDimitry Andric // instead of having it performed here. -BB 35430b57cec5SDimitry Andric 35440b57cec5SDimitry Andric KMP_DEBUG_ASSERT(__kmp_sys_max_nth >= __kmp_threads_capacity); 35450b57cec5SDimitry Andric 35460b57cec5SDimitry Andric /* compute expansion headroom to check if we can expand */ 35470b57cec5SDimitry Andric if (__kmp_sys_max_nth - __kmp_threads_capacity < nNeed) { 35480b57cec5SDimitry Andric /* possible expansion too small -- give up */ 35490b57cec5SDimitry Andric return added; 35500b57cec5SDimitry Andric } 35510b57cec5SDimitry Andric minimumRequiredCapacity = __kmp_threads_capacity + nNeed; 35520b57cec5SDimitry Andric 35530b57cec5SDimitry Andric newCapacity = __kmp_threads_capacity; 35540b57cec5SDimitry Andric do { 35550b57cec5SDimitry Andric newCapacity = newCapacity <= (__kmp_sys_max_nth >> 1) ? (newCapacity << 1) 35560b57cec5SDimitry Andric : __kmp_sys_max_nth; 35570b57cec5SDimitry Andric } while (newCapacity < minimumRequiredCapacity); 35580b57cec5SDimitry Andric newThreads = (kmp_info_t **)__kmp_allocate( 35590b57cec5SDimitry Andric (sizeof(kmp_info_t *) + sizeof(kmp_root_t *)) * newCapacity + CACHE_LINE); 35600b57cec5SDimitry Andric newRoot = 35610b57cec5SDimitry Andric (kmp_root_t **)((char *)newThreads + sizeof(kmp_info_t *) * newCapacity); 35620b57cec5SDimitry Andric KMP_MEMCPY(newThreads, __kmp_threads, 35630b57cec5SDimitry Andric __kmp_threads_capacity * sizeof(kmp_info_t *)); 35640b57cec5SDimitry Andric KMP_MEMCPY(newRoot, __kmp_root, 35650b57cec5SDimitry Andric __kmp_threads_capacity * sizeof(kmp_root_t *)); 35660b57cec5SDimitry Andric 35670b57cec5SDimitry Andric kmp_info_t **temp_threads = __kmp_threads; 35680b57cec5SDimitry Andric *(kmp_info_t * *volatile *)&__kmp_threads = newThreads; 35690b57cec5SDimitry Andric *(kmp_root_t * *volatile *)&__kmp_root = newRoot; 35700b57cec5SDimitry Andric __kmp_free(temp_threads); 35710b57cec5SDimitry Andric added += newCapacity - __kmp_threads_capacity; 35720b57cec5SDimitry Andric *(volatile int *)&__kmp_threads_capacity = newCapacity; 35730b57cec5SDimitry Andric 35740b57cec5SDimitry Andric if (newCapacity > __kmp_tp_capacity) { 35750b57cec5SDimitry Andric __kmp_acquire_bootstrap_lock(&__kmp_tp_cached_lock); 35760b57cec5SDimitry Andric if (__kmp_tp_cached && newCapacity > __kmp_tp_capacity) { 35770b57cec5SDimitry Andric __kmp_threadprivate_resize_cache(newCapacity); 35780b57cec5SDimitry Andric } else { // increase __kmp_tp_capacity to correspond with kmp_threads size 35790b57cec5SDimitry Andric *(volatile int *)&__kmp_tp_capacity = newCapacity; 35800b57cec5SDimitry Andric } 35810b57cec5SDimitry Andric __kmp_release_bootstrap_lock(&__kmp_tp_cached_lock); 35820b57cec5SDimitry Andric } 35830b57cec5SDimitry Andric 35840b57cec5SDimitry Andric return added; 35850b57cec5SDimitry Andric } 35860b57cec5SDimitry Andric 35870b57cec5SDimitry Andric /* Register the current thread as a root thread and obtain our gtid. We must 35880b57cec5SDimitry Andric have the __kmp_initz_lock held at this point. Argument TRUE only if are the 35890b57cec5SDimitry Andric thread that calls from __kmp_do_serial_initialize() */ 35900b57cec5SDimitry Andric int __kmp_register_root(int initial_thread) { 35910b57cec5SDimitry Andric kmp_info_t *root_thread; 35920b57cec5SDimitry Andric kmp_root_t *root; 35930b57cec5SDimitry Andric int gtid; 35940b57cec5SDimitry Andric int capacity; 35950b57cec5SDimitry Andric __kmp_acquire_bootstrap_lock(&__kmp_forkjoin_lock); 35960b57cec5SDimitry Andric KA_TRACE(20, ("__kmp_register_root: entered\n")); 35970b57cec5SDimitry Andric KMP_MB(); 35980b57cec5SDimitry Andric 35990b57cec5SDimitry Andric /* 2007-03-02: 36000b57cec5SDimitry Andric If initial thread did not invoke OpenMP RTL yet, and this thread is not an 36010b57cec5SDimitry Andric initial one, "__kmp_all_nth >= __kmp_threads_capacity" condition does not 36020b57cec5SDimitry Andric work as expected -- it may return false (that means there is at least one 36030b57cec5SDimitry Andric empty slot in __kmp_threads array), but it is possible the only free slot 36040b57cec5SDimitry Andric is #0, which is reserved for initial thread and so cannot be used for this 36050b57cec5SDimitry Andric one. Following code workarounds this bug. 36060b57cec5SDimitry Andric 36070b57cec5SDimitry Andric However, right solution seems to be not reserving slot #0 for initial 36080b57cec5SDimitry Andric thread because: 36090b57cec5SDimitry Andric (1) there is no magic in slot #0, 36100b57cec5SDimitry Andric (2) we cannot detect initial thread reliably (the first thread which does 36110b57cec5SDimitry Andric serial initialization may be not a real initial thread). 36120b57cec5SDimitry Andric */ 36130b57cec5SDimitry Andric capacity = __kmp_threads_capacity; 36140b57cec5SDimitry Andric if (!initial_thread && TCR_PTR(__kmp_threads[0]) == NULL) { 36150b57cec5SDimitry Andric --capacity; 36160b57cec5SDimitry Andric } 36170b57cec5SDimitry Andric 36180b57cec5SDimitry Andric /* see if there are too many threads */ 36190b57cec5SDimitry Andric if (__kmp_all_nth >= capacity && !__kmp_expand_threads(1)) { 36200b57cec5SDimitry Andric if (__kmp_tp_cached) { 36210b57cec5SDimitry Andric __kmp_fatal(KMP_MSG(CantRegisterNewThread), 36220b57cec5SDimitry Andric KMP_HNT(Set_ALL_THREADPRIVATE, __kmp_tp_capacity), 36230b57cec5SDimitry Andric KMP_HNT(PossibleSystemLimitOnThreads), __kmp_msg_null); 36240b57cec5SDimitry Andric } else { 36250b57cec5SDimitry Andric __kmp_fatal(KMP_MSG(CantRegisterNewThread), KMP_HNT(SystemLimitOnThreads), 36260b57cec5SDimitry Andric __kmp_msg_null); 36270b57cec5SDimitry Andric } 36280b57cec5SDimitry Andric } 36290b57cec5SDimitry Andric 36300b57cec5SDimitry Andric /* find an available thread slot */ 36310b57cec5SDimitry Andric /* Don't reassign the zero slot since we need that to only be used by initial 36320b57cec5SDimitry Andric thread */ 36330b57cec5SDimitry Andric for (gtid = (initial_thread ? 0 : 1); TCR_PTR(__kmp_threads[gtid]) != NULL; 36340b57cec5SDimitry Andric gtid++) 36350b57cec5SDimitry Andric ; 36360b57cec5SDimitry Andric KA_TRACE(1, 36370b57cec5SDimitry Andric ("__kmp_register_root: found slot in threads array: T#%d\n", gtid)); 36380b57cec5SDimitry Andric KMP_ASSERT(gtid < __kmp_threads_capacity); 36390b57cec5SDimitry Andric 36400b57cec5SDimitry Andric /* update global accounting */ 36410b57cec5SDimitry Andric __kmp_all_nth++; 36420b57cec5SDimitry Andric TCW_4(__kmp_nth, __kmp_nth + 1); 36430b57cec5SDimitry Andric 36440b57cec5SDimitry Andric // if __kmp_adjust_gtid_mode is set, then we use method #1 (sp search) for low 36450b57cec5SDimitry Andric // numbers of procs, and method #2 (keyed API call) for higher numbers. 36460b57cec5SDimitry Andric if (__kmp_adjust_gtid_mode) { 36470b57cec5SDimitry Andric if (__kmp_all_nth >= __kmp_tls_gtid_min) { 36480b57cec5SDimitry Andric if (TCR_4(__kmp_gtid_mode) != 2) { 36490b57cec5SDimitry Andric TCW_4(__kmp_gtid_mode, 2); 36500b57cec5SDimitry Andric } 36510b57cec5SDimitry Andric } else { 36520b57cec5SDimitry Andric if (TCR_4(__kmp_gtid_mode) != 1) { 36530b57cec5SDimitry Andric TCW_4(__kmp_gtid_mode, 1); 36540b57cec5SDimitry Andric } 36550b57cec5SDimitry Andric } 36560b57cec5SDimitry Andric } 36570b57cec5SDimitry Andric 36580b57cec5SDimitry Andric #ifdef KMP_ADJUST_BLOCKTIME 36590b57cec5SDimitry Andric /* Adjust blocktime to zero if necessary */ 36600b57cec5SDimitry Andric /* Middle initialization might not have occurred yet */ 36610b57cec5SDimitry Andric if (!__kmp_env_blocktime && (__kmp_avail_proc > 0)) { 36620b57cec5SDimitry Andric if (__kmp_nth > __kmp_avail_proc) { 36630b57cec5SDimitry Andric __kmp_zero_bt = TRUE; 36640b57cec5SDimitry Andric } 36650b57cec5SDimitry Andric } 36660b57cec5SDimitry Andric #endif /* KMP_ADJUST_BLOCKTIME */ 36670b57cec5SDimitry Andric 36680b57cec5SDimitry Andric /* setup this new hierarchy */ 36690b57cec5SDimitry Andric if (!(root = __kmp_root[gtid])) { 36700b57cec5SDimitry Andric root = __kmp_root[gtid] = (kmp_root_t *)__kmp_allocate(sizeof(kmp_root_t)); 36710b57cec5SDimitry Andric KMP_DEBUG_ASSERT(!root->r.r_root_team); 36720b57cec5SDimitry Andric } 36730b57cec5SDimitry Andric 36740b57cec5SDimitry Andric #if KMP_STATS_ENABLED 36750b57cec5SDimitry Andric // Initialize stats as soon as possible (right after gtid assignment). 36760b57cec5SDimitry Andric __kmp_stats_thread_ptr = __kmp_stats_list->push_back(gtid); 36770b57cec5SDimitry Andric __kmp_stats_thread_ptr->startLife(); 36780b57cec5SDimitry Andric KMP_SET_THREAD_STATE(SERIAL_REGION); 36790b57cec5SDimitry Andric KMP_INIT_PARTITIONED_TIMERS(OMP_serial); 36800b57cec5SDimitry Andric #endif 36810b57cec5SDimitry Andric __kmp_initialize_root(root); 36820b57cec5SDimitry Andric 36830b57cec5SDimitry Andric /* setup new root thread structure */ 36840b57cec5SDimitry Andric if (root->r.r_uber_thread) { 36850b57cec5SDimitry Andric root_thread = root->r.r_uber_thread; 36860b57cec5SDimitry Andric } else { 36870b57cec5SDimitry Andric root_thread = (kmp_info_t *)__kmp_allocate(sizeof(kmp_info_t)); 36880b57cec5SDimitry Andric if (__kmp_storage_map) { 36890b57cec5SDimitry Andric __kmp_print_thread_storage_map(root_thread, gtid); 36900b57cec5SDimitry Andric } 36910b57cec5SDimitry Andric root_thread->th.th_info.ds.ds_gtid = gtid; 36920b57cec5SDimitry Andric #if OMPT_SUPPORT 36930b57cec5SDimitry Andric root_thread->th.ompt_thread_info.thread_data = ompt_data_none; 36940b57cec5SDimitry Andric #endif 36950b57cec5SDimitry Andric root_thread->th.th_root = root; 36960b57cec5SDimitry Andric if (__kmp_env_consistency_check) { 36970b57cec5SDimitry Andric root_thread->th.th_cons = __kmp_allocate_cons_stack(gtid); 36980b57cec5SDimitry Andric } 36990b57cec5SDimitry Andric #if USE_FAST_MEMORY 37000b57cec5SDimitry Andric __kmp_initialize_fast_memory(root_thread); 37010b57cec5SDimitry Andric #endif /* USE_FAST_MEMORY */ 37020b57cec5SDimitry Andric 37030b57cec5SDimitry Andric #if KMP_USE_BGET 37040b57cec5SDimitry Andric KMP_DEBUG_ASSERT(root_thread->th.th_local.bget_data == NULL); 37050b57cec5SDimitry Andric __kmp_initialize_bget(root_thread); 37060b57cec5SDimitry Andric #endif 37070b57cec5SDimitry Andric __kmp_init_random(root_thread); // Initialize random number generator 37080b57cec5SDimitry Andric } 37090b57cec5SDimitry Andric 37100b57cec5SDimitry Andric /* setup the serial team held in reserve by the root thread */ 37110b57cec5SDimitry Andric if (!root_thread->th.th_serial_team) { 37120b57cec5SDimitry Andric kmp_internal_control_t r_icvs = __kmp_get_global_icvs(); 37130b57cec5SDimitry Andric KF_TRACE(10, ("__kmp_register_root: before serial_team\n")); 37140b57cec5SDimitry Andric root_thread->th.th_serial_team = __kmp_allocate_team( 37150b57cec5SDimitry Andric root, 1, 1, 37160b57cec5SDimitry Andric #if OMPT_SUPPORT 37170b57cec5SDimitry Andric ompt_data_none, // root parallel id 37180b57cec5SDimitry Andric #endif 37190b57cec5SDimitry Andric proc_bind_default, &r_icvs, 0 USE_NESTED_HOT_ARG(NULL)); 37200b57cec5SDimitry Andric } 37210b57cec5SDimitry Andric KMP_ASSERT(root_thread->th.th_serial_team); 37220b57cec5SDimitry Andric KF_TRACE(10, ("__kmp_register_root: after serial_team = %p\n", 37230b57cec5SDimitry Andric root_thread->th.th_serial_team)); 37240b57cec5SDimitry Andric 37250b57cec5SDimitry Andric /* drop root_thread into place */ 37260b57cec5SDimitry Andric TCW_SYNC_PTR(__kmp_threads[gtid], root_thread); 37270b57cec5SDimitry Andric 37280b57cec5SDimitry Andric root->r.r_root_team->t.t_threads[0] = root_thread; 37290b57cec5SDimitry Andric root->r.r_hot_team->t.t_threads[0] = root_thread; 37300b57cec5SDimitry Andric root_thread->th.th_serial_team->t.t_threads[0] = root_thread; 37310b57cec5SDimitry Andric // AC: the team created in reserve, not for execution (it is unused for now). 37320b57cec5SDimitry Andric root_thread->th.th_serial_team->t.t_serialized = 0; 37330b57cec5SDimitry Andric root->r.r_uber_thread = root_thread; 37340b57cec5SDimitry Andric 37350b57cec5SDimitry Andric /* initialize the thread, get it ready to go */ 37360b57cec5SDimitry Andric __kmp_initialize_info(root_thread, root->r.r_root_team, 0, gtid); 37370b57cec5SDimitry Andric TCW_4(__kmp_init_gtid, TRUE); 37380b57cec5SDimitry Andric 37390b57cec5SDimitry Andric /* prepare the master thread for get_gtid() */ 37400b57cec5SDimitry Andric __kmp_gtid_set_specific(gtid); 37410b57cec5SDimitry Andric 37420b57cec5SDimitry Andric #if USE_ITT_BUILD 37430b57cec5SDimitry Andric __kmp_itt_thread_name(gtid); 37440b57cec5SDimitry Andric #endif /* USE_ITT_BUILD */ 37450b57cec5SDimitry Andric 37460b57cec5SDimitry Andric #ifdef KMP_TDATA_GTID 37470b57cec5SDimitry Andric __kmp_gtid = gtid; 37480b57cec5SDimitry Andric #endif 37490b57cec5SDimitry Andric __kmp_create_worker(gtid, root_thread, __kmp_stksize); 37500b57cec5SDimitry Andric KMP_DEBUG_ASSERT(__kmp_gtid_get_specific() == gtid); 37510b57cec5SDimitry Andric 37520b57cec5SDimitry Andric KA_TRACE(20, ("__kmp_register_root: T#%d init T#%d(%d:%d) arrived: join=%u, " 37530b57cec5SDimitry Andric "plain=%u\n", 37540b57cec5SDimitry Andric gtid, __kmp_gtid_from_tid(0, root->r.r_hot_team), 37550b57cec5SDimitry Andric root->r.r_hot_team->t.t_id, 0, KMP_INIT_BARRIER_STATE, 37560b57cec5SDimitry Andric KMP_INIT_BARRIER_STATE)); 37570b57cec5SDimitry Andric { // Initialize barrier data. 37580b57cec5SDimitry Andric int b; 37590b57cec5SDimitry Andric for (b = 0; b < bs_last_barrier; ++b) { 37600b57cec5SDimitry Andric root_thread->th.th_bar[b].bb.b_arrived = KMP_INIT_BARRIER_STATE; 37610b57cec5SDimitry Andric #if USE_DEBUGGER 37620b57cec5SDimitry Andric root_thread->th.th_bar[b].bb.b_worker_arrived = 0; 37630b57cec5SDimitry Andric #endif 37640b57cec5SDimitry Andric } 37650b57cec5SDimitry Andric } 37660b57cec5SDimitry Andric KMP_DEBUG_ASSERT(root->r.r_hot_team->t.t_bar[bs_forkjoin_barrier].b_arrived == 37670b57cec5SDimitry Andric KMP_INIT_BARRIER_STATE); 37680b57cec5SDimitry Andric 37690b57cec5SDimitry Andric #if KMP_AFFINITY_SUPPORTED 37700b57cec5SDimitry Andric root_thread->th.th_current_place = KMP_PLACE_UNDEFINED; 37710b57cec5SDimitry Andric root_thread->th.th_new_place = KMP_PLACE_UNDEFINED; 37720b57cec5SDimitry Andric root_thread->th.th_first_place = KMP_PLACE_UNDEFINED; 37730b57cec5SDimitry Andric root_thread->th.th_last_place = KMP_PLACE_UNDEFINED; 37740b57cec5SDimitry Andric if (TCR_4(__kmp_init_middle)) { 37750b57cec5SDimitry Andric __kmp_affinity_set_init_mask(gtid, TRUE); 37760b57cec5SDimitry Andric } 37770b57cec5SDimitry Andric #endif /* KMP_AFFINITY_SUPPORTED */ 37780b57cec5SDimitry Andric root_thread->th.th_def_allocator = __kmp_def_allocator; 37790b57cec5SDimitry Andric root_thread->th.th_prev_level = 0; 37800b57cec5SDimitry Andric root_thread->th.th_prev_num_threads = 1; 37810b57cec5SDimitry Andric 37820b57cec5SDimitry Andric kmp_cg_root_t *tmp = (kmp_cg_root_t *)__kmp_allocate(sizeof(kmp_cg_root_t)); 37830b57cec5SDimitry Andric tmp->cg_root = root_thread; 37840b57cec5SDimitry Andric tmp->cg_thread_limit = __kmp_cg_max_nth; 37850b57cec5SDimitry Andric tmp->cg_nthreads = 1; 37860b57cec5SDimitry Andric KA_TRACE(100, ("__kmp_register_root: Thread %p created node %p with" 37870b57cec5SDimitry Andric " cg_nthreads init to 1\n", 37880b57cec5SDimitry Andric root_thread, tmp)); 37890b57cec5SDimitry Andric tmp->up = NULL; 37900b57cec5SDimitry Andric root_thread->th.th_cg_roots = tmp; 37910b57cec5SDimitry Andric 37920b57cec5SDimitry Andric __kmp_root_counter++; 37930b57cec5SDimitry Andric 37940b57cec5SDimitry Andric #if OMPT_SUPPORT 37950b57cec5SDimitry Andric if (!initial_thread && ompt_enabled.enabled) { 37960b57cec5SDimitry Andric 37970b57cec5SDimitry Andric kmp_info_t *root_thread = ompt_get_thread(); 37980b57cec5SDimitry Andric 37990b57cec5SDimitry Andric ompt_set_thread_state(root_thread, ompt_state_overhead); 38000b57cec5SDimitry Andric 38010b57cec5SDimitry Andric if (ompt_enabled.ompt_callback_thread_begin) { 38020b57cec5SDimitry Andric ompt_callbacks.ompt_callback(ompt_callback_thread_begin)( 38030b57cec5SDimitry Andric ompt_thread_initial, __ompt_get_thread_data_internal()); 38040b57cec5SDimitry Andric } 38050b57cec5SDimitry Andric ompt_data_t *task_data; 38060b57cec5SDimitry Andric ompt_data_t *parallel_data; 38070b57cec5SDimitry Andric __ompt_get_task_info_internal(0, NULL, &task_data, NULL, ¶llel_data, NULL); 38080b57cec5SDimitry Andric if (ompt_enabled.ompt_callback_implicit_task) { 38090b57cec5SDimitry Andric ompt_callbacks.ompt_callback(ompt_callback_implicit_task)( 38100b57cec5SDimitry Andric ompt_scope_begin, parallel_data, task_data, 1, 1, ompt_task_initial); 38110b57cec5SDimitry Andric } 38120b57cec5SDimitry Andric 38130b57cec5SDimitry Andric ompt_set_thread_state(root_thread, ompt_state_work_serial); 38140b57cec5SDimitry Andric } 38150b57cec5SDimitry Andric #endif 38160b57cec5SDimitry Andric 38170b57cec5SDimitry Andric KMP_MB(); 38180b57cec5SDimitry Andric __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock); 38190b57cec5SDimitry Andric 38200b57cec5SDimitry Andric return gtid; 38210b57cec5SDimitry Andric } 38220b57cec5SDimitry Andric 38230b57cec5SDimitry Andric #if KMP_NESTED_HOT_TEAMS 38240b57cec5SDimitry Andric static int __kmp_free_hot_teams(kmp_root_t *root, kmp_info_t *thr, int level, 38250b57cec5SDimitry Andric const int max_level) { 38260b57cec5SDimitry Andric int i, n, nth; 38270b57cec5SDimitry Andric kmp_hot_team_ptr_t *hot_teams = thr->th.th_hot_teams; 38280b57cec5SDimitry Andric if (!hot_teams || !hot_teams[level].hot_team) { 38290b57cec5SDimitry Andric return 0; 38300b57cec5SDimitry Andric } 38310b57cec5SDimitry Andric KMP_DEBUG_ASSERT(level < max_level); 38320b57cec5SDimitry Andric kmp_team_t *team = hot_teams[level].hot_team; 38330b57cec5SDimitry Andric nth = hot_teams[level].hot_team_nth; 38340b57cec5SDimitry Andric n = nth - 1; // master is not freed 38350b57cec5SDimitry Andric if (level < max_level - 1) { 38360b57cec5SDimitry Andric for (i = 0; i < nth; ++i) { 38370b57cec5SDimitry Andric kmp_info_t *th = team->t.t_threads[i]; 38380b57cec5SDimitry Andric n += __kmp_free_hot_teams(root, th, level + 1, max_level); 38390b57cec5SDimitry Andric if (i > 0 && th->th.th_hot_teams) { 38400b57cec5SDimitry Andric __kmp_free(th->th.th_hot_teams); 38410b57cec5SDimitry Andric th->th.th_hot_teams = NULL; 38420b57cec5SDimitry Andric } 38430b57cec5SDimitry Andric } 38440b57cec5SDimitry Andric } 38450b57cec5SDimitry Andric __kmp_free_team(root, team, NULL); 38460b57cec5SDimitry Andric return n; 38470b57cec5SDimitry Andric } 38480b57cec5SDimitry Andric #endif 38490b57cec5SDimitry Andric 38500b57cec5SDimitry Andric // Resets a root thread and clear its root and hot teams. 38510b57cec5SDimitry Andric // Returns the number of __kmp_threads entries directly and indirectly freed. 38520b57cec5SDimitry Andric static int __kmp_reset_root(int gtid, kmp_root_t *root) { 38530b57cec5SDimitry Andric kmp_team_t *root_team = root->r.r_root_team; 38540b57cec5SDimitry Andric kmp_team_t *hot_team = root->r.r_hot_team; 38550b57cec5SDimitry Andric int n = hot_team->t.t_nproc; 38560b57cec5SDimitry Andric int i; 38570b57cec5SDimitry Andric 38580b57cec5SDimitry Andric KMP_DEBUG_ASSERT(!root->r.r_active); 38590b57cec5SDimitry Andric 38600b57cec5SDimitry Andric root->r.r_root_team = NULL; 38610b57cec5SDimitry Andric root->r.r_hot_team = NULL; 38620b57cec5SDimitry Andric // __kmp_free_team() does not free hot teams, so we have to clear r_hot_team 38630b57cec5SDimitry Andric // before call to __kmp_free_team(). 38640b57cec5SDimitry Andric __kmp_free_team(root, root_team USE_NESTED_HOT_ARG(NULL)); 38650b57cec5SDimitry Andric #if KMP_NESTED_HOT_TEAMS 38660b57cec5SDimitry Andric if (__kmp_hot_teams_max_level > 38670b57cec5SDimitry Andric 0) { // need to free nested hot teams and their threads if any 38680b57cec5SDimitry Andric for (i = 0; i < hot_team->t.t_nproc; ++i) { 38690b57cec5SDimitry Andric kmp_info_t *th = hot_team->t.t_threads[i]; 38700b57cec5SDimitry Andric if (__kmp_hot_teams_max_level > 1) { 38710b57cec5SDimitry Andric n += __kmp_free_hot_teams(root, th, 1, __kmp_hot_teams_max_level); 38720b57cec5SDimitry Andric } 38730b57cec5SDimitry Andric if (th->th.th_hot_teams) { 38740b57cec5SDimitry Andric __kmp_free(th->th.th_hot_teams); 38750b57cec5SDimitry Andric th->th.th_hot_teams = NULL; 38760b57cec5SDimitry Andric } 38770b57cec5SDimitry Andric } 38780b57cec5SDimitry Andric } 38790b57cec5SDimitry Andric #endif 38800b57cec5SDimitry Andric __kmp_free_team(root, hot_team USE_NESTED_HOT_ARG(NULL)); 38810b57cec5SDimitry Andric 38820b57cec5SDimitry Andric // Before we can reap the thread, we need to make certain that all other 38830b57cec5SDimitry Andric // threads in the teams that had this root as ancestor have stopped trying to 38840b57cec5SDimitry Andric // steal tasks. 38850b57cec5SDimitry Andric if (__kmp_tasking_mode != tskm_immediate_exec) { 38860b57cec5SDimitry Andric __kmp_wait_to_unref_task_teams(); 38870b57cec5SDimitry Andric } 38880b57cec5SDimitry Andric 38890b57cec5SDimitry Andric #if KMP_OS_WINDOWS 38900b57cec5SDimitry Andric /* Close Handle of root duplicated in __kmp_create_worker (tr #62919) */ 38910b57cec5SDimitry Andric KA_TRACE( 38920b57cec5SDimitry Andric 10, ("__kmp_reset_root: free handle, th = %p, handle = %" KMP_UINTPTR_SPEC 38930b57cec5SDimitry Andric "\n", 38940b57cec5SDimitry Andric (LPVOID) & (root->r.r_uber_thread->th), 38950b57cec5SDimitry Andric root->r.r_uber_thread->th.th_info.ds.ds_thread)); 38960b57cec5SDimitry Andric __kmp_free_handle(root->r.r_uber_thread->th.th_info.ds.ds_thread); 38970b57cec5SDimitry Andric #endif /* KMP_OS_WINDOWS */ 38980b57cec5SDimitry Andric 38990b57cec5SDimitry Andric #if OMPT_SUPPORT 39000b57cec5SDimitry Andric ompt_data_t *task_data; 39010b57cec5SDimitry Andric ompt_data_t *parallel_data; 39020b57cec5SDimitry Andric __ompt_get_task_info_internal(0, NULL, &task_data, NULL, ¶llel_data, NULL); 39030b57cec5SDimitry Andric if (ompt_enabled.ompt_callback_implicit_task) { 39040b57cec5SDimitry Andric ompt_callbacks.ompt_callback(ompt_callback_implicit_task)( 39050b57cec5SDimitry Andric ompt_scope_end, parallel_data, task_data, 0, 1, ompt_task_initial); 39060b57cec5SDimitry Andric } 39070b57cec5SDimitry Andric if (ompt_enabled.ompt_callback_thread_end) { 39080b57cec5SDimitry Andric ompt_callbacks.ompt_callback(ompt_callback_thread_end)( 39090b57cec5SDimitry Andric &(root->r.r_uber_thread->th.ompt_thread_info.thread_data)); 39100b57cec5SDimitry Andric } 39110b57cec5SDimitry Andric #endif 39120b57cec5SDimitry Andric 39130b57cec5SDimitry Andric TCW_4(__kmp_nth, 39140b57cec5SDimitry Andric __kmp_nth - 1); // __kmp_reap_thread will decrement __kmp_all_nth. 39150b57cec5SDimitry Andric i = root->r.r_uber_thread->th.th_cg_roots->cg_nthreads--; 39160b57cec5SDimitry Andric KA_TRACE(100, ("__kmp_reset_root: Thread %p decrement cg_nthreads on node %p" 39170b57cec5SDimitry Andric " to %d\n", 39180b57cec5SDimitry Andric root->r.r_uber_thread, root->r.r_uber_thread->th.th_cg_roots, 39190b57cec5SDimitry Andric root->r.r_uber_thread->th.th_cg_roots->cg_nthreads)); 39200b57cec5SDimitry Andric if (i == 1) { 39210b57cec5SDimitry Andric // need to free contention group structure 39220b57cec5SDimitry Andric KMP_DEBUG_ASSERT(root->r.r_uber_thread == 39230b57cec5SDimitry Andric root->r.r_uber_thread->th.th_cg_roots->cg_root); 39240b57cec5SDimitry Andric KMP_DEBUG_ASSERT(root->r.r_uber_thread->th.th_cg_roots->up == NULL); 39250b57cec5SDimitry Andric __kmp_free(root->r.r_uber_thread->th.th_cg_roots); 39260b57cec5SDimitry Andric root->r.r_uber_thread->th.th_cg_roots = NULL; 39270b57cec5SDimitry Andric } 39280b57cec5SDimitry Andric __kmp_reap_thread(root->r.r_uber_thread, 1); 39290b57cec5SDimitry Andric 39300b57cec5SDimitry Andric // We canot put root thread to __kmp_thread_pool, so we have to reap it istead 39310b57cec5SDimitry Andric // of freeing. 39320b57cec5SDimitry Andric root->r.r_uber_thread = NULL; 39330b57cec5SDimitry Andric /* mark root as no longer in use */ 39340b57cec5SDimitry Andric root->r.r_begin = FALSE; 39350b57cec5SDimitry Andric 39360b57cec5SDimitry Andric return n; 39370b57cec5SDimitry Andric } 39380b57cec5SDimitry Andric 39390b57cec5SDimitry Andric void __kmp_unregister_root_current_thread(int gtid) { 39400b57cec5SDimitry Andric KA_TRACE(1, ("__kmp_unregister_root_current_thread: enter T#%d\n", gtid)); 39410b57cec5SDimitry Andric /* this lock should be ok, since unregister_root_current_thread is never 39420b57cec5SDimitry Andric called during an abort, only during a normal close. furthermore, if you 39430b57cec5SDimitry Andric have the forkjoin lock, you should never try to get the initz lock */ 39440b57cec5SDimitry Andric __kmp_acquire_bootstrap_lock(&__kmp_forkjoin_lock); 39450b57cec5SDimitry Andric if (TCR_4(__kmp_global.g.g_done) || !__kmp_init_serial) { 39460b57cec5SDimitry Andric KC_TRACE(10, ("__kmp_unregister_root_current_thread: already finished, " 39470b57cec5SDimitry Andric "exiting T#%d\n", 39480b57cec5SDimitry Andric gtid)); 39490b57cec5SDimitry Andric __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock); 39500b57cec5SDimitry Andric return; 39510b57cec5SDimitry Andric } 39520b57cec5SDimitry Andric kmp_root_t *root = __kmp_root[gtid]; 39530b57cec5SDimitry Andric 39540b57cec5SDimitry Andric KMP_DEBUG_ASSERT(__kmp_threads && __kmp_threads[gtid]); 39550b57cec5SDimitry Andric KMP_ASSERT(KMP_UBER_GTID(gtid)); 39560b57cec5SDimitry Andric KMP_ASSERT(root == __kmp_threads[gtid]->th.th_root); 39570b57cec5SDimitry Andric KMP_ASSERT(root->r.r_active == FALSE); 39580b57cec5SDimitry Andric 39590b57cec5SDimitry Andric KMP_MB(); 39600b57cec5SDimitry Andric 39610b57cec5SDimitry Andric kmp_info_t *thread = __kmp_threads[gtid]; 39620b57cec5SDimitry Andric kmp_team_t *team = thread->th.th_team; 39630b57cec5SDimitry Andric kmp_task_team_t *task_team = thread->th.th_task_team; 39640b57cec5SDimitry Andric 39650b57cec5SDimitry Andric // we need to wait for the proxy tasks before finishing the thread 39660b57cec5SDimitry Andric if (task_team != NULL && task_team->tt.tt_found_proxy_tasks) { 39670b57cec5SDimitry Andric #if OMPT_SUPPORT 39680b57cec5SDimitry Andric // the runtime is shutting down so we won't report any events 39690b57cec5SDimitry Andric thread->th.ompt_thread_info.state = ompt_state_undefined; 39700b57cec5SDimitry Andric #endif 39710b57cec5SDimitry Andric __kmp_task_team_wait(thread, team USE_ITT_BUILD_ARG(NULL)); 39720b57cec5SDimitry Andric } 39730b57cec5SDimitry Andric 39740b57cec5SDimitry Andric __kmp_reset_root(gtid, root); 39750b57cec5SDimitry Andric 39760b57cec5SDimitry Andric /* free up this thread slot */ 39770b57cec5SDimitry Andric __kmp_gtid_set_specific(KMP_GTID_DNE); 39780b57cec5SDimitry Andric #ifdef KMP_TDATA_GTID 39790b57cec5SDimitry Andric __kmp_gtid = KMP_GTID_DNE; 39800b57cec5SDimitry Andric #endif 39810b57cec5SDimitry Andric 39820b57cec5SDimitry Andric KMP_MB(); 39830b57cec5SDimitry Andric KC_TRACE(10, 39840b57cec5SDimitry Andric ("__kmp_unregister_root_current_thread: T#%d unregistered\n", gtid)); 39850b57cec5SDimitry Andric 39860b57cec5SDimitry Andric __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock); 39870b57cec5SDimitry Andric } 39880b57cec5SDimitry Andric 39890b57cec5SDimitry Andric #if KMP_OS_WINDOWS 39900b57cec5SDimitry Andric /* __kmp_forkjoin_lock must be already held 39910b57cec5SDimitry Andric Unregisters a root thread that is not the current thread. Returns the number 39920b57cec5SDimitry Andric of __kmp_threads entries freed as a result. */ 39930b57cec5SDimitry Andric static int __kmp_unregister_root_other_thread(int gtid) { 39940b57cec5SDimitry Andric kmp_root_t *root = __kmp_root[gtid]; 39950b57cec5SDimitry Andric int r; 39960b57cec5SDimitry Andric 39970b57cec5SDimitry Andric KA_TRACE(1, ("__kmp_unregister_root_other_thread: enter T#%d\n", gtid)); 39980b57cec5SDimitry Andric KMP_DEBUG_ASSERT(__kmp_threads && __kmp_threads[gtid]); 39990b57cec5SDimitry Andric KMP_ASSERT(KMP_UBER_GTID(gtid)); 40000b57cec5SDimitry Andric KMP_ASSERT(root == __kmp_threads[gtid]->th.th_root); 40010b57cec5SDimitry Andric KMP_ASSERT(root->r.r_active == FALSE); 40020b57cec5SDimitry Andric 40030b57cec5SDimitry Andric r = __kmp_reset_root(gtid, root); 40040b57cec5SDimitry Andric KC_TRACE(10, 40050b57cec5SDimitry Andric ("__kmp_unregister_root_other_thread: T#%d unregistered\n", gtid)); 40060b57cec5SDimitry Andric return r; 40070b57cec5SDimitry Andric } 40080b57cec5SDimitry Andric #endif 40090b57cec5SDimitry Andric 40100b57cec5SDimitry Andric #if KMP_DEBUG 40110b57cec5SDimitry Andric void __kmp_task_info() { 40120b57cec5SDimitry Andric 40130b57cec5SDimitry Andric kmp_int32 gtid = __kmp_entry_gtid(); 40140b57cec5SDimitry Andric kmp_int32 tid = __kmp_tid_from_gtid(gtid); 40150b57cec5SDimitry Andric kmp_info_t *this_thr = __kmp_threads[gtid]; 40160b57cec5SDimitry Andric kmp_team_t *steam = this_thr->th.th_serial_team; 40170b57cec5SDimitry Andric kmp_team_t *team = this_thr->th.th_team; 40180b57cec5SDimitry Andric 40190b57cec5SDimitry Andric __kmp_printf( 40200b57cec5SDimitry Andric "__kmp_task_info: gtid=%d tid=%d t_thread=%p team=%p steam=%p curtask=%p " 40210b57cec5SDimitry Andric "ptask=%p\n", 40220b57cec5SDimitry Andric gtid, tid, this_thr, team, steam, this_thr->th.th_current_task, 40230b57cec5SDimitry Andric team->t.t_implicit_task_taskdata[tid].td_parent); 40240b57cec5SDimitry Andric } 40250b57cec5SDimitry Andric #endif // KMP_DEBUG 40260b57cec5SDimitry Andric 40270b57cec5SDimitry Andric /* TODO optimize with one big memclr, take out what isn't needed, split 40280b57cec5SDimitry Andric responsibility to workers as much as possible, and delay initialization of 40290b57cec5SDimitry Andric features as much as possible */ 40300b57cec5SDimitry Andric static void __kmp_initialize_info(kmp_info_t *this_thr, kmp_team_t *team, 40310b57cec5SDimitry Andric int tid, int gtid) { 40320b57cec5SDimitry Andric /* this_thr->th.th_info.ds.ds_gtid is setup in 40330b57cec5SDimitry Andric kmp_allocate_thread/create_worker. 40340b57cec5SDimitry Andric this_thr->th.th_serial_team is setup in __kmp_allocate_thread */ 40350b57cec5SDimitry Andric kmp_info_t *master = team->t.t_threads[0]; 40360b57cec5SDimitry Andric KMP_DEBUG_ASSERT(this_thr != NULL); 40370b57cec5SDimitry Andric KMP_DEBUG_ASSERT(this_thr->th.th_serial_team); 40380b57cec5SDimitry Andric KMP_DEBUG_ASSERT(team); 40390b57cec5SDimitry Andric KMP_DEBUG_ASSERT(team->t.t_threads); 40400b57cec5SDimitry Andric KMP_DEBUG_ASSERT(team->t.t_dispatch); 40410b57cec5SDimitry Andric KMP_DEBUG_ASSERT(master); 40420b57cec5SDimitry Andric KMP_DEBUG_ASSERT(master->th.th_root); 40430b57cec5SDimitry Andric 40440b57cec5SDimitry Andric KMP_MB(); 40450b57cec5SDimitry Andric 40460b57cec5SDimitry Andric TCW_SYNC_PTR(this_thr->th.th_team, team); 40470b57cec5SDimitry Andric 40480b57cec5SDimitry Andric this_thr->th.th_info.ds.ds_tid = tid; 40490b57cec5SDimitry Andric this_thr->th.th_set_nproc = 0; 40500b57cec5SDimitry Andric if (__kmp_tasking_mode != tskm_immediate_exec) 40510b57cec5SDimitry Andric // When tasking is possible, threads are not safe to reap until they are 40520b57cec5SDimitry Andric // done tasking; this will be set when tasking code is exited in wait 40530b57cec5SDimitry Andric this_thr->th.th_reap_state = KMP_NOT_SAFE_TO_REAP; 40540b57cec5SDimitry Andric else // no tasking --> always safe to reap 40550b57cec5SDimitry Andric this_thr->th.th_reap_state = KMP_SAFE_TO_REAP; 40560b57cec5SDimitry Andric this_thr->th.th_set_proc_bind = proc_bind_default; 40570b57cec5SDimitry Andric #if KMP_AFFINITY_SUPPORTED 40580b57cec5SDimitry Andric this_thr->th.th_new_place = this_thr->th.th_current_place; 40590b57cec5SDimitry Andric #endif 40600b57cec5SDimitry Andric this_thr->th.th_root = master->th.th_root; 40610b57cec5SDimitry Andric 40620b57cec5SDimitry Andric /* setup the thread's cache of the team structure */ 40630b57cec5SDimitry Andric this_thr->th.th_team_nproc = team->t.t_nproc; 40640b57cec5SDimitry Andric this_thr->th.th_team_master = master; 40650b57cec5SDimitry Andric this_thr->th.th_team_serialized = team->t.t_serialized; 40660b57cec5SDimitry Andric TCW_PTR(this_thr->th.th_sleep_loc, NULL); 40670b57cec5SDimitry Andric 40680b57cec5SDimitry Andric KMP_DEBUG_ASSERT(team->t.t_implicit_task_taskdata); 40690b57cec5SDimitry Andric 40700b57cec5SDimitry Andric KF_TRACE(10, ("__kmp_initialize_info1: T#%d:%d this_thread=%p curtask=%p\n", 40710b57cec5SDimitry Andric tid, gtid, this_thr, this_thr->th.th_current_task)); 40720b57cec5SDimitry Andric 40730b57cec5SDimitry Andric __kmp_init_implicit_task(this_thr->th.th_team_master->th.th_ident, this_thr, 40740b57cec5SDimitry Andric team, tid, TRUE); 40750b57cec5SDimitry Andric 40760b57cec5SDimitry Andric KF_TRACE(10, ("__kmp_initialize_info2: T#%d:%d this_thread=%p curtask=%p\n", 40770b57cec5SDimitry Andric tid, gtid, this_thr, this_thr->th.th_current_task)); 40780b57cec5SDimitry Andric // TODO: Initialize ICVs from parent; GEH - isn't that already done in 40790b57cec5SDimitry Andric // __kmp_initialize_team()? 40800b57cec5SDimitry Andric 40810b57cec5SDimitry Andric /* TODO no worksharing in speculative threads */ 40820b57cec5SDimitry Andric this_thr->th.th_dispatch = &team->t.t_dispatch[tid]; 40830b57cec5SDimitry Andric 40840b57cec5SDimitry Andric this_thr->th.th_local.this_construct = 0; 40850b57cec5SDimitry Andric 40860b57cec5SDimitry Andric if (!this_thr->th.th_pri_common) { 40870b57cec5SDimitry Andric this_thr->th.th_pri_common = 40880b57cec5SDimitry Andric (struct common_table *)__kmp_allocate(sizeof(struct common_table)); 40890b57cec5SDimitry Andric if (__kmp_storage_map) { 40900b57cec5SDimitry Andric __kmp_print_storage_map_gtid( 40910b57cec5SDimitry Andric gtid, this_thr->th.th_pri_common, this_thr->th.th_pri_common + 1, 40920b57cec5SDimitry Andric sizeof(struct common_table), "th_%d.th_pri_common\n", gtid); 40930b57cec5SDimitry Andric } 40940b57cec5SDimitry Andric this_thr->th.th_pri_head = NULL; 40950b57cec5SDimitry Andric } 40960b57cec5SDimitry Andric 40970b57cec5SDimitry Andric if (this_thr != master && // Master's CG root is initialized elsewhere 40980b57cec5SDimitry Andric this_thr->th.th_cg_roots != master->th.th_cg_roots) { // CG root not set 40990b57cec5SDimitry Andric // Make new thread's CG root same as master's 41000b57cec5SDimitry Andric KMP_DEBUG_ASSERT(master->th.th_cg_roots); 41010b57cec5SDimitry Andric kmp_cg_root_t *tmp = this_thr->th.th_cg_roots; 41020b57cec5SDimitry Andric if (tmp) { 41030b57cec5SDimitry Andric // worker changes CG, need to check if old CG should be freed 41040b57cec5SDimitry Andric int i = tmp->cg_nthreads--; 41050b57cec5SDimitry Andric KA_TRACE(100, ("__kmp_initialize_info: Thread %p decrement cg_nthreads" 41060b57cec5SDimitry Andric " on node %p of thread %p to %d\n", 41070b57cec5SDimitry Andric this_thr, tmp, tmp->cg_root, tmp->cg_nthreads)); 41080b57cec5SDimitry Andric if (i == 1) { 41090b57cec5SDimitry Andric __kmp_free(tmp); // last thread left CG --> free it 41100b57cec5SDimitry Andric } 41110b57cec5SDimitry Andric } 41120b57cec5SDimitry Andric this_thr->th.th_cg_roots = master->th.th_cg_roots; 41130b57cec5SDimitry Andric // Increment new thread's CG root's counter to add the new thread 41140b57cec5SDimitry Andric this_thr->th.th_cg_roots->cg_nthreads++; 41150b57cec5SDimitry Andric KA_TRACE(100, ("__kmp_initialize_info: Thread %p increment cg_nthreads on" 41160b57cec5SDimitry Andric " node %p of thread %p to %d\n", 41170b57cec5SDimitry Andric this_thr, this_thr->th.th_cg_roots, 41180b57cec5SDimitry Andric this_thr->th.th_cg_roots->cg_root, 41190b57cec5SDimitry Andric this_thr->th.th_cg_roots->cg_nthreads)); 41200b57cec5SDimitry Andric this_thr->th.th_current_task->td_icvs.thread_limit = 41210b57cec5SDimitry Andric this_thr->th.th_cg_roots->cg_thread_limit; 41220b57cec5SDimitry Andric } 41230b57cec5SDimitry Andric 41240b57cec5SDimitry Andric /* Initialize dynamic dispatch */ 41250b57cec5SDimitry Andric { 41260b57cec5SDimitry Andric volatile kmp_disp_t *dispatch = this_thr->th.th_dispatch; 41270b57cec5SDimitry Andric // Use team max_nproc since this will never change for the team. 41280b57cec5SDimitry Andric size_t disp_size = 41290b57cec5SDimitry Andric sizeof(dispatch_private_info_t) * 41300b57cec5SDimitry Andric (team->t.t_max_nproc == 1 ? 1 : __kmp_dispatch_num_buffers); 41310b57cec5SDimitry Andric KD_TRACE(10, ("__kmp_initialize_info: T#%d max_nproc: %d\n", gtid, 41320b57cec5SDimitry Andric team->t.t_max_nproc)); 41330b57cec5SDimitry Andric KMP_ASSERT(dispatch); 41340b57cec5SDimitry Andric KMP_DEBUG_ASSERT(team->t.t_dispatch); 41350b57cec5SDimitry Andric KMP_DEBUG_ASSERT(dispatch == &team->t.t_dispatch[tid]); 41360b57cec5SDimitry Andric 41370b57cec5SDimitry Andric dispatch->th_disp_index = 0; 41380b57cec5SDimitry Andric dispatch->th_doacross_buf_idx = 0; 41390b57cec5SDimitry Andric if (!dispatch->th_disp_buffer) { 41400b57cec5SDimitry Andric dispatch->th_disp_buffer = 41410b57cec5SDimitry Andric (dispatch_private_info_t *)__kmp_allocate(disp_size); 41420b57cec5SDimitry Andric 41430b57cec5SDimitry Andric if (__kmp_storage_map) { 41440b57cec5SDimitry Andric __kmp_print_storage_map_gtid( 41450b57cec5SDimitry Andric gtid, &dispatch->th_disp_buffer[0], 41460b57cec5SDimitry Andric &dispatch->th_disp_buffer[team->t.t_max_nproc == 1 41470b57cec5SDimitry Andric ? 1 41480b57cec5SDimitry Andric : __kmp_dispatch_num_buffers], 41490b57cec5SDimitry Andric disp_size, "th_%d.th_dispatch.th_disp_buffer " 41500b57cec5SDimitry Andric "(team_%d.t_dispatch[%d].th_disp_buffer)", 41510b57cec5SDimitry Andric gtid, team->t.t_id, gtid); 41520b57cec5SDimitry Andric } 41530b57cec5SDimitry Andric } else { 41540b57cec5SDimitry Andric memset(&dispatch->th_disp_buffer[0], '\0', disp_size); 41550b57cec5SDimitry Andric } 41560b57cec5SDimitry Andric 41570b57cec5SDimitry Andric dispatch->th_dispatch_pr_current = 0; 41580b57cec5SDimitry Andric dispatch->th_dispatch_sh_current = 0; 41590b57cec5SDimitry Andric 41600b57cec5SDimitry Andric dispatch->th_deo_fcn = 0; /* ORDERED */ 41610b57cec5SDimitry Andric dispatch->th_dxo_fcn = 0; /* END ORDERED */ 41620b57cec5SDimitry Andric } 41630b57cec5SDimitry Andric 41640b57cec5SDimitry Andric this_thr->th.th_next_pool = NULL; 41650b57cec5SDimitry Andric 41660b57cec5SDimitry Andric if (!this_thr->th.th_task_state_memo_stack) { 41670b57cec5SDimitry Andric size_t i; 41680b57cec5SDimitry Andric this_thr->th.th_task_state_memo_stack = 41690b57cec5SDimitry Andric (kmp_uint8 *)__kmp_allocate(4 * sizeof(kmp_uint8)); 41700b57cec5SDimitry Andric this_thr->th.th_task_state_top = 0; 41710b57cec5SDimitry Andric this_thr->th.th_task_state_stack_sz = 4; 41720b57cec5SDimitry Andric for (i = 0; i < this_thr->th.th_task_state_stack_sz; 41730b57cec5SDimitry Andric ++i) // zero init the stack 41740b57cec5SDimitry Andric this_thr->th.th_task_state_memo_stack[i] = 0; 41750b57cec5SDimitry Andric } 41760b57cec5SDimitry Andric 41770b57cec5SDimitry Andric KMP_DEBUG_ASSERT(!this_thr->th.th_spin_here); 41780b57cec5SDimitry Andric KMP_DEBUG_ASSERT(this_thr->th.th_next_waiting == 0); 41790b57cec5SDimitry Andric 41800b57cec5SDimitry Andric KMP_MB(); 41810b57cec5SDimitry Andric } 41820b57cec5SDimitry Andric 41830b57cec5SDimitry Andric /* allocate a new thread for the requesting team. this is only called from 41840b57cec5SDimitry Andric within a forkjoin critical section. we will first try to get an available 41850b57cec5SDimitry Andric thread from the thread pool. if none is available, we will fork a new one 41860b57cec5SDimitry Andric assuming we are able to create a new one. this should be assured, as the 41870b57cec5SDimitry Andric caller should check on this first. */ 41880b57cec5SDimitry Andric kmp_info_t *__kmp_allocate_thread(kmp_root_t *root, kmp_team_t *team, 41890b57cec5SDimitry Andric int new_tid) { 41900b57cec5SDimitry Andric kmp_team_t *serial_team; 41910b57cec5SDimitry Andric kmp_info_t *new_thr; 41920b57cec5SDimitry Andric int new_gtid; 41930b57cec5SDimitry Andric 41940b57cec5SDimitry Andric KA_TRACE(20, ("__kmp_allocate_thread: T#%d\n", __kmp_get_gtid())); 41950b57cec5SDimitry Andric KMP_DEBUG_ASSERT(root && team); 41960b57cec5SDimitry Andric #if !KMP_NESTED_HOT_TEAMS 41970b57cec5SDimitry Andric KMP_DEBUG_ASSERT(KMP_MASTER_GTID(__kmp_get_gtid())); 41980b57cec5SDimitry Andric #endif 41990b57cec5SDimitry Andric KMP_MB(); 42000b57cec5SDimitry Andric 42010b57cec5SDimitry Andric /* first, try to get one from the thread pool */ 42020b57cec5SDimitry Andric if (__kmp_thread_pool) { 42030b57cec5SDimitry Andric new_thr = CCAST(kmp_info_t *, __kmp_thread_pool); 42040b57cec5SDimitry Andric __kmp_thread_pool = (volatile kmp_info_t *)new_thr->th.th_next_pool; 42050b57cec5SDimitry Andric if (new_thr == __kmp_thread_pool_insert_pt) { 42060b57cec5SDimitry Andric __kmp_thread_pool_insert_pt = NULL; 42070b57cec5SDimitry Andric } 42080b57cec5SDimitry Andric TCW_4(new_thr->th.th_in_pool, FALSE); 42090b57cec5SDimitry Andric __kmp_suspend_initialize_thread(new_thr); 42100b57cec5SDimitry Andric __kmp_lock_suspend_mx(new_thr); 42110b57cec5SDimitry Andric if (new_thr->th.th_active_in_pool == TRUE) { 42120b57cec5SDimitry Andric KMP_DEBUG_ASSERT(new_thr->th.th_active == TRUE); 42130b57cec5SDimitry Andric KMP_ATOMIC_DEC(&__kmp_thread_pool_active_nth); 42140b57cec5SDimitry Andric new_thr->th.th_active_in_pool = FALSE; 42150b57cec5SDimitry Andric } 42160b57cec5SDimitry Andric __kmp_unlock_suspend_mx(new_thr); 42170b57cec5SDimitry Andric 42180b57cec5SDimitry Andric KA_TRACE(20, ("__kmp_allocate_thread: T#%d using thread T#%d\n", 42190b57cec5SDimitry Andric __kmp_get_gtid(), new_thr->th.th_info.ds.ds_gtid)); 42200b57cec5SDimitry Andric KMP_ASSERT(!new_thr->th.th_team); 42210b57cec5SDimitry Andric KMP_DEBUG_ASSERT(__kmp_nth < __kmp_threads_capacity); 42220b57cec5SDimitry Andric 42230b57cec5SDimitry Andric /* setup the thread structure */ 42240b57cec5SDimitry Andric __kmp_initialize_info(new_thr, team, new_tid, 42250b57cec5SDimitry Andric new_thr->th.th_info.ds.ds_gtid); 42260b57cec5SDimitry Andric KMP_DEBUG_ASSERT(new_thr->th.th_serial_team); 42270b57cec5SDimitry Andric 42280b57cec5SDimitry Andric TCW_4(__kmp_nth, __kmp_nth + 1); 42290b57cec5SDimitry Andric 42300b57cec5SDimitry Andric new_thr->th.th_task_state = 0; 42310b57cec5SDimitry Andric new_thr->th.th_task_state_top = 0; 42320b57cec5SDimitry Andric new_thr->th.th_task_state_stack_sz = 4; 42330b57cec5SDimitry Andric 42340b57cec5SDimitry Andric #ifdef KMP_ADJUST_BLOCKTIME 42350b57cec5SDimitry Andric /* Adjust blocktime back to zero if necessary */ 42360b57cec5SDimitry Andric /* Middle initialization might not have occurred yet */ 42370b57cec5SDimitry Andric if (!__kmp_env_blocktime && (__kmp_avail_proc > 0)) { 42380b57cec5SDimitry Andric if (__kmp_nth > __kmp_avail_proc) { 42390b57cec5SDimitry Andric __kmp_zero_bt = TRUE; 42400b57cec5SDimitry Andric } 42410b57cec5SDimitry Andric } 42420b57cec5SDimitry Andric #endif /* KMP_ADJUST_BLOCKTIME */ 42430b57cec5SDimitry Andric 42440b57cec5SDimitry Andric #if KMP_DEBUG 42450b57cec5SDimitry Andric // If thread entered pool via __kmp_free_thread, wait_flag should != 42460b57cec5SDimitry Andric // KMP_BARRIER_PARENT_FLAG. 42470b57cec5SDimitry Andric int b; 42480b57cec5SDimitry Andric kmp_balign_t *balign = new_thr->th.th_bar; 42490b57cec5SDimitry Andric for (b = 0; b < bs_last_barrier; ++b) 42500b57cec5SDimitry Andric KMP_DEBUG_ASSERT(balign[b].bb.wait_flag != KMP_BARRIER_PARENT_FLAG); 42510b57cec5SDimitry Andric #endif 42520b57cec5SDimitry Andric 42530b57cec5SDimitry Andric KF_TRACE(10, ("__kmp_allocate_thread: T#%d using thread %p T#%d\n", 42540b57cec5SDimitry Andric __kmp_get_gtid(), new_thr, new_thr->th.th_info.ds.ds_gtid)); 42550b57cec5SDimitry Andric 42560b57cec5SDimitry Andric KMP_MB(); 42570b57cec5SDimitry Andric return new_thr; 42580b57cec5SDimitry Andric } 42590b57cec5SDimitry Andric 42600b57cec5SDimitry Andric /* no, well fork a new one */ 42610b57cec5SDimitry Andric KMP_ASSERT(__kmp_nth == __kmp_all_nth); 42620b57cec5SDimitry Andric KMP_ASSERT(__kmp_all_nth < __kmp_threads_capacity); 42630b57cec5SDimitry Andric 42640b57cec5SDimitry Andric #if KMP_USE_MONITOR 42650b57cec5SDimitry Andric // If this is the first worker thread the RTL is creating, then also 42660b57cec5SDimitry Andric // launch the monitor thread. We try to do this as early as possible. 42670b57cec5SDimitry Andric if (!TCR_4(__kmp_init_monitor)) { 42680b57cec5SDimitry Andric __kmp_acquire_bootstrap_lock(&__kmp_monitor_lock); 42690b57cec5SDimitry Andric if (!TCR_4(__kmp_init_monitor)) { 42700b57cec5SDimitry Andric KF_TRACE(10, ("before __kmp_create_monitor\n")); 42710b57cec5SDimitry Andric TCW_4(__kmp_init_monitor, 1); 42720b57cec5SDimitry Andric __kmp_create_monitor(&__kmp_monitor); 42730b57cec5SDimitry Andric KF_TRACE(10, ("after __kmp_create_monitor\n")); 42740b57cec5SDimitry Andric #if KMP_OS_WINDOWS 42750b57cec5SDimitry Andric // AC: wait until monitor has started. This is a fix for CQ232808. 42760b57cec5SDimitry Andric // The reason is that if the library is loaded/unloaded in a loop with 42770b57cec5SDimitry Andric // small (parallel) work in between, then there is high probability that 42780b57cec5SDimitry Andric // monitor thread started after the library shutdown. At shutdown it is 42790b57cec5SDimitry Andric // too late to cope with the problem, because when the master is in 42800b57cec5SDimitry Andric // DllMain (process detach) the monitor has no chances to start (it is 42810b57cec5SDimitry Andric // blocked), and master has no means to inform the monitor that the 42820b57cec5SDimitry Andric // library has gone, because all the memory which the monitor can access 42830b57cec5SDimitry Andric // is going to be released/reset. 42840b57cec5SDimitry Andric while (TCR_4(__kmp_init_monitor) < 2) { 42850b57cec5SDimitry Andric KMP_YIELD(TRUE); 42860b57cec5SDimitry Andric } 42870b57cec5SDimitry Andric KF_TRACE(10, ("after monitor thread has started\n")); 42880b57cec5SDimitry Andric #endif 42890b57cec5SDimitry Andric } 42900b57cec5SDimitry Andric __kmp_release_bootstrap_lock(&__kmp_monitor_lock); 42910b57cec5SDimitry Andric } 42920b57cec5SDimitry Andric #endif 42930b57cec5SDimitry Andric 42940b57cec5SDimitry Andric KMP_MB(); 42950b57cec5SDimitry Andric for (new_gtid = 1; TCR_PTR(__kmp_threads[new_gtid]) != NULL; ++new_gtid) { 42960b57cec5SDimitry Andric KMP_DEBUG_ASSERT(new_gtid < __kmp_threads_capacity); 42970b57cec5SDimitry Andric } 42980b57cec5SDimitry Andric 42990b57cec5SDimitry Andric /* allocate space for it. */ 43000b57cec5SDimitry Andric new_thr = (kmp_info_t *)__kmp_allocate(sizeof(kmp_info_t)); 43010b57cec5SDimitry Andric 43020b57cec5SDimitry Andric TCW_SYNC_PTR(__kmp_threads[new_gtid], new_thr); 43030b57cec5SDimitry Andric 43040b57cec5SDimitry Andric if (__kmp_storage_map) { 43050b57cec5SDimitry Andric __kmp_print_thread_storage_map(new_thr, new_gtid); 43060b57cec5SDimitry Andric } 43070b57cec5SDimitry Andric 43080b57cec5SDimitry Andric // add the reserve serialized team, initialized from the team's master thread 43090b57cec5SDimitry Andric { 43100b57cec5SDimitry Andric kmp_internal_control_t r_icvs = __kmp_get_x_global_icvs(team); 43110b57cec5SDimitry Andric KF_TRACE(10, ("__kmp_allocate_thread: before th_serial/serial_team\n")); 43120b57cec5SDimitry Andric new_thr->th.th_serial_team = serial_team = 43130b57cec5SDimitry Andric (kmp_team_t *)__kmp_allocate_team(root, 1, 1, 43140b57cec5SDimitry Andric #if OMPT_SUPPORT 43150b57cec5SDimitry Andric ompt_data_none, // root parallel id 43160b57cec5SDimitry Andric #endif 43170b57cec5SDimitry Andric proc_bind_default, &r_icvs, 43180b57cec5SDimitry Andric 0 USE_NESTED_HOT_ARG(NULL)); 43190b57cec5SDimitry Andric } 43200b57cec5SDimitry Andric KMP_ASSERT(serial_team); 43210b57cec5SDimitry Andric serial_team->t.t_serialized = 0; // AC: the team created in reserve, not for 43220b57cec5SDimitry Andric // execution (it is unused for now). 43230b57cec5SDimitry Andric serial_team->t.t_threads[0] = new_thr; 43240b57cec5SDimitry Andric KF_TRACE(10, 43250b57cec5SDimitry Andric ("__kmp_allocate_thread: after th_serial/serial_team : new_thr=%p\n", 43260b57cec5SDimitry Andric new_thr)); 43270b57cec5SDimitry Andric 43280b57cec5SDimitry Andric /* setup the thread structures */ 43290b57cec5SDimitry Andric __kmp_initialize_info(new_thr, team, new_tid, new_gtid); 43300b57cec5SDimitry Andric 43310b57cec5SDimitry Andric #if USE_FAST_MEMORY 43320b57cec5SDimitry Andric __kmp_initialize_fast_memory(new_thr); 43330b57cec5SDimitry Andric #endif /* USE_FAST_MEMORY */ 43340b57cec5SDimitry Andric 43350b57cec5SDimitry Andric #if KMP_USE_BGET 43360b57cec5SDimitry Andric KMP_DEBUG_ASSERT(new_thr->th.th_local.bget_data == NULL); 43370b57cec5SDimitry Andric __kmp_initialize_bget(new_thr); 43380b57cec5SDimitry Andric #endif 43390b57cec5SDimitry Andric 43400b57cec5SDimitry Andric __kmp_init_random(new_thr); // Initialize random number generator 43410b57cec5SDimitry Andric 43420b57cec5SDimitry Andric /* Initialize these only once when thread is grabbed for a team allocation */ 43430b57cec5SDimitry Andric KA_TRACE(20, 43440b57cec5SDimitry Andric ("__kmp_allocate_thread: T#%d init go fork=%u, plain=%u\n", 43450b57cec5SDimitry Andric __kmp_get_gtid(), KMP_INIT_BARRIER_STATE, KMP_INIT_BARRIER_STATE)); 43460b57cec5SDimitry Andric 43470b57cec5SDimitry Andric int b; 43480b57cec5SDimitry Andric kmp_balign_t *balign = new_thr->th.th_bar; 43490b57cec5SDimitry Andric for (b = 0; b < bs_last_barrier; ++b) { 43500b57cec5SDimitry Andric balign[b].bb.b_go = KMP_INIT_BARRIER_STATE; 43510b57cec5SDimitry Andric balign[b].bb.team = NULL; 43520b57cec5SDimitry Andric balign[b].bb.wait_flag = KMP_BARRIER_NOT_WAITING; 43530b57cec5SDimitry Andric balign[b].bb.use_oncore_barrier = 0; 43540b57cec5SDimitry Andric } 43550b57cec5SDimitry Andric 43560b57cec5SDimitry Andric new_thr->th.th_spin_here = FALSE; 43570b57cec5SDimitry Andric new_thr->th.th_next_waiting = 0; 43580b57cec5SDimitry Andric #if KMP_OS_UNIX 43590b57cec5SDimitry Andric new_thr->th.th_blocking = false; 43600b57cec5SDimitry Andric #endif 43610b57cec5SDimitry Andric 43620b57cec5SDimitry Andric #if KMP_AFFINITY_SUPPORTED 43630b57cec5SDimitry Andric new_thr->th.th_current_place = KMP_PLACE_UNDEFINED; 43640b57cec5SDimitry Andric new_thr->th.th_new_place = KMP_PLACE_UNDEFINED; 43650b57cec5SDimitry Andric new_thr->th.th_first_place = KMP_PLACE_UNDEFINED; 43660b57cec5SDimitry Andric new_thr->th.th_last_place = KMP_PLACE_UNDEFINED; 43670b57cec5SDimitry Andric #endif 43680b57cec5SDimitry Andric new_thr->th.th_def_allocator = __kmp_def_allocator; 43690b57cec5SDimitry Andric new_thr->th.th_prev_level = 0; 43700b57cec5SDimitry Andric new_thr->th.th_prev_num_threads = 1; 43710b57cec5SDimitry Andric 43720b57cec5SDimitry Andric TCW_4(new_thr->th.th_in_pool, FALSE); 43730b57cec5SDimitry Andric new_thr->th.th_active_in_pool = FALSE; 43740b57cec5SDimitry Andric TCW_4(new_thr->th.th_active, TRUE); 43750b57cec5SDimitry Andric 43760b57cec5SDimitry Andric /* adjust the global counters */ 43770b57cec5SDimitry Andric __kmp_all_nth++; 43780b57cec5SDimitry Andric __kmp_nth++; 43790b57cec5SDimitry Andric 43800b57cec5SDimitry Andric // if __kmp_adjust_gtid_mode is set, then we use method #1 (sp search) for low 43810b57cec5SDimitry Andric // numbers of procs, and method #2 (keyed API call) for higher numbers. 43820b57cec5SDimitry Andric if (__kmp_adjust_gtid_mode) { 43830b57cec5SDimitry Andric if (__kmp_all_nth >= __kmp_tls_gtid_min) { 43840b57cec5SDimitry Andric if (TCR_4(__kmp_gtid_mode) != 2) { 43850b57cec5SDimitry Andric TCW_4(__kmp_gtid_mode, 2); 43860b57cec5SDimitry Andric } 43870b57cec5SDimitry Andric } else { 43880b57cec5SDimitry Andric if (TCR_4(__kmp_gtid_mode) != 1) { 43890b57cec5SDimitry Andric TCW_4(__kmp_gtid_mode, 1); 43900b57cec5SDimitry Andric } 43910b57cec5SDimitry Andric } 43920b57cec5SDimitry Andric } 43930b57cec5SDimitry Andric 43940b57cec5SDimitry Andric #ifdef KMP_ADJUST_BLOCKTIME 43950b57cec5SDimitry Andric /* Adjust blocktime back to zero if necessary */ 43960b57cec5SDimitry Andric /* Middle initialization might not have occurred yet */ 43970b57cec5SDimitry Andric if (!__kmp_env_blocktime && (__kmp_avail_proc > 0)) { 43980b57cec5SDimitry Andric if (__kmp_nth > __kmp_avail_proc) { 43990b57cec5SDimitry Andric __kmp_zero_bt = TRUE; 44000b57cec5SDimitry Andric } 44010b57cec5SDimitry Andric } 44020b57cec5SDimitry Andric #endif /* KMP_ADJUST_BLOCKTIME */ 44030b57cec5SDimitry Andric 44040b57cec5SDimitry Andric /* actually fork it and create the new worker thread */ 44050b57cec5SDimitry Andric KF_TRACE( 44060b57cec5SDimitry Andric 10, ("__kmp_allocate_thread: before __kmp_create_worker: %p\n", new_thr)); 44070b57cec5SDimitry Andric __kmp_create_worker(new_gtid, new_thr, __kmp_stksize); 44080b57cec5SDimitry Andric KF_TRACE(10, 44090b57cec5SDimitry Andric ("__kmp_allocate_thread: after __kmp_create_worker: %p\n", new_thr)); 44100b57cec5SDimitry Andric 44110b57cec5SDimitry Andric KA_TRACE(20, ("__kmp_allocate_thread: T#%d forked T#%d\n", __kmp_get_gtid(), 44120b57cec5SDimitry Andric new_gtid)); 44130b57cec5SDimitry Andric KMP_MB(); 44140b57cec5SDimitry Andric return new_thr; 44150b57cec5SDimitry Andric } 44160b57cec5SDimitry Andric 44170b57cec5SDimitry Andric /* Reinitialize team for reuse. 44180b57cec5SDimitry Andric The hot team code calls this case at every fork barrier, so EPCC barrier 44190b57cec5SDimitry Andric test are extremely sensitive to changes in it, esp. writes to the team 44200b57cec5SDimitry Andric struct, which cause a cache invalidation in all threads. 44210b57cec5SDimitry Andric IF YOU TOUCH THIS ROUTINE, RUN EPCC C SYNCBENCH ON A BIG-IRON MACHINE!!! */ 44220b57cec5SDimitry Andric static void __kmp_reinitialize_team(kmp_team_t *team, 44230b57cec5SDimitry Andric kmp_internal_control_t *new_icvs, 44240b57cec5SDimitry Andric ident_t *loc) { 44250b57cec5SDimitry Andric KF_TRACE(10, ("__kmp_reinitialize_team: enter this_thread=%p team=%p\n", 44260b57cec5SDimitry Andric team->t.t_threads[0], team)); 44270b57cec5SDimitry Andric KMP_DEBUG_ASSERT(team && new_icvs); 44280b57cec5SDimitry Andric KMP_DEBUG_ASSERT((!TCR_4(__kmp_init_parallel)) || new_icvs->nproc); 44290b57cec5SDimitry Andric KMP_CHECK_UPDATE(team->t.t_ident, loc); 44300b57cec5SDimitry Andric 44310b57cec5SDimitry Andric KMP_CHECK_UPDATE(team->t.t_id, KMP_GEN_TEAM_ID()); 44320b57cec5SDimitry Andric // Copy ICVs to the master thread's implicit taskdata 44330b57cec5SDimitry Andric __kmp_init_implicit_task(loc, team->t.t_threads[0], team, 0, FALSE); 44340b57cec5SDimitry Andric copy_icvs(&team->t.t_implicit_task_taskdata[0].td_icvs, new_icvs); 44350b57cec5SDimitry Andric 44360b57cec5SDimitry Andric KF_TRACE(10, ("__kmp_reinitialize_team: exit this_thread=%p team=%p\n", 44370b57cec5SDimitry Andric team->t.t_threads[0], team)); 44380b57cec5SDimitry Andric } 44390b57cec5SDimitry Andric 44400b57cec5SDimitry Andric /* Initialize the team data structure. 44410b57cec5SDimitry Andric This assumes the t_threads and t_max_nproc are already set. 44420b57cec5SDimitry Andric Also, we don't touch the arguments */ 44430b57cec5SDimitry Andric static void __kmp_initialize_team(kmp_team_t *team, int new_nproc, 44440b57cec5SDimitry Andric kmp_internal_control_t *new_icvs, 44450b57cec5SDimitry Andric ident_t *loc) { 44460b57cec5SDimitry Andric KF_TRACE(10, ("__kmp_initialize_team: enter: team=%p\n", team)); 44470b57cec5SDimitry Andric 44480b57cec5SDimitry Andric /* verify */ 44490b57cec5SDimitry Andric KMP_DEBUG_ASSERT(team); 44500b57cec5SDimitry Andric KMP_DEBUG_ASSERT(new_nproc <= team->t.t_max_nproc); 44510b57cec5SDimitry Andric KMP_DEBUG_ASSERT(team->t.t_threads); 44520b57cec5SDimitry Andric KMP_MB(); 44530b57cec5SDimitry Andric 44540b57cec5SDimitry Andric team->t.t_master_tid = 0; /* not needed */ 44550b57cec5SDimitry Andric /* team->t.t_master_bar; not needed */ 44560b57cec5SDimitry Andric team->t.t_serialized = new_nproc > 1 ? 0 : 1; 44570b57cec5SDimitry Andric team->t.t_nproc = new_nproc; 44580b57cec5SDimitry Andric 44590b57cec5SDimitry Andric /* team->t.t_parent = NULL; TODO not needed & would mess up hot team */ 44600b57cec5SDimitry Andric team->t.t_next_pool = NULL; 44610b57cec5SDimitry Andric /* memset( team->t.t_threads, 0, sizeof(kmp_info_t*)*new_nproc ); would mess 44620b57cec5SDimitry Andric * up hot team */ 44630b57cec5SDimitry Andric 44640b57cec5SDimitry Andric TCW_SYNC_PTR(team->t.t_pkfn, NULL); /* not needed */ 44650b57cec5SDimitry Andric team->t.t_invoke = NULL; /* not needed */ 44660b57cec5SDimitry Andric 44670b57cec5SDimitry Andric // TODO???: team->t.t_max_active_levels = new_max_active_levels; 44680b57cec5SDimitry Andric team->t.t_sched.sched = new_icvs->sched.sched; 44690b57cec5SDimitry Andric 44700b57cec5SDimitry Andric #if KMP_ARCH_X86 || KMP_ARCH_X86_64 44710b57cec5SDimitry Andric team->t.t_fp_control_saved = FALSE; /* not needed */ 44720b57cec5SDimitry Andric team->t.t_x87_fpu_control_word = 0; /* not needed */ 44730b57cec5SDimitry Andric team->t.t_mxcsr = 0; /* not needed */ 44740b57cec5SDimitry Andric #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */ 44750b57cec5SDimitry Andric 44760b57cec5SDimitry Andric team->t.t_construct = 0; 44770b57cec5SDimitry Andric 44780b57cec5SDimitry Andric team->t.t_ordered.dt.t_value = 0; 44790b57cec5SDimitry Andric team->t.t_master_active = FALSE; 44800b57cec5SDimitry Andric 44810b57cec5SDimitry Andric #ifdef KMP_DEBUG 44820b57cec5SDimitry Andric team->t.t_copypriv_data = NULL; /* not necessary, but nice for debugging */ 44830b57cec5SDimitry Andric #endif 44840b57cec5SDimitry Andric #if KMP_OS_WINDOWS 44850b57cec5SDimitry Andric team->t.t_copyin_counter = 0; /* for barrier-free copyin implementation */ 44860b57cec5SDimitry Andric #endif 44870b57cec5SDimitry Andric 44880b57cec5SDimitry Andric team->t.t_control_stack_top = NULL; 44890b57cec5SDimitry Andric 44900b57cec5SDimitry Andric __kmp_reinitialize_team(team, new_icvs, loc); 44910b57cec5SDimitry Andric 44920b57cec5SDimitry Andric KMP_MB(); 44930b57cec5SDimitry Andric KF_TRACE(10, ("__kmp_initialize_team: exit: team=%p\n", team)); 44940b57cec5SDimitry Andric } 44950b57cec5SDimitry Andric 4496489b1cf2SDimitry Andric #if (KMP_OS_LINUX || KMP_OS_FREEBSD) && KMP_AFFINITY_SUPPORTED 44970b57cec5SDimitry Andric /* Sets full mask for thread and returns old mask, no changes to structures. */ 44980b57cec5SDimitry Andric static void 44990b57cec5SDimitry Andric __kmp_set_thread_affinity_mask_full_tmp(kmp_affin_mask_t *old_mask) { 45000b57cec5SDimitry Andric if (KMP_AFFINITY_CAPABLE()) { 45010b57cec5SDimitry Andric int status; 45020b57cec5SDimitry Andric if (old_mask != NULL) { 45030b57cec5SDimitry Andric status = __kmp_get_system_affinity(old_mask, TRUE); 45040b57cec5SDimitry Andric int error = errno; 45050b57cec5SDimitry Andric if (status != 0) { 45060b57cec5SDimitry Andric __kmp_fatal(KMP_MSG(ChangeThreadAffMaskError), KMP_ERR(error), 45070b57cec5SDimitry Andric __kmp_msg_null); 45080b57cec5SDimitry Andric } 45090b57cec5SDimitry Andric } 45100b57cec5SDimitry Andric __kmp_set_system_affinity(__kmp_affin_fullMask, TRUE); 45110b57cec5SDimitry Andric } 45120b57cec5SDimitry Andric } 45130b57cec5SDimitry Andric #endif 45140b57cec5SDimitry Andric 45150b57cec5SDimitry Andric #if KMP_AFFINITY_SUPPORTED 45160b57cec5SDimitry Andric 45170b57cec5SDimitry Andric // __kmp_partition_places() is the heart of the OpenMP 4.0 affinity mechanism. 45180b57cec5SDimitry Andric // It calculats the worker + master thread's partition based upon the parent 45190b57cec5SDimitry Andric // thread's partition, and binds each worker to a thread in their partition. 45200b57cec5SDimitry Andric // The master thread's partition should already include its current binding. 45210b57cec5SDimitry Andric static void __kmp_partition_places(kmp_team_t *team, int update_master_only) { 45220b57cec5SDimitry Andric // Copy the master thread's place partion to the team struct 45230b57cec5SDimitry Andric kmp_info_t *master_th = team->t.t_threads[0]; 45240b57cec5SDimitry Andric KMP_DEBUG_ASSERT(master_th != NULL); 45250b57cec5SDimitry Andric kmp_proc_bind_t proc_bind = team->t.t_proc_bind; 45260b57cec5SDimitry Andric int first_place = master_th->th.th_first_place; 45270b57cec5SDimitry Andric int last_place = master_th->th.th_last_place; 45280b57cec5SDimitry Andric int masters_place = master_th->th.th_current_place; 45290b57cec5SDimitry Andric team->t.t_first_place = first_place; 45300b57cec5SDimitry Andric team->t.t_last_place = last_place; 45310b57cec5SDimitry Andric 45320b57cec5SDimitry Andric KA_TRACE(20, ("__kmp_partition_places: enter: proc_bind = %d T#%d(%d:0) " 45330b57cec5SDimitry Andric "bound to place %d partition = [%d,%d]\n", 45340b57cec5SDimitry Andric proc_bind, __kmp_gtid_from_thread(team->t.t_threads[0]), 45350b57cec5SDimitry Andric team->t.t_id, masters_place, first_place, last_place)); 45360b57cec5SDimitry Andric 45370b57cec5SDimitry Andric switch (proc_bind) { 45380b57cec5SDimitry Andric 45390b57cec5SDimitry Andric case proc_bind_default: 45400b57cec5SDimitry Andric // serial teams might have the proc_bind policy set to proc_bind_default. It 45410b57cec5SDimitry Andric // doesn't matter, as we don't rebind master thread for any proc_bind policy 45420b57cec5SDimitry Andric KMP_DEBUG_ASSERT(team->t.t_nproc == 1); 45430b57cec5SDimitry Andric break; 45440b57cec5SDimitry Andric 45450b57cec5SDimitry Andric case proc_bind_master: { 45460b57cec5SDimitry Andric int f; 45470b57cec5SDimitry Andric int n_th = team->t.t_nproc; 45480b57cec5SDimitry Andric for (f = 1; f < n_th; f++) { 45490b57cec5SDimitry Andric kmp_info_t *th = team->t.t_threads[f]; 45500b57cec5SDimitry Andric KMP_DEBUG_ASSERT(th != NULL); 45510b57cec5SDimitry Andric th->th.th_first_place = first_place; 45520b57cec5SDimitry Andric th->th.th_last_place = last_place; 45530b57cec5SDimitry Andric th->th.th_new_place = masters_place; 45540b57cec5SDimitry Andric if (__kmp_display_affinity && masters_place != th->th.th_current_place && 45550b57cec5SDimitry Andric team->t.t_display_affinity != 1) { 45560b57cec5SDimitry Andric team->t.t_display_affinity = 1; 45570b57cec5SDimitry Andric } 45580b57cec5SDimitry Andric 45590b57cec5SDimitry Andric KA_TRACE(100, ("__kmp_partition_places: master: T#%d(%d:%d) place %d " 45600b57cec5SDimitry Andric "partition = [%d,%d]\n", 45610b57cec5SDimitry Andric __kmp_gtid_from_thread(team->t.t_threads[f]), team->t.t_id, 45620b57cec5SDimitry Andric f, masters_place, first_place, last_place)); 45630b57cec5SDimitry Andric } 45640b57cec5SDimitry Andric } break; 45650b57cec5SDimitry Andric 45660b57cec5SDimitry Andric case proc_bind_close: { 45670b57cec5SDimitry Andric int f; 45680b57cec5SDimitry Andric int n_th = team->t.t_nproc; 45690b57cec5SDimitry Andric int n_places; 45700b57cec5SDimitry Andric if (first_place <= last_place) { 45710b57cec5SDimitry Andric n_places = last_place - first_place + 1; 45720b57cec5SDimitry Andric } else { 45730b57cec5SDimitry Andric n_places = __kmp_affinity_num_masks - first_place + last_place + 1; 45740b57cec5SDimitry Andric } 45750b57cec5SDimitry Andric if (n_th <= n_places) { 45760b57cec5SDimitry Andric int place = masters_place; 45770b57cec5SDimitry Andric for (f = 1; f < n_th; f++) { 45780b57cec5SDimitry Andric kmp_info_t *th = team->t.t_threads[f]; 45790b57cec5SDimitry Andric KMP_DEBUG_ASSERT(th != NULL); 45800b57cec5SDimitry Andric 45810b57cec5SDimitry Andric if (place == last_place) { 45820b57cec5SDimitry Andric place = first_place; 45830b57cec5SDimitry Andric } else if (place == (int)(__kmp_affinity_num_masks - 1)) { 45840b57cec5SDimitry Andric place = 0; 45850b57cec5SDimitry Andric } else { 45860b57cec5SDimitry Andric place++; 45870b57cec5SDimitry Andric } 45880b57cec5SDimitry Andric th->th.th_first_place = first_place; 45890b57cec5SDimitry Andric th->th.th_last_place = last_place; 45900b57cec5SDimitry Andric th->th.th_new_place = place; 45910b57cec5SDimitry Andric if (__kmp_display_affinity && place != th->th.th_current_place && 45920b57cec5SDimitry Andric team->t.t_display_affinity != 1) { 45930b57cec5SDimitry Andric team->t.t_display_affinity = 1; 45940b57cec5SDimitry Andric } 45950b57cec5SDimitry Andric 45960b57cec5SDimitry Andric KA_TRACE(100, ("__kmp_partition_places: close: T#%d(%d:%d) place %d " 45970b57cec5SDimitry Andric "partition = [%d,%d]\n", 45980b57cec5SDimitry Andric __kmp_gtid_from_thread(team->t.t_threads[f]), 45990b57cec5SDimitry Andric team->t.t_id, f, place, first_place, last_place)); 46000b57cec5SDimitry Andric } 46010b57cec5SDimitry Andric } else { 46020b57cec5SDimitry Andric int S, rem, gap, s_count; 46030b57cec5SDimitry Andric S = n_th / n_places; 46040b57cec5SDimitry Andric s_count = 0; 46050b57cec5SDimitry Andric rem = n_th - (S * n_places); 46060b57cec5SDimitry Andric gap = rem > 0 ? n_places / rem : n_places; 46070b57cec5SDimitry Andric int place = masters_place; 46080b57cec5SDimitry Andric int gap_ct = gap; 46090b57cec5SDimitry Andric for (f = 0; f < n_th; f++) { 46100b57cec5SDimitry Andric kmp_info_t *th = team->t.t_threads[f]; 46110b57cec5SDimitry Andric KMP_DEBUG_ASSERT(th != NULL); 46120b57cec5SDimitry Andric 46130b57cec5SDimitry Andric th->th.th_first_place = first_place; 46140b57cec5SDimitry Andric th->th.th_last_place = last_place; 46150b57cec5SDimitry Andric th->th.th_new_place = place; 46160b57cec5SDimitry Andric if (__kmp_display_affinity && place != th->th.th_current_place && 46170b57cec5SDimitry Andric team->t.t_display_affinity != 1) { 46180b57cec5SDimitry Andric team->t.t_display_affinity = 1; 46190b57cec5SDimitry Andric } 46200b57cec5SDimitry Andric s_count++; 46210b57cec5SDimitry Andric 46220b57cec5SDimitry Andric if ((s_count == S) && rem && (gap_ct == gap)) { 46230b57cec5SDimitry Andric // do nothing, add an extra thread to place on next iteration 46240b57cec5SDimitry Andric } else if ((s_count == S + 1) && rem && (gap_ct == gap)) { 46250b57cec5SDimitry Andric // we added an extra thread to this place; move to next place 46260b57cec5SDimitry Andric if (place == last_place) { 46270b57cec5SDimitry Andric place = first_place; 46280b57cec5SDimitry Andric } else if (place == (int)(__kmp_affinity_num_masks - 1)) { 46290b57cec5SDimitry Andric place = 0; 46300b57cec5SDimitry Andric } else { 46310b57cec5SDimitry Andric place++; 46320b57cec5SDimitry Andric } 46330b57cec5SDimitry Andric s_count = 0; 46340b57cec5SDimitry Andric gap_ct = 1; 46350b57cec5SDimitry Andric rem--; 46360b57cec5SDimitry Andric } else if (s_count == S) { // place full; don't add extra 46370b57cec5SDimitry Andric if (place == last_place) { 46380b57cec5SDimitry Andric place = first_place; 46390b57cec5SDimitry Andric } else if (place == (int)(__kmp_affinity_num_masks - 1)) { 46400b57cec5SDimitry Andric place = 0; 46410b57cec5SDimitry Andric } else { 46420b57cec5SDimitry Andric place++; 46430b57cec5SDimitry Andric } 46440b57cec5SDimitry Andric gap_ct++; 46450b57cec5SDimitry Andric s_count = 0; 46460b57cec5SDimitry Andric } 46470b57cec5SDimitry Andric 46480b57cec5SDimitry Andric KA_TRACE(100, 46490b57cec5SDimitry Andric ("__kmp_partition_places: close: T#%d(%d:%d) place %d " 46500b57cec5SDimitry Andric "partition = [%d,%d]\n", 46510b57cec5SDimitry Andric __kmp_gtid_from_thread(team->t.t_threads[f]), team->t.t_id, f, 46520b57cec5SDimitry Andric th->th.th_new_place, first_place, last_place)); 46530b57cec5SDimitry Andric } 46540b57cec5SDimitry Andric KMP_DEBUG_ASSERT(place == masters_place); 46550b57cec5SDimitry Andric } 46560b57cec5SDimitry Andric } break; 46570b57cec5SDimitry Andric 46580b57cec5SDimitry Andric case proc_bind_spread: { 46590b57cec5SDimitry Andric int f; 46600b57cec5SDimitry Andric int n_th = team->t.t_nproc; 46610b57cec5SDimitry Andric int n_places; 46620b57cec5SDimitry Andric int thidx; 46630b57cec5SDimitry Andric if (first_place <= last_place) { 46640b57cec5SDimitry Andric n_places = last_place - first_place + 1; 46650b57cec5SDimitry Andric } else { 46660b57cec5SDimitry Andric n_places = __kmp_affinity_num_masks - first_place + last_place + 1; 46670b57cec5SDimitry Andric } 46680b57cec5SDimitry Andric if (n_th <= n_places) { 46690b57cec5SDimitry Andric int place = -1; 46700b57cec5SDimitry Andric 46710b57cec5SDimitry Andric if (n_places != static_cast<int>(__kmp_affinity_num_masks)) { 46720b57cec5SDimitry Andric int S = n_places / n_th; 46730b57cec5SDimitry Andric int s_count, rem, gap, gap_ct; 46740b57cec5SDimitry Andric 46750b57cec5SDimitry Andric place = masters_place; 46760b57cec5SDimitry Andric rem = n_places - n_th * S; 46770b57cec5SDimitry Andric gap = rem ? n_th / rem : 1; 46780b57cec5SDimitry Andric gap_ct = gap; 46790b57cec5SDimitry Andric thidx = n_th; 46800b57cec5SDimitry Andric if (update_master_only == 1) 46810b57cec5SDimitry Andric thidx = 1; 46820b57cec5SDimitry Andric for (f = 0; f < thidx; f++) { 46830b57cec5SDimitry Andric kmp_info_t *th = team->t.t_threads[f]; 46840b57cec5SDimitry Andric KMP_DEBUG_ASSERT(th != NULL); 46850b57cec5SDimitry Andric 46860b57cec5SDimitry Andric th->th.th_first_place = place; 46870b57cec5SDimitry Andric th->th.th_new_place = place; 46880b57cec5SDimitry Andric if (__kmp_display_affinity && place != th->th.th_current_place && 46890b57cec5SDimitry Andric team->t.t_display_affinity != 1) { 46900b57cec5SDimitry Andric team->t.t_display_affinity = 1; 46910b57cec5SDimitry Andric } 46920b57cec5SDimitry Andric s_count = 1; 46930b57cec5SDimitry Andric while (s_count < S) { 46940b57cec5SDimitry Andric if (place == last_place) { 46950b57cec5SDimitry Andric place = first_place; 46960b57cec5SDimitry Andric } else if (place == (int)(__kmp_affinity_num_masks - 1)) { 46970b57cec5SDimitry Andric place = 0; 46980b57cec5SDimitry Andric } else { 46990b57cec5SDimitry Andric place++; 47000b57cec5SDimitry Andric } 47010b57cec5SDimitry Andric s_count++; 47020b57cec5SDimitry Andric } 47030b57cec5SDimitry Andric if (rem && (gap_ct == gap)) { 47040b57cec5SDimitry Andric if (place == last_place) { 47050b57cec5SDimitry Andric place = first_place; 47060b57cec5SDimitry Andric } else if (place == (int)(__kmp_affinity_num_masks - 1)) { 47070b57cec5SDimitry Andric place = 0; 47080b57cec5SDimitry Andric } else { 47090b57cec5SDimitry Andric place++; 47100b57cec5SDimitry Andric } 47110b57cec5SDimitry Andric rem--; 47120b57cec5SDimitry Andric gap_ct = 0; 47130b57cec5SDimitry Andric } 47140b57cec5SDimitry Andric th->th.th_last_place = place; 47150b57cec5SDimitry Andric gap_ct++; 47160b57cec5SDimitry Andric 47170b57cec5SDimitry Andric if (place == last_place) { 47180b57cec5SDimitry Andric place = first_place; 47190b57cec5SDimitry Andric } else if (place == (int)(__kmp_affinity_num_masks - 1)) { 47200b57cec5SDimitry Andric place = 0; 47210b57cec5SDimitry Andric } else { 47220b57cec5SDimitry Andric place++; 47230b57cec5SDimitry Andric } 47240b57cec5SDimitry Andric 47250b57cec5SDimitry Andric KA_TRACE(100, 47260b57cec5SDimitry Andric ("__kmp_partition_places: spread: T#%d(%d:%d) place %d " 47270b57cec5SDimitry Andric "partition = [%d,%d], __kmp_affinity_num_masks: %u\n", 47280b57cec5SDimitry Andric __kmp_gtid_from_thread(team->t.t_threads[f]), team->t.t_id, 47290b57cec5SDimitry Andric f, th->th.th_new_place, th->th.th_first_place, 47300b57cec5SDimitry Andric th->th.th_last_place, __kmp_affinity_num_masks)); 47310b57cec5SDimitry Andric } 47320b57cec5SDimitry Andric } else { 47330b57cec5SDimitry Andric /* Having uniform space of available computation places I can create 47340b57cec5SDimitry Andric T partitions of round(P/T) size and put threads into the first 47350b57cec5SDimitry Andric place of each partition. */ 47360b57cec5SDimitry Andric double current = static_cast<double>(masters_place); 47370b57cec5SDimitry Andric double spacing = 47380b57cec5SDimitry Andric (static_cast<double>(n_places + 1) / static_cast<double>(n_th)); 47390b57cec5SDimitry Andric int first, last; 47400b57cec5SDimitry Andric kmp_info_t *th; 47410b57cec5SDimitry Andric 47420b57cec5SDimitry Andric thidx = n_th + 1; 47430b57cec5SDimitry Andric if (update_master_only == 1) 47440b57cec5SDimitry Andric thidx = 1; 47450b57cec5SDimitry Andric for (f = 0; f < thidx; f++) { 47460b57cec5SDimitry Andric first = static_cast<int>(current); 47470b57cec5SDimitry Andric last = static_cast<int>(current + spacing) - 1; 47480b57cec5SDimitry Andric KMP_DEBUG_ASSERT(last >= first); 47490b57cec5SDimitry Andric if (first >= n_places) { 47500b57cec5SDimitry Andric if (masters_place) { 47510b57cec5SDimitry Andric first -= n_places; 47520b57cec5SDimitry Andric last -= n_places; 47530b57cec5SDimitry Andric if (first == (masters_place + 1)) { 47540b57cec5SDimitry Andric KMP_DEBUG_ASSERT(f == n_th); 47550b57cec5SDimitry Andric first--; 47560b57cec5SDimitry Andric } 47570b57cec5SDimitry Andric if (last == masters_place) { 47580b57cec5SDimitry Andric KMP_DEBUG_ASSERT(f == (n_th - 1)); 47590b57cec5SDimitry Andric last--; 47600b57cec5SDimitry Andric } 47610b57cec5SDimitry Andric } else { 47620b57cec5SDimitry Andric KMP_DEBUG_ASSERT(f == n_th); 47630b57cec5SDimitry Andric first = 0; 47640b57cec5SDimitry Andric last = 0; 47650b57cec5SDimitry Andric } 47660b57cec5SDimitry Andric } 47670b57cec5SDimitry Andric if (last >= n_places) { 47680b57cec5SDimitry Andric last = (n_places - 1); 47690b57cec5SDimitry Andric } 47700b57cec5SDimitry Andric place = first; 47710b57cec5SDimitry Andric current += spacing; 47720b57cec5SDimitry Andric if (f < n_th) { 47730b57cec5SDimitry Andric KMP_DEBUG_ASSERT(0 <= first); 47740b57cec5SDimitry Andric KMP_DEBUG_ASSERT(n_places > first); 47750b57cec5SDimitry Andric KMP_DEBUG_ASSERT(0 <= last); 47760b57cec5SDimitry Andric KMP_DEBUG_ASSERT(n_places > last); 47770b57cec5SDimitry Andric KMP_DEBUG_ASSERT(last_place >= first_place); 47780b57cec5SDimitry Andric th = team->t.t_threads[f]; 47790b57cec5SDimitry Andric KMP_DEBUG_ASSERT(th); 47800b57cec5SDimitry Andric th->th.th_first_place = first; 47810b57cec5SDimitry Andric th->th.th_new_place = place; 47820b57cec5SDimitry Andric th->th.th_last_place = last; 47830b57cec5SDimitry Andric if (__kmp_display_affinity && place != th->th.th_current_place && 47840b57cec5SDimitry Andric team->t.t_display_affinity != 1) { 47850b57cec5SDimitry Andric team->t.t_display_affinity = 1; 47860b57cec5SDimitry Andric } 47870b57cec5SDimitry Andric KA_TRACE(100, 47880b57cec5SDimitry Andric ("__kmp_partition_places: spread: T#%d(%d:%d) place %d " 47890b57cec5SDimitry Andric "partition = [%d,%d], spacing = %.4f\n", 47900b57cec5SDimitry Andric __kmp_gtid_from_thread(team->t.t_threads[f]), 47910b57cec5SDimitry Andric team->t.t_id, f, th->th.th_new_place, 47920b57cec5SDimitry Andric th->th.th_first_place, th->th.th_last_place, spacing)); 47930b57cec5SDimitry Andric } 47940b57cec5SDimitry Andric } 47950b57cec5SDimitry Andric } 47960b57cec5SDimitry Andric KMP_DEBUG_ASSERT(update_master_only || place == masters_place); 47970b57cec5SDimitry Andric } else { 47980b57cec5SDimitry Andric int S, rem, gap, s_count; 47990b57cec5SDimitry Andric S = n_th / n_places; 48000b57cec5SDimitry Andric s_count = 0; 48010b57cec5SDimitry Andric rem = n_th - (S * n_places); 48020b57cec5SDimitry Andric gap = rem > 0 ? n_places / rem : n_places; 48030b57cec5SDimitry Andric int place = masters_place; 48040b57cec5SDimitry Andric int gap_ct = gap; 48050b57cec5SDimitry Andric thidx = n_th; 48060b57cec5SDimitry Andric if (update_master_only == 1) 48070b57cec5SDimitry Andric thidx = 1; 48080b57cec5SDimitry Andric for (f = 0; f < thidx; f++) { 48090b57cec5SDimitry Andric kmp_info_t *th = team->t.t_threads[f]; 48100b57cec5SDimitry Andric KMP_DEBUG_ASSERT(th != NULL); 48110b57cec5SDimitry Andric 48120b57cec5SDimitry Andric th->th.th_first_place = place; 48130b57cec5SDimitry Andric th->th.th_last_place = place; 48140b57cec5SDimitry Andric th->th.th_new_place = place; 48150b57cec5SDimitry Andric if (__kmp_display_affinity && place != th->th.th_current_place && 48160b57cec5SDimitry Andric team->t.t_display_affinity != 1) { 48170b57cec5SDimitry Andric team->t.t_display_affinity = 1; 48180b57cec5SDimitry Andric } 48190b57cec5SDimitry Andric s_count++; 48200b57cec5SDimitry Andric 48210b57cec5SDimitry Andric if ((s_count == S) && rem && (gap_ct == gap)) { 48220b57cec5SDimitry Andric // do nothing, add an extra thread to place on next iteration 48230b57cec5SDimitry Andric } else if ((s_count == S + 1) && rem && (gap_ct == gap)) { 48240b57cec5SDimitry Andric // we added an extra thread to this place; move on to next place 48250b57cec5SDimitry Andric if (place == last_place) { 48260b57cec5SDimitry Andric place = first_place; 48270b57cec5SDimitry Andric } else if (place == (int)(__kmp_affinity_num_masks - 1)) { 48280b57cec5SDimitry Andric place = 0; 48290b57cec5SDimitry Andric } else { 48300b57cec5SDimitry Andric place++; 48310b57cec5SDimitry Andric } 48320b57cec5SDimitry Andric s_count = 0; 48330b57cec5SDimitry Andric gap_ct = 1; 48340b57cec5SDimitry Andric rem--; 48350b57cec5SDimitry Andric } else if (s_count == S) { // place is full; don't add extra thread 48360b57cec5SDimitry Andric if (place == last_place) { 48370b57cec5SDimitry Andric place = first_place; 48380b57cec5SDimitry Andric } else if (place == (int)(__kmp_affinity_num_masks - 1)) { 48390b57cec5SDimitry Andric place = 0; 48400b57cec5SDimitry Andric } else { 48410b57cec5SDimitry Andric place++; 48420b57cec5SDimitry Andric } 48430b57cec5SDimitry Andric gap_ct++; 48440b57cec5SDimitry Andric s_count = 0; 48450b57cec5SDimitry Andric } 48460b57cec5SDimitry Andric 48470b57cec5SDimitry Andric KA_TRACE(100, ("__kmp_partition_places: spread: T#%d(%d:%d) place %d " 48480b57cec5SDimitry Andric "partition = [%d,%d]\n", 48490b57cec5SDimitry Andric __kmp_gtid_from_thread(team->t.t_threads[f]), 48500b57cec5SDimitry Andric team->t.t_id, f, th->th.th_new_place, 48510b57cec5SDimitry Andric th->th.th_first_place, th->th.th_last_place)); 48520b57cec5SDimitry Andric } 48530b57cec5SDimitry Andric KMP_DEBUG_ASSERT(update_master_only || place == masters_place); 48540b57cec5SDimitry Andric } 48550b57cec5SDimitry Andric } break; 48560b57cec5SDimitry Andric 48570b57cec5SDimitry Andric default: 48580b57cec5SDimitry Andric break; 48590b57cec5SDimitry Andric } 48600b57cec5SDimitry Andric 48610b57cec5SDimitry Andric KA_TRACE(20, ("__kmp_partition_places: exit T#%d\n", team->t.t_id)); 48620b57cec5SDimitry Andric } 48630b57cec5SDimitry Andric 48640b57cec5SDimitry Andric #endif // KMP_AFFINITY_SUPPORTED 48650b57cec5SDimitry Andric 48660b57cec5SDimitry Andric /* allocate a new team data structure to use. take one off of the free pool if 48670b57cec5SDimitry Andric available */ 48680b57cec5SDimitry Andric kmp_team_t * 48690b57cec5SDimitry Andric __kmp_allocate_team(kmp_root_t *root, int new_nproc, int max_nproc, 48700b57cec5SDimitry Andric #if OMPT_SUPPORT 48710b57cec5SDimitry Andric ompt_data_t ompt_parallel_data, 48720b57cec5SDimitry Andric #endif 48730b57cec5SDimitry Andric kmp_proc_bind_t new_proc_bind, 48740b57cec5SDimitry Andric kmp_internal_control_t *new_icvs, 48750b57cec5SDimitry Andric int argc USE_NESTED_HOT_ARG(kmp_info_t *master)) { 48760b57cec5SDimitry Andric KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_allocate_team); 48770b57cec5SDimitry Andric int f; 48780b57cec5SDimitry Andric kmp_team_t *team; 48790b57cec5SDimitry Andric int use_hot_team = !root->r.r_active; 48800b57cec5SDimitry Andric int level = 0; 48810b57cec5SDimitry Andric 48820b57cec5SDimitry Andric KA_TRACE(20, ("__kmp_allocate_team: called\n")); 48830b57cec5SDimitry Andric KMP_DEBUG_ASSERT(new_nproc >= 1 && argc >= 0); 48840b57cec5SDimitry Andric KMP_DEBUG_ASSERT(max_nproc >= new_nproc); 48850b57cec5SDimitry Andric KMP_MB(); 48860b57cec5SDimitry Andric 48870b57cec5SDimitry Andric #if KMP_NESTED_HOT_TEAMS 48880b57cec5SDimitry Andric kmp_hot_team_ptr_t *hot_teams; 48890b57cec5SDimitry Andric if (master) { 48900b57cec5SDimitry Andric team = master->th.th_team; 48910b57cec5SDimitry Andric level = team->t.t_active_level; 48920b57cec5SDimitry Andric if (master->th.th_teams_microtask) { // in teams construct? 48930b57cec5SDimitry Andric if (master->th.th_teams_size.nteams > 1 && 48940b57cec5SDimitry Andric ( // #teams > 1 48950b57cec5SDimitry Andric team->t.t_pkfn == 48960b57cec5SDimitry Andric (microtask_t)__kmp_teams_master || // inner fork of the teams 48970b57cec5SDimitry Andric master->th.th_teams_level < 48980b57cec5SDimitry Andric team->t.t_level)) { // or nested parallel inside the teams 48990b57cec5SDimitry Andric ++level; // not increment if #teams==1, or for outer fork of the teams; 49000b57cec5SDimitry Andric // increment otherwise 49010b57cec5SDimitry Andric } 49020b57cec5SDimitry Andric } 49030b57cec5SDimitry Andric hot_teams = master->th.th_hot_teams; 49040b57cec5SDimitry Andric if (level < __kmp_hot_teams_max_level && hot_teams && 49050b57cec5SDimitry Andric hot_teams[level] 49060b57cec5SDimitry Andric .hot_team) { // hot team has already been allocated for given level 49070b57cec5SDimitry Andric use_hot_team = 1; 49080b57cec5SDimitry Andric } else { 49090b57cec5SDimitry Andric use_hot_team = 0; 49100b57cec5SDimitry Andric } 49110b57cec5SDimitry Andric } 49120b57cec5SDimitry Andric #endif 49130b57cec5SDimitry Andric // Optimization to use a "hot" team 49140b57cec5SDimitry Andric if (use_hot_team && new_nproc > 1) { 49150b57cec5SDimitry Andric KMP_DEBUG_ASSERT(new_nproc <= max_nproc); 49160b57cec5SDimitry Andric #if KMP_NESTED_HOT_TEAMS 49170b57cec5SDimitry Andric team = hot_teams[level].hot_team; 49180b57cec5SDimitry Andric #else 49190b57cec5SDimitry Andric team = root->r.r_hot_team; 49200b57cec5SDimitry Andric #endif 49210b57cec5SDimitry Andric #if KMP_DEBUG 49220b57cec5SDimitry Andric if (__kmp_tasking_mode != tskm_immediate_exec) { 49230b57cec5SDimitry Andric KA_TRACE(20, ("__kmp_allocate_team: hot team task_team[0] = %p " 49240b57cec5SDimitry Andric "task_team[1] = %p before reinit\n", 49250b57cec5SDimitry Andric team->t.t_task_team[0], team->t.t_task_team[1])); 49260b57cec5SDimitry Andric } 49270b57cec5SDimitry Andric #endif 49280b57cec5SDimitry Andric 49290b57cec5SDimitry Andric // Has the number of threads changed? 49300b57cec5SDimitry Andric /* Let's assume the most common case is that the number of threads is 49310b57cec5SDimitry Andric unchanged, and put that case first. */ 49320b57cec5SDimitry Andric if (team->t.t_nproc == new_nproc) { // Check changes in number of threads 49330b57cec5SDimitry Andric KA_TRACE(20, ("__kmp_allocate_team: reusing hot team\n")); 49340b57cec5SDimitry Andric // This case can mean that omp_set_num_threads() was called and the hot 49350b57cec5SDimitry Andric // team size was already reduced, so we check the special flag 49360b57cec5SDimitry Andric if (team->t.t_size_changed == -1) { 49370b57cec5SDimitry Andric team->t.t_size_changed = 1; 49380b57cec5SDimitry Andric } else { 49390b57cec5SDimitry Andric KMP_CHECK_UPDATE(team->t.t_size_changed, 0); 49400b57cec5SDimitry Andric } 49410b57cec5SDimitry Andric 49420b57cec5SDimitry Andric // TODO???: team->t.t_max_active_levels = new_max_active_levels; 49430b57cec5SDimitry Andric kmp_r_sched_t new_sched = new_icvs->sched; 49440b57cec5SDimitry Andric // set master's schedule as new run-time schedule 49450b57cec5SDimitry Andric KMP_CHECK_UPDATE(team->t.t_sched.sched, new_sched.sched); 49460b57cec5SDimitry Andric 49470b57cec5SDimitry Andric __kmp_reinitialize_team(team, new_icvs, 49480b57cec5SDimitry Andric root->r.r_uber_thread->th.th_ident); 49490b57cec5SDimitry Andric 49500b57cec5SDimitry Andric KF_TRACE(10, ("__kmp_allocate_team2: T#%d, this_thread=%p team=%p\n", 0, 49510b57cec5SDimitry Andric team->t.t_threads[0], team)); 49520b57cec5SDimitry Andric __kmp_push_current_task_to_thread(team->t.t_threads[0], team, 0); 49530b57cec5SDimitry Andric 49540b57cec5SDimitry Andric #if KMP_AFFINITY_SUPPORTED 49550b57cec5SDimitry Andric if ((team->t.t_size_changed == 0) && 49560b57cec5SDimitry Andric (team->t.t_proc_bind == new_proc_bind)) { 49570b57cec5SDimitry Andric if (new_proc_bind == proc_bind_spread) { 49580b57cec5SDimitry Andric __kmp_partition_places( 49590b57cec5SDimitry Andric team, 1); // add flag to update only master for spread 49600b57cec5SDimitry Andric } 49610b57cec5SDimitry Andric KA_TRACE(200, ("__kmp_allocate_team: reusing hot team #%d bindings: " 49620b57cec5SDimitry Andric "proc_bind = %d, partition = [%d,%d]\n", 49630b57cec5SDimitry Andric team->t.t_id, new_proc_bind, team->t.t_first_place, 49640b57cec5SDimitry Andric team->t.t_last_place)); 49650b57cec5SDimitry Andric } else { 49660b57cec5SDimitry Andric KMP_CHECK_UPDATE(team->t.t_proc_bind, new_proc_bind); 49670b57cec5SDimitry Andric __kmp_partition_places(team); 49680b57cec5SDimitry Andric } 49690b57cec5SDimitry Andric #else 49700b57cec5SDimitry Andric KMP_CHECK_UPDATE(team->t.t_proc_bind, new_proc_bind); 49710b57cec5SDimitry Andric #endif /* KMP_AFFINITY_SUPPORTED */ 49720b57cec5SDimitry Andric } else if (team->t.t_nproc > new_nproc) { 49730b57cec5SDimitry Andric KA_TRACE(20, 49740b57cec5SDimitry Andric ("__kmp_allocate_team: decreasing hot team thread count to %d\n", 49750b57cec5SDimitry Andric new_nproc)); 49760b57cec5SDimitry Andric 49770b57cec5SDimitry Andric team->t.t_size_changed = 1; 49780b57cec5SDimitry Andric #if KMP_NESTED_HOT_TEAMS 49790b57cec5SDimitry Andric if (__kmp_hot_teams_mode == 0) { 49800b57cec5SDimitry Andric // AC: saved number of threads should correspond to team's value in this 49810b57cec5SDimitry Andric // mode, can be bigger in mode 1, when hot team has threads in reserve 49820b57cec5SDimitry Andric KMP_DEBUG_ASSERT(hot_teams[level].hot_team_nth == team->t.t_nproc); 49830b57cec5SDimitry Andric hot_teams[level].hot_team_nth = new_nproc; 49840b57cec5SDimitry Andric #endif // KMP_NESTED_HOT_TEAMS 49850b57cec5SDimitry Andric /* release the extra threads we don't need any more */ 49860b57cec5SDimitry Andric for (f = new_nproc; f < team->t.t_nproc; f++) { 49870b57cec5SDimitry Andric KMP_DEBUG_ASSERT(team->t.t_threads[f]); 49880b57cec5SDimitry Andric if (__kmp_tasking_mode != tskm_immediate_exec) { 49890b57cec5SDimitry Andric // When decreasing team size, threads no longer in the team should 49900b57cec5SDimitry Andric // unref task team. 49910b57cec5SDimitry Andric team->t.t_threads[f]->th.th_task_team = NULL; 49920b57cec5SDimitry Andric } 49930b57cec5SDimitry Andric __kmp_free_thread(team->t.t_threads[f]); 49940b57cec5SDimitry Andric team->t.t_threads[f] = NULL; 49950b57cec5SDimitry Andric } 49960b57cec5SDimitry Andric #if KMP_NESTED_HOT_TEAMS 49970b57cec5SDimitry Andric } // (__kmp_hot_teams_mode == 0) 49980b57cec5SDimitry Andric else { 49990b57cec5SDimitry Andric // When keeping extra threads in team, switch threads to wait on own 50000b57cec5SDimitry Andric // b_go flag 50010b57cec5SDimitry Andric for (f = new_nproc; f < team->t.t_nproc; ++f) { 50020b57cec5SDimitry Andric KMP_DEBUG_ASSERT(team->t.t_threads[f]); 50030b57cec5SDimitry Andric kmp_balign_t *balign = team->t.t_threads[f]->th.th_bar; 50040b57cec5SDimitry Andric for (int b = 0; b < bs_last_barrier; ++b) { 50050b57cec5SDimitry Andric if (balign[b].bb.wait_flag == KMP_BARRIER_PARENT_FLAG) { 50060b57cec5SDimitry Andric balign[b].bb.wait_flag = KMP_BARRIER_SWITCH_TO_OWN_FLAG; 50070b57cec5SDimitry Andric } 50080b57cec5SDimitry Andric KMP_CHECK_UPDATE(balign[b].bb.leaf_kids, 0); 50090b57cec5SDimitry Andric } 50100b57cec5SDimitry Andric } 50110b57cec5SDimitry Andric } 50120b57cec5SDimitry Andric #endif // KMP_NESTED_HOT_TEAMS 50130b57cec5SDimitry Andric team->t.t_nproc = new_nproc; 50140b57cec5SDimitry Andric // TODO???: team->t.t_max_active_levels = new_max_active_levels; 50150b57cec5SDimitry Andric KMP_CHECK_UPDATE(team->t.t_sched.sched, new_icvs->sched.sched); 50160b57cec5SDimitry Andric __kmp_reinitialize_team(team, new_icvs, 50170b57cec5SDimitry Andric root->r.r_uber_thread->th.th_ident); 50180b57cec5SDimitry Andric 50190b57cec5SDimitry Andric // Update remaining threads 50200b57cec5SDimitry Andric for (f = 0; f < new_nproc; ++f) { 50210b57cec5SDimitry Andric team->t.t_threads[f]->th.th_team_nproc = new_nproc; 50220b57cec5SDimitry Andric } 50230b57cec5SDimitry Andric 50240b57cec5SDimitry Andric // restore the current task state of the master thread: should be the 50250b57cec5SDimitry Andric // implicit task 50260b57cec5SDimitry Andric KF_TRACE(10, ("__kmp_allocate_team: T#%d, this_thread=%p team=%p\n", 0, 50270b57cec5SDimitry Andric team->t.t_threads[0], team)); 50280b57cec5SDimitry Andric 50290b57cec5SDimitry Andric __kmp_push_current_task_to_thread(team->t.t_threads[0], team, 0); 50300b57cec5SDimitry Andric 50310b57cec5SDimitry Andric #ifdef KMP_DEBUG 50320b57cec5SDimitry Andric for (f = 0; f < team->t.t_nproc; f++) { 50330b57cec5SDimitry Andric KMP_DEBUG_ASSERT(team->t.t_threads[f] && 50340b57cec5SDimitry Andric team->t.t_threads[f]->th.th_team_nproc == 50350b57cec5SDimitry Andric team->t.t_nproc); 50360b57cec5SDimitry Andric } 50370b57cec5SDimitry Andric #endif 50380b57cec5SDimitry Andric 50390b57cec5SDimitry Andric KMP_CHECK_UPDATE(team->t.t_proc_bind, new_proc_bind); 50400b57cec5SDimitry Andric #if KMP_AFFINITY_SUPPORTED 50410b57cec5SDimitry Andric __kmp_partition_places(team); 50420b57cec5SDimitry Andric #endif 50430b57cec5SDimitry Andric } else { // team->t.t_nproc < new_nproc 5044489b1cf2SDimitry Andric #if (KMP_OS_LINUX || KMP_OS_FREEBSD) && KMP_AFFINITY_SUPPORTED 50450b57cec5SDimitry Andric kmp_affin_mask_t *old_mask; 50460b57cec5SDimitry Andric if (KMP_AFFINITY_CAPABLE()) { 50470b57cec5SDimitry Andric KMP_CPU_ALLOC(old_mask); 50480b57cec5SDimitry Andric } 50490b57cec5SDimitry Andric #endif 50500b57cec5SDimitry Andric 50510b57cec5SDimitry Andric KA_TRACE(20, 50520b57cec5SDimitry Andric ("__kmp_allocate_team: increasing hot team thread count to %d\n", 50530b57cec5SDimitry Andric new_nproc)); 50540b57cec5SDimitry Andric 50550b57cec5SDimitry Andric team->t.t_size_changed = 1; 50560b57cec5SDimitry Andric 50570b57cec5SDimitry Andric #if KMP_NESTED_HOT_TEAMS 50580b57cec5SDimitry Andric int avail_threads = hot_teams[level].hot_team_nth; 50590b57cec5SDimitry Andric if (new_nproc < avail_threads) 50600b57cec5SDimitry Andric avail_threads = new_nproc; 50610b57cec5SDimitry Andric kmp_info_t **other_threads = team->t.t_threads; 50620b57cec5SDimitry Andric for (f = team->t.t_nproc; f < avail_threads; ++f) { 50630b57cec5SDimitry Andric // Adjust barrier data of reserved threads (if any) of the team 50640b57cec5SDimitry Andric // Other data will be set in __kmp_initialize_info() below. 50650b57cec5SDimitry Andric int b; 50660b57cec5SDimitry Andric kmp_balign_t *balign = other_threads[f]->th.th_bar; 50670b57cec5SDimitry Andric for (b = 0; b < bs_last_barrier; ++b) { 50680b57cec5SDimitry Andric balign[b].bb.b_arrived = team->t.t_bar[b].b_arrived; 50690b57cec5SDimitry Andric KMP_DEBUG_ASSERT(balign[b].bb.wait_flag != KMP_BARRIER_PARENT_FLAG); 50700b57cec5SDimitry Andric #if USE_DEBUGGER 50710b57cec5SDimitry Andric balign[b].bb.b_worker_arrived = team->t.t_bar[b].b_team_arrived; 50720b57cec5SDimitry Andric #endif 50730b57cec5SDimitry Andric } 50740b57cec5SDimitry Andric } 50750b57cec5SDimitry Andric if (hot_teams[level].hot_team_nth >= new_nproc) { 50760b57cec5SDimitry Andric // we have all needed threads in reserve, no need to allocate any 50770b57cec5SDimitry Andric // this only possible in mode 1, cannot have reserved threads in mode 0 50780b57cec5SDimitry Andric KMP_DEBUG_ASSERT(__kmp_hot_teams_mode == 1); 50790b57cec5SDimitry Andric team->t.t_nproc = new_nproc; // just get reserved threads involved 50800b57cec5SDimitry Andric } else { 50810b57cec5SDimitry Andric // we may have some threads in reserve, but not enough 50820b57cec5SDimitry Andric team->t.t_nproc = 50830b57cec5SDimitry Andric hot_teams[level] 50840b57cec5SDimitry Andric .hot_team_nth; // get reserved threads involved if any 50850b57cec5SDimitry Andric hot_teams[level].hot_team_nth = new_nproc; // adjust hot team max size 50860b57cec5SDimitry Andric #endif // KMP_NESTED_HOT_TEAMS 50870b57cec5SDimitry Andric if (team->t.t_max_nproc < new_nproc) { 50880b57cec5SDimitry Andric /* reallocate larger arrays */ 50890b57cec5SDimitry Andric __kmp_reallocate_team_arrays(team, new_nproc); 50900b57cec5SDimitry Andric __kmp_reinitialize_team(team, new_icvs, NULL); 50910b57cec5SDimitry Andric } 50920b57cec5SDimitry Andric 5093489b1cf2SDimitry Andric #if (KMP_OS_LINUX || KMP_OS_FREEBSD) && KMP_AFFINITY_SUPPORTED 50940b57cec5SDimitry Andric /* Temporarily set full mask for master thread before creation of 50950b57cec5SDimitry Andric workers. The reason is that workers inherit the affinity from master, 50960b57cec5SDimitry Andric so if a lot of workers are created on the single core quickly, they 50970b57cec5SDimitry Andric don't get a chance to set their own affinity for a long time. */ 50980b57cec5SDimitry Andric __kmp_set_thread_affinity_mask_full_tmp(old_mask); 50990b57cec5SDimitry Andric #endif 51000b57cec5SDimitry Andric 51010b57cec5SDimitry Andric /* allocate new threads for the hot team */ 51020b57cec5SDimitry Andric for (f = team->t.t_nproc; f < new_nproc; f++) { 51030b57cec5SDimitry Andric kmp_info_t *new_worker = __kmp_allocate_thread(root, team, f); 51040b57cec5SDimitry Andric KMP_DEBUG_ASSERT(new_worker); 51050b57cec5SDimitry Andric team->t.t_threads[f] = new_worker; 51060b57cec5SDimitry Andric 51070b57cec5SDimitry Andric KA_TRACE(20, 51080b57cec5SDimitry Andric ("__kmp_allocate_team: team %d init T#%d arrived: " 51090b57cec5SDimitry Andric "join=%llu, plain=%llu\n", 51100b57cec5SDimitry Andric team->t.t_id, __kmp_gtid_from_tid(f, team), team->t.t_id, f, 51110b57cec5SDimitry Andric team->t.t_bar[bs_forkjoin_barrier].b_arrived, 51120b57cec5SDimitry Andric team->t.t_bar[bs_plain_barrier].b_arrived)); 51130b57cec5SDimitry Andric 51140b57cec5SDimitry Andric { // Initialize barrier data for new threads. 51150b57cec5SDimitry Andric int b; 51160b57cec5SDimitry Andric kmp_balign_t *balign = new_worker->th.th_bar; 51170b57cec5SDimitry Andric for (b = 0; b < bs_last_barrier; ++b) { 51180b57cec5SDimitry Andric balign[b].bb.b_arrived = team->t.t_bar[b].b_arrived; 51190b57cec5SDimitry Andric KMP_DEBUG_ASSERT(balign[b].bb.wait_flag != 51200b57cec5SDimitry Andric KMP_BARRIER_PARENT_FLAG); 51210b57cec5SDimitry Andric #if USE_DEBUGGER 51220b57cec5SDimitry Andric balign[b].bb.b_worker_arrived = team->t.t_bar[b].b_team_arrived; 51230b57cec5SDimitry Andric #endif 51240b57cec5SDimitry Andric } 51250b57cec5SDimitry Andric } 51260b57cec5SDimitry Andric } 51270b57cec5SDimitry Andric 5128489b1cf2SDimitry Andric #if (KMP_OS_LINUX || KMP_OS_FREEBSD) && KMP_AFFINITY_SUPPORTED 51290b57cec5SDimitry Andric if (KMP_AFFINITY_CAPABLE()) { 51300b57cec5SDimitry Andric /* Restore initial master thread's affinity mask */ 51310b57cec5SDimitry Andric __kmp_set_system_affinity(old_mask, TRUE); 51320b57cec5SDimitry Andric KMP_CPU_FREE(old_mask); 51330b57cec5SDimitry Andric } 51340b57cec5SDimitry Andric #endif 51350b57cec5SDimitry Andric #if KMP_NESTED_HOT_TEAMS 51360b57cec5SDimitry Andric } // end of check of t_nproc vs. new_nproc vs. hot_team_nth 51370b57cec5SDimitry Andric #endif // KMP_NESTED_HOT_TEAMS 51380b57cec5SDimitry Andric /* make sure everyone is syncronized */ 51390b57cec5SDimitry Andric int old_nproc = team->t.t_nproc; // save old value and use to update only 51400b57cec5SDimitry Andric // new threads below 51410b57cec5SDimitry Andric __kmp_initialize_team(team, new_nproc, new_icvs, 51420b57cec5SDimitry Andric root->r.r_uber_thread->th.th_ident); 51430b57cec5SDimitry Andric 51440b57cec5SDimitry Andric /* reinitialize the threads */ 51450b57cec5SDimitry Andric KMP_DEBUG_ASSERT(team->t.t_nproc == new_nproc); 51460b57cec5SDimitry Andric for (f = 0; f < team->t.t_nproc; ++f) 51470b57cec5SDimitry Andric __kmp_initialize_info(team->t.t_threads[f], team, f, 51480b57cec5SDimitry Andric __kmp_gtid_from_tid(f, team)); 51490b57cec5SDimitry Andric 51500b57cec5SDimitry Andric if (level) { // set th_task_state for new threads in nested hot team 51510b57cec5SDimitry Andric // __kmp_initialize_info() no longer zeroes th_task_state, so we should 51520b57cec5SDimitry Andric // only need to set the th_task_state for the new threads. th_task_state 51530b57cec5SDimitry Andric // for master thread will not be accurate until after this in 51540b57cec5SDimitry Andric // __kmp_fork_call(), so we look to the master's memo_stack to get the 51550b57cec5SDimitry Andric // correct value. 51560b57cec5SDimitry Andric for (f = old_nproc; f < team->t.t_nproc; ++f) 51570b57cec5SDimitry Andric team->t.t_threads[f]->th.th_task_state = 51580b57cec5SDimitry Andric team->t.t_threads[0]->th.th_task_state_memo_stack[level]; 51590b57cec5SDimitry Andric } else { // set th_task_state for new threads in non-nested hot team 51600b57cec5SDimitry Andric int old_state = 51610b57cec5SDimitry Andric team->t.t_threads[0]->th.th_task_state; // copy master's state 51620b57cec5SDimitry Andric for (f = old_nproc; f < team->t.t_nproc; ++f) 51630b57cec5SDimitry Andric team->t.t_threads[f]->th.th_task_state = old_state; 51640b57cec5SDimitry Andric } 51650b57cec5SDimitry Andric 51660b57cec5SDimitry Andric #ifdef KMP_DEBUG 51670b57cec5SDimitry Andric for (f = 0; f < team->t.t_nproc; ++f) { 51680b57cec5SDimitry Andric KMP_DEBUG_ASSERT(team->t.t_threads[f] && 51690b57cec5SDimitry Andric team->t.t_threads[f]->th.th_team_nproc == 51700b57cec5SDimitry Andric team->t.t_nproc); 51710b57cec5SDimitry Andric } 51720b57cec5SDimitry Andric #endif 51730b57cec5SDimitry Andric 51740b57cec5SDimitry Andric KMP_CHECK_UPDATE(team->t.t_proc_bind, new_proc_bind); 51750b57cec5SDimitry Andric #if KMP_AFFINITY_SUPPORTED 51760b57cec5SDimitry Andric __kmp_partition_places(team); 51770b57cec5SDimitry Andric #endif 51780b57cec5SDimitry Andric } // Check changes in number of threads 51790b57cec5SDimitry Andric 51800b57cec5SDimitry Andric kmp_info_t *master = team->t.t_threads[0]; 51810b57cec5SDimitry Andric if (master->th.th_teams_microtask) { 51820b57cec5SDimitry Andric for (f = 1; f < new_nproc; ++f) { 51830b57cec5SDimitry Andric // propagate teams construct specific info to workers 51840b57cec5SDimitry Andric kmp_info_t *thr = team->t.t_threads[f]; 51850b57cec5SDimitry Andric thr->th.th_teams_microtask = master->th.th_teams_microtask; 51860b57cec5SDimitry Andric thr->th.th_teams_level = master->th.th_teams_level; 51870b57cec5SDimitry Andric thr->th.th_teams_size = master->th.th_teams_size; 51880b57cec5SDimitry Andric } 51890b57cec5SDimitry Andric } 51900b57cec5SDimitry Andric #if KMP_NESTED_HOT_TEAMS 51910b57cec5SDimitry Andric if (level) { 51920b57cec5SDimitry Andric // Sync barrier state for nested hot teams, not needed for outermost hot 51930b57cec5SDimitry Andric // team. 51940b57cec5SDimitry Andric for (f = 1; f < new_nproc; ++f) { 51950b57cec5SDimitry Andric kmp_info_t *thr = team->t.t_threads[f]; 51960b57cec5SDimitry Andric int b; 51970b57cec5SDimitry Andric kmp_balign_t *balign = thr->th.th_bar; 51980b57cec5SDimitry Andric for (b = 0; b < bs_last_barrier; ++b) { 51990b57cec5SDimitry Andric balign[b].bb.b_arrived = team->t.t_bar[b].b_arrived; 52000b57cec5SDimitry Andric KMP_DEBUG_ASSERT(balign[b].bb.wait_flag != KMP_BARRIER_PARENT_FLAG); 52010b57cec5SDimitry Andric #if USE_DEBUGGER 52020b57cec5SDimitry Andric balign[b].bb.b_worker_arrived = team->t.t_bar[b].b_team_arrived; 52030b57cec5SDimitry Andric #endif 52040b57cec5SDimitry Andric } 52050b57cec5SDimitry Andric } 52060b57cec5SDimitry Andric } 52070b57cec5SDimitry Andric #endif // KMP_NESTED_HOT_TEAMS 52080b57cec5SDimitry Andric 52090b57cec5SDimitry Andric /* reallocate space for arguments if necessary */ 52100b57cec5SDimitry Andric __kmp_alloc_argv_entries(argc, team, TRUE); 52110b57cec5SDimitry Andric KMP_CHECK_UPDATE(team->t.t_argc, argc); 52120b57cec5SDimitry Andric // The hot team re-uses the previous task team, 52130b57cec5SDimitry Andric // if untouched during the previous release->gather phase. 52140b57cec5SDimitry Andric 52150b57cec5SDimitry Andric KF_TRACE(10, (" hot_team = %p\n", team)); 52160b57cec5SDimitry Andric 52170b57cec5SDimitry Andric #if KMP_DEBUG 52180b57cec5SDimitry Andric if (__kmp_tasking_mode != tskm_immediate_exec) { 52190b57cec5SDimitry Andric KA_TRACE(20, ("__kmp_allocate_team: hot team task_team[0] = %p " 52200b57cec5SDimitry Andric "task_team[1] = %p after reinit\n", 52210b57cec5SDimitry Andric team->t.t_task_team[0], team->t.t_task_team[1])); 52220b57cec5SDimitry Andric } 52230b57cec5SDimitry Andric #endif 52240b57cec5SDimitry Andric 52250b57cec5SDimitry Andric #if OMPT_SUPPORT 52260b57cec5SDimitry Andric __ompt_team_assign_id(team, ompt_parallel_data); 52270b57cec5SDimitry Andric #endif 52280b57cec5SDimitry Andric 52290b57cec5SDimitry Andric KMP_MB(); 52300b57cec5SDimitry Andric 52310b57cec5SDimitry Andric return team; 52320b57cec5SDimitry Andric } 52330b57cec5SDimitry Andric 52340b57cec5SDimitry Andric /* next, let's try to take one from the team pool */ 52350b57cec5SDimitry Andric KMP_MB(); 52360b57cec5SDimitry Andric for (team = CCAST(kmp_team_t *, __kmp_team_pool); (team);) { 52370b57cec5SDimitry Andric /* TODO: consider resizing undersized teams instead of reaping them, now 52380b57cec5SDimitry Andric that we have a resizing mechanism */ 52390b57cec5SDimitry Andric if (team->t.t_max_nproc >= max_nproc) { 52400b57cec5SDimitry Andric /* take this team from the team pool */ 52410b57cec5SDimitry Andric __kmp_team_pool = team->t.t_next_pool; 52420b57cec5SDimitry Andric 52430b57cec5SDimitry Andric /* setup the team for fresh use */ 52440b57cec5SDimitry Andric __kmp_initialize_team(team, new_nproc, new_icvs, NULL); 52450b57cec5SDimitry Andric 52460b57cec5SDimitry Andric KA_TRACE(20, ("__kmp_allocate_team: setting task_team[0] %p and " 52470b57cec5SDimitry Andric "task_team[1] %p to NULL\n", 52480b57cec5SDimitry Andric &team->t.t_task_team[0], &team->t.t_task_team[1])); 52490b57cec5SDimitry Andric team->t.t_task_team[0] = NULL; 52500b57cec5SDimitry Andric team->t.t_task_team[1] = NULL; 52510b57cec5SDimitry Andric 52520b57cec5SDimitry Andric /* reallocate space for arguments if necessary */ 52530b57cec5SDimitry Andric __kmp_alloc_argv_entries(argc, team, TRUE); 52540b57cec5SDimitry Andric KMP_CHECK_UPDATE(team->t.t_argc, argc); 52550b57cec5SDimitry Andric 52560b57cec5SDimitry Andric KA_TRACE( 52570b57cec5SDimitry Andric 20, ("__kmp_allocate_team: team %d init arrived: join=%u, plain=%u\n", 52580b57cec5SDimitry Andric team->t.t_id, KMP_INIT_BARRIER_STATE, KMP_INIT_BARRIER_STATE)); 52590b57cec5SDimitry Andric { // Initialize barrier data. 52600b57cec5SDimitry Andric int b; 52610b57cec5SDimitry Andric for (b = 0; b < bs_last_barrier; ++b) { 52620b57cec5SDimitry Andric team->t.t_bar[b].b_arrived = KMP_INIT_BARRIER_STATE; 52630b57cec5SDimitry Andric #if USE_DEBUGGER 52640b57cec5SDimitry Andric team->t.t_bar[b].b_master_arrived = 0; 52650b57cec5SDimitry Andric team->t.t_bar[b].b_team_arrived = 0; 52660b57cec5SDimitry Andric #endif 52670b57cec5SDimitry Andric } 52680b57cec5SDimitry Andric } 52690b57cec5SDimitry Andric 52700b57cec5SDimitry Andric team->t.t_proc_bind = new_proc_bind; 52710b57cec5SDimitry Andric 52720b57cec5SDimitry Andric KA_TRACE(20, ("__kmp_allocate_team: using team from pool %d.\n", 52730b57cec5SDimitry Andric team->t.t_id)); 52740b57cec5SDimitry Andric 52750b57cec5SDimitry Andric #if OMPT_SUPPORT 52760b57cec5SDimitry Andric __ompt_team_assign_id(team, ompt_parallel_data); 52770b57cec5SDimitry Andric #endif 52780b57cec5SDimitry Andric 52790b57cec5SDimitry Andric KMP_MB(); 52800b57cec5SDimitry Andric 52810b57cec5SDimitry Andric return team; 52820b57cec5SDimitry Andric } 52830b57cec5SDimitry Andric 52840b57cec5SDimitry Andric /* reap team if it is too small, then loop back and check the next one */ 52850b57cec5SDimitry Andric // not sure if this is wise, but, will be redone during the hot-teams 52860b57cec5SDimitry Andric // rewrite. 52870b57cec5SDimitry Andric /* TODO: Use technique to find the right size hot-team, don't reap them */ 52880b57cec5SDimitry Andric team = __kmp_reap_team(team); 52890b57cec5SDimitry Andric __kmp_team_pool = team; 52900b57cec5SDimitry Andric } 52910b57cec5SDimitry Andric 52920b57cec5SDimitry Andric /* nothing available in the pool, no matter, make a new team! */ 52930b57cec5SDimitry Andric KMP_MB(); 52940b57cec5SDimitry Andric team = (kmp_team_t *)__kmp_allocate(sizeof(kmp_team_t)); 52950b57cec5SDimitry Andric 52960b57cec5SDimitry Andric /* and set it up */ 52970b57cec5SDimitry Andric team->t.t_max_nproc = max_nproc; 52980b57cec5SDimitry Andric /* NOTE well, for some reason allocating one big buffer and dividing it up 52990b57cec5SDimitry Andric seems to really hurt performance a lot on the P4, so, let's not use this */ 53000b57cec5SDimitry Andric __kmp_allocate_team_arrays(team, max_nproc); 53010b57cec5SDimitry Andric 53020b57cec5SDimitry Andric KA_TRACE(20, ("__kmp_allocate_team: making a new team\n")); 53030b57cec5SDimitry Andric __kmp_initialize_team(team, new_nproc, new_icvs, NULL); 53040b57cec5SDimitry Andric 53050b57cec5SDimitry Andric KA_TRACE(20, ("__kmp_allocate_team: setting task_team[0] %p and task_team[1] " 53060b57cec5SDimitry Andric "%p to NULL\n", 53070b57cec5SDimitry Andric &team->t.t_task_team[0], &team->t.t_task_team[1])); 53080b57cec5SDimitry Andric team->t.t_task_team[0] = NULL; // to be removed, as __kmp_allocate zeroes 53090b57cec5SDimitry Andric // memory, no need to duplicate 53100b57cec5SDimitry Andric team->t.t_task_team[1] = NULL; // to be removed, as __kmp_allocate zeroes 53110b57cec5SDimitry Andric // memory, no need to duplicate 53120b57cec5SDimitry Andric 53130b57cec5SDimitry Andric if (__kmp_storage_map) { 53140b57cec5SDimitry Andric __kmp_print_team_storage_map("team", team, team->t.t_id, new_nproc); 53150b57cec5SDimitry Andric } 53160b57cec5SDimitry Andric 53170b57cec5SDimitry Andric /* allocate space for arguments */ 53180b57cec5SDimitry Andric __kmp_alloc_argv_entries(argc, team, FALSE); 53190b57cec5SDimitry Andric team->t.t_argc = argc; 53200b57cec5SDimitry Andric 53210b57cec5SDimitry Andric KA_TRACE(20, 53220b57cec5SDimitry Andric ("__kmp_allocate_team: team %d init arrived: join=%u, plain=%u\n", 53230b57cec5SDimitry Andric team->t.t_id, KMP_INIT_BARRIER_STATE, KMP_INIT_BARRIER_STATE)); 53240b57cec5SDimitry Andric { // Initialize barrier data. 53250b57cec5SDimitry Andric int b; 53260b57cec5SDimitry Andric for (b = 0; b < bs_last_barrier; ++b) { 53270b57cec5SDimitry Andric team->t.t_bar[b].b_arrived = KMP_INIT_BARRIER_STATE; 53280b57cec5SDimitry Andric #if USE_DEBUGGER 53290b57cec5SDimitry Andric team->t.t_bar[b].b_master_arrived = 0; 53300b57cec5SDimitry Andric team->t.t_bar[b].b_team_arrived = 0; 53310b57cec5SDimitry Andric #endif 53320b57cec5SDimitry Andric } 53330b57cec5SDimitry Andric } 53340b57cec5SDimitry Andric 53350b57cec5SDimitry Andric team->t.t_proc_bind = new_proc_bind; 53360b57cec5SDimitry Andric 53370b57cec5SDimitry Andric #if OMPT_SUPPORT 53380b57cec5SDimitry Andric __ompt_team_assign_id(team, ompt_parallel_data); 53390b57cec5SDimitry Andric team->t.ompt_serialized_team_info = NULL; 53400b57cec5SDimitry Andric #endif 53410b57cec5SDimitry Andric 53420b57cec5SDimitry Andric KMP_MB(); 53430b57cec5SDimitry Andric 53440b57cec5SDimitry Andric KA_TRACE(20, ("__kmp_allocate_team: done creating a new team %d.\n", 53450b57cec5SDimitry Andric team->t.t_id)); 53460b57cec5SDimitry Andric 53470b57cec5SDimitry Andric return team; 53480b57cec5SDimitry Andric } 53490b57cec5SDimitry Andric 53500b57cec5SDimitry Andric /* TODO implement hot-teams at all levels */ 53510b57cec5SDimitry Andric /* TODO implement lazy thread release on demand (disband request) */ 53520b57cec5SDimitry Andric 53530b57cec5SDimitry Andric /* free the team. return it to the team pool. release all the threads 53540b57cec5SDimitry Andric * associated with it */ 53550b57cec5SDimitry Andric void __kmp_free_team(kmp_root_t *root, 53560b57cec5SDimitry Andric kmp_team_t *team USE_NESTED_HOT_ARG(kmp_info_t *master)) { 53570b57cec5SDimitry Andric int f; 53580b57cec5SDimitry Andric KA_TRACE(20, ("__kmp_free_team: T#%d freeing team %d\n", __kmp_get_gtid(), 53590b57cec5SDimitry Andric team->t.t_id)); 53600b57cec5SDimitry Andric 53610b57cec5SDimitry Andric /* verify state */ 53620b57cec5SDimitry Andric KMP_DEBUG_ASSERT(root); 53630b57cec5SDimitry Andric KMP_DEBUG_ASSERT(team); 53640b57cec5SDimitry Andric KMP_DEBUG_ASSERT(team->t.t_nproc <= team->t.t_max_nproc); 53650b57cec5SDimitry Andric KMP_DEBUG_ASSERT(team->t.t_threads); 53660b57cec5SDimitry Andric 53670b57cec5SDimitry Andric int use_hot_team = team == root->r.r_hot_team; 53680b57cec5SDimitry Andric #if KMP_NESTED_HOT_TEAMS 53690b57cec5SDimitry Andric int level; 53700b57cec5SDimitry Andric kmp_hot_team_ptr_t *hot_teams; 53710b57cec5SDimitry Andric if (master) { 53720b57cec5SDimitry Andric level = team->t.t_active_level - 1; 53730b57cec5SDimitry Andric if (master->th.th_teams_microtask) { // in teams construct? 53740b57cec5SDimitry Andric if (master->th.th_teams_size.nteams > 1) { 53750b57cec5SDimitry Andric ++level; // level was not increased in teams construct for 53760b57cec5SDimitry Andric // team_of_masters 53770b57cec5SDimitry Andric } 53780b57cec5SDimitry Andric if (team->t.t_pkfn != (microtask_t)__kmp_teams_master && 53790b57cec5SDimitry Andric master->th.th_teams_level == team->t.t_level) { 53800b57cec5SDimitry Andric ++level; // level was not increased in teams construct for 53810b57cec5SDimitry Andric // team_of_workers before the parallel 53820b57cec5SDimitry Andric } // team->t.t_level will be increased inside parallel 53830b57cec5SDimitry Andric } 53840b57cec5SDimitry Andric hot_teams = master->th.th_hot_teams; 53850b57cec5SDimitry Andric if (level < __kmp_hot_teams_max_level) { 53860b57cec5SDimitry Andric KMP_DEBUG_ASSERT(team == hot_teams[level].hot_team); 53870b57cec5SDimitry Andric use_hot_team = 1; 53880b57cec5SDimitry Andric } 53890b57cec5SDimitry Andric } 53900b57cec5SDimitry Andric #endif // KMP_NESTED_HOT_TEAMS 53910b57cec5SDimitry Andric 53920b57cec5SDimitry Andric /* team is done working */ 53930b57cec5SDimitry Andric TCW_SYNC_PTR(team->t.t_pkfn, 53940b57cec5SDimitry Andric NULL); // Important for Debugging Support Library. 53950b57cec5SDimitry Andric #if KMP_OS_WINDOWS 53960b57cec5SDimitry Andric team->t.t_copyin_counter = 0; // init counter for possible reuse 53970b57cec5SDimitry Andric #endif 53980b57cec5SDimitry Andric // Do not reset pointer to parent team to NULL for hot teams. 53990b57cec5SDimitry Andric 54000b57cec5SDimitry Andric /* if we are non-hot team, release our threads */ 54010b57cec5SDimitry Andric if (!use_hot_team) { 54020b57cec5SDimitry Andric if (__kmp_tasking_mode != tskm_immediate_exec) { 54030b57cec5SDimitry Andric // Wait for threads to reach reapable state 54040b57cec5SDimitry Andric for (f = 1; f < team->t.t_nproc; ++f) { 54050b57cec5SDimitry Andric KMP_DEBUG_ASSERT(team->t.t_threads[f]); 54060b57cec5SDimitry Andric kmp_info_t *th = team->t.t_threads[f]; 54070b57cec5SDimitry Andric volatile kmp_uint32 *state = &th->th.th_reap_state; 54080b57cec5SDimitry Andric while (*state != KMP_SAFE_TO_REAP) { 54090b57cec5SDimitry Andric #if KMP_OS_WINDOWS 54100b57cec5SDimitry Andric // On Windows a thread can be killed at any time, check this 54110b57cec5SDimitry Andric DWORD ecode; 54120b57cec5SDimitry Andric if (!__kmp_is_thread_alive(th, &ecode)) { 54130b57cec5SDimitry Andric *state = KMP_SAFE_TO_REAP; // reset the flag for dead thread 54140b57cec5SDimitry Andric break; 54150b57cec5SDimitry Andric } 54160b57cec5SDimitry Andric #endif 54170b57cec5SDimitry Andric // first check if thread is sleeping 54180b57cec5SDimitry Andric kmp_flag_64 fl(&th->th.th_bar[bs_forkjoin_barrier].bb.b_go, th); 54190b57cec5SDimitry Andric if (fl.is_sleeping()) 54200b57cec5SDimitry Andric fl.resume(__kmp_gtid_from_thread(th)); 54210b57cec5SDimitry Andric KMP_CPU_PAUSE(); 54220b57cec5SDimitry Andric } 54230b57cec5SDimitry Andric } 54240b57cec5SDimitry Andric 54250b57cec5SDimitry Andric // Delete task teams 54260b57cec5SDimitry Andric int tt_idx; 54270b57cec5SDimitry Andric for (tt_idx = 0; tt_idx < 2; ++tt_idx) { 54280b57cec5SDimitry Andric kmp_task_team_t *task_team = team->t.t_task_team[tt_idx]; 54290b57cec5SDimitry Andric if (task_team != NULL) { 54300b57cec5SDimitry Andric for (f = 0; f < team->t.t_nproc; ++f) { // threads unref task teams 54310b57cec5SDimitry Andric KMP_DEBUG_ASSERT(team->t.t_threads[f]); 54320b57cec5SDimitry Andric team->t.t_threads[f]->th.th_task_team = NULL; 54330b57cec5SDimitry Andric } 54340b57cec5SDimitry Andric KA_TRACE( 54350b57cec5SDimitry Andric 20, 54360b57cec5SDimitry Andric ("__kmp_free_team: T#%d deactivating task_team %p on team %d\n", 54370b57cec5SDimitry Andric __kmp_get_gtid(), task_team, team->t.t_id)); 54380b57cec5SDimitry Andric #if KMP_NESTED_HOT_TEAMS 54390b57cec5SDimitry Andric __kmp_free_task_team(master, task_team); 54400b57cec5SDimitry Andric #endif 54410b57cec5SDimitry Andric team->t.t_task_team[tt_idx] = NULL; 54420b57cec5SDimitry Andric } 54430b57cec5SDimitry Andric } 54440b57cec5SDimitry Andric } 54450b57cec5SDimitry Andric 54460b57cec5SDimitry Andric // Reset pointer to parent team only for non-hot teams. 54470b57cec5SDimitry Andric team->t.t_parent = NULL; 54480b57cec5SDimitry Andric team->t.t_level = 0; 54490b57cec5SDimitry Andric team->t.t_active_level = 0; 54500b57cec5SDimitry Andric 54510b57cec5SDimitry Andric /* free the worker threads */ 54520b57cec5SDimitry Andric for (f = 1; f < team->t.t_nproc; ++f) { 54530b57cec5SDimitry Andric KMP_DEBUG_ASSERT(team->t.t_threads[f]); 54540b57cec5SDimitry Andric __kmp_free_thread(team->t.t_threads[f]); 54550b57cec5SDimitry Andric team->t.t_threads[f] = NULL; 54560b57cec5SDimitry Andric } 54570b57cec5SDimitry Andric 54580b57cec5SDimitry Andric /* put the team back in the team pool */ 54590b57cec5SDimitry Andric /* TODO limit size of team pool, call reap_team if pool too large */ 54600b57cec5SDimitry Andric team->t.t_next_pool = CCAST(kmp_team_t *, __kmp_team_pool); 54610b57cec5SDimitry Andric __kmp_team_pool = (volatile kmp_team_t *)team; 54620b57cec5SDimitry Andric } else { // Check if team was created for the masters in a teams construct 54630b57cec5SDimitry Andric // See if first worker is a CG root 54640b57cec5SDimitry Andric KMP_DEBUG_ASSERT(team->t.t_threads[1] && 54650b57cec5SDimitry Andric team->t.t_threads[1]->th.th_cg_roots); 54660b57cec5SDimitry Andric if (team->t.t_threads[1]->th.th_cg_roots->cg_root == team->t.t_threads[1]) { 54670b57cec5SDimitry Andric // Clean up the CG root nodes on workers so that this team can be re-used 54680b57cec5SDimitry Andric for (f = 1; f < team->t.t_nproc; ++f) { 54690b57cec5SDimitry Andric kmp_info_t *thr = team->t.t_threads[f]; 54700b57cec5SDimitry Andric KMP_DEBUG_ASSERT(thr && thr->th.th_cg_roots && 54710b57cec5SDimitry Andric thr->th.th_cg_roots->cg_root == thr); 54720b57cec5SDimitry Andric // Pop current CG root off list 54730b57cec5SDimitry Andric kmp_cg_root_t *tmp = thr->th.th_cg_roots; 54740b57cec5SDimitry Andric thr->th.th_cg_roots = tmp->up; 54750b57cec5SDimitry Andric KA_TRACE(100, ("__kmp_free_team: Thread %p popping node %p and moving" 54760b57cec5SDimitry Andric " up to node %p. cg_nthreads was %d\n", 54770b57cec5SDimitry Andric thr, tmp, thr->th.th_cg_roots, tmp->cg_nthreads)); 54780b57cec5SDimitry Andric int i = tmp->cg_nthreads--; 54790b57cec5SDimitry Andric if (i == 1) { 54800b57cec5SDimitry Andric __kmp_free(tmp); // free CG if we are the last thread in it 54810b57cec5SDimitry Andric } 54820b57cec5SDimitry Andric // Restore current task's thread_limit from CG root 54830b57cec5SDimitry Andric if (thr->th.th_cg_roots) 54840b57cec5SDimitry Andric thr->th.th_current_task->td_icvs.thread_limit = 54850b57cec5SDimitry Andric thr->th.th_cg_roots->cg_thread_limit; 54860b57cec5SDimitry Andric } 54870b57cec5SDimitry Andric } 54880b57cec5SDimitry Andric } 54890b57cec5SDimitry Andric 54900b57cec5SDimitry Andric KMP_MB(); 54910b57cec5SDimitry Andric } 54920b57cec5SDimitry Andric 54930b57cec5SDimitry Andric /* reap the team. destroy it, reclaim all its resources and free its memory */ 54940b57cec5SDimitry Andric kmp_team_t *__kmp_reap_team(kmp_team_t *team) { 54950b57cec5SDimitry Andric kmp_team_t *next_pool = team->t.t_next_pool; 54960b57cec5SDimitry Andric 54970b57cec5SDimitry Andric KMP_DEBUG_ASSERT(team); 54980b57cec5SDimitry Andric KMP_DEBUG_ASSERT(team->t.t_dispatch); 54990b57cec5SDimitry Andric KMP_DEBUG_ASSERT(team->t.t_disp_buffer); 55000b57cec5SDimitry Andric KMP_DEBUG_ASSERT(team->t.t_threads); 55010b57cec5SDimitry Andric KMP_DEBUG_ASSERT(team->t.t_argv); 55020b57cec5SDimitry Andric 55030b57cec5SDimitry Andric /* TODO clean the threads that are a part of this? */ 55040b57cec5SDimitry Andric 55050b57cec5SDimitry Andric /* free stuff */ 55060b57cec5SDimitry Andric __kmp_free_team_arrays(team); 55070b57cec5SDimitry Andric if (team->t.t_argv != &team->t.t_inline_argv[0]) 55080b57cec5SDimitry Andric __kmp_free((void *)team->t.t_argv); 55090b57cec5SDimitry Andric __kmp_free(team); 55100b57cec5SDimitry Andric 55110b57cec5SDimitry Andric KMP_MB(); 55120b57cec5SDimitry Andric return next_pool; 55130b57cec5SDimitry Andric } 55140b57cec5SDimitry Andric 55150b57cec5SDimitry Andric // Free the thread. Don't reap it, just place it on the pool of available 55160b57cec5SDimitry Andric // threads. 55170b57cec5SDimitry Andric // 55180b57cec5SDimitry Andric // Changes for Quad issue 527845: We need a predictable OMP tid <-> gtid 55190b57cec5SDimitry Andric // binding for the affinity mechanism to be useful. 55200b57cec5SDimitry Andric // 55210b57cec5SDimitry Andric // Now, we always keep the free list (__kmp_thread_pool) sorted by gtid. 55220b57cec5SDimitry Andric // However, we want to avoid a potential performance problem by always 55230b57cec5SDimitry Andric // scanning through the list to find the correct point at which to insert 55240b57cec5SDimitry Andric // the thread (potential N**2 behavior). To do this we keep track of the 55250b57cec5SDimitry Andric // last place a thread struct was inserted (__kmp_thread_pool_insert_pt). 55260b57cec5SDimitry Andric // With single-level parallelism, threads will always be added to the tail 55270b57cec5SDimitry Andric // of the list, kept track of by __kmp_thread_pool_insert_pt. With nested 55280b57cec5SDimitry Andric // parallelism, all bets are off and we may need to scan through the entire 55290b57cec5SDimitry Andric // free list. 55300b57cec5SDimitry Andric // 55310b57cec5SDimitry Andric // This change also has a potentially large performance benefit, for some 55320b57cec5SDimitry Andric // applications. Previously, as threads were freed from the hot team, they 55330b57cec5SDimitry Andric // would be placed back on the free list in inverse order. If the hot team 55340b57cec5SDimitry Andric // grew back to it's original size, then the freed thread would be placed 55350b57cec5SDimitry Andric // back on the hot team in reverse order. This could cause bad cache 55360b57cec5SDimitry Andric // locality problems on programs where the size of the hot team regularly 55370b57cec5SDimitry Andric // grew and shrunk. 55380b57cec5SDimitry Andric // 55390b57cec5SDimitry Andric // Now, for single-level parallelism, the OMP tid is alway == gtid. 55400b57cec5SDimitry Andric void __kmp_free_thread(kmp_info_t *this_th) { 55410b57cec5SDimitry Andric int gtid; 55420b57cec5SDimitry Andric kmp_info_t **scan; 55430b57cec5SDimitry Andric 55440b57cec5SDimitry Andric KA_TRACE(20, ("__kmp_free_thread: T#%d putting T#%d back on free pool.\n", 55450b57cec5SDimitry Andric __kmp_get_gtid(), this_th->th.th_info.ds.ds_gtid)); 55460b57cec5SDimitry Andric 55470b57cec5SDimitry Andric KMP_DEBUG_ASSERT(this_th); 55480b57cec5SDimitry Andric 55490b57cec5SDimitry Andric // When moving thread to pool, switch thread to wait on own b_go flag, and 55500b57cec5SDimitry Andric // uninitialized (NULL team). 55510b57cec5SDimitry Andric int b; 55520b57cec5SDimitry Andric kmp_balign_t *balign = this_th->th.th_bar; 55530b57cec5SDimitry Andric for (b = 0; b < bs_last_barrier; ++b) { 55540b57cec5SDimitry Andric if (balign[b].bb.wait_flag == KMP_BARRIER_PARENT_FLAG) 55550b57cec5SDimitry Andric balign[b].bb.wait_flag = KMP_BARRIER_SWITCH_TO_OWN_FLAG; 55560b57cec5SDimitry Andric balign[b].bb.team = NULL; 55570b57cec5SDimitry Andric balign[b].bb.leaf_kids = 0; 55580b57cec5SDimitry Andric } 55590b57cec5SDimitry Andric this_th->th.th_task_state = 0; 55600b57cec5SDimitry Andric this_th->th.th_reap_state = KMP_SAFE_TO_REAP; 55610b57cec5SDimitry Andric 55620b57cec5SDimitry Andric /* put thread back on the free pool */ 55630b57cec5SDimitry Andric TCW_PTR(this_th->th.th_team, NULL); 55640b57cec5SDimitry Andric TCW_PTR(this_th->th.th_root, NULL); 55650b57cec5SDimitry Andric TCW_PTR(this_th->th.th_dispatch, NULL); /* NOT NEEDED */ 55660b57cec5SDimitry Andric 55670b57cec5SDimitry Andric while (this_th->th.th_cg_roots) { 55680b57cec5SDimitry Andric this_th->th.th_cg_roots->cg_nthreads--; 55690b57cec5SDimitry Andric KA_TRACE(100, ("__kmp_free_thread: Thread %p decrement cg_nthreads on node" 55700b57cec5SDimitry Andric " %p of thread %p to %d\n", 55710b57cec5SDimitry Andric this_th, this_th->th.th_cg_roots, 55720b57cec5SDimitry Andric this_th->th.th_cg_roots->cg_root, 55730b57cec5SDimitry Andric this_th->th.th_cg_roots->cg_nthreads)); 55740b57cec5SDimitry Andric kmp_cg_root_t *tmp = this_th->th.th_cg_roots; 55750b57cec5SDimitry Andric if (tmp->cg_root == this_th) { // Thread is a cg_root 55760b57cec5SDimitry Andric KMP_DEBUG_ASSERT(tmp->cg_nthreads == 0); 55770b57cec5SDimitry Andric KA_TRACE( 55780b57cec5SDimitry Andric 5, ("__kmp_free_thread: Thread %p freeing node %p\n", this_th, tmp)); 55790b57cec5SDimitry Andric this_th->th.th_cg_roots = tmp->up; 55800b57cec5SDimitry Andric __kmp_free(tmp); 55810b57cec5SDimitry Andric } else { // Worker thread 55820b57cec5SDimitry Andric if (tmp->cg_nthreads == 0) { // last thread leaves contention group 55830b57cec5SDimitry Andric __kmp_free(tmp); 55840b57cec5SDimitry Andric } 55850b57cec5SDimitry Andric this_th->th.th_cg_roots = NULL; 55860b57cec5SDimitry Andric break; 55870b57cec5SDimitry Andric } 55880b57cec5SDimitry Andric } 55890b57cec5SDimitry Andric 55900b57cec5SDimitry Andric /* If the implicit task assigned to this thread can be used by other threads 55910b57cec5SDimitry Andric * -> multiple threads can share the data and try to free the task at 55920b57cec5SDimitry Andric * __kmp_reap_thread at exit. This duplicate use of the task data can happen 55930b57cec5SDimitry Andric * with higher probability when hot team is disabled but can occurs even when 55940b57cec5SDimitry Andric * the hot team is enabled */ 55950b57cec5SDimitry Andric __kmp_free_implicit_task(this_th); 55960b57cec5SDimitry Andric this_th->th.th_current_task = NULL; 55970b57cec5SDimitry Andric 55980b57cec5SDimitry Andric // If the __kmp_thread_pool_insert_pt is already past the new insert 55990b57cec5SDimitry Andric // point, then we need to re-scan the entire list. 56000b57cec5SDimitry Andric gtid = this_th->th.th_info.ds.ds_gtid; 56010b57cec5SDimitry Andric if (__kmp_thread_pool_insert_pt != NULL) { 56020b57cec5SDimitry Andric KMP_DEBUG_ASSERT(__kmp_thread_pool != NULL); 56030b57cec5SDimitry Andric if (__kmp_thread_pool_insert_pt->th.th_info.ds.ds_gtid > gtid) { 56040b57cec5SDimitry Andric __kmp_thread_pool_insert_pt = NULL; 56050b57cec5SDimitry Andric } 56060b57cec5SDimitry Andric } 56070b57cec5SDimitry Andric 56080b57cec5SDimitry Andric // Scan down the list to find the place to insert the thread. 56090b57cec5SDimitry Andric // scan is the address of a link in the list, possibly the address of 56100b57cec5SDimitry Andric // __kmp_thread_pool itself. 56110b57cec5SDimitry Andric // 56120b57cec5SDimitry Andric // In the absence of nested parallism, the for loop will have 0 iterations. 56130b57cec5SDimitry Andric if (__kmp_thread_pool_insert_pt != NULL) { 56140b57cec5SDimitry Andric scan = &(__kmp_thread_pool_insert_pt->th.th_next_pool); 56150b57cec5SDimitry Andric } else { 56160b57cec5SDimitry Andric scan = CCAST(kmp_info_t **, &__kmp_thread_pool); 56170b57cec5SDimitry Andric } 56180b57cec5SDimitry Andric for (; (*scan != NULL) && ((*scan)->th.th_info.ds.ds_gtid < gtid); 56190b57cec5SDimitry Andric scan = &((*scan)->th.th_next_pool)) 56200b57cec5SDimitry Andric ; 56210b57cec5SDimitry Andric 56220b57cec5SDimitry Andric // Insert the new element on the list, and set __kmp_thread_pool_insert_pt 56230b57cec5SDimitry Andric // to its address. 56240b57cec5SDimitry Andric TCW_PTR(this_th->th.th_next_pool, *scan); 56250b57cec5SDimitry Andric __kmp_thread_pool_insert_pt = *scan = this_th; 56260b57cec5SDimitry Andric KMP_DEBUG_ASSERT((this_th->th.th_next_pool == NULL) || 56270b57cec5SDimitry Andric (this_th->th.th_info.ds.ds_gtid < 56280b57cec5SDimitry Andric this_th->th.th_next_pool->th.th_info.ds.ds_gtid)); 56290b57cec5SDimitry Andric TCW_4(this_th->th.th_in_pool, TRUE); 56300b57cec5SDimitry Andric __kmp_suspend_initialize_thread(this_th); 56310b57cec5SDimitry Andric __kmp_lock_suspend_mx(this_th); 56320b57cec5SDimitry Andric if (this_th->th.th_active == TRUE) { 56330b57cec5SDimitry Andric KMP_ATOMIC_INC(&__kmp_thread_pool_active_nth); 56340b57cec5SDimitry Andric this_th->th.th_active_in_pool = TRUE; 56350b57cec5SDimitry Andric } 56360b57cec5SDimitry Andric #if KMP_DEBUG 56370b57cec5SDimitry Andric else { 56380b57cec5SDimitry Andric KMP_DEBUG_ASSERT(this_th->th.th_active_in_pool == FALSE); 56390b57cec5SDimitry Andric } 56400b57cec5SDimitry Andric #endif 56410b57cec5SDimitry Andric __kmp_unlock_suspend_mx(this_th); 56420b57cec5SDimitry Andric 56430b57cec5SDimitry Andric TCW_4(__kmp_nth, __kmp_nth - 1); 56440b57cec5SDimitry Andric 56450b57cec5SDimitry Andric #ifdef KMP_ADJUST_BLOCKTIME 56460b57cec5SDimitry Andric /* Adjust blocktime back to user setting or default if necessary */ 56470b57cec5SDimitry Andric /* Middle initialization might never have occurred */ 56480b57cec5SDimitry Andric if (!__kmp_env_blocktime && (__kmp_avail_proc > 0)) { 56490b57cec5SDimitry Andric KMP_DEBUG_ASSERT(__kmp_avail_proc > 0); 56500b57cec5SDimitry Andric if (__kmp_nth <= __kmp_avail_proc) { 56510b57cec5SDimitry Andric __kmp_zero_bt = FALSE; 56520b57cec5SDimitry Andric } 56530b57cec5SDimitry Andric } 56540b57cec5SDimitry Andric #endif /* KMP_ADJUST_BLOCKTIME */ 56550b57cec5SDimitry Andric 56560b57cec5SDimitry Andric KMP_MB(); 56570b57cec5SDimitry Andric } 56580b57cec5SDimitry Andric 56590b57cec5SDimitry Andric /* ------------------------------------------------------------------------ */ 56600b57cec5SDimitry Andric 56610b57cec5SDimitry Andric void *__kmp_launch_thread(kmp_info_t *this_thr) { 56620b57cec5SDimitry Andric int gtid = this_thr->th.th_info.ds.ds_gtid; 56630b57cec5SDimitry Andric /* void *stack_data;*/ 5664489b1cf2SDimitry Andric kmp_team_t **volatile pteam; 56650b57cec5SDimitry Andric 56660b57cec5SDimitry Andric KMP_MB(); 56670b57cec5SDimitry Andric KA_TRACE(10, ("__kmp_launch_thread: T#%d start\n", gtid)); 56680b57cec5SDimitry Andric 56690b57cec5SDimitry Andric if (__kmp_env_consistency_check) { 56700b57cec5SDimitry Andric this_thr->th.th_cons = __kmp_allocate_cons_stack(gtid); // ATT: Memory leak? 56710b57cec5SDimitry Andric } 56720b57cec5SDimitry Andric 56730b57cec5SDimitry Andric #if OMPT_SUPPORT 56740b57cec5SDimitry Andric ompt_data_t *thread_data; 56750b57cec5SDimitry Andric if (ompt_enabled.enabled) { 56760b57cec5SDimitry Andric thread_data = &(this_thr->th.ompt_thread_info.thread_data); 56770b57cec5SDimitry Andric *thread_data = ompt_data_none; 56780b57cec5SDimitry Andric 56790b57cec5SDimitry Andric this_thr->th.ompt_thread_info.state = ompt_state_overhead; 56800b57cec5SDimitry Andric this_thr->th.ompt_thread_info.wait_id = 0; 56810b57cec5SDimitry Andric this_thr->th.ompt_thread_info.idle_frame = OMPT_GET_FRAME_ADDRESS(0); 5682489b1cf2SDimitry Andric this_thr->th.ompt_thread_info.parallel_flags = 0; 56830b57cec5SDimitry Andric if (ompt_enabled.ompt_callback_thread_begin) { 56840b57cec5SDimitry Andric ompt_callbacks.ompt_callback(ompt_callback_thread_begin)( 56850b57cec5SDimitry Andric ompt_thread_worker, thread_data); 56860b57cec5SDimitry Andric } 56870b57cec5SDimitry Andric this_thr->th.ompt_thread_info.state = ompt_state_idle; 56880b57cec5SDimitry Andric } 56890b57cec5SDimitry Andric #endif 5690489b1cf2SDimitry Andric 56910b57cec5SDimitry Andric /* This is the place where threads wait for work */ 56920b57cec5SDimitry Andric while (!TCR_4(__kmp_global.g.g_done)) { 56930b57cec5SDimitry Andric KMP_DEBUG_ASSERT(this_thr == __kmp_threads[gtid]); 56940b57cec5SDimitry Andric KMP_MB(); 56950b57cec5SDimitry Andric 56960b57cec5SDimitry Andric /* wait for work to do */ 56970b57cec5SDimitry Andric KA_TRACE(20, ("__kmp_launch_thread: T#%d waiting for work\n", gtid)); 56980b57cec5SDimitry Andric 56990b57cec5SDimitry Andric /* No tid yet since not part of a team */ 57000b57cec5SDimitry Andric __kmp_fork_barrier(gtid, KMP_GTID_DNE); 57010b57cec5SDimitry Andric 57020b57cec5SDimitry Andric #if OMPT_SUPPORT 57030b57cec5SDimitry Andric if (ompt_enabled.enabled) { 57040b57cec5SDimitry Andric this_thr->th.ompt_thread_info.state = ompt_state_overhead; 57050b57cec5SDimitry Andric } 57060b57cec5SDimitry Andric #endif 57070b57cec5SDimitry Andric 5708489b1cf2SDimitry Andric pteam = &this_thr->th.th_team; 57090b57cec5SDimitry Andric 57100b57cec5SDimitry Andric /* have we been allocated? */ 57110b57cec5SDimitry Andric if (TCR_SYNC_PTR(*pteam) && !TCR_4(__kmp_global.g.g_done)) { 57120b57cec5SDimitry Andric /* we were just woken up, so run our new task */ 57130b57cec5SDimitry Andric if (TCR_SYNC_PTR((*pteam)->t.t_pkfn) != NULL) { 57140b57cec5SDimitry Andric int rc; 57150b57cec5SDimitry Andric KA_TRACE(20, 57160b57cec5SDimitry Andric ("__kmp_launch_thread: T#%d(%d:%d) invoke microtask = %p\n", 57170b57cec5SDimitry Andric gtid, (*pteam)->t.t_id, __kmp_tid_from_gtid(gtid), 57180b57cec5SDimitry Andric (*pteam)->t.t_pkfn)); 57190b57cec5SDimitry Andric 57200b57cec5SDimitry Andric updateHWFPControl(*pteam); 57210b57cec5SDimitry Andric 57220b57cec5SDimitry Andric #if OMPT_SUPPORT 57230b57cec5SDimitry Andric if (ompt_enabled.enabled) { 57240b57cec5SDimitry Andric this_thr->th.ompt_thread_info.state = ompt_state_work_parallel; 57250b57cec5SDimitry Andric } 57260b57cec5SDimitry Andric #endif 57270b57cec5SDimitry Andric 57280b57cec5SDimitry Andric rc = (*pteam)->t.t_invoke(gtid); 57290b57cec5SDimitry Andric KMP_ASSERT(rc); 57300b57cec5SDimitry Andric 57310b57cec5SDimitry Andric KMP_MB(); 57320b57cec5SDimitry Andric KA_TRACE(20, ("__kmp_launch_thread: T#%d(%d:%d) done microtask = %p\n", 57330b57cec5SDimitry Andric gtid, (*pteam)->t.t_id, __kmp_tid_from_gtid(gtid), 57340b57cec5SDimitry Andric (*pteam)->t.t_pkfn)); 57350b57cec5SDimitry Andric } 57360b57cec5SDimitry Andric #if OMPT_SUPPORT 57370b57cec5SDimitry Andric if (ompt_enabled.enabled) { 57380b57cec5SDimitry Andric /* no frame set while outside task */ 57390b57cec5SDimitry Andric __ompt_get_task_info_object(0)->frame.exit_frame = ompt_data_none; 57400b57cec5SDimitry Andric 57410b57cec5SDimitry Andric this_thr->th.ompt_thread_info.state = ompt_state_overhead; 57420b57cec5SDimitry Andric } 57430b57cec5SDimitry Andric #endif 57440b57cec5SDimitry Andric /* join barrier after parallel region */ 57450b57cec5SDimitry Andric __kmp_join_barrier(gtid); 57460b57cec5SDimitry Andric } 57470b57cec5SDimitry Andric } 57480b57cec5SDimitry Andric TCR_SYNC_PTR((intptr_t)__kmp_global.g.g_done); 57490b57cec5SDimitry Andric 57500b57cec5SDimitry Andric #if OMPT_SUPPORT 57510b57cec5SDimitry Andric if (ompt_enabled.ompt_callback_thread_end) { 57520b57cec5SDimitry Andric ompt_callbacks.ompt_callback(ompt_callback_thread_end)(thread_data); 57530b57cec5SDimitry Andric } 57540b57cec5SDimitry Andric #endif 57550b57cec5SDimitry Andric 57560b57cec5SDimitry Andric this_thr->th.th_task_team = NULL; 57570b57cec5SDimitry Andric /* run the destructors for the threadprivate data for this thread */ 57580b57cec5SDimitry Andric __kmp_common_destroy_gtid(gtid); 57590b57cec5SDimitry Andric 57600b57cec5SDimitry Andric KA_TRACE(10, ("__kmp_launch_thread: T#%d done\n", gtid)); 57610b57cec5SDimitry Andric KMP_MB(); 57620b57cec5SDimitry Andric return this_thr; 57630b57cec5SDimitry Andric } 57640b57cec5SDimitry Andric 57650b57cec5SDimitry Andric /* ------------------------------------------------------------------------ */ 57660b57cec5SDimitry Andric 57670b57cec5SDimitry Andric void __kmp_internal_end_dest(void *specific_gtid) { 57680b57cec5SDimitry Andric #if KMP_COMPILER_ICC 57690b57cec5SDimitry Andric #pragma warning(push) 57700b57cec5SDimitry Andric #pragma warning(disable : 810) // conversion from "void *" to "int" may lose 57710b57cec5SDimitry Andric // significant bits 57720b57cec5SDimitry Andric #endif 57730b57cec5SDimitry Andric // Make sure no significant bits are lost 57740b57cec5SDimitry Andric int gtid = (kmp_intptr_t)specific_gtid - 1; 57750b57cec5SDimitry Andric #if KMP_COMPILER_ICC 57760b57cec5SDimitry Andric #pragma warning(pop) 57770b57cec5SDimitry Andric #endif 57780b57cec5SDimitry Andric 57790b57cec5SDimitry Andric KA_TRACE(30, ("__kmp_internal_end_dest: T#%d\n", gtid)); 57800b57cec5SDimitry Andric /* NOTE: the gtid is stored as gitd+1 in the thread-local-storage 57810b57cec5SDimitry Andric * this is because 0 is reserved for the nothing-stored case */ 57820b57cec5SDimitry Andric 57830b57cec5SDimitry Andric /* josh: One reason for setting the gtid specific data even when it is being 57840b57cec5SDimitry Andric destroyed by pthread is to allow gtid lookup through thread specific data 57850b57cec5SDimitry Andric (__kmp_gtid_get_specific). Some of the code, especially stat code, 57860b57cec5SDimitry Andric that gets executed in the call to __kmp_internal_end_thread, actually 57870b57cec5SDimitry Andric gets the gtid through the thread specific data. Setting it here seems 57880b57cec5SDimitry Andric rather inelegant and perhaps wrong, but allows __kmp_internal_end_thread 57890b57cec5SDimitry Andric to run smoothly. 57900b57cec5SDimitry Andric todo: get rid of this after we remove the dependence on 57910b57cec5SDimitry Andric __kmp_gtid_get_specific */ 57920b57cec5SDimitry Andric if (gtid >= 0 && KMP_UBER_GTID(gtid)) 57930b57cec5SDimitry Andric __kmp_gtid_set_specific(gtid); 57940b57cec5SDimitry Andric #ifdef KMP_TDATA_GTID 57950b57cec5SDimitry Andric __kmp_gtid = gtid; 57960b57cec5SDimitry Andric #endif 57970b57cec5SDimitry Andric __kmp_internal_end_thread(gtid); 57980b57cec5SDimitry Andric } 57990b57cec5SDimitry Andric 58000b57cec5SDimitry Andric #if KMP_OS_UNIX && KMP_DYNAMIC_LIB 58010b57cec5SDimitry Andric 58020b57cec5SDimitry Andric // 2009-09-08 (lev): It looks the destructor does not work. In simple test cases 58030b57cec5SDimitry Andric // destructors work perfectly, but in real libomp.so I have no evidence it is 58040b57cec5SDimitry Andric // ever called. However, -fini linker option in makefile.mk works fine. 58050b57cec5SDimitry Andric 58060b57cec5SDimitry Andric __attribute__((destructor)) void __kmp_internal_end_dtor(void) { 58070b57cec5SDimitry Andric __kmp_internal_end_atexit(); 58080b57cec5SDimitry Andric } 58090b57cec5SDimitry Andric 58100b57cec5SDimitry Andric void __kmp_internal_end_fini(void) { __kmp_internal_end_atexit(); } 58110b57cec5SDimitry Andric 58120b57cec5SDimitry Andric #endif 58130b57cec5SDimitry Andric 58140b57cec5SDimitry Andric /* [Windows] josh: when the atexit handler is called, there may still be more 58150b57cec5SDimitry Andric than one thread alive */ 58160b57cec5SDimitry Andric void __kmp_internal_end_atexit(void) { 58170b57cec5SDimitry Andric KA_TRACE(30, ("__kmp_internal_end_atexit\n")); 58180b57cec5SDimitry Andric /* [Windows] 58190b57cec5SDimitry Andric josh: ideally, we want to completely shutdown the library in this atexit 58200b57cec5SDimitry Andric handler, but stat code that depends on thread specific data for gtid fails 58210b57cec5SDimitry Andric because that data becomes unavailable at some point during the shutdown, so 58220b57cec5SDimitry Andric we call __kmp_internal_end_thread instead. We should eventually remove the 58230b57cec5SDimitry Andric dependency on __kmp_get_specific_gtid in the stat code and use 58240b57cec5SDimitry Andric __kmp_internal_end_library to cleanly shutdown the library. 58250b57cec5SDimitry Andric 58260b57cec5SDimitry Andric // TODO: Can some of this comment about GVS be removed? 58270b57cec5SDimitry Andric I suspect that the offending stat code is executed when the calling thread 58280b57cec5SDimitry Andric tries to clean up a dead root thread's data structures, resulting in GVS 58290b57cec5SDimitry Andric code trying to close the GVS structures for that thread, but since the stat 58300b57cec5SDimitry Andric code uses __kmp_get_specific_gtid to get the gtid with the assumption that 58310b57cec5SDimitry Andric the calling thread is cleaning up itself instead of another thread, it get 58320b57cec5SDimitry Andric confused. This happens because allowing a thread to unregister and cleanup 58330b57cec5SDimitry Andric another thread is a recent modification for addressing an issue. 58340b57cec5SDimitry Andric Based on the current design (20050722), a thread may end up 58350b57cec5SDimitry Andric trying to unregister another thread only if thread death does not trigger 58360b57cec5SDimitry Andric the calling of __kmp_internal_end_thread. For Linux* OS, there is the 58370b57cec5SDimitry Andric thread specific data destructor function to detect thread death. For 58380b57cec5SDimitry Andric Windows dynamic, there is DllMain(THREAD_DETACH). For Windows static, there 58390b57cec5SDimitry Andric is nothing. Thus, the workaround is applicable only for Windows static 58400b57cec5SDimitry Andric stat library. */ 58410b57cec5SDimitry Andric __kmp_internal_end_library(-1); 58420b57cec5SDimitry Andric #if KMP_OS_WINDOWS 58430b57cec5SDimitry Andric __kmp_close_console(); 58440b57cec5SDimitry Andric #endif 58450b57cec5SDimitry Andric } 58460b57cec5SDimitry Andric 58470b57cec5SDimitry Andric static void __kmp_reap_thread(kmp_info_t *thread, int is_root) { 58480b57cec5SDimitry Andric // It is assumed __kmp_forkjoin_lock is acquired. 58490b57cec5SDimitry Andric 58500b57cec5SDimitry Andric int gtid; 58510b57cec5SDimitry Andric 58520b57cec5SDimitry Andric KMP_DEBUG_ASSERT(thread != NULL); 58530b57cec5SDimitry Andric 58540b57cec5SDimitry Andric gtid = thread->th.th_info.ds.ds_gtid; 58550b57cec5SDimitry Andric 58560b57cec5SDimitry Andric if (!is_root) { 58570b57cec5SDimitry Andric if (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME) { 58580b57cec5SDimitry Andric /* Assume the threads are at the fork barrier here */ 58590b57cec5SDimitry Andric KA_TRACE( 58600b57cec5SDimitry Andric 20, ("__kmp_reap_thread: releasing T#%d from fork barrier for reap\n", 58610b57cec5SDimitry Andric gtid)); 58620b57cec5SDimitry Andric /* Need release fence here to prevent seg faults for tree forkjoin barrier 58630b57cec5SDimitry Andric * (GEH) */ 58640b57cec5SDimitry Andric ANNOTATE_HAPPENS_BEFORE(thread); 58650b57cec5SDimitry Andric kmp_flag_64 flag(&thread->th.th_bar[bs_forkjoin_barrier].bb.b_go, thread); 58660b57cec5SDimitry Andric __kmp_release_64(&flag); 58670b57cec5SDimitry Andric } 58680b57cec5SDimitry Andric 58690b57cec5SDimitry Andric // Terminate OS thread. 58700b57cec5SDimitry Andric __kmp_reap_worker(thread); 58710b57cec5SDimitry Andric 58720b57cec5SDimitry Andric // The thread was killed asynchronously. If it was actively 58730b57cec5SDimitry Andric // spinning in the thread pool, decrement the global count. 58740b57cec5SDimitry Andric // 58750b57cec5SDimitry Andric // There is a small timing hole here - if the worker thread was just waking 58760b57cec5SDimitry Andric // up after sleeping in the pool, had reset it's th_active_in_pool flag but 58770b57cec5SDimitry Andric // not decremented the global counter __kmp_thread_pool_active_nth yet, then 58780b57cec5SDimitry Andric // the global counter might not get updated. 58790b57cec5SDimitry Andric // 58800b57cec5SDimitry Andric // Currently, this can only happen as the library is unloaded, 58810b57cec5SDimitry Andric // so there are no harmful side effects. 58820b57cec5SDimitry Andric if (thread->th.th_active_in_pool) { 58830b57cec5SDimitry Andric thread->th.th_active_in_pool = FALSE; 58840b57cec5SDimitry Andric KMP_ATOMIC_DEC(&__kmp_thread_pool_active_nth); 58850b57cec5SDimitry Andric KMP_DEBUG_ASSERT(__kmp_thread_pool_active_nth >= 0); 58860b57cec5SDimitry Andric } 58870b57cec5SDimitry Andric } 58880b57cec5SDimitry Andric 58890b57cec5SDimitry Andric __kmp_free_implicit_task(thread); 58900b57cec5SDimitry Andric 58910b57cec5SDimitry Andric // Free the fast memory for tasking 58920b57cec5SDimitry Andric #if USE_FAST_MEMORY 58930b57cec5SDimitry Andric __kmp_free_fast_memory(thread); 58940b57cec5SDimitry Andric #endif /* USE_FAST_MEMORY */ 58950b57cec5SDimitry Andric 58960b57cec5SDimitry Andric __kmp_suspend_uninitialize_thread(thread); 58970b57cec5SDimitry Andric 58980b57cec5SDimitry Andric KMP_DEBUG_ASSERT(__kmp_threads[gtid] == thread); 58990b57cec5SDimitry Andric TCW_SYNC_PTR(__kmp_threads[gtid], NULL); 59000b57cec5SDimitry Andric 59010b57cec5SDimitry Andric --__kmp_all_nth; 59020b57cec5SDimitry Andric // __kmp_nth was decremented when thread is added to the pool. 59030b57cec5SDimitry Andric 59040b57cec5SDimitry Andric #ifdef KMP_ADJUST_BLOCKTIME 59050b57cec5SDimitry Andric /* Adjust blocktime back to user setting or default if necessary */ 59060b57cec5SDimitry Andric /* Middle initialization might never have occurred */ 59070b57cec5SDimitry Andric if (!__kmp_env_blocktime && (__kmp_avail_proc > 0)) { 59080b57cec5SDimitry Andric KMP_DEBUG_ASSERT(__kmp_avail_proc > 0); 59090b57cec5SDimitry Andric if (__kmp_nth <= __kmp_avail_proc) { 59100b57cec5SDimitry Andric __kmp_zero_bt = FALSE; 59110b57cec5SDimitry Andric } 59120b57cec5SDimitry Andric } 59130b57cec5SDimitry Andric #endif /* KMP_ADJUST_BLOCKTIME */ 59140b57cec5SDimitry Andric 59150b57cec5SDimitry Andric /* free the memory being used */ 59160b57cec5SDimitry Andric if (__kmp_env_consistency_check) { 59170b57cec5SDimitry Andric if (thread->th.th_cons) { 59180b57cec5SDimitry Andric __kmp_free_cons_stack(thread->th.th_cons); 59190b57cec5SDimitry Andric thread->th.th_cons = NULL; 59200b57cec5SDimitry Andric } 59210b57cec5SDimitry Andric } 59220b57cec5SDimitry Andric 59230b57cec5SDimitry Andric if (thread->th.th_pri_common != NULL) { 59240b57cec5SDimitry Andric __kmp_free(thread->th.th_pri_common); 59250b57cec5SDimitry Andric thread->th.th_pri_common = NULL; 59260b57cec5SDimitry Andric } 59270b57cec5SDimitry Andric 59280b57cec5SDimitry Andric if (thread->th.th_task_state_memo_stack != NULL) { 59290b57cec5SDimitry Andric __kmp_free(thread->th.th_task_state_memo_stack); 59300b57cec5SDimitry Andric thread->th.th_task_state_memo_stack = NULL; 59310b57cec5SDimitry Andric } 59320b57cec5SDimitry Andric 59330b57cec5SDimitry Andric #if KMP_USE_BGET 59340b57cec5SDimitry Andric if (thread->th.th_local.bget_data != NULL) { 59350b57cec5SDimitry Andric __kmp_finalize_bget(thread); 59360b57cec5SDimitry Andric } 59370b57cec5SDimitry Andric #endif 59380b57cec5SDimitry Andric 59390b57cec5SDimitry Andric #if KMP_AFFINITY_SUPPORTED 59400b57cec5SDimitry Andric if (thread->th.th_affin_mask != NULL) { 59410b57cec5SDimitry Andric KMP_CPU_FREE(thread->th.th_affin_mask); 59420b57cec5SDimitry Andric thread->th.th_affin_mask = NULL; 59430b57cec5SDimitry Andric } 59440b57cec5SDimitry Andric #endif /* KMP_AFFINITY_SUPPORTED */ 59450b57cec5SDimitry Andric 59460b57cec5SDimitry Andric #if KMP_USE_HIER_SCHED 59470b57cec5SDimitry Andric if (thread->th.th_hier_bar_data != NULL) { 59480b57cec5SDimitry Andric __kmp_free(thread->th.th_hier_bar_data); 59490b57cec5SDimitry Andric thread->th.th_hier_bar_data = NULL; 59500b57cec5SDimitry Andric } 59510b57cec5SDimitry Andric #endif 59520b57cec5SDimitry Andric 59530b57cec5SDimitry Andric __kmp_reap_team(thread->th.th_serial_team); 59540b57cec5SDimitry Andric thread->th.th_serial_team = NULL; 59550b57cec5SDimitry Andric __kmp_free(thread); 59560b57cec5SDimitry Andric 59570b57cec5SDimitry Andric KMP_MB(); 59580b57cec5SDimitry Andric 59590b57cec5SDimitry Andric } // __kmp_reap_thread 59600b57cec5SDimitry Andric 59610b57cec5SDimitry Andric static void __kmp_internal_end(void) { 59620b57cec5SDimitry Andric int i; 59630b57cec5SDimitry Andric 59640b57cec5SDimitry Andric /* First, unregister the library */ 59650b57cec5SDimitry Andric __kmp_unregister_library(); 59660b57cec5SDimitry Andric 59670b57cec5SDimitry Andric #if KMP_OS_WINDOWS 59680b57cec5SDimitry Andric /* In Win static library, we can't tell when a root actually dies, so we 59690b57cec5SDimitry Andric reclaim the data structures for any root threads that have died but not 59700b57cec5SDimitry Andric unregistered themselves, in order to shut down cleanly. 59710b57cec5SDimitry Andric In Win dynamic library we also can't tell when a thread dies. */ 59720b57cec5SDimitry Andric __kmp_reclaim_dead_roots(); // AC: moved here to always clean resources of 59730b57cec5SDimitry Andric // dead roots 59740b57cec5SDimitry Andric #endif 59750b57cec5SDimitry Andric 59760b57cec5SDimitry Andric for (i = 0; i < __kmp_threads_capacity; i++) 59770b57cec5SDimitry Andric if (__kmp_root[i]) 59780b57cec5SDimitry Andric if (__kmp_root[i]->r.r_active) 59790b57cec5SDimitry Andric break; 59800b57cec5SDimitry Andric KMP_MB(); /* Flush all pending memory write invalidates. */ 59810b57cec5SDimitry Andric TCW_SYNC_4(__kmp_global.g.g_done, TRUE); 59820b57cec5SDimitry Andric 59830b57cec5SDimitry Andric if (i < __kmp_threads_capacity) { 59840b57cec5SDimitry Andric #if KMP_USE_MONITOR 59850b57cec5SDimitry Andric // 2009-09-08 (lev): Other alive roots found. Why do we kill the monitor?? 59860b57cec5SDimitry Andric KMP_MB(); /* Flush all pending memory write invalidates. */ 59870b57cec5SDimitry Andric 59880b57cec5SDimitry Andric // Need to check that monitor was initialized before reaping it. If we are 59890b57cec5SDimitry Andric // called form __kmp_atfork_child (which sets __kmp_init_parallel = 0), then 59900b57cec5SDimitry Andric // __kmp_monitor will appear to contain valid data, but it is only valid in 59910b57cec5SDimitry Andric // the parent process, not the child. 59920b57cec5SDimitry Andric // New behavior (201008): instead of keying off of the flag 59930b57cec5SDimitry Andric // __kmp_init_parallel, the monitor thread creation is keyed off 59940b57cec5SDimitry Andric // of the new flag __kmp_init_monitor. 59950b57cec5SDimitry Andric __kmp_acquire_bootstrap_lock(&__kmp_monitor_lock); 59960b57cec5SDimitry Andric if (TCR_4(__kmp_init_monitor)) { 59970b57cec5SDimitry Andric __kmp_reap_monitor(&__kmp_monitor); 59980b57cec5SDimitry Andric TCW_4(__kmp_init_monitor, 0); 59990b57cec5SDimitry Andric } 60000b57cec5SDimitry Andric __kmp_release_bootstrap_lock(&__kmp_monitor_lock); 60010b57cec5SDimitry Andric KA_TRACE(10, ("__kmp_internal_end: monitor reaped\n")); 60020b57cec5SDimitry Andric #endif // KMP_USE_MONITOR 60030b57cec5SDimitry Andric } else { 60040b57cec5SDimitry Andric /* TODO move this to cleanup code */ 60050b57cec5SDimitry Andric #ifdef KMP_DEBUG 60060b57cec5SDimitry Andric /* make sure that everything has properly ended */ 60070b57cec5SDimitry Andric for (i = 0; i < __kmp_threads_capacity; i++) { 60080b57cec5SDimitry Andric if (__kmp_root[i]) { 60090b57cec5SDimitry Andric // KMP_ASSERT( ! KMP_UBER_GTID( i ) ); // AC: 60100b57cec5SDimitry Andric // there can be uber threads alive here 60110b57cec5SDimitry Andric KMP_ASSERT(!__kmp_root[i]->r.r_active); // TODO: can they be active? 60120b57cec5SDimitry Andric } 60130b57cec5SDimitry Andric } 60140b57cec5SDimitry Andric #endif 60150b57cec5SDimitry Andric 60160b57cec5SDimitry Andric KMP_MB(); 60170b57cec5SDimitry Andric 60180b57cec5SDimitry Andric // Reap the worker threads. 60190b57cec5SDimitry Andric // This is valid for now, but be careful if threads are reaped sooner. 60200b57cec5SDimitry Andric while (__kmp_thread_pool != NULL) { // Loop thru all the thread in the pool. 60210b57cec5SDimitry Andric // Get the next thread from the pool. 60220b57cec5SDimitry Andric kmp_info_t *thread = CCAST(kmp_info_t *, __kmp_thread_pool); 60230b57cec5SDimitry Andric __kmp_thread_pool = thread->th.th_next_pool; 60240b57cec5SDimitry Andric // Reap it. 60250b57cec5SDimitry Andric KMP_DEBUG_ASSERT(thread->th.th_reap_state == KMP_SAFE_TO_REAP); 60260b57cec5SDimitry Andric thread->th.th_next_pool = NULL; 60270b57cec5SDimitry Andric thread->th.th_in_pool = FALSE; 60280b57cec5SDimitry Andric __kmp_reap_thread(thread, 0); 60290b57cec5SDimitry Andric } 60300b57cec5SDimitry Andric __kmp_thread_pool_insert_pt = NULL; 60310b57cec5SDimitry Andric 60320b57cec5SDimitry Andric // Reap teams. 60330b57cec5SDimitry Andric while (__kmp_team_pool != NULL) { // Loop thru all the teams in the pool. 60340b57cec5SDimitry Andric // Get the next team from the pool. 60350b57cec5SDimitry Andric kmp_team_t *team = CCAST(kmp_team_t *, __kmp_team_pool); 60360b57cec5SDimitry Andric __kmp_team_pool = team->t.t_next_pool; 60370b57cec5SDimitry Andric // Reap it. 60380b57cec5SDimitry Andric team->t.t_next_pool = NULL; 60390b57cec5SDimitry Andric __kmp_reap_team(team); 60400b57cec5SDimitry Andric } 60410b57cec5SDimitry Andric 60420b57cec5SDimitry Andric __kmp_reap_task_teams(); 60430b57cec5SDimitry Andric 60440b57cec5SDimitry Andric #if KMP_OS_UNIX 60450b57cec5SDimitry Andric // Threads that are not reaped should not access any resources since they 60460b57cec5SDimitry Andric // are going to be deallocated soon, so the shutdown sequence should wait 60470b57cec5SDimitry Andric // until all threads either exit the final spin-waiting loop or begin 60480b57cec5SDimitry Andric // sleeping after the given blocktime. 60490b57cec5SDimitry Andric for (i = 0; i < __kmp_threads_capacity; i++) { 60500b57cec5SDimitry Andric kmp_info_t *thr = __kmp_threads[i]; 60510b57cec5SDimitry Andric while (thr && KMP_ATOMIC_LD_ACQ(&thr->th.th_blocking)) 60520b57cec5SDimitry Andric KMP_CPU_PAUSE(); 60530b57cec5SDimitry Andric } 60540b57cec5SDimitry Andric #endif 60550b57cec5SDimitry Andric 60560b57cec5SDimitry Andric for (i = 0; i < __kmp_threads_capacity; ++i) { 60570b57cec5SDimitry Andric // TBD: Add some checking... 60580b57cec5SDimitry Andric // Something like KMP_DEBUG_ASSERT( __kmp_thread[ i ] == NULL ); 60590b57cec5SDimitry Andric } 60600b57cec5SDimitry Andric 60610b57cec5SDimitry Andric /* Make sure all threadprivate destructors get run by joining with all 60620b57cec5SDimitry Andric worker threads before resetting this flag */ 60630b57cec5SDimitry Andric TCW_SYNC_4(__kmp_init_common, FALSE); 60640b57cec5SDimitry Andric 60650b57cec5SDimitry Andric KA_TRACE(10, ("__kmp_internal_end: all workers reaped\n")); 60660b57cec5SDimitry Andric KMP_MB(); 60670b57cec5SDimitry Andric 60680b57cec5SDimitry Andric #if KMP_USE_MONITOR 60690b57cec5SDimitry Andric // See note above: One of the possible fixes for CQ138434 / CQ140126 60700b57cec5SDimitry Andric // 60710b57cec5SDimitry Andric // FIXME: push both code fragments down and CSE them? 60720b57cec5SDimitry Andric // push them into __kmp_cleanup() ? 60730b57cec5SDimitry Andric __kmp_acquire_bootstrap_lock(&__kmp_monitor_lock); 60740b57cec5SDimitry Andric if (TCR_4(__kmp_init_monitor)) { 60750b57cec5SDimitry Andric __kmp_reap_monitor(&__kmp_monitor); 60760b57cec5SDimitry Andric TCW_4(__kmp_init_monitor, 0); 60770b57cec5SDimitry Andric } 60780b57cec5SDimitry Andric __kmp_release_bootstrap_lock(&__kmp_monitor_lock); 60790b57cec5SDimitry Andric KA_TRACE(10, ("__kmp_internal_end: monitor reaped\n")); 60800b57cec5SDimitry Andric #endif 60810b57cec5SDimitry Andric } /* else !__kmp_global.t_active */ 60820b57cec5SDimitry Andric TCW_4(__kmp_init_gtid, FALSE); 60830b57cec5SDimitry Andric KMP_MB(); /* Flush all pending memory write invalidates. */ 60840b57cec5SDimitry Andric 60850b57cec5SDimitry Andric __kmp_cleanup(); 60860b57cec5SDimitry Andric #if OMPT_SUPPORT 60870b57cec5SDimitry Andric ompt_fini(); 60880b57cec5SDimitry Andric #endif 60890b57cec5SDimitry Andric } 60900b57cec5SDimitry Andric 60910b57cec5SDimitry Andric void __kmp_internal_end_library(int gtid_req) { 60920b57cec5SDimitry Andric /* if we have already cleaned up, don't try again, it wouldn't be pretty */ 60930b57cec5SDimitry Andric /* this shouldn't be a race condition because __kmp_internal_end() is the 60940b57cec5SDimitry Andric only place to clear __kmp_serial_init */ 60950b57cec5SDimitry Andric /* we'll check this later too, after we get the lock */ 60960b57cec5SDimitry Andric // 2009-09-06: We do not set g_abort without setting g_done. This check looks 60970b57cec5SDimitry Andric // redundaant, because the next check will work in any case. 60980b57cec5SDimitry Andric if (__kmp_global.g.g_abort) { 60990b57cec5SDimitry Andric KA_TRACE(11, ("__kmp_internal_end_library: abort, exiting\n")); 61000b57cec5SDimitry Andric /* TODO abort? */ 61010b57cec5SDimitry Andric return; 61020b57cec5SDimitry Andric } 61030b57cec5SDimitry Andric if (TCR_4(__kmp_global.g.g_done) || !__kmp_init_serial) { 61040b57cec5SDimitry Andric KA_TRACE(10, ("__kmp_internal_end_library: already finished\n")); 61050b57cec5SDimitry Andric return; 61060b57cec5SDimitry Andric } 61070b57cec5SDimitry Andric 61080b57cec5SDimitry Andric KMP_MB(); /* Flush all pending memory write invalidates. */ 61090b57cec5SDimitry Andric 61100b57cec5SDimitry Andric /* find out who we are and what we should do */ 61110b57cec5SDimitry Andric { 61120b57cec5SDimitry Andric int gtid = (gtid_req >= 0) ? gtid_req : __kmp_gtid_get_specific(); 61130b57cec5SDimitry Andric KA_TRACE( 61140b57cec5SDimitry Andric 10, ("__kmp_internal_end_library: enter T#%d (%d)\n", gtid, gtid_req)); 61150b57cec5SDimitry Andric if (gtid == KMP_GTID_SHUTDOWN) { 61160b57cec5SDimitry Andric KA_TRACE(10, ("__kmp_internal_end_library: !__kmp_init_runtime, system " 61170b57cec5SDimitry Andric "already shutdown\n")); 61180b57cec5SDimitry Andric return; 61190b57cec5SDimitry Andric } else if (gtid == KMP_GTID_MONITOR) { 61200b57cec5SDimitry Andric KA_TRACE(10, ("__kmp_internal_end_library: monitor thread, gtid not " 61210b57cec5SDimitry Andric "registered, or system shutdown\n")); 61220b57cec5SDimitry Andric return; 61230b57cec5SDimitry Andric } else if (gtid == KMP_GTID_DNE) { 61240b57cec5SDimitry Andric KA_TRACE(10, ("__kmp_internal_end_library: gtid not registered or system " 61250b57cec5SDimitry Andric "shutdown\n")); 61260b57cec5SDimitry Andric /* we don't know who we are, but we may still shutdown the library */ 61270b57cec5SDimitry Andric } else if (KMP_UBER_GTID(gtid)) { 61280b57cec5SDimitry Andric /* unregister ourselves as an uber thread. gtid is no longer valid */ 61290b57cec5SDimitry Andric if (__kmp_root[gtid]->r.r_active) { 61300b57cec5SDimitry Andric __kmp_global.g.g_abort = -1; 61310b57cec5SDimitry Andric TCW_SYNC_4(__kmp_global.g.g_done, TRUE); 61320b57cec5SDimitry Andric KA_TRACE(10, 61330b57cec5SDimitry Andric ("__kmp_internal_end_library: root still active, abort T#%d\n", 61340b57cec5SDimitry Andric gtid)); 61350b57cec5SDimitry Andric return; 61360b57cec5SDimitry Andric } else { 61370b57cec5SDimitry Andric KA_TRACE( 61380b57cec5SDimitry Andric 10, 61390b57cec5SDimitry Andric ("__kmp_internal_end_library: unregistering sibling T#%d\n", gtid)); 61400b57cec5SDimitry Andric __kmp_unregister_root_current_thread(gtid); 61410b57cec5SDimitry Andric } 61420b57cec5SDimitry Andric } else { 61430b57cec5SDimitry Andric /* worker threads may call this function through the atexit handler, if they 61440b57cec5SDimitry Andric * call exit() */ 61450b57cec5SDimitry Andric /* For now, skip the usual subsequent processing and just dump the debug buffer. 61460b57cec5SDimitry Andric TODO: do a thorough shutdown instead */ 61470b57cec5SDimitry Andric #ifdef DUMP_DEBUG_ON_EXIT 61480b57cec5SDimitry Andric if (__kmp_debug_buf) 61490b57cec5SDimitry Andric __kmp_dump_debug_buffer(); 61500b57cec5SDimitry Andric #endif 61510b57cec5SDimitry Andric return; 61520b57cec5SDimitry Andric } 61530b57cec5SDimitry Andric } 61540b57cec5SDimitry Andric /* synchronize the termination process */ 61550b57cec5SDimitry Andric __kmp_acquire_bootstrap_lock(&__kmp_initz_lock); 61560b57cec5SDimitry Andric 61570b57cec5SDimitry Andric /* have we already finished */ 61580b57cec5SDimitry Andric if (__kmp_global.g.g_abort) { 61590b57cec5SDimitry Andric KA_TRACE(10, ("__kmp_internal_end_library: abort, exiting\n")); 61600b57cec5SDimitry Andric /* TODO abort? */ 61610b57cec5SDimitry Andric __kmp_release_bootstrap_lock(&__kmp_initz_lock); 61620b57cec5SDimitry Andric return; 61630b57cec5SDimitry Andric } 61640b57cec5SDimitry Andric if (TCR_4(__kmp_global.g.g_done) || !__kmp_init_serial) { 61650b57cec5SDimitry Andric __kmp_release_bootstrap_lock(&__kmp_initz_lock); 61660b57cec5SDimitry Andric return; 61670b57cec5SDimitry Andric } 61680b57cec5SDimitry Andric 61690b57cec5SDimitry Andric /* We need this lock to enforce mutex between this reading of 61700b57cec5SDimitry Andric __kmp_threads_capacity and the writing by __kmp_register_root. 61710b57cec5SDimitry Andric Alternatively, we can use a counter of roots that is atomically updated by 61720b57cec5SDimitry Andric __kmp_get_global_thread_id_reg, __kmp_do_serial_initialize and 61730b57cec5SDimitry Andric __kmp_internal_end_*. */ 61740b57cec5SDimitry Andric __kmp_acquire_bootstrap_lock(&__kmp_forkjoin_lock); 61750b57cec5SDimitry Andric 61760b57cec5SDimitry Andric /* now we can safely conduct the actual termination */ 61770b57cec5SDimitry Andric __kmp_internal_end(); 61780b57cec5SDimitry Andric 61790b57cec5SDimitry Andric __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock); 61800b57cec5SDimitry Andric __kmp_release_bootstrap_lock(&__kmp_initz_lock); 61810b57cec5SDimitry Andric 61820b57cec5SDimitry Andric KA_TRACE(10, ("__kmp_internal_end_library: exit\n")); 61830b57cec5SDimitry Andric 61840b57cec5SDimitry Andric #ifdef DUMP_DEBUG_ON_EXIT 61850b57cec5SDimitry Andric if (__kmp_debug_buf) 61860b57cec5SDimitry Andric __kmp_dump_debug_buffer(); 61870b57cec5SDimitry Andric #endif 61880b57cec5SDimitry Andric 61890b57cec5SDimitry Andric #if KMP_OS_WINDOWS 61900b57cec5SDimitry Andric __kmp_close_console(); 61910b57cec5SDimitry Andric #endif 61920b57cec5SDimitry Andric 61930b57cec5SDimitry Andric __kmp_fini_allocator(); 61940b57cec5SDimitry Andric 61950b57cec5SDimitry Andric } // __kmp_internal_end_library 61960b57cec5SDimitry Andric 61970b57cec5SDimitry Andric void __kmp_internal_end_thread(int gtid_req) { 61980b57cec5SDimitry Andric int i; 61990b57cec5SDimitry Andric 62000b57cec5SDimitry Andric /* if we have already cleaned up, don't try again, it wouldn't be pretty */ 62010b57cec5SDimitry Andric /* this shouldn't be a race condition because __kmp_internal_end() is the 62020b57cec5SDimitry Andric * only place to clear __kmp_serial_init */ 62030b57cec5SDimitry Andric /* we'll check this later too, after we get the lock */ 62040b57cec5SDimitry Andric // 2009-09-06: We do not set g_abort without setting g_done. This check looks 62050b57cec5SDimitry Andric // redundant, because the next check will work in any case. 62060b57cec5SDimitry Andric if (__kmp_global.g.g_abort) { 62070b57cec5SDimitry Andric KA_TRACE(11, ("__kmp_internal_end_thread: abort, exiting\n")); 62080b57cec5SDimitry Andric /* TODO abort? */ 62090b57cec5SDimitry Andric return; 62100b57cec5SDimitry Andric } 62110b57cec5SDimitry Andric if (TCR_4(__kmp_global.g.g_done) || !__kmp_init_serial) { 62120b57cec5SDimitry Andric KA_TRACE(10, ("__kmp_internal_end_thread: already finished\n")); 62130b57cec5SDimitry Andric return; 62140b57cec5SDimitry Andric } 62150b57cec5SDimitry Andric 62160b57cec5SDimitry Andric KMP_MB(); /* Flush all pending memory write invalidates. */ 62170b57cec5SDimitry Andric 62180b57cec5SDimitry Andric /* find out who we are and what we should do */ 62190b57cec5SDimitry Andric { 62200b57cec5SDimitry Andric int gtid = (gtid_req >= 0) ? gtid_req : __kmp_gtid_get_specific(); 62210b57cec5SDimitry Andric KA_TRACE(10, 62220b57cec5SDimitry Andric ("__kmp_internal_end_thread: enter T#%d (%d)\n", gtid, gtid_req)); 62230b57cec5SDimitry Andric if (gtid == KMP_GTID_SHUTDOWN) { 62240b57cec5SDimitry Andric KA_TRACE(10, ("__kmp_internal_end_thread: !__kmp_init_runtime, system " 62250b57cec5SDimitry Andric "already shutdown\n")); 62260b57cec5SDimitry Andric return; 62270b57cec5SDimitry Andric } else if (gtid == KMP_GTID_MONITOR) { 62280b57cec5SDimitry Andric KA_TRACE(10, ("__kmp_internal_end_thread: monitor thread, gtid not " 62290b57cec5SDimitry Andric "registered, or system shutdown\n")); 62300b57cec5SDimitry Andric return; 62310b57cec5SDimitry Andric } else if (gtid == KMP_GTID_DNE) { 62320b57cec5SDimitry Andric KA_TRACE(10, ("__kmp_internal_end_thread: gtid not registered or system " 62330b57cec5SDimitry Andric "shutdown\n")); 62340b57cec5SDimitry Andric return; 62350b57cec5SDimitry Andric /* we don't know who we are */ 62360b57cec5SDimitry Andric } else if (KMP_UBER_GTID(gtid)) { 62370b57cec5SDimitry Andric /* unregister ourselves as an uber thread. gtid is no longer valid */ 62380b57cec5SDimitry Andric if (__kmp_root[gtid]->r.r_active) { 62390b57cec5SDimitry Andric __kmp_global.g.g_abort = -1; 62400b57cec5SDimitry Andric TCW_SYNC_4(__kmp_global.g.g_done, TRUE); 62410b57cec5SDimitry Andric KA_TRACE(10, 62420b57cec5SDimitry Andric ("__kmp_internal_end_thread: root still active, abort T#%d\n", 62430b57cec5SDimitry Andric gtid)); 62440b57cec5SDimitry Andric return; 62450b57cec5SDimitry Andric } else { 62460b57cec5SDimitry Andric KA_TRACE(10, ("__kmp_internal_end_thread: unregistering sibling T#%d\n", 62470b57cec5SDimitry Andric gtid)); 62480b57cec5SDimitry Andric __kmp_unregister_root_current_thread(gtid); 62490b57cec5SDimitry Andric } 62500b57cec5SDimitry Andric } else { 62510b57cec5SDimitry Andric /* just a worker thread, let's leave */ 62520b57cec5SDimitry Andric KA_TRACE(10, ("__kmp_internal_end_thread: worker thread T#%d\n", gtid)); 62530b57cec5SDimitry Andric 62540b57cec5SDimitry Andric if (gtid >= 0) { 62550b57cec5SDimitry Andric __kmp_threads[gtid]->th.th_task_team = NULL; 62560b57cec5SDimitry Andric } 62570b57cec5SDimitry Andric 62580b57cec5SDimitry Andric KA_TRACE(10, 62590b57cec5SDimitry Andric ("__kmp_internal_end_thread: worker thread done, exiting T#%d\n", 62600b57cec5SDimitry Andric gtid)); 62610b57cec5SDimitry Andric return; 62620b57cec5SDimitry Andric } 62630b57cec5SDimitry Andric } 62640b57cec5SDimitry Andric #if KMP_DYNAMIC_LIB 62650b57cec5SDimitry Andric if (__kmp_pause_status != kmp_hard_paused) 62660b57cec5SDimitry Andric // AC: lets not shutdown the dynamic library at the exit of uber thread, 62670b57cec5SDimitry Andric // because we will better shutdown later in the library destructor. 62680b57cec5SDimitry Andric { 62690b57cec5SDimitry Andric KA_TRACE(10, ("__kmp_internal_end_thread: exiting T#%d\n", gtid_req)); 62700b57cec5SDimitry Andric return; 62710b57cec5SDimitry Andric } 62720b57cec5SDimitry Andric #endif 62730b57cec5SDimitry Andric /* synchronize the termination process */ 62740b57cec5SDimitry Andric __kmp_acquire_bootstrap_lock(&__kmp_initz_lock); 62750b57cec5SDimitry Andric 62760b57cec5SDimitry Andric /* have we already finished */ 62770b57cec5SDimitry Andric if (__kmp_global.g.g_abort) { 62780b57cec5SDimitry Andric KA_TRACE(10, ("__kmp_internal_end_thread: abort, exiting\n")); 62790b57cec5SDimitry Andric /* TODO abort? */ 62800b57cec5SDimitry Andric __kmp_release_bootstrap_lock(&__kmp_initz_lock); 62810b57cec5SDimitry Andric return; 62820b57cec5SDimitry Andric } 62830b57cec5SDimitry Andric if (TCR_4(__kmp_global.g.g_done) || !__kmp_init_serial) { 62840b57cec5SDimitry Andric __kmp_release_bootstrap_lock(&__kmp_initz_lock); 62850b57cec5SDimitry Andric return; 62860b57cec5SDimitry Andric } 62870b57cec5SDimitry Andric 62880b57cec5SDimitry Andric /* We need this lock to enforce mutex between this reading of 62890b57cec5SDimitry Andric __kmp_threads_capacity and the writing by __kmp_register_root. 62900b57cec5SDimitry Andric Alternatively, we can use a counter of roots that is atomically updated by 62910b57cec5SDimitry Andric __kmp_get_global_thread_id_reg, __kmp_do_serial_initialize and 62920b57cec5SDimitry Andric __kmp_internal_end_*. */ 62930b57cec5SDimitry Andric 62940b57cec5SDimitry Andric /* should we finish the run-time? are all siblings done? */ 62950b57cec5SDimitry Andric __kmp_acquire_bootstrap_lock(&__kmp_forkjoin_lock); 62960b57cec5SDimitry Andric 62970b57cec5SDimitry Andric for (i = 0; i < __kmp_threads_capacity; ++i) { 62980b57cec5SDimitry Andric if (KMP_UBER_GTID(i)) { 62990b57cec5SDimitry Andric KA_TRACE( 63000b57cec5SDimitry Andric 10, 63010b57cec5SDimitry Andric ("__kmp_internal_end_thread: remaining sibling task: gtid==%d\n", i)); 63020b57cec5SDimitry Andric __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock); 63030b57cec5SDimitry Andric __kmp_release_bootstrap_lock(&__kmp_initz_lock); 63040b57cec5SDimitry Andric return; 63050b57cec5SDimitry Andric } 63060b57cec5SDimitry Andric } 63070b57cec5SDimitry Andric 63080b57cec5SDimitry Andric /* now we can safely conduct the actual termination */ 63090b57cec5SDimitry Andric 63100b57cec5SDimitry Andric __kmp_internal_end(); 63110b57cec5SDimitry Andric 63120b57cec5SDimitry Andric __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock); 63130b57cec5SDimitry Andric __kmp_release_bootstrap_lock(&__kmp_initz_lock); 63140b57cec5SDimitry Andric 63150b57cec5SDimitry Andric KA_TRACE(10, ("__kmp_internal_end_thread: exit T#%d\n", gtid_req)); 63160b57cec5SDimitry Andric 63170b57cec5SDimitry Andric #ifdef DUMP_DEBUG_ON_EXIT 63180b57cec5SDimitry Andric if (__kmp_debug_buf) 63190b57cec5SDimitry Andric __kmp_dump_debug_buffer(); 63200b57cec5SDimitry Andric #endif 63210b57cec5SDimitry Andric } // __kmp_internal_end_thread 63220b57cec5SDimitry Andric 63230b57cec5SDimitry Andric // ----------------------------------------------------------------------------- 63240b57cec5SDimitry Andric // Library registration stuff. 63250b57cec5SDimitry Andric 63260b57cec5SDimitry Andric static long __kmp_registration_flag = 0; 63270b57cec5SDimitry Andric // Random value used to indicate library initialization. 63280b57cec5SDimitry Andric static char *__kmp_registration_str = NULL; 63290b57cec5SDimitry Andric // Value to be saved in env var __KMP_REGISTERED_LIB_<pid>. 63300b57cec5SDimitry Andric 63310b57cec5SDimitry Andric static inline char *__kmp_reg_status_name() { 63320b57cec5SDimitry Andric /* On RHEL 3u5 if linked statically, getpid() returns different values in 63330b57cec5SDimitry Andric each thread. If registration and unregistration go in different threads 63340b57cec5SDimitry Andric (omp_misc_other_root_exit.cpp test case), the name of registered_lib_env 63350b57cec5SDimitry Andric env var can not be found, because the name will contain different pid. */ 63360b57cec5SDimitry Andric return __kmp_str_format("__KMP_REGISTERED_LIB_%d", (int)getpid()); 63370b57cec5SDimitry Andric } // __kmp_reg_status_get 63380b57cec5SDimitry Andric 63390b57cec5SDimitry Andric void __kmp_register_library_startup(void) { 63400b57cec5SDimitry Andric 63410b57cec5SDimitry Andric char *name = __kmp_reg_status_name(); // Name of the environment variable. 63420b57cec5SDimitry Andric int done = 0; 63430b57cec5SDimitry Andric union { 63440b57cec5SDimitry Andric double dtime; 63450b57cec5SDimitry Andric long ltime; 63460b57cec5SDimitry Andric } time; 63470b57cec5SDimitry Andric #if KMP_ARCH_X86 || KMP_ARCH_X86_64 63480b57cec5SDimitry Andric __kmp_initialize_system_tick(); 63490b57cec5SDimitry Andric #endif 63500b57cec5SDimitry Andric __kmp_read_system_time(&time.dtime); 63510b57cec5SDimitry Andric __kmp_registration_flag = 0xCAFE0000L | (time.ltime & 0x0000FFFFL); 63520b57cec5SDimitry Andric __kmp_registration_str = 63530b57cec5SDimitry Andric __kmp_str_format("%p-%lx-%s", &__kmp_registration_flag, 63540b57cec5SDimitry Andric __kmp_registration_flag, KMP_LIBRARY_FILE); 63550b57cec5SDimitry Andric 63560b57cec5SDimitry Andric KA_TRACE(50, ("__kmp_register_library_startup: %s=\"%s\"\n", name, 63570b57cec5SDimitry Andric __kmp_registration_str)); 63580b57cec5SDimitry Andric 63590b57cec5SDimitry Andric while (!done) { 63600b57cec5SDimitry Andric 63610b57cec5SDimitry Andric char *value = NULL; // Actual value of the environment variable. 63620b57cec5SDimitry Andric 63630b57cec5SDimitry Andric // Set environment variable, but do not overwrite if it is exist. 63640b57cec5SDimitry Andric __kmp_env_set(name, __kmp_registration_str, 0); 63650b57cec5SDimitry Andric // Check the variable is written. 63660b57cec5SDimitry Andric value = __kmp_env_get(name); 63670b57cec5SDimitry Andric if (value != NULL && strcmp(value, __kmp_registration_str) == 0) { 63680b57cec5SDimitry Andric 63690b57cec5SDimitry Andric done = 1; // Ok, environment variable set successfully, exit the loop. 63700b57cec5SDimitry Andric 63710b57cec5SDimitry Andric } else { 63720b57cec5SDimitry Andric 63730b57cec5SDimitry Andric // Oops. Write failed. Another copy of OpenMP RTL is in memory. 63740b57cec5SDimitry Andric // Check whether it alive or dead. 63750b57cec5SDimitry Andric int neighbor = 0; // 0 -- unknown status, 1 -- alive, 2 -- dead. 63760b57cec5SDimitry Andric char *tail = value; 63770b57cec5SDimitry Andric char *flag_addr_str = NULL; 63780b57cec5SDimitry Andric char *flag_val_str = NULL; 63790b57cec5SDimitry Andric char const *file_name = NULL; 63800b57cec5SDimitry Andric __kmp_str_split(tail, '-', &flag_addr_str, &tail); 63810b57cec5SDimitry Andric __kmp_str_split(tail, '-', &flag_val_str, &tail); 63820b57cec5SDimitry Andric file_name = tail; 63830b57cec5SDimitry Andric if (tail != NULL) { 63840b57cec5SDimitry Andric long *flag_addr = 0; 63850b57cec5SDimitry Andric long flag_val = 0; 63860b57cec5SDimitry Andric KMP_SSCANF(flag_addr_str, "%p", RCAST(void**, &flag_addr)); 63870b57cec5SDimitry Andric KMP_SSCANF(flag_val_str, "%lx", &flag_val); 63880b57cec5SDimitry Andric if (flag_addr != 0 && flag_val != 0 && strcmp(file_name, "") != 0) { 63890b57cec5SDimitry Andric // First, check whether environment-encoded address is mapped into 63900b57cec5SDimitry Andric // addr space. 63910b57cec5SDimitry Andric // If so, dereference it to see if it still has the right value. 63920b57cec5SDimitry Andric if (__kmp_is_address_mapped(flag_addr) && *flag_addr == flag_val) { 63930b57cec5SDimitry Andric neighbor = 1; 63940b57cec5SDimitry Andric } else { 63950b57cec5SDimitry Andric // If not, then we know the other copy of the library is no longer 63960b57cec5SDimitry Andric // running. 63970b57cec5SDimitry Andric neighbor = 2; 63980b57cec5SDimitry Andric } 63990b57cec5SDimitry Andric } 64000b57cec5SDimitry Andric } 64010b57cec5SDimitry Andric switch (neighbor) { 64020b57cec5SDimitry Andric case 0: // Cannot parse environment variable -- neighbor status unknown. 64030b57cec5SDimitry Andric // Assume it is the incompatible format of future version of the 64040b57cec5SDimitry Andric // library. Assume the other library is alive. 64050b57cec5SDimitry Andric // WARN( ... ); // TODO: Issue a warning. 64060b57cec5SDimitry Andric file_name = "unknown library"; 64070b57cec5SDimitry Andric KMP_FALLTHROUGH(); 64080b57cec5SDimitry Andric // Attention! Falling to the next case. That's intentional. 64090b57cec5SDimitry Andric case 1: { // Neighbor is alive. 64100b57cec5SDimitry Andric // Check it is allowed. 64110b57cec5SDimitry Andric char *duplicate_ok = __kmp_env_get("KMP_DUPLICATE_LIB_OK"); 64120b57cec5SDimitry Andric if (!__kmp_str_match_true(duplicate_ok)) { 64130b57cec5SDimitry Andric // That's not allowed. Issue fatal error. 64140b57cec5SDimitry Andric __kmp_fatal(KMP_MSG(DuplicateLibrary, KMP_LIBRARY_FILE, file_name), 64150b57cec5SDimitry Andric KMP_HNT(DuplicateLibrary), __kmp_msg_null); 64160b57cec5SDimitry Andric } 64170b57cec5SDimitry Andric KMP_INTERNAL_FREE(duplicate_ok); 64180b57cec5SDimitry Andric __kmp_duplicate_library_ok = 1; 64190b57cec5SDimitry Andric done = 1; // Exit the loop. 64200b57cec5SDimitry Andric } break; 64210b57cec5SDimitry Andric case 2: { // Neighbor is dead. 64220b57cec5SDimitry Andric // Clear the variable and try to register library again. 64230b57cec5SDimitry Andric __kmp_env_unset(name); 64240b57cec5SDimitry Andric } break; 64250b57cec5SDimitry Andric default: { KMP_DEBUG_ASSERT(0); } break; 64260b57cec5SDimitry Andric } 64270b57cec5SDimitry Andric } 64280b57cec5SDimitry Andric KMP_INTERNAL_FREE((void *)value); 64290b57cec5SDimitry Andric } 64300b57cec5SDimitry Andric KMP_INTERNAL_FREE((void *)name); 64310b57cec5SDimitry Andric 64320b57cec5SDimitry Andric } // func __kmp_register_library_startup 64330b57cec5SDimitry Andric 64340b57cec5SDimitry Andric void __kmp_unregister_library(void) { 64350b57cec5SDimitry Andric 64360b57cec5SDimitry Andric char *name = __kmp_reg_status_name(); 64370b57cec5SDimitry Andric char *value = __kmp_env_get(name); 64380b57cec5SDimitry Andric 64390b57cec5SDimitry Andric KMP_DEBUG_ASSERT(__kmp_registration_flag != 0); 64400b57cec5SDimitry Andric KMP_DEBUG_ASSERT(__kmp_registration_str != NULL); 64410b57cec5SDimitry Andric if (value != NULL && strcmp(value, __kmp_registration_str) == 0) { 64420b57cec5SDimitry Andric // Ok, this is our variable. Delete it. 64430b57cec5SDimitry Andric __kmp_env_unset(name); 64440b57cec5SDimitry Andric } 64450b57cec5SDimitry Andric 64460b57cec5SDimitry Andric KMP_INTERNAL_FREE(__kmp_registration_str); 64470b57cec5SDimitry Andric KMP_INTERNAL_FREE(value); 64480b57cec5SDimitry Andric KMP_INTERNAL_FREE(name); 64490b57cec5SDimitry Andric 64500b57cec5SDimitry Andric __kmp_registration_flag = 0; 64510b57cec5SDimitry Andric __kmp_registration_str = NULL; 64520b57cec5SDimitry Andric 64530b57cec5SDimitry Andric } // __kmp_unregister_library 64540b57cec5SDimitry Andric 64550b57cec5SDimitry Andric // End of Library registration stuff. 64560b57cec5SDimitry Andric // ----------------------------------------------------------------------------- 64570b57cec5SDimitry Andric 64580b57cec5SDimitry Andric #if KMP_MIC_SUPPORTED 64590b57cec5SDimitry Andric 64600b57cec5SDimitry Andric static void __kmp_check_mic_type() { 64610b57cec5SDimitry Andric kmp_cpuid_t cpuid_state = {0}; 64620b57cec5SDimitry Andric kmp_cpuid_t *cs_p = &cpuid_state; 64630b57cec5SDimitry Andric __kmp_x86_cpuid(1, 0, cs_p); 64640b57cec5SDimitry Andric // We don't support mic1 at the moment 64650b57cec5SDimitry Andric if ((cs_p->eax & 0xff0) == 0xB10) { 64660b57cec5SDimitry Andric __kmp_mic_type = mic2; 64670b57cec5SDimitry Andric } else if ((cs_p->eax & 0xf0ff0) == 0x50670) { 64680b57cec5SDimitry Andric __kmp_mic_type = mic3; 64690b57cec5SDimitry Andric } else { 64700b57cec5SDimitry Andric __kmp_mic_type = non_mic; 64710b57cec5SDimitry Andric } 64720b57cec5SDimitry Andric } 64730b57cec5SDimitry Andric 64740b57cec5SDimitry Andric #endif /* KMP_MIC_SUPPORTED */ 64750b57cec5SDimitry Andric 64760b57cec5SDimitry Andric static void __kmp_do_serial_initialize(void) { 64770b57cec5SDimitry Andric int i, gtid; 64780b57cec5SDimitry Andric int size; 64790b57cec5SDimitry Andric 64800b57cec5SDimitry Andric KA_TRACE(10, ("__kmp_do_serial_initialize: enter\n")); 64810b57cec5SDimitry Andric 64820b57cec5SDimitry Andric KMP_DEBUG_ASSERT(sizeof(kmp_int32) == 4); 64830b57cec5SDimitry Andric KMP_DEBUG_ASSERT(sizeof(kmp_uint32) == 4); 64840b57cec5SDimitry Andric KMP_DEBUG_ASSERT(sizeof(kmp_int64) == 8); 64850b57cec5SDimitry Andric KMP_DEBUG_ASSERT(sizeof(kmp_uint64) == 8); 64860b57cec5SDimitry Andric KMP_DEBUG_ASSERT(sizeof(kmp_intptr_t) == sizeof(void *)); 64870b57cec5SDimitry Andric 64880b57cec5SDimitry Andric #if OMPT_SUPPORT 64890b57cec5SDimitry Andric ompt_pre_init(); 64900b57cec5SDimitry Andric #endif 64910b57cec5SDimitry Andric 64920b57cec5SDimitry Andric __kmp_validate_locks(); 64930b57cec5SDimitry Andric 64940b57cec5SDimitry Andric /* Initialize internal memory allocator */ 64950b57cec5SDimitry Andric __kmp_init_allocator(); 64960b57cec5SDimitry Andric 64970b57cec5SDimitry Andric /* Register the library startup via an environment variable and check to see 64980b57cec5SDimitry Andric whether another copy of the library is already registered. */ 64990b57cec5SDimitry Andric 65000b57cec5SDimitry Andric __kmp_register_library_startup(); 65010b57cec5SDimitry Andric 65020b57cec5SDimitry Andric /* TODO reinitialization of library */ 65030b57cec5SDimitry Andric if (TCR_4(__kmp_global.g.g_done)) { 65040b57cec5SDimitry Andric KA_TRACE(10, ("__kmp_do_serial_initialize: reinitialization of library\n")); 65050b57cec5SDimitry Andric } 65060b57cec5SDimitry Andric 65070b57cec5SDimitry Andric __kmp_global.g.g_abort = 0; 65080b57cec5SDimitry Andric TCW_SYNC_4(__kmp_global.g.g_done, FALSE); 65090b57cec5SDimitry Andric 65100b57cec5SDimitry Andric /* initialize the locks */ 65110b57cec5SDimitry Andric #if KMP_USE_ADAPTIVE_LOCKS 65120b57cec5SDimitry Andric #if KMP_DEBUG_ADAPTIVE_LOCKS 65130b57cec5SDimitry Andric __kmp_init_speculative_stats(); 65140b57cec5SDimitry Andric #endif 65150b57cec5SDimitry Andric #endif 65160b57cec5SDimitry Andric #if KMP_STATS_ENABLED 65170b57cec5SDimitry Andric __kmp_stats_init(); 65180b57cec5SDimitry Andric #endif 65190b57cec5SDimitry Andric __kmp_init_lock(&__kmp_global_lock); 65200b57cec5SDimitry Andric __kmp_init_queuing_lock(&__kmp_dispatch_lock); 65210b57cec5SDimitry Andric __kmp_init_lock(&__kmp_debug_lock); 65220b57cec5SDimitry Andric __kmp_init_atomic_lock(&__kmp_atomic_lock); 65230b57cec5SDimitry Andric __kmp_init_atomic_lock(&__kmp_atomic_lock_1i); 65240b57cec5SDimitry Andric __kmp_init_atomic_lock(&__kmp_atomic_lock_2i); 65250b57cec5SDimitry Andric __kmp_init_atomic_lock(&__kmp_atomic_lock_4i); 65260b57cec5SDimitry Andric __kmp_init_atomic_lock(&__kmp_atomic_lock_4r); 65270b57cec5SDimitry Andric __kmp_init_atomic_lock(&__kmp_atomic_lock_8i); 65280b57cec5SDimitry Andric __kmp_init_atomic_lock(&__kmp_atomic_lock_8r); 65290b57cec5SDimitry Andric __kmp_init_atomic_lock(&__kmp_atomic_lock_8c); 65300b57cec5SDimitry Andric __kmp_init_atomic_lock(&__kmp_atomic_lock_10r); 65310b57cec5SDimitry Andric __kmp_init_atomic_lock(&__kmp_atomic_lock_16r); 65320b57cec5SDimitry Andric __kmp_init_atomic_lock(&__kmp_atomic_lock_16c); 65330b57cec5SDimitry Andric __kmp_init_atomic_lock(&__kmp_atomic_lock_20c); 65340b57cec5SDimitry Andric __kmp_init_atomic_lock(&__kmp_atomic_lock_32c); 65350b57cec5SDimitry Andric __kmp_init_bootstrap_lock(&__kmp_forkjoin_lock); 65360b57cec5SDimitry Andric __kmp_init_bootstrap_lock(&__kmp_exit_lock); 65370b57cec5SDimitry Andric #if KMP_USE_MONITOR 65380b57cec5SDimitry Andric __kmp_init_bootstrap_lock(&__kmp_monitor_lock); 65390b57cec5SDimitry Andric #endif 65400b57cec5SDimitry Andric __kmp_init_bootstrap_lock(&__kmp_tp_cached_lock); 65410b57cec5SDimitry Andric 65420b57cec5SDimitry Andric /* conduct initialization and initial setup of configuration */ 65430b57cec5SDimitry Andric 65440b57cec5SDimitry Andric __kmp_runtime_initialize(); 65450b57cec5SDimitry Andric 65460b57cec5SDimitry Andric #if KMP_MIC_SUPPORTED 65470b57cec5SDimitry Andric __kmp_check_mic_type(); 65480b57cec5SDimitry Andric #endif 65490b57cec5SDimitry Andric 65500b57cec5SDimitry Andric // Some global variable initialization moved here from kmp_env_initialize() 65510b57cec5SDimitry Andric #ifdef KMP_DEBUG 65520b57cec5SDimitry Andric kmp_diag = 0; 65530b57cec5SDimitry Andric #endif 65540b57cec5SDimitry Andric __kmp_abort_delay = 0; 65550b57cec5SDimitry Andric 65560b57cec5SDimitry Andric // From __kmp_init_dflt_team_nth() 65570b57cec5SDimitry Andric /* assume the entire machine will be used */ 65580b57cec5SDimitry Andric __kmp_dflt_team_nth_ub = __kmp_xproc; 65590b57cec5SDimitry Andric if (__kmp_dflt_team_nth_ub < KMP_MIN_NTH) { 65600b57cec5SDimitry Andric __kmp_dflt_team_nth_ub = KMP_MIN_NTH; 65610b57cec5SDimitry Andric } 65620b57cec5SDimitry Andric if (__kmp_dflt_team_nth_ub > __kmp_sys_max_nth) { 65630b57cec5SDimitry Andric __kmp_dflt_team_nth_ub = __kmp_sys_max_nth; 65640b57cec5SDimitry Andric } 65650b57cec5SDimitry Andric __kmp_max_nth = __kmp_sys_max_nth; 65660b57cec5SDimitry Andric __kmp_cg_max_nth = __kmp_sys_max_nth; 65670b57cec5SDimitry Andric __kmp_teams_max_nth = __kmp_xproc; // set a "reasonable" default 65680b57cec5SDimitry Andric if (__kmp_teams_max_nth > __kmp_sys_max_nth) { 65690b57cec5SDimitry Andric __kmp_teams_max_nth = __kmp_sys_max_nth; 65700b57cec5SDimitry Andric } 65710b57cec5SDimitry Andric 65720b57cec5SDimitry Andric // Three vars below moved here from __kmp_env_initialize() "KMP_BLOCKTIME" 65730b57cec5SDimitry Andric // part 65740b57cec5SDimitry Andric __kmp_dflt_blocktime = KMP_DEFAULT_BLOCKTIME; 65750b57cec5SDimitry Andric #if KMP_USE_MONITOR 65760b57cec5SDimitry Andric __kmp_monitor_wakeups = 65770b57cec5SDimitry Andric KMP_WAKEUPS_FROM_BLOCKTIME(__kmp_dflt_blocktime, __kmp_monitor_wakeups); 65780b57cec5SDimitry Andric __kmp_bt_intervals = 65790b57cec5SDimitry Andric KMP_INTERVALS_FROM_BLOCKTIME(__kmp_dflt_blocktime, __kmp_monitor_wakeups); 65800b57cec5SDimitry Andric #endif 65810b57cec5SDimitry Andric // From "KMP_LIBRARY" part of __kmp_env_initialize() 65820b57cec5SDimitry Andric __kmp_library = library_throughput; 65830b57cec5SDimitry Andric // From KMP_SCHEDULE initialization 65840b57cec5SDimitry Andric __kmp_static = kmp_sch_static_balanced; 65850b57cec5SDimitry Andric // AC: do not use analytical here, because it is non-monotonous 65860b57cec5SDimitry Andric //__kmp_guided = kmp_sch_guided_iterative_chunked; 65870b57cec5SDimitry Andric //__kmp_auto = kmp_sch_guided_analytical_chunked; // AC: it is the default, no 65880b57cec5SDimitry Andric // need to repeat assignment 65890b57cec5SDimitry Andric // Barrier initialization. Moved here from __kmp_env_initialize() Barrier branch 65900b57cec5SDimitry Andric // bit control and barrier method control parts 65910b57cec5SDimitry Andric #if KMP_FAST_REDUCTION_BARRIER 65920b57cec5SDimitry Andric #define kmp_reduction_barrier_gather_bb ((int)1) 65930b57cec5SDimitry Andric #define kmp_reduction_barrier_release_bb ((int)1) 65940b57cec5SDimitry Andric #define kmp_reduction_barrier_gather_pat bp_hyper_bar 65950b57cec5SDimitry Andric #define kmp_reduction_barrier_release_pat bp_hyper_bar 65960b57cec5SDimitry Andric #endif // KMP_FAST_REDUCTION_BARRIER 65970b57cec5SDimitry Andric for (i = bs_plain_barrier; i < bs_last_barrier; i++) { 65980b57cec5SDimitry Andric __kmp_barrier_gather_branch_bits[i] = __kmp_barrier_gather_bb_dflt; 65990b57cec5SDimitry Andric __kmp_barrier_release_branch_bits[i] = __kmp_barrier_release_bb_dflt; 66000b57cec5SDimitry Andric __kmp_barrier_gather_pattern[i] = __kmp_barrier_gather_pat_dflt; 66010b57cec5SDimitry Andric __kmp_barrier_release_pattern[i] = __kmp_barrier_release_pat_dflt; 66020b57cec5SDimitry Andric #if KMP_FAST_REDUCTION_BARRIER 66030b57cec5SDimitry Andric if (i == bs_reduction_barrier) { // tested and confirmed on ALTIX only ( 66040b57cec5SDimitry Andric // lin_64 ): hyper,1 66050b57cec5SDimitry Andric __kmp_barrier_gather_branch_bits[i] = kmp_reduction_barrier_gather_bb; 66060b57cec5SDimitry Andric __kmp_barrier_release_branch_bits[i] = kmp_reduction_barrier_release_bb; 66070b57cec5SDimitry Andric __kmp_barrier_gather_pattern[i] = kmp_reduction_barrier_gather_pat; 66080b57cec5SDimitry Andric __kmp_barrier_release_pattern[i] = kmp_reduction_barrier_release_pat; 66090b57cec5SDimitry Andric } 66100b57cec5SDimitry Andric #endif // KMP_FAST_REDUCTION_BARRIER 66110b57cec5SDimitry Andric } 66120b57cec5SDimitry Andric #if KMP_FAST_REDUCTION_BARRIER 66130b57cec5SDimitry Andric #undef kmp_reduction_barrier_release_pat 66140b57cec5SDimitry Andric #undef kmp_reduction_barrier_gather_pat 66150b57cec5SDimitry Andric #undef kmp_reduction_barrier_release_bb 66160b57cec5SDimitry Andric #undef kmp_reduction_barrier_gather_bb 66170b57cec5SDimitry Andric #endif // KMP_FAST_REDUCTION_BARRIER 66180b57cec5SDimitry Andric #if KMP_MIC_SUPPORTED 66190b57cec5SDimitry Andric if (__kmp_mic_type == mic2) { // KNC 66200b57cec5SDimitry Andric // AC: plane=3,2, forkjoin=2,1 are optimal for 240 threads on KNC 66210b57cec5SDimitry Andric __kmp_barrier_gather_branch_bits[bs_plain_barrier] = 3; // plain gather 66220b57cec5SDimitry Andric __kmp_barrier_release_branch_bits[bs_forkjoin_barrier] = 66230b57cec5SDimitry Andric 1; // forkjoin release 66240b57cec5SDimitry Andric __kmp_barrier_gather_pattern[bs_forkjoin_barrier] = bp_hierarchical_bar; 66250b57cec5SDimitry Andric __kmp_barrier_release_pattern[bs_forkjoin_barrier] = bp_hierarchical_bar; 66260b57cec5SDimitry Andric } 66270b57cec5SDimitry Andric #if KMP_FAST_REDUCTION_BARRIER 66280b57cec5SDimitry Andric if (__kmp_mic_type == mic2) { // KNC 66290b57cec5SDimitry Andric __kmp_barrier_gather_pattern[bs_reduction_barrier] = bp_hierarchical_bar; 66300b57cec5SDimitry Andric __kmp_barrier_release_pattern[bs_reduction_barrier] = bp_hierarchical_bar; 66310b57cec5SDimitry Andric } 66320b57cec5SDimitry Andric #endif // KMP_FAST_REDUCTION_BARRIER 66330b57cec5SDimitry Andric #endif // KMP_MIC_SUPPORTED 66340b57cec5SDimitry Andric 66350b57cec5SDimitry Andric // From KMP_CHECKS initialization 66360b57cec5SDimitry Andric #ifdef KMP_DEBUG 66370b57cec5SDimitry Andric __kmp_env_checks = TRUE; /* development versions have the extra checks */ 66380b57cec5SDimitry Andric #else 66390b57cec5SDimitry Andric __kmp_env_checks = FALSE; /* port versions do not have the extra checks */ 66400b57cec5SDimitry Andric #endif 66410b57cec5SDimitry Andric 66420b57cec5SDimitry Andric // From "KMP_FOREIGN_THREADS_THREADPRIVATE" initialization 66430b57cec5SDimitry Andric __kmp_foreign_tp = TRUE; 66440b57cec5SDimitry Andric 66450b57cec5SDimitry Andric __kmp_global.g.g_dynamic = FALSE; 66460b57cec5SDimitry Andric __kmp_global.g.g_dynamic_mode = dynamic_default; 66470b57cec5SDimitry Andric 66480b57cec5SDimitry Andric __kmp_env_initialize(NULL); 66490b57cec5SDimitry Andric 66500b57cec5SDimitry Andric // Print all messages in message catalog for testing purposes. 66510b57cec5SDimitry Andric #ifdef KMP_DEBUG 66520b57cec5SDimitry Andric char const *val = __kmp_env_get("KMP_DUMP_CATALOG"); 66530b57cec5SDimitry Andric if (__kmp_str_match_true(val)) { 66540b57cec5SDimitry Andric kmp_str_buf_t buffer; 66550b57cec5SDimitry Andric __kmp_str_buf_init(&buffer); 66560b57cec5SDimitry Andric __kmp_i18n_dump_catalog(&buffer); 66570b57cec5SDimitry Andric __kmp_printf("%s", buffer.str); 66580b57cec5SDimitry Andric __kmp_str_buf_free(&buffer); 66590b57cec5SDimitry Andric } 66600b57cec5SDimitry Andric __kmp_env_free(&val); 66610b57cec5SDimitry Andric #endif 66620b57cec5SDimitry Andric 66630b57cec5SDimitry Andric __kmp_threads_capacity = 66640b57cec5SDimitry Andric __kmp_initial_threads_capacity(__kmp_dflt_team_nth_ub); 66650b57cec5SDimitry Andric // Moved here from __kmp_env_initialize() "KMP_ALL_THREADPRIVATE" part 66660b57cec5SDimitry Andric __kmp_tp_capacity = __kmp_default_tp_capacity( 66670b57cec5SDimitry Andric __kmp_dflt_team_nth_ub, __kmp_max_nth, __kmp_allThreadsSpecified); 66680b57cec5SDimitry Andric 66690b57cec5SDimitry Andric // If the library is shut down properly, both pools must be NULL. Just in 66700b57cec5SDimitry Andric // case, set them to NULL -- some memory may leak, but subsequent code will 66710b57cec5SDimitry Andric // work even if pools are not freed. 66720b57cec5SDimitry Andric KMP_DEBUG_ASSERT(__kmp_thread_pool == NULL); 66730b57cec5SDimitry Andric KMP_DEBUG_ASSERT(__kmp_thread_pool_insert_pt == NULL); 66740b57cec5SDimitry Andric KMP_DEBUG_ASSERT(__kmp_team_pool == NULL); 66750b57cec5SDimitry Andric __kmp_thread_pool = NULL; 66760b57cec5SDimitry Andric __kmp_thread_pool_insert_pt = NULL; 66770b57cec5SDimitry Andric __kmp_team_pool = NULL; 66780b57cec5SDimitry Andric 66790b57cec5SDimitry Andric /* Allocate all of the variable sized records */ 66800b57cec5SDimitry Andric /* NOTE: __kmp_threads_capacity entries are allocated, but the arrays are 66810b57cec5SDimitry Andric * expandable */ 66820b57cec5SDimitry Andric /* Since allocation is cache-aligned, just add extra padding at the end */ 66830b57cec5SDimitry Andric size = 66840b57cec5SDimitry Andric (sizeof(kmp_info_t *) + sizeof(kmp_root_t *)) * __kmp_threads_capacity + 66850b57cec5SDimitry Andric CACHE_LINE; 66860b57cec5SDimitry Andric __kmp_threads = (kmp_info_t **)__kmp_allocate(size); 66870b57cec5SDimitry Andric __kmp_root = (kmp_root_t **)((char *)__kmp_threads + 66880b57cec5SDimitry Andric sizeof(kmp_info_t *) * __kmp_threads_capacity); 66890b57cec5SDimitry Andric 66900b57cec5SDimitry Andric /* init thread counts */ 66910b57cec5SDimitry Andric KMP_DEBUG_ASSERT(__kmp_all_nth == 66920b57cec5SDimitry Andric 0); // Asserts fail if the library is reinitializing and 66930b57cec5SDimitry Andric KMP_DEBUG_ASSERT(__kmp_nth == 0); // something was wrong in termination. 66940b57cec5SDimitry Andric __kmp_all_nth = 0; 66950b57cec5SDimitry Andric __kmp_nth = 0; 66960b57cec5SDimitry Andric 66970b57cec5SDimitry Andric /* setup the uber master thread and hierarchy */ 66980b57cec5SDimitry Andric gtid = __kmp_register_root(TRUE); 66990b57cec5SDimitry Andric KA_TRACE(10, ("__kmp_do_serial_initialize T#%d\n", gtid)); 67000b57cec5SDimitry Andric KMP_ASSERT(KMP_UBER_GTID(gtid)); 67010b57cec5SDimitry Andric KMP_ASSERT(KMP_INITIAL_GTID(gtid)); 67020b57cec5SDimitry Andric 67030b57cec5SDimitry Andric KMP_MB(); /* Flush all pending memory write invalidates. */ 67040b57cec5SDimitry Andric 67050b57cec5SDimitry Andric __kmp_common_initialize(); 67060b57cec5SDimitry Andric 67070b57cec5SDimitry Andric #if KMP_OS_UNIX 67080b57cec5SDimitry Andric /* invoke the child fork handler */ 67090b57cec5SDimitry Andric __kmp_register_atfork(); 67100b57cec5SDimitry Andric #endif 67110b57cec5SDimitry Andric 67120b57cec5SDimitry Andric #if !KMP_DYNAMIC_LIB 67130b57cec5SDimitry Andric { 67140b57cec5SDimitry Andric /* Invoke the exit handler when the program finishes, only for static 67150b57cec5SDimitry Andric library. For dynamic library, we already have _fini and DllMain. */ 67160b57cec5SDimitry Andric int rc = atexit(__kmp_internal_end_atexit); 67170b57cec5SDimitry Andric if (rc != 0) { 67180b57cec5SDimitry Andric __kmp_fatal(KMP_MSG(FunctionError, "atexit()"), KMP_ERR(rc), 67190b57cec5SDimitry Andric __kmp_msg_null); 67200b57cec5SDimitry Andric } 67210b57cec5SDimitry Andric } 67220b57cec5SDimitry Andric #endif 67230b57cec5SDimitry Andric 67240b57cec5SDimitry Andric #if KMP_HANDLE_SIGNALS 67250b57cec5SDimitry Andric #if KMP_OS_UNIX 67260b57cec5SDimitry Andric /* NOTE: make sure that this is called before the user installs their own 67270b57cec5SDimitry Andric signal handlers so that the user handlers are called first. this way they 67280b57cec5SDimitry Andric can return false, not call our handler, avoid terminating the library, and 67290b57cec5SDimitry Andric continue execution where they left off. */ 67300b57cec5SDimitry Andric __kmp_install_signals(FALSE); 67310b57cec5SDimitry Andric #endif /* KMP_OS_UNIX */ 67320b57cec5SDimitry Andric #if KMP_OS_WINDOWS 67330b57cec5SDimitry Andric __kmp_install_signals(TRUE); 67340b57cec5SDimitry Andric #endif /* KMP_OS_WINDOWS */ 67350b57cec5SDimitry Andric #endif 67360b57cec5SDimitry Andric 67370b57cec5SDimitry Andric /* we have finished the serial initialization */ 67380b57cec5SDimitry Andric __kmp_init_counter++; 67390b57cec5SDimitry Andric 67400b57cec5SDimitry Andric __kmp_init_serial = TRUE; 67410b57cec5SDimitry Andric 67420b57cec5SDimitry Andric if (__kmp_settings) { 67430b57cec5SDimitry Andric __kmp_env_print(); 67440b57cec5SDimitry Andric } 67450b57cec5SDimitry Andric 67460b57cec5SDimitry Andric if (__kmp_display_env || __kmp_display_env_verbose) { 67470b57cec5SDimitry Andric __kmp_env_print_2(); 67480b57cec5SDimitry Andric } 67490b57cec5SDimitry Andric 67500b57cec5SDimitry Andric #if OMPT_SUPPORT 67510b57cec5SDimitry Andric ompt_post_init(); 67520b57cec5SDimitry Andric #endif 67530b57cec5SDimitry Andric 67540b57cec5SDimitry Andric KMP_MB(); 67550b57cec5SDimitry Andric 67560b57cec5SDimitry Andric KA_TRACE(10, ("__kmp_do_serial_initialize: exit\n")); 67570b57cec5SDimitry Andric } 67580b57cec5SDimitry Andric 67590b57cec5SDimitry Andric void __kmp_serial_initialize(void) { 67600b57cec5SDimitry Andric if (__kmp_init_serial) { 67610b57cec5SDimitry Andric return; 67620b57cec5SDimitry Andric } 67630b57cec5SDimitry Andric __kmp_acquire_bootstrap_lock(&__kmp_initz_lock); 67640b57cec5SDimitry Andric if (__kmp_init_serial) { 67650b57cec5SDimitry Andric __kmp_release_bootstrap_lock(&__kmp_initz_lock); 67660b57cec5SDimitry Andric return; 67670b57cec5SDimitry Andric } 67680b57cec5SDimitry Andric __kmp_do_serial_initialize(); 67690b57cec5SDimitry Andric __kmp_release_bootstrap_lock(&__kmp_initz_lock); 67700b57cec5SDimitry Andric } 67710b57cec5SDimitry Andric 67720b57cec5SDimitry Andric static void __kmp_do_middle_initialize(void) { 67730b57cec5SDimitry Andric int i, j; 67740b57cec5SDimitry Andric int prev_dflt_team_nth; 67750b57cec5SDimitry Andric 67760b57cec5SDimitry Andric if (!__kmp_init_serial) { 67770b57cec5SDimitry Andric __kmp_do_serial_initialize(); 67780b57cec5SDimitry Andric } 67790b57cec5SDimitry Andric 67800b57cec5SDimitry Andric KA_TRACE(10, ("__kmp_middle_initialize: enter\n")); 67810b57cec5SDimitry Andric 67820b57cec5SDimitry Andric // Save the previous value for the __kmp_dflt_team_nth so that 67830b57cec5SDimitry Andric // we can avoid some reinitialization if it hasn't changed. 67840b57cec5SDimitry Andric prev_dflt_team_nth = __kmp_dflt_team_nth; 67850b57cec5SDimitry Andric 67860b57cec5SDimitry Andric #if KMP_AFFINITY_SUPPORTED 67870b57cec5SDimitry Andric // __kmp_affinity_initialize() will try to set __kmp_ncores to the 67880b57cec5SDimitry Andric // number of cores on the machine. 67890b57cec5SDimitry Andric __kmp_affinity_initialize(); 67900b57cec5SDimitry Andric 67910b57cec5SDimitry Andric // Run through the __kmp_threads array and set the affinity mask 67920b57cec5SDimitry Andric // for each root thread that is currently registered with the RTL. 67930b57cec5SDimitry Andric for (i = 0; i < __kmp_threads_capacity; i++) { 67940b57cec5SDimitry Andric if (TCR_PTR(__kmp_threads[i]) != NULL) { 67950b57cec5SDimitry Andric __kmp_affinity_set_init_mask(i, TRUE); 67960b57cec5SDimitry Andric } 67970b57cec5SDimitry Andric } 67980b57cec5SDimitry Andric #endif /* KMP_AFFINITY_SUPPORTED */ 67990b57cec5SDimitry Andric 68000b57cec5SDimitry Andric KMP_ASSERT(__kmp_xproc > 0); 68010b57cec5SDimitry Andric if (__kmp_avail_proc == 0) { 68020b57cec5SDimitry Andric __kmp_avail_proc = __kmp_xproc; 68030b57cec5SDimitry Andric } 68040b57cec5SDimitry Andric 68050b57cec5SDimitry Andric // If there were empty places in num_threads list (OMP_NUM_THREADS=,,2,3), 68060b57cec5SDimitry Andric // correct them now 68070b57cec5SDimitry Andric j = 0; 68080b57cec5SDimitry Andric while ((j < __kmp_nested_nth.used) && !__kmp_nested_nth.nth[j]) { 68090b57cec5SDimitry Andric __kmp_nested_nth.nth[j] = __kmp_dflt_team_nth = __kmp_dflt_team_nth_ub = 68100b57cec5SDimitry Andric __kmp_avail_proc; 68110b57cec5SDimitry Andric j++; 68120b57cec5SDimitry Andric } 68130b57cec5SDimitry Andric 68140b57cec5SDimitry Andric if (__kmp_dflt_team_nth == 0) { 68150b57cec5SDimitry Andric #ifdef KMP_DFLT_NTH_CORES 68160b57cec5SDimitry Andric // Default #threads = #cores 68170b57cec5SDimitry Andric __kmp_dflt_team_nth = __kmp_ncores; 68180b57cec5SDimitry Andric KA_TRACE(20, ("__kmp_middle_initialize: setting __kmp_dflt_team_nth = " 68190b57cec5SDimitry Andric "__kmp_ncores (%d)\n", 68200b57cec5SDimitry Andric __kmp_dflt_team_nth)); 68210b57cec5SDimitry Andric #else 68220b57cec5SDimitry Andric // Default #threads = #available OS procs 68230b57cec5SDimitry Andric __kmp_dflt_team_nth = __kmp_avail_proc; 68240b57cec5SDimitry Andric KA_TRACE(20, ("__kmp_middle_initialize: setting __kmp_dflt_team_nth = " 68250b57cec5SDimitry Andric "__kmp_avail_proc(%d)\n", 68260b57cec5SDimitry Andric __kmp_dflt_team_nth)); 68270b57cec5SDimitry Andric #endif /* KMP_DFLT_NTH_CORES */ 68280b57cec5SDimitry Andric } 68290b57cec5SDimitry Andric 68300b57cec5SDimitry Andric if (__kmp_dflt_team_nth < KMP_MIN_NTH) { 68310b57cec5SDimitry Andric __kmp_dflt_team_nth = KMP_MIN_NTH; 68320b57cec5SDimitry Andric } 68330b57cec5SDimitry Andric if (__kmp_dflt_team_nth > __kmp_sys_max_nth) { 68340b57cec5SDimitry Andric __kmp_dflt_team_nth = __kmp_sys_max_nth; 68350b57cec5SDimitry Andric } 68360b57cec5SDimitry Andric 68370b57cec5SDimitry Andric // There's no harm in continuing if the following check fails, 68380b57cec5SDimitry Andric // but it indicates an error in the previous logic. 68390b57cec5SDimitry Andric KMP_DEBUG_ASSERT(__kmp_dflt_team_nth <= __kmp_dflt_team_nth_ub); 68400b57cec5SDimitry Andric 68410b57cec5SDimitry Andric if (__kmp_dflt_team_nth != prev_dflt_team_nth) { 68420b57cec5SDimitry Andric // Run through the __kmp_threads array and set the num threads icv for each 68430b57cec5SDimitry Andric // root thread that is currently registered with the RTL (which has not 68440b57cec5SDimitry Andric // already explicitly set its nthreads-var with a call to 68450b57cec5SDimitry Andric // omp_set_num_threads()). 68460b57cec5SDimitry Andric for (i = 0; i < __kmp_threads_capacity; i++) { 68470b57cec5SDimitry Andric kmp_info_t *thread = __kmp_threads[i]; 68480b57cec5SDimitry Andric if (thread == NULL) 68490b57cec5SDimitry Andric continue; 68500b57cec5SDimitry Andric if (thread->th.th_current_task->td_icvs.nproc != 0) 68510b57cec5SDimitry Andric continue; 68520b57cec5SDimitry Andric 68530b57cec5SDimitry Andric set__nproc(__kmp_threads[i], __kmp_dflt_team_nth); 68540b57cec5SDimitry Andric } 68550b57cec5SDimitry Andric } 68560b57cec5SDimitry Andric KA_TRACE( 68570b57cec5SDimitry Andric 20, 68580b57cec5SDimitry Andric ("__kmp_middle_initialize: final value for __kmp_dflt_team_nth = %d\n", 68590b57cec5SDimitry Andric __kmp_dflt_team_nth)); 68600b57cec5SDimitry Andric 68610b57cec5SDimitry Andric #ifdef KMP_ADJUST_BLOCKTIME 68620b57cec5SDimitry Andric /* Adjust blocktime to zero if necessary now that __kmp_avail_proc is set */ 68630b57cec5SDimitry Andric if (!__kmp_env_blocktime && (__kmp_avail_proc > 0)) { 68640b57cec5SDimitry Andric KMP_DEBUG_ASSERT(__kmp_avail_proc > 0); 68650b57cec5SDimitry Andric if (__kmp_nth > __kmp_avail_proc) { 68660b57cec5SDimitry Andric __kmp_zero_bt = TRUE; 68670b57cec5SDimitry Andric } 68680b57cec5SDimitry Andric } 68690b57cec5SDimitry Andric #endif /* KMP_ADJUST_BLOCKTIME */ 68700b57cec5SDimitry Andric 68710b57cec5SDimitry Andric /* we have finished middle initialization */ 68720b57cec5SDimitry Andric TCW_SYNC_4(__kmp_init_middle, TRUE); 68730b57cec5SDimitry Andric 68740b57cec5SDimitry Andric KA_TRACE(10, ("__kmp_do_middle_initialize: exit\n")); 68750b57cec5SDimitry Andric } 68760b57cec5SDimitry Andric 68770b57cec5SDimitry Andric void __kmp_middle_initialize(void) { 68780b57cec5SDimitry Andric if (__kmp_init_middle) { 68790b57cec5SDimitry Andric return; 68800b57cec5SDimitry Andric } 68810b57cec5SDimitry Andric __kmp_acquire_bootstrap_lock(&__kmp_initz_lock); 68820b57cec5SDimitry Andric if (__kmp_init_middle) { 68830b57cec5SDimitry Andric __kmp_release_bootstrap_lock(&__kmp_initz_lock); 68840b57cec5SDimitry Andric return; 68850b57cec5SDimitry Andric } 68860b57cec5SDimitry Andric __kmp_do_middle_initialize(); 68870b57cec5SDimitry Andric __kmp_release_bootstrap_lock(&__kmp_initz_lock); 68880b57cec5SDimitry Andric } 68890b57cec5SDimitry Andric 68900b57cec5SDimitry Andric void __kmp_parallel_initialize(void) { 68910b57cec5SDimitry Andric int gtid = __kmp_entry_gtid(); // this might be a new root 68920b57cec5SDimitry Andric 68930b57cec5SDimitry Andric /* synchronize parallel initialization (for sibling) */ 68940b57cec5SDimitry Andric if (TCR_4(__kmp_init_parallel)) 68950b57cec5SDimitry Andric return; 68960b57cec5SDimitry Andric __kmp_acquire_bootstrap_lock(&__kmp_initz_lock); 68970b57cec5SDimitry Andric if (TCR_4(__kmp_init_parallel)) { 68980b57cec5SDimitry Andric __kmp_release_bootstrap_lock(&__kmp_initz_lock); 68990b57cec5SDimitry Andric return; 69000b57cec5SDimitry Andric } 69010b57cec5SDimitry Andric 69020b57cec5SDimitry Andric /* TODO reinitialization after we have already shut down */ 69030b57cec5SDimitry Andric if (TCR_4(__kmp_global.g.g_done)) { 69040b57cec5SDimitry Andric KA_TRACE( 69050b57cec5SDimitry Andric 10, 69060b57cec5SDimitry Andric ("__kmp_parallel_initialize: attempt to init while shutting down\n")); 69070b57cec5SDimitry Andric __kmp_infinite_loop(); 69080b57cec5SDimitry Andric } 69090b57cec5SDimitry Andric 69100b57cec5SDimitry Andric /* jc: The lock __kmp_initz_lock is already held, so calling 69110b57cec5SDimitry Andric __kmp_serial_initialize would cause a deadlock. So we call 69120b57cec5SDimitry Andric __kmp_do_serial_initialize directly. */ 69130b57cec5SDimitry Andric if (!__kmp_init_middle) { 69140b57cec5SDimitry Andric __kmp_do_middle_initialize(); 69150b57cec5SDimitry Andric } 69160b57cec5SDimitry Andric __kmp_resume_if_hard_paused(); 69170b57cec5SDimitry Andric 69180b57cec5SDimitry Andric /* begin initialization */ 69190b57cec5SDimitry Andric KA_TRACE(10, ("__kmp_parallel_initialize: enter\n")); 69200b57cec5SDimitry Andric KMP_ASSERT(KMP_UBER_GTID(gtid)); 69210b57cec5SDimitry Andric 69220b57cec5SDimitry Andric #if KMP_ARCH_X86 || KMP_ARCH_X86_64 69230b57cec5SDimitry Andric // Save the FP control regs. 69240b57cec5SDimitry Andric // Worker threads will set theirs to these values at thread startup. 69250b57cec5SDimitry Andric __kmp_store_x87_fpu_control_word(&__kmp_init_x87_fpu_control_word); 69260b57cec5SDimitry Andric __kmp_store_mxcsr(&__kmp_init_mxcsr); 69270b57cec5SDimitry Andric __kmp_init_mxcsr &= KMP_X86_MXCSR_MASK; 69280b57cec5SDimitry Andric #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */ 69290b57cec5SDimitry Andric 69300b57cec5SDimitry Andric #if KMP_OS_UNIX 69310b57cec5SDimitry Andric #if KMP_HANDLE_SIGNALS 69320b57cec5SDimitry Andric /* must be after __kmp_serial_initialize */ 69330b57cec5SDimitry Andric __kmp_install_signals(TRUE); 69340b57cec5SDimitry Andric #endif 69350b57cec5SDimitry Andric #endif 69360b57cec5SDimitry Andric 69370b57cec5SDimitry Andric __kmp_suspend_initialize(); 69380b57cec5SDimitry Andric 69390b57cec5SDimitry Andric #if defined(USE_LOAD_BALANCE) 69400b57cec5SDimitry Andric if (__kmp_global.g.g_dynamic_mode == dynamic_default) { 69410b57cec5SDimitry Andric __kmp_global.g.g_dynamic_mode = dynamic_load_balance; 69420b57cec5SDimitry Andric } 69430b57cec5SDimitry Andric #else 69440b57cec5SDimitry Andric if (__kmp_global.g.g_dynamic_mode == dynamic_default) { 69450b57cec5SDimitry Andric __kmp_global.g.g_dynamic_mode = dynamic_thread_limit; 69460b57cec5SDimitry Andric } 69470b57cec5SDimitry Andric #endif 69480b57cec5SDimitry Andric 69490b57cec5SDimitry Andric if (__kmp_version) { 69500b57cec5SDimitry Andric __kmp_print_version_2(); 69510b57cec5SDimitry Andric } 69520b57cec5SDimitry Andric 69530b57cec5SDimitry Andric /* we have finished parallel initialization */ 69540b57cec5SDimitry Andric TCW_SYNC_4(__kmp_init_parallel, TRUE); 69550b57cec5SDimitry Andric 69560b57cec5SDimitry Andric KMP_MB(); 69570b57cec5SDimitry Andric KA_TRACE(10, ("__kmp_parallel_initialize: exit\n")); 69580b57cec5SDimitry Andric 69590b57cec5SDimitry Andric __kmp_release_bootstrap_lock(&__kmp_initz_lock); 69600b57cec5SDimitry Andric } 69610b57cec5SDimitry Andric 69620b57cec5SDimitry Andric /* ------------------------------------------------------------------------ */ 69630b57cec5SDimitry Andric 69640b57cec5SDimitry Andric void __kmp_run_before_invoked_task(int gtid, int tid, kmp_info_t *this_thr, 69650b57cec5SDimitry Andric kmp_team_t *team) { 69660b57cec5SDimitry Andric kmp_disp_t *dispatch; 69670b57cec5SDimitry Andric 69680b57cec5SDimitry Andric KMP_MB(); 69690b57cec5SDimitry Andric 69700b57cec5SDimitry Andric /* none of the threads have encountered any constructs, yet. */ 69710b57cec5SDimitry Andric this_thr->th.th_local.this_construct = 0; 69720b57cec5SDimitry Andric #if KMP_CACHE_MANAGE 69730b57cec5SDimitry Andric KMP_CACHE_PREFETCH(&this_thr->th.th_bar[bs_forkjoin_barrier].bb.b_arrived); 69740b57cec5SDimitry Andric #endif /* KMP_CACHE_MANAGE */ 69750b57cec5SDimitry Andric dispatch = (kmp_disp_t *)TCR_PTR(this_thr->th.th_dispatch); 69760b57cec5SDimitry Andric KMP_DEBUG_ASSERT(dispatch); 69770b57cec5SDimitry Andric KMP_DEBUG_ASSERT(team->t.t_dispatch); 69780b57cec5SDimitry Andric // KMP_DEBUG_ASSERT( this_thr->th.th_dispatch == &team->t.t_dispatch[ 69790b57cec5SDimitry Andric // this_thr->th.th_info.ds.ds_tid ] ); 69800b57cec5SDimitry Andric 69810b57cec5SDimitry Andric dispatch->th_disp_index = 0; /* reset the dispatch buffer counter */ 69820b57cec5SDimitry Andric dispatch->th_doacross_buf_idx = 0; // reset doacross dispatch buffer counter 69830b57cec5SDimitry Andric if (__kmp_env_consistency_check) 69840b57cec5SDimitry Andric __kmp_push_parallel(gtid, team->t.t_ident); 69850b57cec5SDimitry Andric 69860b57cec5SDimitry Andric KMP_MB(); /* Flush all pending memory write invalidates. */ 69870b57cec5SDimitry Andric } 69880b57cec5SDimitry Andric 69890b57cec5SDimitry Andric void __kmp_run_after_invoked_task(int gtid, int tid, kmp_info_t *this_thr, 69900b57cec5SDimitry Andric kmp_team_t *team) { 69910b57cec5SDimitry Andric if (__kmp_env_consistency_check) 69920b57cec5SDimitry Andric __kmp_pop_parallel(gtid, team->t.t_ident); 69930b57cec5SDimitry Andric 69940b57cec5SDimitry Andric __kmp_finish_implicit_task(this_thr); 69950b57cec5SDimitry Andric } 69960b57cec5SDimitry Andric 69970b57cec5SDimitry Andric int __kmp_invoke_task_func(int gtid) { 69980b57cec5SDimitry Andric int rc; 69990b57cec5SDimitry Andric int tid = __kmp_tid_from_gtid(gtid); 70000b57cec5SDimitry Andric kmp_info_t *this_thr = __kmp_threads[gtid]; 70010b57cec5SDimitry Andric kmp_team_t *team = this_thr->th.th_team; 70020b57cec5SDimitry Andric 70030b57cec5SDimitry Andric __kmp_run_before_invoked_task(gtid, tid, this_thr, team); 70040b57cec5SDimitry Andric #if USE_ITT_BUILD 70050b57cec5SDimitry Andric if (__itt_stack_caller_create_ptr) { 70060b57cec5SDimitry Andric __kmp_itt_stack_callee_enter( 70070b57cec5SDimitry Andric (__itt_caller) 70080b57cec5SDimitry Andric team->t.t_stack_id); // inform ittnotify about entering user's code 70090b57cec5SDimitry Andric } 70100b57cec5SDimitry Andric #endif /* USE_ITT_BUILD */ 70110b57cec5SDimitry Andric #if INCLUDE_SSC_MARKS 70120b57cec5SDimitry Andric SSC_MARK_INVOKING(); 70130b57cec5SDimitry Andric #endif 70140b57cec5SDimitry Andric 70150b57cec5SDimitry Andric #if OMPT_SUPPORT 70160b57cec5SDimitry Andric void *dummy; 7017489b1cf2SDimitry Andric void **exit_frame_p; 70180b57cec5SDimitry Andric ompt_data_t *my_task_data; 70190b57cec5SDimitry Andric ompt_data_t *my_parallel_data; 70200b57cec5SDimitry Andric int ompt_team_size; 70210b57cec5SDimitry Andric 70220b57cec5SDimitry Andric if (ompt_enabled.enabled) { 7023489b1cf2SDimitry Andric exit_frame_p = &( 70240b57cec5SDimitry Andric team->t.t_implicit_task_taskdata[tid].ompt_task_info.frame.exit_frame.ptr); 70250b57cec5SDimitry Andric } else { 7026489b1cf2SDimitry Andric exit_frame_p = &dummy; 70270b57cec5SDimitry Andric } 70280b57cec5SDimitry Andric 70290b57cec5SDimitry Andric my_task_data = 70300b57cec5SDimitry Andric &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data); 70310b57cec5SDimitry Andric my_parallel_data = &(team->t.ompt_team_info.parallel_data); 70320b57cec5SDimitry Andric if (ompt_enabled.ompt_callback_implicit_task) { 70330b57cec5SDimitry Andric ompt_team_size = team->t.t_nproc; 70340b57cec5SDimitry Andric ompt_callbacks.ompt_callback(ompt_callback_implicit_task)( 70350b57cec5SDimitry Andric ompt_scope_begin, my_parallel_data, my_task_data, ompt_team_size, 7036489b1cf2SDimitry Andric __kmp_tid_from_gtid(gtid), ompt_task_implicit); 70370b57cec5SDimitry Andric OMPT_CUR_TASK_INFO(this_thr)->thread_num = __kmp_tid_from_gtid(gtid); 70380b57cec5SDimitry Andric } 70390b57cec5SDimitry Andric #endif 70400b57cec5SDimitry Andric 70410b57cec5SDimitry Andric #if KMP_STATS_ENABLED 70420b57cec5SDimitry Andric stats_state_e previous_state = KMP_GET_THREAD_STATE(); 70430b57cec5SDimitry Andric if (previous_state == stats_state_e::TEAMS_REGION) { 70440b57cec5SDimitry Andric KMP_PUSH_PARTITIONED_TIMER(OMP_teams); 70450b57cec5SDimitry Andric } else { 70460b57cec5SDimitry Andric KMP_PUSH_PARTITIONED_TIMER(OMP_parallel); 70470b57cec5SDimitry Andric } 70480b57cec5SDimitry Andric KMP_SET_THREAD_STATE(IMPLICIT_TASK); 70490b57cec5SDimitry Andric #endif 70500b57cec5SDimitry Andric 70510b57cec5SDimitry Andric rc = __kmp_invoke_microtask((microtask_t)TCR_SYNC_PTR(team->t.t_pkfn), gtid, 70520b57cec5SDimitry Andric tid, (int)team->t.t_argc, (void **)team->t.t_argv 70530b57cec5SDimitry Andric #if OMPT_SUPPORT 70540b57cec5SDimitry Andric , 7055489b1cf2SDimitry Andric exit_frame_p 70560b57cec5SDimitry Andric #endif 70570b57cec5SDimitry Andric ); 70580b57cec5SDimitry Andric #if OMPT_SUPPORT 7059489b1cf2SDimitry Andric *exit_frame_p = NULL; 7060489b1cf2SDimitry Andric this_thr->th.ompt_thread_info.parallel_flags |= ompt_parallel_team; 70610b57cec5SDimitry Andric #endif 70620b57cec5SDimitry Andric 70630b57cec5SDimitry Andric #if KMP_STATS_ENABLED 70640b57cec5SDimitry Andric if (previous_state == stats_state_e::TEAMS_REGION) { 70650b57cec5SDimitry Andric KMP_SET_THREAD_STATE(previous_state); 70660b57cec5SDimitry Andric } 70670b57cec5SDimitry Andric KMP_POP_PARTITIONED_TIMER(); 70680b57cec5SDimitry Andric #endif 70690b57cec5SDimitry Andric 70700b57cec5SDimitry Andric #if USE_ITT_BUILD 70710b57cec5SDimitry Andric if (__itt_stack_caller_create_ptr) { 70720b57cec5SDimitry Andric __kmp_itt_stack_callee_leave( 70730b57cec5SDimitry Andric (__itt_caller) 70740b57cec5SDimitry Andric team->t.t_stack_id); // inform ittnotify about leaving user's code 70750b57cec5SDimitry Andric } 70760b57cec5SDimitry Andric #endif /* USE_ITT_BUILD */ 70770b57cec5SDimitry Andric __kmp_run_after_invoked_task(gtid, tid, this_thr, team); 70780b57cec5SDimitry Andric 70790b57cec5SDimitry Andric return rc; 70800b57cec5SDimitry Andric } 70810b57cec5SDimitry Andric 70820b57cec5SDimitry Andric void __kmp_teams_master(int gtid) { 70830b57cec5SDimitry Andric // This routine is called by all master threads in teams construct 70840b57cec5SDimitry Andric kmp_info_t *thr = __kmp_threads[gtid]; 70850b57cec5SDimitry Andric kmp_team_t *team = thr->th.th_team; 70860b57cec5SDimitry Andric ident_t *loc = team->t.t_ident; 70870b57cec5SDimitry Andric thr->th.th_set_nproc = thr->th.th_teams_size.nth; 70880b57cec5SDimitry Andric KMP_DEBUG_ASSERT(thr->th.th_teams_microtask); 70890b57cec5SDimitry Andric KMP_DEBUG_ASSERT(thr->th.th_set_nproc); 70900b57cec5SDimitry Andric KA_TRACE(20, ("__kmp_teams_master: T#%d, Tid %d, microtask %p\n", gtid, 70910b57cec5SDimitry Andric __kmp_tid_from_gtid(gtid), thr->th.th_teams_microtask)); 70920b57cec5SDimitry Andric 70930b57cec5SDimitry Andric // This thread is a new CG root. Set up the proper variables. 70940b57cec5SDimitry Andric kmp_cg_root_t *tmp = (kmp_cg_root_t *)__kmp_allocate(sizeof(kmp_cg_root_t)); 70950b57cec5SDimitry Andric tmp->cg_root = thr; // Make thr the CG root 70960b57cec5SDimitry Andric // Init to thread limit that was stored when league masters were forked 70970b57cec5SDimitry Andric tmp->cg_thread_limit = thr->th.th_current_task->td_icvs.thread_limit; 70980b57cec5SDimitry Andric tmp->cg_nthreads = 1; // Init counter to one active thread, this one 70990b57cec5SDimitry Andric KA_TRACE(100, ("__kmp_teams_master: Thread %p created node %p and init" 71000b57cec5SDimitry Andric " cg_nthreads to 1\n", 71010b57cec5SDimitry Andric thr, tmp)); 71020b57cec5SDimitry Andric tmp->up = thr->th.th_cg_roots; 71030b57cec5SDimitry Andric thr->th.th_cg_roots = tmp; 71040b57cec5SDimitry Andric 71050b57cec5SDimitry Andric // Launch league of teams now, but not let workers execute 71060b57cec5SDimitry Andric // (they hang on fork barrier until next parallel) 71070b57cec5SDimitry Andric #if INCLUDE_SSC_MARKS 71080b57cec5SDimitry Andric SSC_MARK_FORKING(); 71090b57cec5SDimitry Andric #endif 71100b57cec5SDimitry Andric __kmp_fork_call(loc, gtid, fork_context_intel, team->t.t_argc, 71110b57cec5SDimitry Andric (microtask_t)thr->th.th_teams_microtask, // "wrapped" task 71120b57cec5SDimitry Andric VOLATILE_CAST(launch_t) __kmp_invoke_task_func, NULL); 71130b57cec5SDimitry Andric #if INCLUDE_SSC_MARKS 71140b57cec5SDimitry Andric SSC_MARK_JOINING(); 71150b57cec5SDimitry Andric #endif 71160b57cec5SDimitry Andric // If the team size was reduced from the limit, set it to the new size 71170b57cec5SDimitry Andric if (thr->th.th_team_nproc < thr->th.th_teams_size.nth) 71180b57cec5SDimitry Andric thr->th.th_teams_size.nth = thr->th.th_team_nproc; 71190b57cec5SDimitry Andric // AC: last parameter "1" eliminates join barrier which won't work because 71200b57cec5SDimitry Andric // worker threads are in a fork barrier waiting for more parallel regions 71210b57cec5SDimitry Andric __kmp_join_call(loc, gtid 71220b57cec5SDimitry Andric #if OMPT_SUPPORT 71230b57cec5SDimitry Andric , 71240b57cec5SDimitry Andric fork_context_intel 71250b57cec5SDimitry Andric #endif 71260b57cec5SDimitry Andric , 71270b57cec5SDimitry Andric 1); 71280b57cec5SDimitry Andric } 71290b57cec5SDimitry Andric 71300b57cec5SDimitry Andric int __kmp_invoke_teams_master(int gtid) { 71310b57cec5SDimitry Andric kmp_info_t *this_thr = __kmp_threads[gtid]; 71320b57cec5SDimitry Andric kmp_team_t *team = this_thr->th.th_team; 71330b57cec5SDimitry Andric #if KMP_DEBUG 71340b57cec5SDimitry Andric if (!__kmp_threads[gtid]->th.th_team->t.t_serialized) 71350b57cec5SDimitry Andric KMP_DEBUG_ASSERT((void *)__kmp_threads[gtid]->th.th_team->t.t_pkfn == 71360b57cec5SDimitry Andric (void *)__kmp_teams_master); 71370b57cec5SDimitry Andric #endif 71380b57cec5SDimitry Andric __kmp_run_before_invoked_task(gtid, 0, this_thr, team); 7139489b1cf2SDimitry Andric #if OMPT_SUPPORT 7140489b1cf2SDimitry Andric int tid = __kmp_tid_from_gtid(gtid); 7141489b1cf2SDimitry Andric ompt_data_t *task_data = 7142489b1cf2SDimitry Andric &team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data; 7143489b1cf2SDimitry Andric ompt_data_t *parallel_data = &team->t.ompt_team_info.parallel_data; 7144489b1cf2SDimitry Andric if (ompt_enabled.ompt_callback_implicit_task) { 7145489b1cf2SDimitry Andric ompt_callbacks.ompt_callback(ompt_callback_implicit_task)( 7146489b1cf2SDimitry Andric ompt_scope_begin, parallel_data, task_data, team->t.t_nproc, tid, 7147489b1cf2SDimitry Andric ompt_task_initial); 7148489b1cf2SDimitry Andric OMPT_CUR_TASK_INFO(this_thr)->thread_num = tid; 7149489b1cf2SDimitry Andric } 7150489b1cf2SDimitry Andric #endif 71510b57cec5SDimitry Andric __kmp_teams_master(gtid); 7152489b1cf2SDimitry Andric #if OMPT_SUPPORT 7153489b1cf2SDimitry Andric this_thr->th.ompt_thread_info.parallel_flags |= ompt_parallel_league; 7154489b1cf2SDimitry Andric #endif 71550b57cec5SDimitry Andric __kmp_run_after_invoked_task(gtid, 0, this_thr, team); 71560b57cec5SDimitry Andric return 1; 71570b57cec5SDimitry Andric } 71580b57cec5SDimitry Andric 71590b57cec5SDimitry Andric /* this sets the requested number of threads for the next parallel region 71600b57cec5SDimitry Andric encountered by this team. since this should be enclosed in the forkjoin 71610b57cec5SDimitry Andric critical section it should avoid race conditions with assymmetrical nested 71620b57cec5SDimitry Andric parallelism */ 71630b57cec5SDimitry Andric 71640b57cec5SDimitry Andric void __kmp_push_num_threads(ident_t *id, int gtid, int num_threads) { 71650b57cec5SDimitry Andric kmp_info_t *thr = __kmp_threads[gtid]; 71660b57cec5SDimitry Andric 71670b57cec5SDimitry Andric if (num_threads > 0) 71680b57cec5SDimitry Andric thr->th.th_set_nproc = num_threads; 71690b57cec5SDimitry Andric } 71700b57cec5SDimitry Andric 71710b57cec5SDimitry Andric /* this sets the requested number of teams for the teams region and/or 71720b57cec5SDimitry Andric the number of threads for the next parallel region encountered */ 71730b57cec5SDimitry Andric void __kmp_push_num_teams(ident_t *id, int gtid, int num_teams, 71740b57cec5SDimitry Andric int num_threads) { 71750b57cec5SDimitry Andric kmp_info_t *thr = __kmp_threads[gtid]; 71760b57cec5SDimitry Andric KMP_DEBUG_ASSERT(num_teams >= 0); 71770b57cec5SDimitry Andric KMP_DEBUG_ASSERT(num_threads >= 0); 71780b57cec5SDimitry Andric 71790b57cec5SDimitry Andric if (num_teams == 0) 71800b57cec5SDimitry Andric num_teams = 1; // default number of teams is 1. 71810b57cec5SDimitry Andric if (num_teams > __kmp_teams_max_nth) { // if too many teams requested? 71820b57cec5SDimitry Andric if (!__kmp_reserve_warn) { 71830b57cec5SDimitry Andric __kmp_reserve_warn = 1; 71840b57cec5SDimitry Andric __kmp_msg(kmp_ms_warning, 71850b57cec5SDimitry Andric KMP_MSG(CantFormThrTeam, num_teams, __kmp_teams_max_nth), 71860b57cec5SDimitry Andric KMP_HNT(Unset_ALL_THREADS), __kmp_msg_null); 71870b57cec5SDimitry Andric } 71880b57cec5SDimitry Andric num_teams = __kmp_teams_max_nth; 71890b57cec5SDimitry Andric } 71900b57cec5SDimitry Andric // Set number of teams (number of threads in the outer "parallel" of the 71910b57cec5SDimitry Andric // teams) 71920b57cec5SDimitry Andric thr->th.th_set_nproc = thr->th.th_teams_size.nteams = num_teams; 71930b57cec5SDimitry Andric 71940b57cec5SDimitry Andric // Remember the number of threads for inner parallel regions 71950b57cec5SDimitry Andric if (!TCR_4(__kmp_init_middle)) 7196489b1cf2SDimitry Andric __kmp_middle_initialize(); // get internal globals calculated 7197489b1cf2SDimitry Andric KMP_DEBUG_ASSERT(__kmp_avail_proc); 7198489b1cf2SDimitry Andric KMP_DEBUG_ASSERT(__kmp_dflt_team_nth); 7199489b1cf2SDimitry Andric if (num_threads == 0) { 72000b57cec5SDimitry Andric num_threads = __kmp_avail_proc / num_teams; 72010b57cec5SDimitry Andric // adjust num_threads w/o warning as it is not user setting 7202489b1cf2SDimitry Andric // num_threads = min(num_threads, nthreads-var, thread-limit-var) 7203489b1cf2SDimitry Andric // no thread_limit clause specified - do not change thread-limit-var ICV 7204489b1cf2SDimitry Andric if (num_threads > __kmp_dflt_team_nth) { 7205489b1cf2SDimitry Andric num_threads = __kmp_dflt_team_nth; // honor nthreads-var ICV 7206489b1cf2SDimitry Andric } 7207489b1cf2SDimitry Andric if (num_threads > thr->th.th_current_task->td_icvs.thread_limit) { 7208489b1cf2SDimitry Andric num_threads = thr->th.th_current_task->td_icvs.thread_limit; 7209489b1cf2SDimitry Andric } // prevent team size to exceed thread-limit-var 7210489b1cf2SDimitry Andric if (num_teams * num_threads > __kmp_teams_max_nth) { 72110b57cec5SDimitry Andric num_threads = __kmp_teams_max_nth / num_teams; 72120b57cec5SDimitry Andric } 72130b57cec5SDimitry Andric } else { 72140b57cec5SDimitry Andric // This thread will be the master of the league masters 72150b57cec5SDimitry Andric // Store new thread limit; old limit is saved in th_cg_roots list 72160b57cec5SDimitry Andric thr->th.th_current_task->td_icvs.thread_limit = num_threads; 7217489b1cf2SDimitry Andric // num_threads = min(num_threads, nthreads-var) 7218489b1cf2SDimitry Andric if (num_threads > __kmp_dflt_team_nth) { 7219489b1cf2SDimitry Andric num_threads = __kmp_dflt_team_nth; // honor nthreads-var ICV 7220489b1cf2SDimitry Andric } 72210b57cec5SDimitry Andric if (num_teams * num_threads > __kmp_teams_max_nth) { 72220b57cec5SDimitry Andric int new_threads = __kmp_teams_max_nth / num_teams; 72230b57cec5SDimitry Andric if (!__kmp_reserve_warn) { // user asked for too many threads 72240b57cec5SDimitry Andric __kmp_reserve_warn = 1; // conflicts with KMP_TEAMS_THREAD_LIMIT 72250b57cec5SDimitry Andric __kmp_msg(kmp_ms_warning, 72260b57cec5SDimitry Andric KMP_MSG(CantFormThrTeam, num_threads, new_threads), 72270b57cec5SDimitry Andric KMP_HNT(Unset_ALL_THREADS), __kmp_msg_null); 72280b57cec5SDimitry Andric } 72290b57cec5SDimitry Andric num_threads = new_threads; 72300b57cec5SDimitry Andric } 72310b57cec5SDimitry Andric } 72320b57cec5SDimitry Andric thr->th.th_teams_size.nth = num_threads; 72330b57cec5SDimitry Andric } 72340b57cec5SDimitry Andric 72350b57cec5SDimitry Andric // Set the proc_bind var to use in the following parallel region. 72360b57cec5SDimitry Andric void __kmp_push_proc_bind(ident_t *id, int gtid, kmp_proc_bind_t proc_bind) { 72370b57cec5SDimitry Andric kmp_info_t *thr = __kmp_threads[gtid]; 72380b57cec5SDimitry Andric thr->th.th_set_proc_bind = proc_bind; 72390b57cec5SDimitry Andric } 72400b57cec5SDimitry Andric 72410b57cec5SDimitry Andric /* Launch the worker threads into the microtask. */ 72420b57cec5SDimitry Andric 72430b57cec5SDimitry Andric void __kmp_internal_fork(ident_t *id, int gtid, kmp_team_t *team) { 72440b57cec5SDimitry Andric kmp_info_t *this_thr = __kmp_threads[gtid]; 72450b57cec5SDimitry Andric 72460b57cec5SDimitry Andric #ifdef KMP_DEBUG 72470b57cec5SDimitry Andric int f; 72480b57cec5SDimitry Andric #endif /* KMP_DEBUG */ 72490b57cec5SDimitry Andric 72500b57cec5SDimitry Andric KMP_DEBUG_ASSERT(team); 72510b57cec5SDimitry Andric KMP_DEBUG_ASSERT(this_thr->th.th_team == team); 72520b57cec5SDimitry Andric KMP_ASSERT(KMP_MASTER_GTID(gtid)); 72530b57cec5SDimitry Andric KMP_MB(); /* Flush all pending memory write invalidates. */ 72540b57cec5SDimitry Andric 72550b57cec5SDimitry Andric team->t.t_construct = 0; /* no single directives seen yet */ 72560b57cec5SDimitry Andric team->t.t_ordered.dt.t_value = 72570b57cec5SDimitry Andric 0; /* thread 0 enters the ordered section first */ 72580b57cec5SDimitry Andric 72590b57cec5SDimitry Andric /* Reset the identifiers on the dispatch buffer */ 72600b57cec5SDimitry Andric KMP_DEBUG_ASSERT(team->t.t_disp_buffer); 72610b57cec5SDimitry Andric if (team->t.t_max_nproc > 1) { 72620b57cec5SDimitry Andric int i; 72630b57cec5SDimitry Andric for (i = 0; i < __kmp_dispatch_num_buffers; ++i) { 72640b57cec5SDimitry Andric team->t.t_disp_buffer[i].buffer_index = i; 72650b57cec5SDimitry Andric team->t.t_disp_buffer[i].doacross_buf_idx = i; 72660b57cec5SDimitry Andric } 72670b57cec5SDimitry Andric } else { 72680b57cec5SDimitry Andric team->t.t_disp_buffer[0].buffer_index = 0; 72690b57cec5SDimitry Andric team->t.t_disp_buffer[0].doacross_buf_idx = 0; 72700b57cec5SDimitry Andric } 72710b57cec5SDimitry Andric 72720b57cec5SDimitry Andric KMP_MB(); /* Flush all pending memory write invalidates. */ 72730b57cec5SDimitry Andric KMP_ASSERT(this_thr->th.th_team == team); 72740b57cec5SDimitry Andric 72750b57cec5SDimitry Andric #ifdef KMP_DEBUG 72760b57cec5SDimitry Andric for (f = 0; f < team->t.t_nproc; f++) { 72770b57cec5SDimitry Andric KMP_DEBUG_ASSERT(team->t.t_threads[f] && 72780b57cec5SDimitry Andric team->t.t_threads[f]->th.th_team_nproc == team->t.t_nproc); 72790b57cec5SDimitry Andric } 72800b57cec5SDimitry Andric #endif /* KMP_DEBUG */ 72810b57cec5SDimitry Andric 72820b57cec5SDimitry Andric /* release the worker threads so they may begin working */ 72830b57cec5SDimitry Andric __kmp_fork_barrier(gtid, 0); 72840b57cec5SDimitry Andric } 72850b57cec5SDimitry Andric 72860b57cec5SDimitry Andric void __kmp_internal_join(ident_t *id, int gtid, kmp_team_t *team) { 72870b57cec5SDimitry Andric kmp_info_t *this_thr = __kmp_threads[gtid]; 72880b57cec5SDimitry Andric 72890b57cec5SDimitry Andric KMP_DEBUG_ASSERT(team); 72900b57cec5SDimitry Andric KMP_DEBUG_ASSERT(this_thr->th.th_team == team); 72910b57cec5SDimitry Andric KMP_ASSERT(KMP_MASTER_GTID(gtid)); 72920b57cec5SDimitry Andric KMP_MB(); /* Flush all pending memory write invalidates. */ 72930b57cec5SDimitry Andric 72940b57cec5SDimitry Andric /* Join barrier after fork */ 72950b57cec5SDimitry Andric 72960b57cec5SDimitry Andric #ifdef KMP_DEBUG 72970b57cec5SDimitry Andric if (__kmp_threads[gtid] && 72980b57cec5SDimitry Andric __kmp_threads[gtid]->th.th_team_nproc != team->t.t_nproc) { 72990b57cec5SDimitry Andric __kmp_printf("GTID: %d, __kmp_threads[%d]=%p\n", gtid, gtid, 73000b57cec5SDimitry Andric __kmp_threads[gtid]); 73010b57cec5SDimitry Andric __kmp_printf("__kmp_threads[%d]->th.th_team_nproc=%d, TEAM: %p, " 73020b57cec5SDimitry Andric "team->t.t_nproc=%d\n", 73030b57cec5SDimitry Andric gtid, __kmp_threads[gtid]->th.th_team_nproc, team, 73040b57cec5SDimitry Andric team->t.t_nproc); 73050b57cec5SDimitry Andric __kmp_print_structure(); 73060b57cec5SDimitry Andric } 73070b57cec5SDimitry Andric KMP_DEBUG_ASSERT(__kmp_threads[gtid] && 73080b57cec5SDimitry Andric __kmp_threads[gtid]->th.th_team_nproc == team->t.t_nproc); 73090b57cec5SDimitry Andric #endif /* KMP_DEBUG */ 73100b57cec5SDimitry Andric 73110b57cec5SDimitry Andric __kmp_join_barrier(gtid); /* wait for everyone */ 73120b57cec5SDimitry Andric #if OMPT_SUPPORT 73130b57cec5SDimitry Andric if (ompt_enabled.enabled && 73140b57cec5SDimitry Andric this_thr->th.ompt_thread_info.state == ompt_state_wait_barrier_implicit) { 73150b57cec5SDimitry Andric int ds_tid = this_thr->th.th_info.ds.ds_tid; 73160b57cec5SDimitry Andric ompt_data_t *task_data = OMPT_CUR_TASK_DATA(this_thr); 73170b57cec5SDimitry Andric this_thr->th.ompt_thread_info.state = ompt_state_overhead; 73180b57cec5SDimitry Andric #if OMPT_OPTIONAL 73190b57cec5SDimitry Andric void *codeptr = NULL; 73200b57cec5SDimitry Andric if (KMP_MASTER_TID(ds_tid) && 73210b57cec5SDimitry Andric (ompt_callbacks.ompt_callback(ompt_callback_sync_region_wait) || 73220b57cec5SDimitry Andric ompt_callbacks.ompt_callback(ompt_callback_sync_region))) 73230b57cec5SDimitry Andric codeptr = OMPT_CUR_TEAM_INFO(this_thr)->master_return_address; 73240b57cec5SDimitry Andric 73250b57cec5SDimitry Andric if (ompt_enabled.ompt_callback_sync_region_wait) { 73260b57cec5SDimitry Andric ompt_callbacks.ompt_callback(ompt_callback_sync_region_wait)( 73270b57cec5SDimitry Andric ompt_sync_region_barrier_implicit, ompt_scope_end, NULL, task_data, 73280b57cec5SDimitry Andric codeptr); 73290b57cec5SDimitry Andric } 73300b57cec5SDimitry Andric if (ompt_enabled.ompt_callback_sync_region) { 73310b57cec5SDimitry Andric ompt_callbacks.ompt_callback(ompt_callback_sync_region)( 73320b57cec5SDimitry Andric ompt_sync_region_barrier_implicit, ompt_scope_end, NULL, task_data, 73330b57cec5SDimitry Andric codeptr); 73340b57cec5SDimitry Andric } 73350b57cec5SDimitry Andric #endif 73360b57cec5SDimitry Andric if (!KMP_MASTER_TID(ds_tid) && ompt_enabled.ompt_callback_implicit_task) { 73370b57cec5SDimitry Andric ompt_callbacks.ompt_callback(ompt_callback_implicit_task)( 73380b57cec5SDimitry Andric ompt_scope_end, NULL, task_data, 0, ds_tid, ompt_task_implicit); // TODO: Can this be ompt_task_initial? 73390b57cec5SDimitry Andric } 73400b57cec5SDimitry Andric } 73410b57cec5SDimitry Andric #endif 73420b57cec5SDimitry Andric 73430b57cec5SDimitry Andric KMP_MB(); /* Flush all pending memory write invalidates. */ 73440b57cec5SDimitry Andric KMP_ASSERT(this_thr->th.th_team == team); 73450b57cec5SDimitry Andric } 73460b57cec5SDimitry Andric 73470b57cec5SDimitry Andric /* ------------------------------------------------------------------------ */ 73480b57cec5SDimitry Andric 73490b57cec5SDimitry Andric #ifdef USE_LOAD_BALANCE 73500b57cec5SDimitry Andric 73510b57cec5SDimitry Andric // Return the worker threads actively spinning in the hot team, if we 73520b57cec5SDimitry Andric // are at the outermost level of parallelism. Otherwise, return 0. 73530b57cec5SDimitry Andric static int __kmp_active_hot_team_nproc(kmp_root_t *root) { 73540b57cec5SDimitry Andric int i; 73550b57cec5SDimitry Andric int retval; 73560b57cec5SDimitry Andric kmp_team_t *hot_team; 73570b57cec5SDimitry Andric 73580b57cec5SDimitry Andric if (root->r.r_active) { 73590b57cec5SDimitry Andric return 0; 73600b57cec5SDimitry Andric } 73610b57cec5SDimitry Andric hot_team = root->r.r_hot_team; 73620b57cec5SDimitry Andric if (__kmp_dflt_blocktime == KMP_MAX_BLOCKTIME) { 73630b57cec5SDimitry Andric return hot_team->t.t_nproc - 1; // Don't count master thread 73640b57cec5SDimitry Andric } 73650b57cec5SDimitry Andric 73660b57cec5SDimitry Andric // Skip the master thread - it is accounted for elsewhere. 73670b57cec5SDimitry Andric retval = 0; 73680b57cec5SDimitry Andric for (i = 1; i < hot_team->t.t_nproc; i++) { 73690b57cec5SDimitry Andric if (hot_team->t.t_threads[i]->th.th_active) { 73700b57cec5SDimitry Andric retval++; 73710b57cec5SDimitry Andric } 73720b57cec5SDimitry Andric } 73730b57cec5SDimitry Andric return retval; 73740b57cec5SDimitry Andric } 73750b57cec5SDimitry Andric 73760b57cec5SDimitry Andric // Perform an automatic adjustment to the number of 73770b57cec5SDimitry Andric // threads used by the next parallel region. 73780b57cec5SDimitry Andric static int __kmp_load_balance_nproc(kmp_root_t *root, int set_nproc) { 73790b57cec5SDimitry Andric int retval; 73800b57cec5SDimitry Andric int pool_active; 73810b57cec5SDimitry Andric int hot_team_active; 73820b57cec5SDimitry Andric int team_curr_active; 73830b57cec5SDimitry Andric int system_active; 73840b57cec5SDimitry Andric 73850b57cec5SDimitry Andric KB_TRACE(20, ("__kmp_load_balance_nproc: called root:%p set_nproc:%d\n", root, 73860b57cec5SDimitry Andric set_nproc)); 73870b57cec5SDimitry Andric KMP_DEBUG_ASSERT(root); 73880b57cec5SDimitry Andric KMP_DEBUG_ASSERT(root->r.r_root_team->t.t_threads[0] 73890b57cec5SDimitry Andric ->th.th_current_task->td_icvs.dynamic == TRUE); 73900b57cec5SDimitry Andric KMP_DEBUG_ASSERT(set_nproc > 1); 73910b57cec5SDimitry Andric 73920b57cec5SDimitry Andric if (set_nproc == 1) { 73930b57cec5SDimitry Andric KB_TRACE(20, ("__kmp_load_balance_nproc: serial execution.\n")); 73940b57cec5SDimitry Andric return 1; 73950b57cec5SDimitry Andric } 73960b57cec5SDimitry Andric 73970b57cec5SDimitry Andric // Threads that are active in the thread pool, active in the hot team for this 73980b57cec5SDimitry Andric // particular root (if we are at the outer par level), and the currently 73990b57cec5SDimitry Andric // executing thread (to become the master) are available to add to the new 74000b57cec5SDimitry Andric // team, but are currently contributing to the system load, and must be 74010b57cec5SDimitry Andric // accounted for. 74020b57cec5SDimitry Andric pool_active = __kmp_thread_pool_active_nth; 74030b57cec5SDimitry Andric hot_team_active = __kmp_active_hot_team_nproc(root); 74040b57cec5SDimitry Andric team_curr_active = pool_active + hot_team_active + 1; 74050b57cec5SDimitry Andric 74060b57cec5SDimitry Andric // Check the system load. 74070b57cec5SDimitry Andric system_active = __kmp_get_load_balance(__kmp_avail_proc + team_curr_active); 74080b57cec5SDimitry Andric KB_TRACE(30, ("__kmp_load_balance_nproc: system active = %d pool active = %d " 74090b57cec5SDimitry Andric "hot team active = %d\n", 74100b57cec5SDimitry Andric system_active, pool_active, hot_team_active)); 74110b57cec5SDimitry Andric 74120b57cec5SDimitry Andric if (system_active < 0) { 74130b57cec5SDimitry Andric // There was an error reading the necessary info from /proc, so use the 74140b57cec5SDimitry Andric // thread limit algorithm instead. Once we set __kmp_global.g.g_dynamic_mode 74150b57cec5SDimitry Andric // = dynamic_thread_limit, we shouldn't wind up getting back here. 74160b57cec5SDimitry Andric __kmp_global.g.g_dynamic_mode = dynamic_thread_limit; 74170b57cec5SDimitry Andric KMP_WARNING(CantLoadBalUsing, "KMP_DYNAMIC_MODE=thread limit"); 74180b57cec5SDimitry Andric 74190b57cec5SDimitry Andric // Make this call behave like the thread limit algorithm. 74200b57cec5SDimitry Andric retval = __kmp_avail_proc - __kmp_nth + 74210b57cec5SDimitry Andric (root->r.r_active ? 1 : root->r.r_hot_team->t.t_nproc); 74220b57cec5SDimitry Andric if (retval > set_nproc) { 74230b57cec5SDimitry Andric retval = set_nproc; 74240b57cec5SDimitry Andric } 74250b57cec5SDimitry Andric if (retval < KMP_MIN_NTH) { 74260b57cec5SDimitry Andric retval = KMP_MIN_NTH; 74270b57cec5SDimitry Andric } 74280b57cec5SDimitry Andric 74290b57cec5SDimitry Andric KB_TRACE(20, ("__kmp_load_balance_nproc: thread limit exit. retval:%d\n", 74300b57cec5SDimitry Andric retval)); 74310b57cec5SDimitry Andric return retval; 74320b57cec5SDimitry Andric } 74330b57cec5SDimitry Andric 74340b57cec5SDimitry Andric // There is a slight delay in the load balance algorithm in detecting new 74350b57cec5SDimitry Andric // running procs. The real system load at this instant should be at least as 74360b57cec5SDimitry Andric // large as the #active omp thread that are available to add to the team. 74370b57cec5SDimitry Andric if (system_active < team_curr_active) { 74380b57cec5SDimitry Andric system_active = team_curr_active; 74390b57cec5SDimitry Andric } 74400b57cec5SDimitry Andric retval = __kmp_avail_proc - system_active + team_curr_active; 74410b57cec5SDimitry Andric if (retval > set_nproc) { 74420b57cec5SDimitry Andric retval = set_nproc; 74430b57cec5SDimitry Andric } 74440b57cec5SDimitry Andric if (retval < KMP_MIN_NTH) { 74450b57cec5SDimitry Andric retval = KMP_MIN_NTH; 74460b57cec5SDimitry Andric } 74470b57cec5SDimitry Andric 74480b57cec5SDimitry Andric KB_TRACE(20, ("__kmp_load_balance_nproc: exit. retval:%d\n", retval)); 74490b57cec5SDimitry Andric return retval; 74500b57cec5SDimitry Andric } // __kmp_load_balance_nproc() 74510b57cec5SDimitry Andric 74520b57cec5SDimitry Andric #endif /* USE_LOAD_BALANCE */ 74530b57cec5SDimitry Andric 74540b57cec5SDimitry Andric /* ------------------------------------------------------------------------ */ 74550b57cec5SDimitry Andric 74560b57cec5SDimitry Andric /* NOTE: this is called with the __kmp_init_lock held */ 74570b57cec5SDimitry Andric void __kmp_cleanup(void) { 74580b57cec5SDimitry Andric int f; 74590b57cec5SDimitry Andric 74600b57cec5SDimitry Andric KA_TRACE(10, ("__kmp_cleanup: enter\n")); 74610b57cec5SDimitry Andric 74620b57cec5SDimitry Andric if (TCR_4(__kmp_init_parallel)) { 74630b57cec5SDimitry Andric #if KMP_HANDLE_SIGNALS 74640b57cec5SDimitry Andric __kmp_remove_signals(); 74650b57cec5SDimitry Andric #endif 74660b57cec5SDimitry Andric TCW_4(__kmp_init_parallel, FALSE); 74670b57cec5SDimitry Andric } 74680b57cec5SDimitry Andric 74690b57cec5SDimitry Andric if (TCR_4(__kmp_init_middle)) { 74700b57cec5SDimitry Andric #if KMP_AFFINITY_SUPPORTED 74710b57cec5SDimitry Andric __kmp_affinity_uninitialize(); 74720b57cec5SDimitry Andric #endif /* KMP_AFFINITY_SUPPORTED */ 74730b57cec5SDimitry Andric __kmp_cleanup_hierarchy(); 74740b57cec5SDimitry Andric TCW_4(__kmp_init_middle, FALSE); 74750b57cec5SDimitry Andric } 74760b57cec5SDimitry Andric 74770b57cec5SDimitry Andric KA_TRACE(10, ("__kmp_cleanup: go serial cleanup\n")); 74780b57cec5SDimitry Andric 74790b57cec5SDimitry Andric if (__kmp_init_serial) { 74800b57cec5SDimitry Andric __kmp_runtime_destroy(); 74810b57cec5SDimitry Andric __kmp_init_serial = FALSE; 74820b57cec5SDimitry Andric } 74830b57cec5SDimitry Andric 74840b57cec5SDimitry Andric __kmp_cleanup_threadprivate_caches(); 74850b57cec5SDimitry Andric 74860b57cec5SDimitry Andric for (f = 0; f < __kmp_threads_capacity; f++) { 74870b57cec5SDimitry Andric if (__kmp_root[f] != NULL) { 74880b57cec5SDimitry Andric __kmp_free(__kmp_root[f]); 74890b57cec5SDimitry Andric __kmp_root[f] = NULL; 74900b57cec5SDimitry Andric } 74910b57cec5SDimitry Andric } 74920b57cec5SDimitry Andric __kmp_free(__kmp_threads); 74930b57cec5SDimitry Andric // __kmp_threads and __kmp_root were allocated at once, as single block, so 74940b57cec5SDimitry Andric // there is no need in freeing __kmp_root. 74950b57cec5SDimitry Andric __kmp_threads = NULL; 74960b57cec5SDimitry Andric __kmp_root = NULL; 74970b57cec5SDimitry Andric __kmp_threads_capacity = 0; 74980b57cec5SDimitry Andric 74990b57cec5SDimitry Andric #if KMP_USE_DYNAMIC_LOCK 75000b57cec5SDimitry Andric __kmp_cleanup_indirect_user_locks(); 75010b57cec5SDimitry Andric #else 75020b57cec5SDimitry Andric __kmp_cleanup_user_locks(); 75030b57cec5SDimitry Andric #endif 75040b57cec5SDimitry Andric 75050b57cec5SDimitry Andric #if KMP_AFFINITY_SUPPORTED 75060b57cec5SDimitry Andric KMP_INTERNAL_FREE(CCAST(char *, __kmp_cpuinfo_file)); 75070b57cec5SDimitry Andric __kmp_cpuinfo_file = NULL; 75080b57cec5SDimitry Andric #endif /* KMP_AFFINITY_SUPPORTED */ 75090b57cec5SDimitry Andric 75100b57cec5SDimitry Andric #if KMP_USE_ADAPTIVE_LOCKS 75110b57cec5SDimitry Andric #if KMP_DEBUG_ADAPTIVE_LOCKS 75120b57cec5SDimitry Andric __kmp_print_speculative_stats(); 75130b57cec5SDimitry Andric #endif 75140b57cec5SDimitry Andric #endif 75150b57cec5SDimitry Andric KMP_INTERNAL_FREE(__kmp_nested_nth.nth); 75160b57cec5SDimitry Andric __kmp_nested_nth.nth = NULL; 75170b57cec5SDimitry Andric __kmp_nested_nth.size = 0; 75180b57cec5SDimitry Andric __kmp_nested_nth.used = 0; 75190b57cec5SDimitry Andric KMP_INTERNAL_FREE(__kmp_nested_proc_bind.bind_types); 75200b57cec5SDimitry Andric __kmp_nested_proc_bind.bind_types = NULL; 75210b57cec5SDimitry Andric __kmp_nested_proc_bind.size = 0; 75220b57cec5SDimitry Andric __kmp_nested_proc_bind.used = 0; 75230b57cec5SDimitry Andric if (__kmp_affinity_format) { 75240b57cec5SDimitry Andric KMP_INTERNAL_FREE(__kmp_affinity_format); 75250b57cec5SDimitry Andric __kmp_affinity_format = NULL; 75260b57cec5SDimitry Andric } 75270b57cec5SDimitry Andric 75280b57cec5SDimitry Andric __kmp_i18n_catclose(); 75290b57cec5SDimitry Andric 75300b57cec5SDimitry Andric #if KMP_USE_HIER_SCHED 75310b57cec5SDimitry Andric __kmp_hier_scheds.deallocate(); 75320b57cec5SDimitry Andric #endif 75330b57cec5SDimitry Andric 75340b57cec5SDimitry Andric #if KMP_STATS_ENABLED 75350b57cec5SDimitry Andric __kmp_stats_fini(); 75360b57cec5SDimitry Andric #endif 75370b57cec5SDimitry Andric 75380b57cec5SDimitry Andric KA_TRACE(10, ("__kmp_cleanup: exit\n")); 75390b57cec5SDimitry Andric } 75400b57cec5SDimitry Andric 75410b57cec5SDimitry Andric /* ------------------------------------------------------------------------ */ 75420b57cec5SDimitry Andric 75430b57cec5SDimitry Andric int __kmp_ignore_mppbeg(void) { 75440b57cec5SDimitry Andric char *env; 75450b57cec5SDimitry Andric 75460b57cec5SDimitry Andric if ((env = getenv("KMP_IGNORE_MPPBEG")) != NULL) { 75470b57cec5SDimitry Andric if (__kmp_str_match_false(env)) 75480b57cec5SDimitry Andric return FALSE; 75490b57cec5SDimitry Andric } 75500b57cec5SDimitry Andric // By default __kmpc_begin() is no-op. 75510b57cec5SDimitry Andric return TRUE; 75520b57cec5SDimitry Andric } 75530b57cec5SDimitry Andric 75540b57cec5SDimitry Andric int __kmp_ignore_mppend(void) { 75550b57cec5SDimitry Andric char *env; 75560b57cec5SDimitry Andric 75570b57cec5SDimitry Andric if ((env = getenv("KMP_IGNORE_MPPEND")) != NULL) { 75580b57cec5SDimitry Andric if (__kmp_str_match_false(env)) 75590b57cec5SDimitry Andric return FALSE; 75600b57cec5SDimitry Andric } 75610b57cec5SDimitry Andric // By default __kmpc_end() is no-op. 75620b57cec5SDimitry Andric return TRUE; 75630b57cec5SDimitry Andric } 75640b57cec5SDimitry Andric 75650b57cec5SDimitry Andric void __kmp_internal_begin(void) { 75660b57cec5SDimitry Andric int gtid; 75670b57cec5SDimitry Andric kmp_root_t *root; 75680b57cec5SDimitry Andric 75690b57cec5SDimitry Andric /* this is a very important step as it will register new sibling threads 75700b57cec5SDimitry Andric and assign these new uber threads a new gtid */ 75710b57cec5SDimitry Andric gtid = __kmp_entry_gtid(); 75720b57cec5SDimitry Andric root = __kmp_threads[gtid]->th.th_root; 75730b57cec5SDimitry Andric KMP_ASSERT(KMP_UBER_GTID(gtid)); 75740b57cec5SDimitry Andric 75750b57cec5SDimitry Andric if (root->r.r_begin) 75760b57cec5SDimitry Andric return; 75770b57cec5SDimitry Andric __kmp_acquire_lock(&root->r.r_begin_lock, gtid); 75780b57cec5SDimitry Andric if (root->r.r_begin) { 75790b57cec5SDimitry Andric __kmp_release_lock(&root->r.r_begin_lock, gtid); 75800b57cec5SDimitry Andric return; 75810b57cec5SDimitry Andric } 75820b57cec5SDimitry Andric 75830b57cec5SDimitry Andric root->r.r_begin = TRUE; 75840b57cec5SDimitry Andric 75850b57cec5SDimitry Andric __kmp_release_lock(&root->r.r_begin_lock, gtid); 75860b57cec5SDimitry Andric } 75870b57cec5SDimitry Andric 75880b57cec5SDimitry Andric /* ------------------------------------------------------------------------ */ 75890b57cec5SDimitry Andric 75900b57cec5SDimitry Andric void __kmp_user_set_library(enum library_type arg) { 75910b57cec5SDimitry Andric int gtid; 75920b57cec5SDimitry Andric kmp_root_t *root; 75930b57cec5SDimitry Andric kmp_info_t *thread; 75940b57cec5SDimitry Andric 75950b57cec5SDimitry Andric /* first, make sure we are initialized so we can get our gtid */ 75960b57cec5SDimitry Andric 75970b57cec5SDimitry Andric gtid = __kmp_entry_gtid(); 75980b57cec5SDimitry Andric thread = __kmp_threads[gtid]; 75990b57cec5SDimitry Andric 76000b57cec5SDimitry Andric root = thread->th.th_root; 76010b57cec5SDimitry Andric 76020b57cec5SDimitry Andric KA_TRACE(20, ("__kmp_user_set_library: enter T#%d, arg: %d, %d\n", gtid, arg, 76030b57cec5SDimitry Andric library_serial)); 76040b57cec5SDimitry Andric if (root->r.r_in_parallel) { /* Must be called in serial section of top-level 76050b57cec5SDimitry Andric thread */ 76060b57cec5SDimitry Andric KMP_WARNING(SetLibraryIncorrectCall); 76070b57cec5SDimitry Andric return; 76080b57cec5SDimitry Andric } 76090b57cec5SDimitry Andric 76100b57cec5SDimitry Andric switch (arg) { 76110b57cec5SDimitry Andric case library_serial: 76120b57cec5SDimitry Andric thread->th.th_set_nproc = 0; 76130b57cec5SDimitry Andric set__nproc(thread, 1); 76140b57cec5SDimitry Andric break; 76150b57cec5SDimitry Andric case library_turnaround: 76160b57cec5SDimitry Andric thread->th.th_set_nproc = 0; 76170b57cec5SDimitry Andric set__nproc(thread, __kmp_dflt_team_nth ? __kmp_dflt_team_nth 76180b57cec5SDimitry Andric : __kmp_dflt_team_nth_ub); 76190b57cec5SDimitry Andric break; 76200b57cec5SDimitry Andric case library_throughput: 76210b57cec5SDimitry Andric thread->th.th_set_nproc = 0; 76220b57cec5SDimitry Andric set__nproc(thread, __kmp_dflt_team_nth ? __kmp_dflt_team_nth 76230b57cec5SDimitry Andric : __kmp_dflt_team_nth_ub); 76240b57cec5SDimitry Andric break; 76250b57cec5SDimitry Andric default: 76260b57cec5SDimitry Andric KMP_FATAL(UnknownLibraryType, arg); 76270b57cec5SDimitry Andric } 76280b57cec5SDimitry Andric 76290b57cec5SDimitry Andric __kmp_aux_set_library(arg); 76300b57cec5SDimitry Andric } 76310b57cec5SDimitry Andric 76320b57cec5SDimitry Andric void __kmp_aux_set_stacksize(size_t arg) { 76330b57cec5SDimitry Andric if (!__kmp_init_serial) 76340b57cec5SDimitry Andric __kmp_serial_initialize(); 76350b57cec5SDimitry Andric 76360b57cec5SDimitry Andric #if KMP_OS_DARWIN 76370b57cec5SDimitry Andric if (arg & (0x1000 - 1)) { 76380b57cec5SDimitry Andric arg &= ~(0x1000 - 1); 76390b57cec5SDimitry Andric if (arg + 0x1000) /* check for overflow if we round up */ 76400b57cec5SDimitry Andric arg += 0x1000; 76410b57cec5SDimitry Andric } 76420b57cec5SDimitry Andric #endif 76430b57cec5SDimitry Andric __kmp_acquire_bootstrap_lock(&__kmp_initz_lock); 76440b57cec5SDimitry Andric 76450b57cec5SDimitry Andric /* only change the default stacksize before the first parallel region */ 76460b57cec5SDimitry Andric if (!TCR_4(__kmp_init_parallel)) { 76470b57cec5SDimitry Andric size_t value = arg; /* argument is in bytes */ 76480b57cec5SDimitry Andric 76490b57cec5SDimitry Andric if (value < __kmp_sys_min_stksize) 76500b57cec5SDimitry Andric value = __kmp_sys_min_stksize; 76510b57cec5SDimitry Andric else if (value > KMP_MAX_STKSIZE) 76520b57cec5SDimitry Andric value = KMP_MAX_STKSIZE; 76530b57cec5SDimitry Andric 76540b57cec5SDimitry Andric __kmp_stksize = value; 76550b57cec5SDimitry Andric 76560b57cec5SDimitry Andric __kmp_env_stksize = TRUE; /* was KMP_STACKSIZE specified? */ 76570b57cec5SDimitry Andric } 76580b57cec5SDimitry Andric 76590b57cec5SDimitry Andric __kmp_release_bootstrap_lock(&__kmp_initz_lock); 76600b57cec5SDimitry Andric } 76610b57cec5SDimitry Andric 76620b57cec5SDimitry Andric /* set the behaviour of the runtime library */ 76630b57cec5SDimitry Andric /* TODO this can cause some odd behaviour with sibling parallelism... */ 76640b57cec5SDimitry Andric void __kmp_aux_set_library(enum library_type arg) { 76650b57cec5SDimitry Andric __kmp_library = arg; 76660b57cec5SDimitry Andric 76670b57cec5SDimitry Andric switch (__kmp_library) { 76680b57cec5SDimitry Andric case library_serial: { 76690b57cec5SDimitry Andric KMP_INFORM(LibraryIsSerial); 76700b57cec5SDimitry Andric } break; 76710b57cec5SDimitry Andric case library_turnaround: 76720b57cec5SDimitry Andric if (__kmp_use_yield == 1 && !__kmp_use_yield_exp_set) 76730b57cec5SDimitry Andric __kmp_use_yield = 2; // only yield when oversubscribed 76740b57cec5SDimitry Andric break; 76750b57cec5SDimitry Andric case library_throughput: 76760b57cec5SDimitry Andric if (__kmp_dflt_blocktime == KMP_MAX_BLOCKTIME) 76770b57cec5SDimitry Andric __kmp_dflt_blocktime = 200; 76780b57cec5SDimitry Andric break; 76790b57cec5SDimitry Andric default: 76800b57cec5SDimitry Andric KMP_FATAL(UnknownLibraryType, arg); 76810b57cec5SDimitry Andric } 76820b57cec5SDimitry Andric } 76830b57cec5SDimitry Andric 76840b57cec5SDimitry Andric /* Getting team information common for all team API */ 76850b57cec5SDimitry Andric // Returns NULL if not in teams construct 76860b57cec5SDimitry Andric static kmp_team_t *__kmp_aux_get_team_info(int &teams_serialized) { 76870b57cec5SDimitry Andric kmp_info_t *thr = __kmp_entry_thread(); 76880b57cec5SDimitry Andric teams_serialized = 0; 76890b57cec5SDimitry Andric if (thr->th.th_teams_microtask) { 76900b57cec5SDimitry Andric kmp_team_t *team = thr->th.th_team; 76910b57cec5SDimitry Andric int tlevel = thr->th.th_teams_level; // the level of the teams construct 76920b57cec5SDimitry Andric int ii = team->t.t_level; 76930b57cec5SDimitry Andric teams_serialized = team->t.t_serialized; 76940b57cec5SDimitry Andric int level = tlevel + 1; 76950b57cec5SDimitry Andric KMP_DEBUG_ASSERT(ii >= tlevel); 76960b57cec5SDimitry Andric while (ii > level) { 76970b57cec5SDimitry Andric for (teams_serialized = team->t.t_serialized; 76980b57cec5SDimitry Andric (teams_serialized > 0) && (ii > level); teams_serialized--, ii--) { 76990b57cec5SDimitry Andric } 77000b57cec5SDimitry Andric if (team->t.t_serialized && (!teams_serialized)) { 77010b57cec5SDimitry Andric team = team->t.t_parent; 77020b57cec5SDimitry Andric continue; 77030b57cec5SDimitry Andric } 77040b57cec5SDimitry Andric if (ii > level) { 77050b57cec5SDimitry Andric team = team->t.t_parent; 77060b57cec5SDimitry Andric ii--; 77070b57cec5SDimitry Andric } 77080b57cec5SDimitry Andric } 77090b57cec5SDimitry Andric return team; 77100b57cec5SDimitry Andric } 77110b57cec5SDimitry Andric return NULL; 77120b57cec5SDimitry Andric } 77130b57cec5SDimitry Andric 77140b57cec5SDimitry Andric int __kmp_aux_get_team_num() { 77150b57cec5SDimitry Andric int serialized; 77160b57cec5SDimitry Andric kmp_team_t *team = __kmp_aux_get_team_info(serialized); 77170b57cec5SDimitry Andric if (team) { 77180b57cec5SDimitry Andric if (serialized > 1) { 77190b57cec5SDimitry Andric return 0; // teams region is serialized ( 1 team of 1 thread ). 77200b57cec5SDimitry Andric } else { 77210b57cec5SDimitry Andric return team->t.t_master_tid; 77220b57cec5SDimitry Andric } 77230b57cec5SDimitry Andric } 77240b57cec5SDimitry Andric return 0; 77250b57cec5SDimitry Andric } 77260b57cec5SDimitry Andric 77270b57cec5SDimitry Andric int __kmp_aux_get_num_teams() { 77280b57cec5SDimitry Andric int serialized; 77290b57cec5SDimitry Andric kmp_team_t *team = __kmp_aux_get_team_info(serialized); 77300b57cec5SDimitry Andric if (team) { 77310b57cec5SDimitry Andric if (serialized > 1) { 77320b57cec5SDimitry Andric return 1; 77330b57cec5SDimitry Andric } else { 77340b57cec5SDimitry Andric return team->t.t_parent->t.t_nproc; 77350b57cec5SDimitry Andric } 77360b57cec5SDimitry Andric } 77370b57cec5SDimitry Andric return 1; 77380b57cec5SDimitry Andric } 77390b57cec5SDimitry Andric 77400b57cec5SDimitry Andric /* ------------------------------------------------------------------------ */ 77410b57cec5SDimitry Andric 77420b57cec5SDimitry Andric /* 77430b57cec5SDimitry Andric * Affinity Format Parser 77440b57cec5SDimitry Andric * 77450b57cec5SDimitry Andric * Field is in form of: %[[[0].]size]type 77460b57cec5SDimitry Andric * % and type are required (%% means print a literal '%') 77470b57cec5SDimitry Andric * type is either single char or long name surrounded by {}, 77480b57cec5SDimitry Andric * e.g., N or {num_threads} 77490b57cec5SDimitry Andric * 0 => leading zeros 77500b57cec5SDimitry Andric * . => right justified when size is specified 77510b57cec5SDimitry Andric * by default output is left justified 77520b57cec5SDimitry Andric * size is the *minimum* field length 77530b57cec5SDimitry Andric * All other characters are printed as is 77540b57cec5SDimitry Andric * 77550b57cec5SDimitry Andric * Available field types: 77560b57cec5SDimitry Andric * L {thread_level} - omp_get_level() 77570b57cec5SDimitry Andric * n {thread_num} - omp_get_thread_num() 77580b57cec5SDimitry Andric * h {host} - name of host machine 77590b57cec5SDimitry Andric * P {process_id} - process id (integer) 77600b57cec5SDimitry Andric * T {thread_identifier} - native thread identifier (integer) 77610b57cec5SDimitry Andric * N {num_threads} - omp_get_num_threads() 77620b57cec5SDimitry Andric * A {ancestor_tnum} - omp_get_ancestor_thread_num(omp_get_level()-1) 77630b57cec5SDimitry Andric * a {thread_affinity} - comma separated list of integers or integer ranges 77640b57cec5SDimitry Andric * (values of affinity mask) 77650b57cec5SDimitry Andric * 77660b57cec5SDimitry Andric * Implementation-specific field types can be added 77670b57cec5SDimitry Andric * If a type is unknown, print "undefined" 77680b57cec5SDimitry Andric */ 77690b57cec5SDimitry Andric 77700b57cec5SDimitry Andric // Structure holding the short name, long name, and corresponding data type 77710b57cec5SDimitry Andric // for snprintf. A table of these will represent the entire valid keyword 77720b57cec5SDimitry Andric // field types. 77730b57cec5SDimitry Andric typedef struct kmp_affinity_format_field_t { 77740b57cec5SDimitry Andric char short_name; // from spec e.g., L -> thread level 77750b57cec5SDimitry Andric const char *long_name; // from spec thread_level -> thread level 77760b57cec5SDimitry Andric char field_format; // data type for snprintf (typically 'd' or 's' 77770b57cec5SDimitry Andric // for integer or string) 77780b57cec5SDimitry Andric } kmp_affinity_format_field_t; 77790b57cec5SDimitry Andric 77800b57cec5SDimitry Andric static const kmp_affinity_format_field_t __kmp_affinity_format_table[] = { 77810b57cec5SDimitry Andric #if KMP_AFFINITY_SUPPORTED 77820b57cec5SDimitry Andric {'A', "thread_affinity", 's'}, 77830b57cec5SDimitry Andric #endif 77840b57cec5SDimitry Andric {'t', "team_num", 'd'}, 77850b57cec5SDimitry Andric {'T', "num_teams", 'd'}, 77860b57cec5SDimitry Andric {'L', "nesting_level", 'd'}, 77870b57cec5SDimitry Andric {'n', "thread_num", 'd'}, 77880b57cec5SDimitry Andric {'N', "num_threads", 'd'}, 77890b57cec5SDimitry Andric {'a', "ancestor_tnum", 'd'}, 77900b57cec5SDimitry Andric {'H', "host", 's'}, 77910b57cec5SDimitry Andric {'P', "process_id", 'd'}, 77920b57cec5SDimitry Andric {'i', "native_thread_id", 'd'}}; 77930b57cec5SDimitry Andric 77940b57cec5SDimitry Andric // Return the number of characters it takes to hold field 77950b57cec5SDimitry Andric static int __kmp_aux_capture_affinity_field(int gtid, const kmp_info_t *th, 77960b57cec5SDimitry Andric const char **ptr, 77970b57cec5SDimitry Andric kmp_str_buf_t *field_buffer) { 77980b57cec5SDimitry Andric int rc, format_index, field_value; 77990b57cec5SDimitry Andric const char *width_left, *width_right; 78000b57cec5SDimitry Andric bool pad_zeros, right_justify, parse_long_name, found_valid_name; 78010b57cec5SDimitry Andric static const int FORMAT_SIZE = 20; 78020b57cec5SDimitry Andric char format[FORMAT_SIZE] = {0}; 78030b57cec5SDimitry Andric char absolute_short_name = 0; 78040b57cec5SDimitry Andric 78050b57cec5SDimitry Andric KMP_DEBUG_ASSERT(gtid >= 0); 78060b57cec5SDimitry Andric KMP_DEBUG_ASSERT(th); 78070b57cec5SDimitry Andric KMP_DEBUG_ASSERT(**ptr == '%'); 78080b57cec5SDimitry Andric KMP_DEBUG_ASSERT(field_buffer); 78090b57cec5SDimitry Andric 78100b57cec5SDimitry Andric __kmp_str_buf_clear(field_buffer); 78110b57cec5SDimitry Andric 78120b57cec5SDimitry Andric // Skip the initial % 78130b57cec5SDimitry Andric (*ptr)++; 78140b57cec5SDimitry Andric 78150b57cec5SDimitry Andric // Check for %% first 78160b57cec5SDimitry Andric if (**ptr == '%') { 78170b57cec5SDimitry Andric __kmp_str_buf_cat(field_buffer, "%", 1); 78180b57cec5SDimitry Andric (*ptr)++; // skip over the second % 78190b57cec5SDimitry Andric return 1; 78200b57cec5SDimitry Andric } 78210b57cec5SDimitry Andric 78220b57cec5SDimitry Andric // Parse field modifiers if they are present 78230b57cec5SDimitry Andric pad_zeros = false; 78240b57cec5SDimitry Andric if (**ptr == '0') { 78250b57cec5SDimitry Andric pad_zeros = true; 78260b57cec5SDimitry Andric (*ptr)++; // skip over 0 78270b57cec5SDimitry Andric } 78280b57cec5SDimitry Andric right_justify = false; 78290b57cec5SDimitry Andric if (**ptr == '.') { 78300b57cec5SDimitry Andric right_justify = true; 78310b57cec5SDimitry Andric (*ptr)++; // skip over . 78320b57cec5SDimitry Andric } 78330b57cec5SDimitry Andric // Parse width of field: [width_left, width_right) 78340b57cec5SDimitry Andric width_left = width_right = NULL; 78350b57cec5SDimitry Andric if (**ptr >= '0' && **ptr <= '9') { 78360b57cec5SDimitry Andric width_left = *ptr; 78370b57cec5SDimitry Andric SKIP_DIGITS(*ptr); 78380b57cec5SDimitry Andric width_right = *ptr; 78390b57cec5SDimitry Andric } 78400b57cec5SDimitry Andric 78410b57cec5SDimitry Andric // Create the format for KMP_SNPRINTF based on flags parsed above 78420b57cec5SDimitry Andric format_index = 0; 78430b57cec5SDimitry Andric format[format_index++] = '%'; 78440b57cec5SDimitry Andric if (!right_justify) 78450b57cec5SDimitry Andric format[format_index++] = '-'; 78460b57cec5SDimitry Andric if (pad_zeros) 78470b57cec5SDimitry Andric format[format_index++] = '0'; 78480b57cec5SDimitry Andric if (width_left && width_right) { 78490b57cec5SDimitry Andric int i = 0; 78500b57cec5SDimitry Andric // Only allow 8 digit number widths. 78510b57cec5SDimitry Andric // This also prevents overflowing format variable 78520b57cec5SDimitry Andric while (i < 8 && width_left < width_right) { 78530b57cec5SDimitry Andric format[format_index++] = *width_left; 78540b57cec5SDimitry Andric width_left++; 78550b57cec5SDimitry Andric i++; 78560b57cec5SDimitry Andric } 78570b57cec5SDimitry Andric } 78580b57cec5SDimitry Andric 78590b57cec5SDimitry Andric // Parse a name (long or short) 78600b57cec5SDimitry Andric // Canonicalize the name into absolute_short_name 78610b57cec5SDimitry Andric found_valid_name = false; 78620b57cec5SDimitry Andric parse_long_name = (**ptr == '{'); 78630b57cec5SDimitry Andric if (parse_long_name) 78640b57cec5SDimitry Andric (*ptr)++; // skip initial left brace 78650b57cec5SDimitry Andric for (size_t i = 0; i < sizeof(__kmp_affinity_format_table) / 78660b57cec5SDimitry Andric sizeof(__kmp_affinity_format_table[0]); 78670b57cec5SDimitry Andric ++i) { 78680b57cec5SDimitry Andric char short_name = __kmp_affinity_format_table[i].short_name; 78690b57cec5SDimitry Andric const char *long_name = __kmp_affinity_format_table[i].long_name; 78700b57cec5SDimitry Andric char field_format = __kmp_affinity_format_table[i].field_format; 78710b57cec5SDimitry Andric if (parse_long_name) { 78720b57cec5SDimitry Andric int length = KMP_STRLEN(long_name); 78730b57cec5SDimitry Andric if (strncmp(*ptr, long_name, length) == 0) { 78740b57cec5SDimitry Andric found_valid_name = true; 78750b57cec5SDimitry Andric (*ptr) += length; // skip the long name 78760b57cec5SDimitry Andric } 78770b57cec5SDimitry Andric } else if (**ptr == short_name) { 78780b57cec5SDimitry Andric found_valid_name = true; 78790b57cec5SDimitry Andric (*ptr)++; // skip the short name 78800b57cec5SDimitry Andric } 78810b57cec5SDimitry Andric if (found_valid_name) { 78820b57cec5SDimitry Andric format[format_index++] = field_format; 78830b57cec5SDimitry Andric format[format_index++] = '\0'; 78840b57cec5SDimitry Andric absolute_short_name = short_name; 78850b57cec5SDimitry Andric break; 78860b57cec5SDimitry Andric } 78870b57cec5SDimitry Andric } 78880b57cec5SDimitry Andric if (parse_long_name) { 78890b57cec5SDimitry Andric if (**ptr != '}') { 78900b57cec5SDimitry Andric absolute_short_name = 0; 78910b57cec5SDimitry Andric } else { 78920b57cec5SDimitry Andric (*ptr)++; // skip over the right brace 78930b57cec5SDimitry Andric } 78940b57cec5SDimitry Andric } 78950b57cec5SDimitry Andric 78960b57cec5SDimitry Andric // Attempt to fill the buffer with the requested 78970b57cec5SDimitry Andric // value using snprintf within __kmp_str_buf_print() 78980b57cec5SDimitry Andric switch (absolute_short_name) { 78990b57cec5SDimitry Andric case 't': 79000b57cec5SDimitry Andric rc = __kmp_str_buf_print(field_buffer, format, __kmp_aux_get_team_num()); 79010b57cec5SDimitry Andric break; 79020b57cec5SDimitry Andric case 'T': 79030b57cec5SDimitry Andric rc = __kmp_str_buf_print(field_buffer, format, __kmp_aux_get_num_teams()); 79040b57cec5SDimitry Andric break; 79050b57cec5SDimitry Andric case 'L': 79060b57cec5SDimitry Andric rc = __kmp_str_buf_print(field_buffer, format, th->th.th_team->t.t_level); 79070b57cec5SDimitry Andric break; 79080b57cec5SDimitry Andric case 'n': 79090b57cec5SDimitry Andric rc = __kmp_str_buf_print(field_buffer, format, __kmp_tid_from_gtid(gtid)); 79100b57cec5SDimitry Andric break; 79110b57cec5SDimitry Andric case 'H': { 79120b57cec5SDimitry Andric static const int BUFFER_SIZE = 256; 79130b57cec5SDimitry Andric char buf[BUFFER_SIZE]; 79140b57cec5SDimitry Andric __kmp_expand_host_name(buf, BUFFER_SIZE); 79150b57cec5SDimitry Andric rc = __kmp_str_buf_print(field_buffer, format, buf); 79160b57cec5SDimitry Andric } break; 79170b57cec5SDimitry Andric case 'P': 79180b57cec5SDimitry Andric rc = __kmp_str_buf_print(field_buffer, format, getpid()); 79190b57cec5SDimitry Andric break; 79200b57cec5SDimitry Andric case 'i': 79210b57cec5SDimitry Andric rc = __kmp_str_buf_print(field_buffer, format, __kmp_gettid()); 79220b57cec5SDimitry Andric break; 79230b57cec5SDimitry Andric case 'N': 79240b57cec5SDimitry Andric rc = __kmp_str_buf_print(field_buffer, format, th->th.th_team->t.t_nproc); 79250b57cec5SDimitry Andric break; 79260b57cec5SDimitry Andric case 'a': 79270b57cec5SDimitry Andric field_value = 79280b57cec5SDimitry Andric __kmp_get_ancestor_thread_num(gtid, th->th.th_team->t.t_level - 1); 79290b57cec5SDimitry Andric rc = __kmp_str_buf_print(field_buffer, format, field_value); 79300b57cec5SDimitry Andric break; 79310b57cec5SDimitry Andric #if KMP_AFFINITY_SUPPORTED 79320b57cec5SDimitry Andric case 'A': { 79330b57cec5SDimitry Andric kmp_str_buf_t buf; 79340b57cec5SDimitry Andric __kmp_str_buf_init(&buf); 79350b57cec5SDimitry Andric __kmp_affinity_str_buf_mask(&buf, th->th.th_affin_mask); 79360b57cec5SDimitry Andric rc = __kmp_str_buf_print(field_buffer, format, buf.str); 79370b57cec5SDimitry Andric __kmp_str_buf_free(&buf); 79380b57cec5SDimitry Andric } break; 79390b57cec5SDimitry Andric #endif 79400b57cec5SDimitry Andric default: 79410b57cec5SDimitry Andric // According to spec, If an implementation does not have info for field 79420b57cec5SDimitry Andric // type, then "undefined" is printed 79430b57cec5SDimitry Andric rc = __kmp_str_buf_print(field_buffer, "%s", "undefined"); 79440b57cec5SDimitry Andric // Skip the field 79450b57cec5SDimitry Andric if (parse_long_name) { 79460b57cec5SDimitry Andric SKIP_TOKEN(*ptr); 79470b57cec5SDimitry Andric if (**ptr == '}') 79480b57cec5SDimitry Andric (*ptr)++; 79490b57cec5SDimitry Andric } else { 79500b57cec5SDimitry Andric (*ptr)++; 79510b57cec5SDimitry Andric } 79520b57cec5SDimitry Andric } 79530b57cec5SDimitry Andric 79540b57cec5SDimitry Andric KMP_ASSERT(format_index <= FORMAT_SIZE); 79550b57cec5SDimitry Andric return rc; 79560b57cec5SDimitry Andric } 79570b57cec5SDimitry Andric 79580b57cec5SDimitry Andric /* 79590b57cec5SDimitry Andric * Return number of characters needed to hold the affinity string 79600b57cec5SDimitry Andric * (not including null byte character) 79610b57cec5SDimitry Andric * The resultant string is printed to buffer, which the caller can then 79620b57cec5SDimitry Andric * handle afterwards 79630b57cec5SDimitry Andric */ 79640b57cec5SDimitry Andric size_t __kmp_aux_capture_affinity(int gtid, const char *format, 79650b57cec5SDimitry Andric kmp_str_buf_t *buffer) { 79660b57cec5SDimitry Andric const char *parse_ptr; 79670b57cec5SDimitry Andric size_t retval; 79680b57cec5SDimitry Andric const kmp_info_t *th; 79690b57cec5SDimitry Andric kmp_str_buf_t field; 79700b57cec5SDimitry Andric 79710b57cec5SDimitry Andric KMP_DEBUG_ASSERT(buffer); 79720b57cec5SDimitry Andric KMP_DEBUG_ASSERT(gtid >= 0); 79730b57cec5SDimitry Andric 79740b57cec5SDimitry Andric __kmp_str_buf_init(&field); 79750b57cec5SDimitry Andric __kmp_str_buf_clear(buffer); 79760b57cec5SDimitry Andric 79770b57cec5SDimitry Andric th = __kmp_threads[gtid]; 79780b57cec5SDimitry Andric retval = 0; 79790b57cec5SDimitry Andric 79800b57cec5SDimitry Andric // If format is NULL or zero-length string, then we use 79810b57cec5SDimitry Andric // affinity-format-var ICV 79820b57cec5SDimitry Andric parse_ptr = format; 79830b57cec5SDimitry Andric if (parse_ptr == NULL || *parse_ptr == '\0') { 79840b57cec5SDimitry Andric parse_ptr = __kmp_affinity_format; 79850b57cec5SDimitry Andric } 79860b57cec5SDimitry Andric KMP_DEBUG_ASSERT(parse_ptr); 79870b57cec5SDimitry Andric 79880b57cec5SDimitry Andric while (*parse_ptr != '\0') { 79890b57cec5SDimitry Andric // Parse a field 79900b57cec5SDimitry Andric if (*parse_ptr == '%') { 79910b57cec5SDimitry Andric // Put field in the buffer 79920b57cec5SDimitry Andric int rc = __kmp_aux_capture_affinity_field(gtid, th, &parse_ptr, &field); 79930b57cec5SDimitry Andric __kmp_str_buf_catbuf(buffer, &field); 79940b57cec5SDimitry Andric retval += rc; 79950b57cec5SDimitry Andric } else { 79960b57cec5SDimitry Andric // Put literal character in buffer 79970b57cec5SDimitry Andric __kmp_str_buf_cat(buffer, parse_ptr, 1); 79980b57cec5SDimitry Andric retval++; 79990b57cec5SDimitry Andric parse_ptr++; 80000b57cec5SDimitry Andric } 80010b57cec5SDimitry Andric } 80020b57cec5SDimitry Andric __kmp_str_buf_free(&field); 80030b57cec5SDimitry Andric return retval; 80040b57cec5SDimitry Andric } 80050b57cec5SDimitry Andric 80060b57cec5SDimitry Andric // Displays the affinity string to stdout 80070b57cec5SDimitry Andric void __kmp_aux_display_affinity(int gtid, const char *format) { 80080b57cec5SDimitry Andric kmp_str_buf_t buf; 80090b57cec5SDimitry Andric __kmp_str_buf_init(&buf); 80100b57cec5SDimitry Andric __kmp_aux_capture_affinity(gtid, format, &buf); 80110b57cec5SDimitry Andric __kmp_fprintf(kmp_out, "%s" KMP_END_OF_LINE, buf.str); 80120b57cec5SDimitry Andric __kmp_str_buf_free(&buf); 80130b57cec5SDimitry Andric } 80140b57cec5SDimitry Andric 80150b57cec5SDimitry Andric /* ------------------------------------------------------------------------ */ 80160b57cec5SDimitry Andric 80170b57cec5SDimitry Andric void __kmp_aux_set_blocktime(int arg, kmp_info_t *thread, int tid) { 80180b57cec5SDimitry Andric int blocktime = arg; /* argument is in milliseconds */ 80190b57cec5SDimitry Andric #if KMP_USE_MONITOR 80200b57cec5SDimitry Andric int bt_intervals; 80210b57cec5SDimitry Andric #endif 80220b57cec5SDimitry Andric int bt_set; 80230b57cec5SDimitry Andric 80240b57cec5SDimitry Andric __kmp_save_internal_controls(thread); 80250b57cec5SDimitry Andric 80260b57cec5SDimitry Andric /* Normalize and set blocktime for the teams */ 80270b57cec5SDimitry Andric if (blocktime < KMP_MIN_BLOCKTIME) 80280b57cec5SDimitry Andric blocktime = KMP_MIN_BLOCKTIME; 80290b57cec5SDimitry Andric else if (blocktime > KMP_MAX_BLOCKTIME) 80300b57cec5SDimitry Andric blocktime = KMP_MAX_BLOCKTIME; 80310b57cec5SDimitry Andric 80320b57cec5SDimitry Andric set__blocktime_team(thread->th.th_team, tid, blocktime); 80330b57cec5SDimitry Andric set__blocktime_team(thread->th.th_serial_team, 0, blocktime); 80340b57cec5SDimitry Andric 80350b57cec5SDimitry Andric #if KMP_USE_MONITOR 80360b57cec5SDimitry Andric /* Calculate and set blocktime intervals for the teams */ 80370b57cec5SDimitry Andric bt_intervals = KMP_INTERVALS_FROM_BLOCKTIME(blocktime, __kmp_monitor_wakeups); 80380b57cec5SDimitry Andric 80390b57cec5SDimitry Andric set__bt_intervals_team(thread->th.th_team, tid, bt_intervals); 80400b57cec5SDimitry Andric set__bt_intervals_team(thread->th.th_serial_team, 0, bt_intervals); 80410b57cec5SDimitry Andric #endif 80420b57cec5SDimitry Andric 80430b57cec5SDimitry Andric /* Set whether blocktime has been set to "TRUE" */ 80440b57cec5SDimitry Andric bt_set = TRUE; 80450b57cec5SDimitry Andric 80460b57cec5SDimitry Andric set__bt_set_team(thread->th.th_team, tid, bt_set); 80470b57cec5SDimitry Andric set__bt_set_team(thread->th.th_serial_team, 0, bt_set); 80480b57cec5SDimitry Andric #if KMP_USE_MONITOR 80490b57cec5SDimitry Andric KF_TRACE(10, ("kmp_set_blocktime: T#%d(%d:%d), blocktime=%d, " 80500b57cec5SDimitry Andric "bt_intervals=%d, monitor_updates=%d\n", 80510b57cec5SDimitry Andric __kmp_gtid_from_tid(tid, thread->th.th_team), 80520b57cec5SDimitry Andric thread->th.th_team->t.t_id, tid, blocktime, bt_intervals, 80530b57cec5SDimitry Andric __kmp_monitor_wakeups)); 80540b57cec5SDimitry Andric #else 80550b57cec5SDimitry Andric KF_TRACE(10, ("kmp_set_blocktime: T#%d(%d:%d), blocktime=%d\n", 80560b57cec5SDimitry Andric __kmp_gtid_from_tid(tid, thread->th.th_team), 80570b57cec5SDimitry Andric thread->th.th_team->t.t_id, tid, blocktime)); 80580b57cec5SDimitry Andric #endif 80590b57cec5SDimitry Andric } 80600b57cec5SDimitry Andric 80610b57cec5SDimitry Andric void __kmp_aux_set_defaults(char const *str, int len) { 80620b57cec5SDimitry Andric if (!__kmp_init_serial) { 80630b57cec5SDimitry Andric __kmp_serial_initialize(); 80640b57cec5SDimitry Andric } 80650b57cec5SDimitry Andric __kmp_env_initialize(str); 80660b57cec5SDimitry Andric 80670b57cec5SDimitry Andric if (__kmp_settings || __kmp_display_env || __kmp_display_env_verbose) { 80680b57cec5SDimitry Andric __kmp_env_print(); 80690b57cec5SDimitry Andric } 80700b57cec5SDimitry Andric } // __kmp_aux_set_defaults 80710b57cec5SDimitry Andric 80720b57cec5SDimitry Andric /* ------------------------------------------------------------------------ */ 80730b57cec5SDimitry Andric /* internal fast reduction routines */ 80740b57cec5SDimitry Andric 80750b57cec5SDimitry Andric PACKED_REDUCTION_METHOD_T 80760b57cec5SDimitry Andric __kmp_determine_reduction_method( 80770b57cec5SDimitry Andric ident_t *loc, kmp_int32 global_tid, kmp_int32 num_vars, size_t reduce_size, 80780b57cec5SDimitry Andric void *reduce_data, void (*reduce_func)(void *lhs_data, void *rhs_data), 80790b57cec5SDimitry Andric kmp_critical_name *lck) { 80800b57cec5SDimitry Andric 80810b57cec5SDimitry Andric // Default reduction method: critical construct ( lck != NULL, like in current 80820b57cec5SDimitry Andric // PAROPT ) 80830b57cec5SDimitry Andric // If ( reduce_data!=NULL && reduce_func!=NULL ): the tree-reduction method 80840b57cec5SDimitry Andric // can be selected by RTL 80850b57cec5SDimitry Andric // If loc->flags contains KMP_IDENT_ATOMIC_REDUCE, the atomic reduce method 80860b57cec5SDimitry Andric // can be selected by RTL 80870b57cec5SDimitry Andric // Finally, it's up to OpenMP RTL to make a decision on which method to select 80880b57cec5SDimitry Andric // among generated by PAROPT. 80890b57cec5SDimitry Andric 80900b57cec5SDimitry Andric PACKED_REDUCTION_METHOD_T retval; 80910b57cec5SDimitry Andric 80920b57cec5SDimitry Andric int team_size; 80930b57cec5SDimitry Andric 80940b57cec5SDimitry Andric KMP_DEBUG_ASSERT(loc); // it would be nice to test ( loc != 0 ) 80950b57cec5SDimitry Andric KMP_DEBUG_ASSERT(lck); // it would be nice to test ( lck != 0 ) 80960b57cec5SDimitry Andric 80970b57cec5SDimitry Andric #define FAST_REDUCTION_ATOMIC_METHOD_GENERATED \ 80980b57cec5SDimitry Andric ((loc->flags & (KMP_IDENT_ATOMIC_REDUCE)) == (KMP_IDENT_ATOMIC_REDUCE)) 80990b57cec5SDimitry Andric #define FAST_REDUCTION_TREE_METHOD_GENERATED ((reduce_data) && (reduce_func)) 81000b57cec5SDimitry Andric 81010b57cec5SDimitry Andric retval = critical_reduce_block; 81020b57cec5SDimitry Andric 81030b57cec5SDimitry Andric // another choice of getting a team size (with 1 dynamic deference) is slower 81040b57cec5SDimitry Andric team_size = __kmp_get_team_num_threads(global_tid); 81050b57cec5SDimitry Andric if (team_size == 1) { 81060b57cec5SDimitry Andric 81070b57cec5SDimitry Andric retval = empty_reduce_block; 81080b57cec5SDimitry Andric 81090b57cec5SDimitry Andric } else { 81100b57cec5SDimitry Andric 81110b57cec5SDimitry Andric int atomic_available = FAST_REDUCTION_ATOMIC_METHOD_GENERATED; 81120b57cec5SDimitry Andric 8113489b1cf2SDimitry Andric #if KMP_ARCH_X86_64 || KMP_ARCH_PPC64 || KMP_ARCH_AARCH64 || \ 8114489b1cf2SDimitry Andric KMP_ARCH_MIPS64 || KMP_ARCH_RISCV64 81150b57cec5SDimitry Andric 81160b57cec5SDimitry Andric #if KMP_OS_LINUX || KMP_OS_DRAGONFLY || KMP_OS_FREEBSD || KMP_OS_NETBSD || \ 81170b57cec5SDimitry Andric KMP_OS_OPENBSD || KMP_OS_WINDOWS || KMP_OS_DARWIN || KMP_OS_HURD 81180b57cec5SDimitry Andric 81190b57cec5SDimitry Andric int teamsize_cutoff = 4; 81200b57cec5SDimitry Andric 81210b57cec5SDimitry Andric #if KMP_MIC_SUPPORTED 81220b57cec5SDimitry Andric if (__kmp_mic_type != non_mic) { 81230b57cec5SDimitry Andric teamsize_cutoff = 8; 81240b57cec5SDimitry Andric } 81250b57cec5SDimitry Andric #endif 81260b57cec5SDimitry Andric int tree_available = FAST_REDUCTION_TREE_METHOD_GENERATED; 81270b57cec5SDimitry Andric if (tree_available) { 81280b57cec5SDimitry Andric if (team_size <= teamsize_cutoff) { 81290b57cec5SDimitry Andric if (atomic_available) { 81300b57cec5SDimitry Andric retval = atomic_reduce_block; 81310b57cec5SDimitry Andric } 81320b57cec5SDimitry Andric } else { 81330b57cec5SDimitry Andric retval = TREE_REDUCE_BLOCK_WITH_REDUCTION_BARRIER; 81340b57cec5SDimitry Andric } 81350b57cec5SDimitry Andric } else if (atomic_available) { 81360b57cec5SDimitry Andric retval = atomic_reduce_block; 81370b57cec5SDimitry Andric } 81380b57cec5SDimitry Andric #else 81390b57cec5SDimitry Andric #error "Unknown or unsupported OS" 81400b57cec5SDimitry Andric #endif // KMP_OS_LINUX || KMP_OS_DRAGONFLY || KMP_OS_FREEBSD || KMP_OS_NETBSD || 81410b57cec5SDimitry Andric // KMP_OS_OPENBSD || KMP_OS_WINDOWS || KMP_OS_DARWIN || KMP_OS_HURD 81420b57cec5SDimitry Andric 81430b57cec5SDimitry Andric #elif KMP_ARCH_X86 || KMP_ARCH_ARM || KMP_ARCH_AARCH || KMP_ARCH_MIPS 81440b57cec5SDimitry Andric 81450b57cec5SDimitry Andric #if KMP_OS_LINUX || KMP_OS_FREEBSD || KMP_OS_WINDOWS || KMP_OS_HURD 81460b57cec5SDimitry Andric 81470b57cec5SDimitry Andric // basic tuning 81480b57cec5SDimitry Andric 81490b57cec5SDimitry Andric if (atomic_available) { 81500b57cec5SDimitry Andric if (num_vars <= 2) { // && ( team_size <= 8 ) due to false-sharing ??? 81510b57cec5SDimitry Andric retval = atomic_reduce_block; 81520b57cec5SDimitry Andric } 81530b57cec5SDimitry Andric } // otherwise: use critical section 81540b57cec5SDimitry Andric 81550b57cec5SDimitry Andric #elif KMP_OS_DARWIN 81560b57cec5SDimitry Andric 81570b57cec5SDimitry Andric int tree_available = FAST_REDUCTION_TREE_METHOD_GENERATED; 81580b57cec5SDimitry Andric if (atomic_available && (num_vars <= 3)) { 81590b57cec5SDimitry Andric retval = atomic_reduce_block; 81600b57cec5SDimitry Andric } else if (tree_available) { 81610b57cec5SDimitry Andric if ((reduce_size > (9 * sizeof(kmp_real64))) && 81620b57cec5SDimitry Andric (reduce_size < (2000 * sizeof(kmp_real64)))) { 81630b57cec5SDimitry Andric retval = TREE_REDUCE_BLOCK_WITH_PLAIN_BARRIER; 81640b57cec5SDimitry Andric } 81650b57cec5SDimitry Andric } // otherwise: use critical section 81660b57cec5SDimitry Andric 81670b57cec5SDimitry Andric #else 81680b57cec5SDimitry Andric #error "Unknown or unsupported OS" 81690b57cec5SDimitry Andric #endif 81700b57cec5SDimitry Andric 81710b57cec5SDimitry Andric #else 81720b57cec5SDimitry Andric #error "Unknown or unsupported architecture" 81730b57cec5SDimitry Andric #endif 81740b57cec5SDimitry Andric } 81750b57cec5SDimitry Andric 81760b57cec5SDimitry Andric // KMP_FORCE_REDUCTION 81770b57cec5SDimitry Andric 81780b57cec5SDimitry Andric // If the team is serialized (team_size == 1), ignore the forced reduction 81790b57cec5SDimitry Andric // method and stay with the unsynchronized method (empty_reduce_block) 81800b57cec5SDimitry Andric if (__kmp_force_reduction_method != reduction_method_not_defined && 81810b57cec5SDimitry Andric team_size != 1) { 81820b57cec5SDimitry Andric 81830b57cec5SDimitry Andric PACKED_REDUCTION_METHOD_T forced_retval = critical_reduce_block; 81840b57cec5SDimitry Andric 81850b57cec5SDimitry Andric int atomic_available, tree_available; 81860b57cec5SDimitry Andric 81870b57cec5SDimitry Andric switch ((forced_retval = __kmp_force_reduction_method)) { 81880b57cec5SDimitry Andric case critical_reduce_block: 81890b57cec5SDimitry Andric KMP_ASSERT(lck); // lck should be != 0 81900b57cec5SDimitry Andric break; 81910b57cec5SDimitry Andric 81920b57cec5SDimitry Andric case atomic_reduce_block: 81930b57cec5SDimitry Andric atomic_available = FAST_REDUCTION_ATOMIC_METHOD_GENERATED; 81940b57cec5SDimitry Andric if (!atomic_available) { 81950b57cec5SDimitry Andric KMP_WARNING(RedMethodNotSupported, "atomic"); 81960b57cec5SDimitry Andric forced_retval = critical_reduce_block; 81970b57cec5SDimitry Andric } 81980b57cec5SDimitry Andric break; 81990b57cec5SDimitry Andric 82000b57cec5SDimitry Andric case tree_reduce_block: 82010b57cec5SDimitry Andric tree_available = FAST_REDUCTION_TREE_METHOD_GENERATED; 82020b57cec5SDimitry Andric if (!tree_available) { 82030b57cec5SDimitry Andric KMP_WARNING(RedMethodNotSupported, "tree"); 82040b57cec5SDimitry Andric forced_retval = critical_reduce_block; 82050b57cec5SDimitry Andric } else { 82060b57cec5SDimitry Andric #if KMP_FAST_REDUCTION_BARRIER 82070b57cec5SDimitry Andric forced_retval = TREE_REDUCE_BLOCK_WITH_REDUCTION_BARRIER; 82080b57cec5SDimitry Andric #endif 82090b57cec5SDimitry Andric } 82100b57cec5SDimitry Andric break; 82110b57cec5SDimitry Andric 82120b57cec5SDimitry Andric default: 82130b57cec5SDimitry Andric KMP_ASSERT(0); // "unsupported method specified" 82140b57cec5SDimitry Andric } 82150b57cec5SDimitry Andric 82160b57cec5SDimitry Andric retval = forced_retval; 82170b57cec5SDimitry Andric } 82180b57cec5SDimitry Andric 82190b57cec5SDimitry Andric KA_TRACE(10, ("reduction method selected=%08x\n", retval)); 82200b57cec5SDimitry Andric 82210b57cec5SDimitry Andric #undef FAST_REDUCTION_TREE_METHOD_GENERATED 82220b57cec5SDimitry Andric #undef FAST_REDUCTION_ATOMIC_METHOD_GENERATED 82230b57cec5SDimitry Andric 82240b57cec5SDimitry Andric return (retval); 82250b57cec5SDimitry Andric } 82260b57cec5SDimitry Andric 82270b57cec5SDimitry Andric // this function is for testing set/get/determine reduce method 82280b57cec5SDimitry Andric kmp_int32 __kmp_get_reduce_method(void) { 82290b57cec5SDimitry Andric return ((__kmp_entry_thread()->th.th_local.packed_reduction_method) >> 8); 82300b57cec5SDimitry Andric } 82310b57cec5SDimitry Andric 82320b57cec5SDimitry Andric // Soft pause sets up threads to ignore blocktime and just go to sleep. 82330b57cec5SDimitry Andric // Spin-wait code checks __kmp_pause_status and reacts accordingly. 82340b57cec5SDimitry Andric void __kmp_soft_pause() { __kmp_pause_status = kmp_soft_paused; } 82350b57cec5SDimitry Andric 82360b57cec5SDimitry Andric // Hard pause shuts down the runtime completely. Resume happens naturally when 82370b57cec5SDimitry Andric // OpenMP is used subsequently. 82380b57cec5SDimitry Andric void __kmp_hard_pause() { 82390b57cec5SDimitry Andric __kmp_pause_status = kmp_hard_paused; 82400b57cec5SDimitry Andric __kmp_internal_end_thread(-1); 82410b57cec5SDimitry Andric } 82420b57cec5SDimitry Andric 82430b57cec5SDimitry Andric // Soft resume sets __kmp_pause_status, and wakes up all threads. 82440b57cec5SDimitry Andric void __kmp_resume_if_soft_paused() { 82450b57cec5SDimitry Andric if (__kmp_pause_status == kmp_soft_paused) { 82460b57cec5SDimitry Andric __kmp_pause_status = kmp_not_paused; 82470b57cec5SDimitry Andric 82480b57cec5SDimitry Andric for (int gtid = 1; gtid < __kmp_threads_capacity; ++gtid) { 82490b57cec5SDimitry Andric kmp_info_t *thread = __kmp_threads[gtid]; 82500b57cec5SDimitry Andric if (thread) { // Wake it if sleeping 82510b57cec5SDimitry Andric kmp_flag_64 fl(&thread->th.th_bar[bs_forkjoin_barrier].bb.b_go, thread); 82520b57cec5SDimitry Andric if (fl.is_sleeping()) 82530b57cec5SDimitry Andric fl.resume(gtid); 82540b57cec5SDimitry Andric else if (__kmp_try_suspend_mx(thread)) { // got suspend lock 82550b57cec5SDimitry Andric __kmp_unlock_suspend_mx(thread); // unlock it; it won't sleep 82560b57cec5SDimitry Andric } else { // thread holds the lock and may sleep soon 82570b57cec5SDimitry Andric do { // until either the thread sleeps, or we can get the lock 82580b57cec5SDimitry Andric if (fl.is_sleeping()) { 82590b57cec5SDimitry Andric fl.resume(gtid); 82600b57cec5SDimitry Andric break; 82610b57cec5SDimitry Andric } else if (__kmp_try_suspend_mx(thread)) { 82620b57cec5SDimitry Andric __kmp_unlock_suspend_mx(thread); 82630b57cec5SDimitry Andric break; 82640b57cec5SDimitry Andric } 82650b57cec5SDimitry Andric } while (1); 82660b57cec5SDimitry Andric } 82670b57cec5SDimitry Andric } 82680b57cec5SDimitry Andric } 82690b57cec5SDimitry Andric } 82700b57cec5SDimitry Andric } 82710b57cec5SDimitry Andric 82720b57cec5SDimitry Andric // This function is called via __kmpc_pause_resource. Returns 0 if successful. 82730b57cec5SDimitry Andric // TODO: add warning messages 82740b57cec5SDimitry Andric int __kmp_pause_resource(kmp_pause_status_t level) { 82750b57cec5SDimitry Andric if (level == kmp_not_paused) { // requesting resume 82760b57cec5SDimitry Andric if (__kmp_pause_status == kmp_not_paused) { 82770b57cec5SDimitry Andric // error message about runtime not being paused, so can't resume 82780b57cec5SDimitry Andric return 1; 82790b57cec5SDimitry Andric } else { 82800b57cec5SDimitry Andric KMP_DEBUG_ASSERT(__kmp_pause_status == kmp_soft_paused || 82810b57cec5SDimitry Andric __kmp_pause_status == kmp_hard_paused); 82820b57cec5SDimitry Andric __kmp_pause_status = kmp_not_paused; 82830b57cec5SDimitry Andric return 0; 82840b57cec5SDimitry Andric } 82850b57cec5SDimitry Andric } else if (level == kmp_soft_paused) { // requesting soft pause 82860b57cec5SDimitry Andric if (__kmp_pause_status != kmp_not_paused) { 82870b57cec5SDimitry Andric // error message about already being paused 82880b57cec5SDimitry Andric return 1; 82890b57cec5SDimitry Andric } else { 82900b57cec5SDimitry Andric __kmp_soft_pause(); 82910b57cec5SDimitry Andric return 0; 82920b57cec5SDimitry Andric } 82930b57cec5SDimitry Andric } else if (level == kmp_hard_paused) { // requesting hard pause 82940b57cec5SDimitry Andric if (__kmp_pause_status != kmp_not_paused) { 82950b57cec5SDimitry Andric // error message about already being paused 82960b57cec5SDimitry Andric return 1; 82970b57cec5SDimitry Andric } else { 82980b57cec5SDimitry Andric __kmp_hard_pause(); 82990b57cec5SDimitry Andric return 0; 83000b57cec5SDimitry Andric } 83010b57cec5SDimitry Andric } else { 83020b57cec5SDimitry Andric // error message about invalid level 83030b57cec5SDimitry Andric return 1; 83040b57cec5SDimitry Andric } 83050b57cec5SDimitry Andric } 8306