1# Author: Alfio Lazzaro, alazzaro@cray.com (2014) 2# 3# This file uses offload mode for the execution of the kernels on the Intel Xeon phi. 4# Remember to set the variable target_compile_offload. 5 6# 7# target compiler... these are the options used for building the Intel Xeon Phi kernels. 8# They should be aggessive enough to e.g. perform vectorization, 9# and allow some flexibility in reordering floating point expressions. 10# Higher level optimisation (in particular loop nest optimization) should not 11# be used. 12# Mandatory to add "-offload-attribute-target=mic" 13# 14target_compile="ftn -O2 -funroll-loops -warn -offload-attribute-target=mic -fpp -finline-functions -nogen-interfaces -openmp" 15 16# 17# target compiler used to build the CPU driven code for the offload execution 18# of the Intel Xeon Phi kernels. 19# You can reuse most of the flags defined in the target_compile variable. 20# Replace "-offload-attribute-target=mic" with "-offload=mandatory". 21# 22target_compile_offload="ftn -O2 -funroll-loops -warn -offload=mandatory -watch=mic-cmd -fpp -finline-functions -nogen-interfaces -openmp" 23 24# 25# target dgemm link options... these are the options needed to link blas (e.g. -lblas) 26# blas is used as a fall back option for sizes not included in the library or in those cases where it is faster 27# the same blas library should thus also be used when libsmm is linked. 28# 29blas_linking="-mkl=sequential" 30 31# 32# SIMD registers type (bytes): sse (16), avx (32), knc (64) 33# 34SIMD=knc 35 36# 37# Number of threads to be used on the card. 38# Do not use hyperthreads! 39# 40MIC_OMP_NUM_THREADS=58 41 42# 43# host compiler... this is used only to compile a few tools needed to build 44# the library. The library itself is not compiled this way. 45# This compiler needs to be able to deal with some Fortran2003 constructs. 46# 47host_compile="ftn -O2" 48 49# 50# Set KMP affinity and other variables 51# 52export KMP_AFFINITY=verbose,granularity=fine,scatter 53export OFFLOAD_REPORT=3 54export OMP_STACKSIZE=256M 55export MIC_STACKSIZE=256M 56export CRAYPE_LINK_TYPE=dynamic 57 58# 59# Set the aprun command and its options for batch submission 60# 61aprun_cmd="aprun -n 1 -N 1 -d ${ntasks} -r 1 -cc none" 62