1# Author: Alfio Lazzaro, alazzaro@cray.com (2014)
2#
3# This file uses offload mode for the execution of the kernels on the Intel Xeon phi.
4# Remember to set the variable target_compile_offload.
5
6#
7# target compiler... these are the options used for building the Intel Xeon Phi kernels.
8# They should be aggessive enough to e.g. perform vectorization,
9# and allow some flexibility in reordering floating point expressions.
10# Higher level optimisation (in particular loop nest optimization) should not
11# be used.
12# Mandatory to add "-offload-attribute-target=mic"
13#
14target_compile="ifort -O2 -funroll-loops -warn -offload-attribute-target=mic -fpp -finline-functions -nogen-interfaces -openmp"
15
16#
17# target compiler used to build the CPU driven code for the offload execution
18# of the Intel Xeon Phi kernels.
19# You can reuse most of the flags defined in the target_compile variable.
20# Replace "-offload-attribute-target=mic" with "-offload=mandatory".
21#
22target_compile_offload="ifort -O2 -funroll-loops -warn -offload=mandatory -watch=mic-cmd -fpp -finline-functions -nogen-interfaces -openmp"
23
24#
25# target dgemm link options... these are the options needed to link blas (e.g. -lblas)
26# blas is used as a fall back option for sizes not included in the library or in those cases where it is faster
27# the same blas library should thus also be used when libsmm is linked.
28#
29blas_linking="-mkl=sequential"
30
31#
32# SIMD registers type (bytes): sse (16), avx (32), knc (64)
33#
34SIMD=knc
35
36#
37# Number of threads to be used on the card.
38# Do not use hyperthreads!
39#
40MIC_OMP_NUM_THREADS=58
41
42#
43# host compiler... this is used only to compile a few tools needed to build
44# the library. The library itself is not compiled this way.
45# This compiler needs to be able to deal with some Fortran2003 constructs.
46#
47host_compile="ifort -O2"
48
49#
50# Set KMP affinity and other variables
51#
52export KMP_AFFINITY=verbose,granularity=fine,scatter
53export OFFLOAD_REPORT=3
54export OMP_STACKSIZE=256M
55export MIC_STACKSIZE=256M
56