1# Author: Alfio Lazzaro, alazzaro@cray.com (2013)
2
3# Use: module load PrgEnv-intel
4# Remember to remove any module specific for GPU, e.g. module unload craype-accel-nvidia35 cudatoolkit
5
6#
7# target compiler... these are the options used for building the library.
8# They should be aggessive enough to e.g. perform vectorization for the specific CPU (e.g. -ftree-vectorize -march=native),
9# and allow some flexibility in reordering floating point expressions (-ffast-math).
10# Higher level optimisation (in particular loop nest optimization) should not be used.
11#
12target_compile="ftn -O2 -funroll-loops -warn -fpp -finline-functions -nogen-interfaces -openmp -nolib-inline -no-offload"
13
14#
15# target dgemm link options... these are the options needed to link blas (e.g. -lblas)
16# blas is used as a fall back option for sizes not included in the library or in those cases where it is faster
17# the same blas library should thus also be used when libsmm is linked.
18#
19# Use libsci when using ftn, therefore no need to set blas_linking
20
21#
22# host compiler... this is used only to compile a few tools needed to build
23# the library. The library itself is not compiled this way.
24# This compiler needs to be able to deal with some Fortran2003 constructs.
25#
26host_compile="ifort -O2"
27
28#
29# Set KMP affinity
30#
31export KMP_AFFINITY=verbose,granularity=fine,scatter
32
33#
34# Set the aprun command and its options for batch submission
35#
36aprun_cmd="aprun -n 1 -N 1 -d ${ntasks} -r 1 -cc none"
37