1# Author: Alfio Lazzaro, alazzaro@cray.com (2013)
2
3# Use: module load PrgEnv-cray
4# Remember to remove any module specific for GPU, e.g. module unload craype-accel-nvidia35 cudatoolkit
5
6#
7# target compiler... these are the options used for building the library.
8# They should be aggessive enough to e.g. perform vectorization for the specific CPU (e.g. -ftree-vectorize -march=native),
9# and allow some flexibility in reordering floating point expressions (-ffast-math).
10# Higher level optimisation (in particular loop nest optimization) should not be used.
11#
12target_compile="ftn -O2 -hfp3 -hnodwarf -Onopattern -hvector1 -eF -ffree"
13
14#
15# target dgemm link options... these are the options needed to link blas (e.g. -lblas)
16# blas is used as a fall back option for sizes not included in the library or in those cases where it is faster
17# the same blas library should thus also be used when libsmm is linked.
18#
19# Use libsci when using ftn, therefore no need to set blas_linking
20#
21
22#
23# host compiler... this is used only to compile a few tools needed to build
24# the library. The library itself is not compiled this way.
25# This compiler needs to be able to deal with some Fortran2003 constructs.
26#
27host_compile="gfortran-4.6 -O2"
28
29#
30# Show affinity mask
31#
32export CRAY_OMP_CHECK_AFFINITY=TRUE
33
34#
35# Set the aprun command and its options for batch submission
36#
37aprun_cmd="aprun -n 1 -N 1 -d ${ntasks} -r 1"
38