1# ROOTDIR avoid abspath to match Makefile targets
2ROOTDIR = $(subst //,$(NULL),$(dir $(firstword $(MAKEFILE_LIST)))/)
3INCDIR = include
4SCRDIR = scripts
5TSTDIR = tests
6BLDDIR = obj
7SRCDIR = src
8OUTDIR = lib
9BINDIR = bin
10SPLDIR = samples
11DOCDIR = documentation
12
13# subdirectories (relative) to PREFIX (install targets)
14PINCDIR ?= $(INCDIR)
15PSRCDIR ?= $(PINCDIR)/libxsmm
16POUTDIR ?= $(OUTDIR)
17PPKGDIR ?= $(OUTDIR)
18PMODDIR ?= $(OUTDIR)
19PBINDIR ?= $(BINDIR)
20PTSTDIR ?= $(TSTDIR)
21PDOCDIR ?= share/libxsmm
22LICFDIR ?= $(PDOCDIR)
23LICFILE ?= LICENSE.md
24
25# initial default flags: RPM_OPT_FLAGS are usually NULL
26CFLAGS = $(RPM_OPT_FLAGS)
27CXXFLAGS = $(RPM_OPT_FLAGS)
28FCFLAGS = $(RPM_OPT_FLAGS)
29
30CFLAGS += -fcommon # multiple definition of `libxsmm_scratch_pools' (and other symbols) https://github.com/hfp/libxsmm/issues/412
31
32# THRESHOLD problem size (M x N x K) determining when to use BLAS
33# A value of zero (0) populates a default threshold
34THRESHOLD ?= 0
35
36# Generates M,N,K-combinations for each comma separated group, e.g., "1, 2, 3" generates (1,1,1), (2,2,2),
37# and (3,3,3). This way a heterogeneous set can be generated, e.g., "1 2, 3" generates (1,1,1), (1,1,2),
38# (1,2,1), (1,2,2), (2,1,1), (2,1,2) (2,2,1) out of the first group, and a (3,3,3) for the second group
39# To generate a series of square matrices one can specify, e.g., make MNK=$(echo $(seq -s, 1 5))
40# Alternative to MNK, index sets can be specified separately according to a loop nest relationship
41# (M(N(K))) using M, N, and K separately. Please consult the documentation for further details.
42MNK ?= 0
43
44# Enable thread-local cache of recently dispatched kernels either
45# 0: "disable", 1: "enable", or small power-of-two number.
46CACHE ?= 1
47
48# Issue software prefetch instructions (see end of section
49# https://github.com/hfp/libxsmm/#generator-driver)
50# Use the enumerator 1...6, or the exact strategy
51# name pfsigonly...AL2_BL2viaC.
52# 0: no prefetch (nopf)
53# 1: auto-select
54# 2: pfsigonly
55# 3: BL2viaC
56# 4: curAL2
57# 7: curAL2_BL2viaC
58# 5: AL2
59# 6: AL2_BL2viaC
60PREFETCH ?= 1
61
62# Preferred precision when registering statically generated code versions
63# 0: SP and DP code versions to be registered
64# 1: SP only
65# 2: DP only
66PRECISION ?= 0
67
68# Specify the size of a cacheline (Bytes)
69CACHELINE ?= 64
70
71# Alpha argument of GEMM
72# Supported: 1.0
73ALPHA ?= 1
74ifneq (1,$(ALPHA))
75  $(info --------------------------------------------------------------------------------)
76  $(error ALPHA needs to be 1)
77endif
78
79# Beta argument of GEMM
80# Supported: 0.0, 1.0
81# 0: C  = A * B
82# 1: C += A * B
83BETA ?= 1
84ifneq (1,$(BETA))
85ifneq (0,$(BETA))
86  $(info --------------------------------------------------------------------------------)
87  $(error BETA needs to be either 0 or 1)
88endif
89endif
90
91# Determines if the library is thread-safe
92THREADS ?= 1
93
94# 0: shared libraries files suitable for dynamic linkage
95# 1: library archives suitable for static linkage
96STATIC ?= 1
97
98# 0: link all dependencies as specified for the target
99# 1: attempt to avoid dependencies if not referenced
100ASNEEDED ?= 0
101
102# 0: build according to the value of STATIC
103# 1: build according to STATIC=0 and STATIC=1
104SHARED ?= 0
105
106# -1: support intercepted malloc (disabled at runtime by default)
107#  0: disable intercepted malloc at compile-time
108# >0: enable intercepted malloc
109MALLOC ?= 0
110
111# Determines the kind of routine called for intercepted GEMMs
112# >=1 and odd : sequential and non-tiled (small problem sizes only)
113# >=2 and even: parallelized and tiled (all problem sizes)
114# >=3 and odd : GEMV is intercepted; small problem sizes
115# >=4 and even: GEMV is intercepted; all problem sizes
116# negative: BLAS provides DGEMM_BATCH and SGEMM_BATCH
117# 0: disabled
118WRAP ?= 1
119
120# JIT backend is enabled by default
121ifeq (0,$(shell echo "$(PLATFORM)" | grep "^-*[0-9][0-9]*$$" 2>/dev/null || echo "0")) # NaN
122  JIT ?= 1
123else # disabled if platform is forced
124# enable: make PLATFORM=1 JIT=1
125  JIT ?= 0
126endif
127
128# TRACE facility
129INSTRUMENT ?= $(TRACE)
130
131# target library for a broad range of systems
132ifneq (0,$(JIT))
133  SSE ?= 1
134endif
135
136ifneq (,$(MAXTARGET))
137  DFLAGS += -DLIBXSMM_MAXTARGET=$(MAXTARGET)
138endif
139
140# Profiling JIT code using Linux Perf
141# PERF=0: disabled (default)
142# PERF=1: enabled (without JITDUMP)
143# PERF=2: enabled (with JITDUMP)
144#
145# Additional support for jitdump
146# JITDUMP=0: disabled (default)
147# JITDUMP=1: enabled
148# PERF=2: enabled
149#
150ifneq (,$(PERF))
151ifneq (0,$(PERF))
152ifneq (1,$(PERF))
153  JITDUMP ?= 1
154endif
155endif
156endif
157JITDUMP ?= 0
158
159ifneq (0,$(JITDUMP))
160  PERF ?= 1
161endif
162
163PERF ?= 0
164ifneq (0,$(PERF))
165  SYM ?= 1
166endif
167
168# OpenMP is disabled by default and LIBXSMM is
169# always agnostic wrt the threading runtime
170OMP ?= 0
171
172ifneq (,$(MKL))
173ifneq (0,$(MKL))
174  BLAS = $(MKL)
175endif
176endif
177
178ifneq (1,$(CACHE))
179  DFLAGS += -DLIBXSMM_CAPACITY_CACHE=$(CACHE)
180endif
181
182# disable lazy initialization and rely on ctor attribute
183ifeq (0,$(INIT))
184  DFLAGS += -DLIBXSMM_CTOR
185endif
186
187# Kind of documentation (internal key)
188DOCEXT = pdf
189
190# Timeout when downloading documentation parts
191TIMEOUT = 30
192
193# state to be excluded from tracking the (re-)build state
194EXCLUDE_STATE = \
195  DESTDIR PREFIX BINDIR CURDIR DOCDIR DOCEXT INCDIR LICFDIR OUTDIR TSTDIR TIMEOUT \
196  PBINDIR PINCDIR POUTDIR PPKGDIR PMODDIR PSRCDIR PTSTDIR PDOCDIR SCRDIR SPLDIR \
197  SRCDIR TEST VERSION_STRING DEPSTATIC ALIAS_% BLAS %_TARGET %ROOT MPSS KNC
198
199# fixed .state file directory (included by source)
200DIRSTATE = $(OUTDIR)/..
201
202ifeq (,$(M)$(N)$(K))
203ifeq (,$(filter-out 0,$(MNK)))
204  EXCLUDE_STATE += PRECISION MNK M N K
205endif
206endif
207
208# avoid to link with C++ standard library
209FORCE_CXX = 0
210
211# include common Makefile artifacts
212include $(ROOTDIR)/Makefile.inc
213
214# necessary include directories
215IFLAGS += -I$(call quote,$(INCDIR))
216IFLAGS += -I$(call quote,$(ROOTDIR)/$(SRCDIR))
217
218# Version numbers according to interface (version.txt)
219ifneq (,$(PYTHON))
220  VERSION_MAJOR ?= $(shell $(PYTHON) $(ROOTDIR)/$(SCRDIR)/libxsmm_utilities.py 1)
221  VERSION_MINOR ?= $(shell $(PYTHON) $(ROOTDIR)/$(SCRDIR)/libxsmm_utilities.py 2)
222  VERSION_UPDATE ?= $(shell $(PYTHON) $(ROOTDIR)/$(SCRDIR)/libxsmm_utilities.py 3)
223else
224  $(info --------------------------------------------------------------------------------)
225  $(error No Python interpreter found)
226endif
227VERSION_STRING ?= $(VERSION_MAJOR).$(VERSION_MINOR).$(VERSION_UPDATE)
228VERSION_API ?= $(shell $(PYTHON) $(ROOTDIR)/$(SCRDIR)/libxsmm_utilities.py 0 $(VERSION_STRING))
229VERSION_RELEASE ?= HEAD
230VERSION_PACKAGE ?= 1
231
232# explicitly target all objects
233ifneq (,$(strip $(SSE)$(AVX)$(MIC)))
234  TGT ?= 1
235endif
236TGT ?= 0
237
238ifeq (0,$(BLAS))
239ifeq (0,$(STATIC))
240ifneq (0,$(LNKSOFT))
241ifeq (Darwin,$(UNAME))
242  LDFLAGS += $(call linkopt,-U,_dgemm_)
243  LDFLAGS += $(call linkopt,-U,_sgemm_)
244  LDFLAGS += $(call linkopt,-U,_dgemv_)
245  LDFLAGS += $(call linkopt,-U,_sgemv_)
246endif
247endif
248endif
249endif
250
251# target library for a broad range of systems
252ifneq (0,$(JIT))
253ifeq (file,$(origin AVX))
254  AVX_STATIC = 0
255endif
256endif
257AVX_STATIC ?= $(AVX)
258
259ifeq (1,$(AVX_STATIC))
260  GENTARGET = snb
261else ifeq (2,$(AVX_STATIC))
262  GENTARGET = hsw
263else ifeq (3,$(AVX_STATIC))
264  ifneq (0,$(MIC))
265    ifeq (2,$(MIC))
266      GENTARGET = knm
267    else
268      GENTARGET = knl
269    endif
270  else
271    GENTARGET = skx
272  endif
273else ifneq (0,$(SSE))
274  GENTARGET = wsm
275else
276  GENTARGET = noarch
277endif
278
279ifneq (Darwin,$(UNAME))
280  GENGEMM = @$(ENVBIN) \
281    LD_LIBRARY_PATH="$(OUTDIR):$${LD_LIBRARY_PATH}" \
282    PATH="$(OUTDIR):$${PATH}" \
283  $(BINDIR)/libxsmm_gemm_generator
284else # osx
285  GENGEMM = @$(ENVBIN) \
286    DYLD_LIBRARY_PATH="$(OUTDIR):$${DYLD_LIBRARY_PATH}" \
287    PATH="$(OUTDIR):$${PATH}" \
288  $(BINDIR)/libxsmm_gemm_generator
289endif
290
291ifneq (,$(PYTHON))
292  INDICES ?= $(shell $(PYTHON) $(ROOTDIR)/$(SCRDIR)/libxsmm_utilities.py -1 $(THRESHOLD) $(words $(MNK)) $(MNK) $(words $(M)) $(words $(N)) $(M) $(N) $(K))
293endif
294NINDICES = $(words $(INDICES))
295
296SRCFILES_KERNELS = $(patsubst %,$(BLDDIR)/mm_%.c,$(INDICES))
297KRNOBJS_HST = $(patsubst %,$(BLDDIR)/intel64/mm_%.o,$(INDICES))
298KRNOBJS_MIC = $(patsubst %,$(BLDDIR)/mic/mm_%.o,$(INDICES))
299
300HEADERS = $(wildcard $(ROOTDIR)/$(SRCDIR)/template/*.c) $(wildcard $(ROOTDIR)/$(SRCDIR)/*.h) \
301          $(ROOTDIR)/$(SRCDIR)/libxsmm_hash.c \
302          $(ROOTDIR)/include/libxsmm_blocked_gemm.h \
303          $(ROOTDIR)/include/libxsmm_cpuid.h \
304          $(ROOTDIR)/include/libxsmm_dnn.h \
305          $(ROOTDIR)/include/libxsmm_dnn_tensor.h \
306          $(ROOTDIR)/include/libxsmm_dnn_convolution.h \
307          $(ROOTDIR)/include/libxsmm_dnn_fusedbatchnorm.h \
308          $(ROOTDIR)/include/libxsmm_dnn_fusedgroupnorm.h \
309          $(ROOTDIR)/include/libxsmm_dnn_pooling.h \
310          $(ROOTDIR)/include/libxsmm_dnn_fullyconnected.h \
311          $(ROOTDIR)/include/libxsmm_dnn_rnncell.h \
312          $(ROOTDIR)/include/libxsmm_dnn_softmaxloss.h \
313          $(ROOTDIR)/include/libxsmm_dnn_optimizer.h \
314          $(ROOTDIR)/include/libxsmm_rng.h \
315          $(ROOTDIR)/include/libxsmm_frontend.h \
316          $(ROOTDIR)/include/libxsmm_fsspmdm.h \
317          $(ROOTDIR)/include/libxsmm_generator.h \
318          $(ROOTDIR)/include/libxsmm_intrinsics_x86.h \
319          $(ROOTDIR)/include/libxsmm_macros.h \
320          $(ROOTDIR)/include/libxsmm_malloc.h \
321          $(ROOTDIR)/include/libxsmm_math.h \
322          $(ROOTDIR)/include/libxsmm_memory.h \
323          $(ROOTDIR)/include/libxsmm_mhd.h \
324          $(ROOTDIR)/include/libxsmm_spmdm.h \
325          $(ROOTDIR)/include/libxsmm_sync.h \
326          $(ROOTDIR)/include/libxsmm_timer.h \
327          $(ROOTDIR)/include/libxsmm_typedefs.h
328SRCFILES_LIB = $(patsubst %,$(ROOTDIR)/$(SRCDIR)/%, \
329          libxsmm_main.c libxsmm_memory.c libxsmm_malloc.c libxsmm_hash.c libxsmm_math.c \
330          libxsmm_sync.c libxsmm_python.c libxsmm_mhd.c libxsmm_timer.c libxsmm_perf.c \
331          libxsmm_gemm.c libxsmm_xcopy.c libxsmm_blocked_gemm.c libxsmm_spmdm.c libxsmm_fsspmdm.c libxsmm_rng.c\
332          libxsmm_dnn.c libxsmm_dnn_tensor.c libxsmm_dnn_convolution.c  libxsmm_dnn_elementwise.c \
333          libxsmm_dnn_rnncell.c libxsmm_dnn_rnncell_forward.c libxsmm_dnn_rnncell_backward_weight_update.c \
334          libxsmm_dnn_fusedbatchnorm.c libxsmm_dnn_fusedbatchnorm_forward.c libxsmm_dnn_fusedbatchnorm_backward.c \
335          libxsmm_dnn_fusedgroupnorm.c libxsmm_dnn_fusedgroupnorm_forward.c libxsmm_dnn_fusedgroupnorm_backward.c \
336          libxsmm_dnn_pooling.c libxsmm_dnn_pooling_forward.c libxsmm_dnn_pooling_backward.c libxsmm_dnn_convolution_forward.c \
337          libxsmm_dnn_fullyconnected.c libxsmm_dnn_fullyconnected_forward.c libxsmm_dnn_fullyconnected_backward_weight_update.c \
338          libxsmm_dnn_convolution_backward.c libxsmm_dnn_convolution_weight_update.c libxsmm_dnn_softmaxloss.c \
339          libxsmm_dnn_softmaxloss_forward.c libxsmm_dnn_softmaxloss_backward.c libxsmm_dnn_optimizer.c libxsmm_dnn_optimizer_sgd.c )
340SRCFILES_GEN_LIB = $(patsubst %,$(ROOTDIR)/$(SRCDIR)/%,$(notdir $(wildcard $(ROOTDIR)/$(SRCDIR)/generator_*.c)) \
341          libxsmm_cpuid_x86.c libxsmm_generator.c libxsmm_trace.c)
342
343SRCFILES_GEN_GEMM_BIN = $(patsubst %,$(ROOTDIR)/$(SRCDIR)/%,libxsmm_generator_gemm_driver.c)
344OBJFILES_GEN_GEMM_BIN = $(patsubst %,$(BLDDIR)/intel64/%.o,$(basename $(notdir $(SRCFILES_GEN_GEMM_BIN))))
345OBJFILES_GEN_LIB = $(patsubst %,$(BLDDIR)/intel64/%.o,$(basename $(notdir $(SRCFILES_GEN_LIB))))
346OBJFILES_HST = $(patsubst %,$(BLDDIR)/intel64/%.o,$(basename $(notdir $(SRCFILES_LIB))))
347OBJFILES_MIC = $(patsubst %,$(BLDDIR)/mic/%.o,$(basename $(notdir $(SRCFILES_LIB)))) $(BLDDIR)/mic/generator_common.o
348EXTOBJS_HST  = $(BLDDIR)/intel64/libxsmm_ext.o \
349               $(BLDDIR)/intel64/libxsmm_ext_xcopy.o \
350               $(BLDDIR)/intel64/libxsmm_ext_blocked_gemm.o \
351               $(BLDDIR)/intel64/libxsmm_ext_gemm.o
352EXTOBJS_MIC  = $(BLDDIR)/mic/libxsmm_ext.o \
353               $(BLDDIR)/mic/libxsmm_ext_xcopy.o \
354               $(BLDDIR)/mic/libxsmm_ext_blocked_gemm.o \
355               $(BLDDIR)/mic/libxsmm_ext_gemm.o
356NOBLAS_HST   = $(BLDDIR)/intel64/libxsmm_noblas.o
357NOBLAS_MIC   = $(BLDDIR)/mic/libxsmm_noblas.o
358
359# list of object might be "incomplete" if not all code gen. FLAGS are supplied with clean target!
360OBJECTS = $(OBJFILES_GEN_LIB) $(OBJFILES_GEN_GEMM_BIN) $(OBJFILES_GEN_CONV_BIN) $(OBJFILES_HST) $(OBJFILES_MIC) \
361          $(KRNOBJS_HST) $(KRNOBJS_MIC) $(EXTOBJS_HST) $(EXTOBJS_MIC) $(NOBLAS_HST) $(NOBLAS_MIC)
362ifneq (,$(strip $(FC)))
363  FTNOBJS = $(BLDDIR)/intel64/libxsmm-mod.o $(BLDDIR)/mic/libxsmm-mod.o
364endif
365
366MSGJITPROFILING = 0
367ifneq (0,$(JIT))
368ifneq (0,$(VTUNE))
369ifeq (,$(filter Darwin,$(UNAME)))
370  ifneq (0,$(PERF))
371    DFLAGS += -DLIBXSMM_PERF
372    ifneq (0,$(JITDUMP))
373      DFLAGS += -DLIBXSMM_PERF_JITDUMP
374    endif
375  endif
376  VTUNEROOT = $(shell env | grep VTUNE_PROFILER | grep -m1 _DIR | cut -d= -f2-)
377  ifeq (,$(VTUNEROOT))
378    VTUNEROOT = $(shell env | grep VTUNE_AMPLIFIER | grep -m1 _DIR | cut -d= -f2-)
379  endif
380  ifeq (,$(VTUNEROOT))
381    VTUNEROOT = $(EBROOTVTUNE)/vtune_amplifier
382  endif
383  ifneq (,$(wildcard $(VTUNEROOT)/lib64/libjitprofiling.$(SLIBEXT)))
384    ifneq (0,$(SYM))
385      LIBJITPROFILING = $(BLDDIR)/jitprofiling/libjitprofiling.$(SLIBEXT)
386      OBJJITPROFILING = $(BLDDIR)/jitprofiling/*.o
387      DFLAGS += -DLIBXSMM_VTUNE
388      IFLAGS += -I$(call quote,$(VTUNEROOT)/include)
389      ifneq (0,$(INTEL))
390        CXXFLAGS += -diag-disable 271
391        CFLAGS += -diag-disable 271
392      endif
393    endif
394    MSGJITPROFILING = 1
395  endif
396endif
397endif
398endif
399
400ifneq (,$(PYTHON))
401information = \
402	$(info ================================================================================) \
403	$(info LIBXSMM $(shell $(PYTHON) $(ROOTDIR)/$(SCRDIR)/libxsmm_utilities.py) ($(UNAME))) \
404	$(info --------------------------------------------------------------------------------) \
405	$(info $(GINFO)) \
406	$(info $(CINFO)) \
407	$(if $(strip $(FC)),$(info $(FINFO)),$(NULL)) \
408	$(if $(strip $(FC)),$(NULL), \
409	$(if $(strip $(FC_VERSION)), \
410	$(info Fortran Compiler $(FC_VERSION) is outdated!), \
411	$(info Fortran Compiler is disabled or missing: no Fortran interface is built!))) \
412	$(info --------------------------------------------------------------------------------)
413endif
414
415ifneq (,$(strip $(TEST)))
416.PHONY: run-tests
417run-tests: tests
418endif
419
420.PHONY: libxsmm
421ifeq (0,$(COMPATIBLE))
422ifeq (0,$(SHARED))
423libxsmm: lib generator
424else
425libxsmm: libs generator
426endif
427else
428ifeq (0,$(SHARED))
429libxsmm: lib
430else
431libxsmm: libs
432endif
433endif
434	$(information)
435ifneq (,$(filter _0_,_$(LNKSOFT)_))
436ifeq (0,$(DEPSTATIC))
437	$(info Building a shared library requires to link against BLAS)
438	$(info since a deferred choice is not implemented for this OS.)
439	$(info --------------------------------------------------------------------------------)
440endif
441endif
442ifneq (,$(filter _0_,_$(BLAS)_))
443ifeq (,$(filter _0_,_$(NOBLAS)_))
444	$(info BLAS dependency and fallback is removed!)
445	$(info --------------------------------------------------------------------------------)
446endif
447else ifeq (, $(filter _0_,_$(LNKSOFT)_))
448	$(info LIBXSMM is link-time agnostic with respect to a BLAS library!)
449	$(info Forcing a specific library can take away a user's choice.)
450	$(info If this was to solve linker errors (dgemm_, sgemm_, etc.),)
451	$(info the BLAS library should go after LIBXSMM (link-line).)
452	$(info --------------------------------------------------------------------------------)
453endif
454ifneq (2,$(INTRINSICS))
455ifeq (0,$(COMPATIBLE))
456ifeq (0,$(AVX))
457	$(info INTRINSICS=$(INTRINSICS) without setting AVX can reduce performance of certain code paths.)
458else
459	$(info INTRINSICS=$(INTRINSICS) limits LIBXSMM to AVX$(AVX) (and beyond).)
460endif
461ifeq (0,$(INTEL))
462	$(info If adjusting INTRINSICS was necessary, reconsider an updated tool chain.)
463else # Intel Compiler
464	$(info Intel Compiler does not require adjusting INTRINSICS.)
465endif
466	$(info --------------------------------------------------------------------------------)
467endif
468endif
469ifneq (0,$(MSGJITPROFILING))
470ifneq (,$(strip $(LIBJITPROFILING)))
471	$(info Intel VTune Amplifier support has been incorporated.)
472else
473	$(info Intel VTune Amplifier support has been detected (enable with SYM=1).)
474endif
475	$(info --------------------------------------------------------------------------------)
476endif
477
478.PHONY: lib
479lib: headers drytest lib_hst lib_mic
480
481.PHONY: libs
482libs: lib
483ifneq (0,$(STATIC))
484	@$(MAKE) --no-print-directory lib STATIC=0
485else
486	@$(MAKE) --no-print-directory lib STATIC=1
487endif
488
489.PHONY: all
490all: libxsmm
491
492.PHONY: realall
493realall: all samples
494
495.PHONY: headers
496headers: cheader cheader_only fheader
497
498.PHONY: header-only
499header-only: cheader_only
500
501.PHONY: header_only
502header_only: header-only
503
504.PHONY: interface
505interface: headers module
506
507.PHONY: winterface
508winterface: headers sources
509
510.PHONY: lib_mic
511lib_mic: clib_mic flib_mic ext_mic noblas_mic
512
513.PHONY: lib_hst
514lib_hst: clib_hst flib_hst ext_hst noblas_hst
515
516PREFETCH_UID = 0
517PREFETCH_TYPE = 0
518PREFETCH_SCHEME = nopf
519ifneq (Windows_NT,$(UNAME)) # TODO: full support for Windows calling convention
520  ifneq (0,$(shell echo $$((0 <= $(PREFETCH) && $(PREFETCH) <= 6))))
521    PREFETCH_UID = $(PREFETCH)
522  else ifneq (0,$(shell echo $$((0 > $(PREFETCH))))) # auto
523    PREFETCH_UID = 1
524  else ifeq (pfsigonly,$(PREFETCH))
525    PREFETCH_UID = 2
526  else ifeq (BL2viaC,$(PREFETCH))
527    PREFETCH_UID = 3
528  else ifeq (curAL2,$(PREFETCH))
529    PREFETCH_UID = 4
530  else ifeq (curAL2_BL2viaC,$(PREFETCH))
531    PREFETCH_UID = 5
532  else ifeq (AL2,$(PREFETCH))
533    PREFETCH_UID = 6
534  else ifeq (AL2_BL2viaC,$(PREFETCH))
535    PREFETCH_UID = 7
536  endif
537  # Mapping build options to libxsmm_gemm_prefetch_type (see include/libxsmm_typedefs.h)
538  ifeq (1,$(PREFETCH_UID))
539    # Prefetch "auto" is a pseudo-strategy introduced by the frontend;
540    # select "nopf" for statically generated code.
541    PREFETCH_SCHEME = nopf
542    PREFETCH_TYPE = -1
543  else ifeq (2,$(PREFETCH_UID))
544    PREFETCH_SCHEME = pfsigonly
545    PREFETCH_TYPE = 1
546  else ifeq (3,$(PREFETCH_UID))
547    PREFETCH_SCHEME = BL2viaC
548    PREFETCH_TYPE = 4
549  else ifeq (4,$(PREFETCH_UID))
550    PREFETCH_SCHEME = curAL2
551    PREFETCH_TYPE = 8
552  else ifeq (5,$(PREFETCH_UID))
553    PREFETCH_SCHEME = curAL2_BL2viaC
554    PREFETCH_TYPE = $(shell echo $$((4 | 8)))
555  else ifeq (6,$(PREFETCH_UID))
556    PREFETCH_SCHEME = AL2
557    PREFETCH_TYPE = 2
558  else ifeq (7,$(PREFETCH_UID))
559    PREFETCH_SCHEME = AL2_BL2viaC
560    PREFETCH_TYPE = $(shell echo $$((4 | 2)))
561  endif
562endif
563ifeq (,$(PREFETCH_SCHEME_MIC)) # adopt host scheme
564  PREFETCH_SCHEME_MIC = $(PREFETCH_SCHEME)
565endif
566
567# Mapping build options to libxsmm_gemm_flags (see include/libxsmm_typedefs.h)
568#FLAGS = $(shell echo $$((((0==$(ALPHA))*4) | ((0>$(ALPHA))*8) | ((0==$(BETA))*16) | ((0>$(BETA))*32))))
569FLAGS = 0
570
571SUPPRESS_UNUSED_VARIABLE_WARNINGS = LIBXSMM_UNUSED(A); LIBXSMM_UNUSED(B); LIBXSMM_UNUSED(C);
572ifneq (nopf,$(PREFETCH_SCHEME))
573  #SUPPRESS_UNUSED_VARIABLE_WARNINGS += LIBXSMM_UNUSED(A_prefetch); LIBXSMM_UNUSED(B_prefetch);
574  #SUPPRESS_UNUSED_PREFETCH_WARNINGS = $(NULL)  LIBXSMM_UNUSED(C_prefetch);~
575  SUPPRESS_UNUSED_PREFETCH_WARNINGS = $(NULL)  LIBXSMM_UNUSED(A_prefetch); LIBXSMM_UNUSED(B_prefetch); LIBXSMM_UNUSED(C_prefetch);~
576endif
577
578# auto-clean the co-build
579$(ROOTDIR)/$(SRCDIR)/template/libxsmm_config.h: $(ROOTDIR)/$(SCRDIR)/libxsmm_config.py $(ROOTDIR)/$(SCRDIR)/libxsmm_utilities.py \
580                                                $(ROOTDIR)/Makefile $(ROOTDIR)/Makefile.inc $(wildcard $(ROOTDIR)/.github/*) \
581                                                $(ROOTDIR)/version.txt
582#ifneq (,$(filter-out 0 1 2 STATIC,$(words $(PRESTATE)) $(word 2,$(PRESTATE))))
583ifneq (0,$(STATIC)) # static
584	@rm -f $(OUTDIR)/libxsmm*.$(DLIBEXT) $(OUTDIR)/libxsmm*.$(DLIBEXT).*
585else # shared/dynamic
586	@rm -f $(OUTDIR)/libxsmm*.$(SLIBEXT) $(OUTDIR)/libxsmm*.$(SLIBEXT).*
587endif
588	@touch $@
589#endif
590
591.PHONY: config
592config: $(INCDIR)/libxsmm_config.h $(INCDIR)/libxsmm_version.h
593$(INCDIR)/libxsmm_config.h: $(INCDIR)/.make $(ROOTDIR)/$(SRCDIR)/template/libxsmm_config.h $(DIRSTATE)/.state
594	$(information)
595	$(info --- LIBXSMM build log)
596	@if [ -e $(ROOTDIR)/.github/install.sh ]; then \
597		$(ROOTDIR)/.github/install.sh; \
598	fi
599	@$(CP) $(filter $(ROOTDIR)/include/%.h,$(HEADERS)) $(INCDIR) 2>/dev/null || true
600ifneq (,$(PYTHON))
601	@$(PYTHON) $(ROOTDIR)/$(SCRDIR)/libxsmm_config.py $(ROOTDIR)/$(SRCDIR)/template/libxsmm_config.h \
602		$(MAKE_ILP64) $(OFFLOAD) $(CACHELINE) $(PRECISION) $(PREFETCH_TYPE) \
603		$(shell echo $$((0<$(THRESHOLD)?$(THRESHOLD):0))) \
604		$(shell echo $$(($(THREADS)+$(OMP)))) \
605		$(JIT) $(FLAGS) $(ALPHA) $(BETA) $(WRAP) $(MALLOC) $(INDICES) > $@
606endif
607$(INCDIR)/libxsmm_version.h: $(ROOTDIR)/$(SRCDIR)/template/libxsmm_config.h $(INCDIR)/.make \
608                             $(ROOTDIR)/$(SRCDIR)/template/libxsmm_version.h
609ifneq (,$(PYTHON))
610	@$(PYTHON) $(ROOTDIR)/$(SCRDIR)/libxsmm_config.py $(ROOTDIR)/$(SRCDIR)/template/libxsmm_version.h > $@
611else
612.PHONY: $(INCDIR)/libxsmm_version.h
613endif
614
615
616.PHONY: cheader
617cheader: $(INCDIR)/libxsmm.h
618ifneq (,$(PYTHON))
619$(INCDIR)/libxsmm.h: $(ROOTDIR)/$(SCRDIR)/libxsmm_interface.py \
620                     $(ROOTDIR)/$(SRCDIR)/template/libxsmm.h \
621                     $(INCDIR)/libxsmm_version.h \
622                     $(INCDIR)/libxsmm_config.h \
623                     $(HEADERS)
624	@$(PYTHON) $(ROOTDIR)/$(SCRDIR)/libxsmm_interface.py $(ROOTDIR)/$(SRCDIR)/template/libxsmm.h \
625		$(PRECISION) $(PREFETCH_TYPE) $(INDICES) > $@
626else
627.PHONY: $(INCDIR)/libxsmm.h
628endif
629
630.PHONY: cheader_only
631cheader_only: $(INCDIR)/libxsmm_source.h
632$(INCDIR)/libxsmm_source.h: $(INCDIR)/.make $(ROOTDIR)/$(SCRDIR)/libxsmm_source.sh $(INCDIR)/libxsmm.h
633	@$(ROOTDIR)/$(SCRDIR)/libxsmm_source.sh > $@
634
635.PHONY: fheader
636fheader: $(INCDIR)/libxsmm.f
637ifneq (,$(PYTHON))
638$(INCDIR)/libxsmm.f: $(ROOTDIR)/$(SCRDIR)/libxsmm_interface.py \
639                     $(ROOTDIR)/$(SCRDIR)/libxsmm_config.py \
640                     $(ROOTDIR)/$(SRCDIR)/template/libxsmm.f \
641                     $(INCDIR)/libxsmm_version.h \
642                     $(INCDIR)/libxsmm_config.h
643	@$(PYTHON) $(ROOTDIR)/$(SCRDIR)/libxsmm_interface.py $(ROOTDIR)/$(SRCDIR)/template/libxsmm.f \
644		$(PRECISION) $(PREFETCH_TYPE) $(INDICES) | \
645	$(PYTHON) $(ROOTDIR)/$(SCRDIR)/libxsmm_config.py /dev/stdin \
646		$(MAKE_ILP64) $(OFFLOAD) $(CACHELINE) $(PRECISION) $(PREFETCH_TYPE) \
647		$(shell echo $$((0<$(THRESHOLD)?$(THRESHOLD):0))) \
648		$(shell echo $$(($(THREADS)+$(OMP)))) \
649		$(JIT) $(FLAGS) $(ALPHA) $(BETA) $(WRAP) $(MALLOC) $(INDICES) | \
650	sed "/ATTRIBUTES OFFLOAD:MIC/d" > $@
651else
652.PHONY: $(INCDIR)/libxsmm.f
653endif
654
655.PHONY: sources
656sources: $(SRCFILES_KERNELS) $(BLDDIR)/libxsmm_dispatch.h
657ifneq (,$(PYTHON))
658$(BLDDIR)/libxsmm_dispatch.h: $(BLDDIR)/.make $(SRCFILES_KERNELS) $(ROOTDIR)/$(SCRDIR)/libxsmm_dispatch.py $(DIRSTATE)/.state
659	@$(PYTHON) $(call quote,$(ROOTDIR)/$(SCRDIR)/libxsmm_dispatch.py) $(call qapath,$(DIRSTATE)/.state) $(PRECISION) $(THRESHOLD) $(INDICES) > $@
660else
661.PHONY: $(BLDDIR)/libxsmm_dispatch.h
662endif
663
664$(BLDDIR)/%.c: $(BLDDIR)/.make $(INCDIR)/libxsmm.h $(BINDIR)/libxsmm_gemm_generator $(ROOTDIR)/$(SCRDIR)/libxsmm_utilities.py $(ROOTDIR)/$(SCRDIR)/libxsmm_specialized.py
665ifneq (,$(strip $(SRCFILES_KERNELS)))
666	$(eval MVALUE := $(shell echo $(basename $(notdir $@)) | cut -d_ -f2))
667	$(eval NVALUE := $(shell echo $(basename $(notdir $@)) | cut -d_ -f3))
668	$(eval KVALUE := $(shell echo $(basename $(notdir $@)) | cut -d_ -f4))
669	$(eval MNVALUE := $(MVALUE))
670	$(eval NMVALUE := $(NVALUE))
671	@echo "#include <libxsmm.h>" > $@
672	@echo >> $@
673ifeq (noarch,$(GENTARGET))
674ifneq (,$(CTARGET))
675ifneq (2,$(PRECISION))
676	@echo "#define LIBXSMM_GENTARGET_knl_sp" >> $@
677	@echo "#define LIBXSMM_GENTARGET_hsw_sp" >> $@
678	@echo "#define LIBXSMM_GENTARGET_snb_sp" >> $@
679	@echo "#define LIBXSMM_GENTARGET_wsm_sp" >> $@
680endif
681ifneq (1,$(PRECISION))
682	@echo "#define LIBXSMM_GENTARGET_knl_dp" >> $@
683	@echo "#define LIBXSMM_GENTARGET_hsw_dp" >> $@
684	@echo "#define LIBXSMM_GENTARGET_snb_dp" >> $@
685	@echo "#define LIBXSMM_GENTARGET_wsm_dp" >> $@
686endif
687	@echo >> $@
688	@echo >> $@
689ifneq (2,$(PRECISION))
690	$(GENGEMM) dense $@ libxsmm_s$(basename $(notdir $@))_knl $(MNVALUE) $(NMVALUE) $(KVALUE) $(MNVALUE) $(KVALUE) $(MNVALUE) $(ALPHA) $(BETA) 0 0 knl $(PREFETCH_SCHEME) SP
691	$(GENGEMM) dense $@ libxsmm_s$(basename $(notdir $@))_hsw $(MNVALUE) $(NMVALUE) $(KVALUE) $(MNVALUE) $(KVALUE) $(MNVALUE) $(ALPHA) $(BETA) 0 0 hsw $(PREFETCH_SCHEME) SP
692	$(GENGEMM) dense $@ libxsmm_s$(basename $(notdir $@))_snb $(MNVALUE) $(NMVALUE) $(KVALUE) $(MNVALUE) $(KVALUE) $(MNVALUE) $(ALPHA) $(BETA) 0 0 snb $(PREFETCH_SCHEME) SP
693	$(GENGEMM) dense $@ libxsmm_s$(basename $(notdir $@))_wsm $(MNVALUE) $(NMVALUE) $(KVALUE) $(MNVALUE) $(KVALUE) $(MNVALUE) $(ALPHA) $(BETA) 0 0 wsm $(PREFETCH_SCHEME) SP
694endif
695ifneq (1,$(PRECISION))
696	$(GENGEMM) dense $@ libxsmm_d$(basename $(notdir $@))_knl $(MNVALUE) $(NMVALUE) $(KVALUE) $(MNVALUE) $(KVALUE) $(MNVALUE) $(ALPHA) $(BETA) 0 0 knl $(PREFETCH_SCHEME) DP
697	$(GENGEMM) dense $@ libxsmm_d$(basename $(notdir $@))_hsw $(MNVALUE) $(NMVALUE) $(KVALUE) $(MNVALUE) $(KVALUE) $(MNVALUE) $(ALPHA) $(BETA) 0 0 hsw $(PREFETCH_SCHEME) DP
698	$(GENGEMM) dense $@ libxsmm_d$(basename $(notdir $@))_snb $(MNVALUE) $(NMVALUE) $(KVALUE) $(MNVALUE) $(KVALUE) $(MNVALUE) $(ALPHA) $(BETA) 0 0 snb $(PREFETCH_SCHEME) DP
699	$(GENGEMM) dense $@ libxsmm_d$(basename $(notdir $@))_wsm $(MNVALUE) $(NMVALUE) $(KVALUE) $(MNVALUE) $(KVALUE) $(MNVALUE) $(ALPHA) $(BETA) 0 0 wsm $(PREFETCH_SCHEME) DP
700endif
701endif # target
702else # noarch
703ifneq (2,$(PRECISION))
704	@echo "#define LIBXSMM_GENTARGET_$(GENTARGET)_sp" >> $@
705endif
706ifneq (1,$(PRECISION))
707	@echo "#define LIBXSMM_GENTARGET_$(GENTARGET)_dp" >> $@
708endif
709	@echo >> $@
710	@echo >> $@
711ifneq (2,$(PRECISION))
712	$(GENGEMM) dense $@ libxsmm_s$(basename $(notdir $@))_$(GENTARGET) $(MNVALUE) $(NMVALUE) $(KVALUE) $(MNVALUE) $(KVALUE) $(MNVALUE) $(ALPHA) $(BETA) 0 0 $(GENTARGET) $(PREFETCH_SCHEME) SP
713endif
714ifneq (1,$(PRECISION))
715	$(GENGEMM) dense $@ libxsmm_d$(basename $(notdir $@))_$(GENTARGET) $(MNVALUE) $(NMVALUE) $(KVALUE) $(MNVALUE) $(KVALUE) $(MNVALUE) $(ALPHA) $(BETA) 0 0 $(GENTARGET) $(PREFETCH_SCHEME) DP
716endif
717endif # noarch
718	$(eval TMPFILE = $(shell $(MKTEMP) /tmp/.libxsmm_XXXXXX.mak))
719	@cat $@ | sed \
720		-e "s/void libxsmm_/LIBXSMM_INLINE LIBXSMM_RETARGETABLE void libxsmm_/" \
721		-e "s/#ifndef NDEBUG/$(SUPPRESS_UNUSED_PREFETCH_WARNINGS)#ifdef LIBXSMM_NEVER_DEFINED/" \
722		-e "s/#pragma message (\".*KERNEL COMPILATION ERROR in: \" __FILE__)/  $(SUPPRESS_UNUSED_VARIABLE_WARNINGS)/" \
723		-e "/#error No kernel was compiled, lacking support for current architecture?/d" \
724		-e "/#pragma message (\".*KERNEL COMPILATION WARNING: compiling ..* code on ..* or newer architecture: \" __FILE__)/d" \
725		| tr "~" "\n" > $(TMPFILE)
726ifneq (,$(PYTHON))
727	@$(PYTHON) $(ROOTDIR)/$(SCRDIR)/libxsmm_specialized.py $(PRECISION) $(MVALUE) $(NVALUE) $(KVALUE) $(PREFETCH_TYPE) >> $(TMPFILE)
728endif
729	@$(MV) $(TMPFILE) $@
730endif
731
732define DEFINE_COMPILE_RULE
733$(1): $(2) $(3) $(dir $(1))/.make
734	@rm -f $(1)
735	-$(CC) $(4) -c $(2) -o $(1)
736	@if ! [ -e $(1) ]; then \
737		echo "--------------------------------------------------------------"; \
738		echo "In case of assembler error, perhaps the Binutils are outdated."; \
739		echo "See https://github.com/hfp/libxsmm#outdated-binutils"; \
740		echo "--------------------------------------------------------------"; \
741		false; \
742	fi
743endef
744
745ifneq (0,$(GLIBC))
746  DFLAGS += -DLIBXSMM_BUILD=2
747else
748  DFLAGS += -DLIBXSMM_BUILD=1
749endif
750
751EXTCFLAGS = -DLIBXSMM_BUILD_EXT
752ifeq (0,$(OMP))
753  ifeq (,$(filter environment% override command%,$(origin OMP)))
754    EXTCFLAGS += $(OMPFLAG)
755    EXTLDFLAGS += $(OMPLIB)
756  endif
757else # OpenMP
758  DFLAGS += -DLIBXSMM_SYNC_OMP
759endif
760
761ifneq (0,$(MIC))
762ifneq (0,$(MPSS))
763$(foreach OBJ,$(OBJFILES_MIC),$(eval $(call DEFINE_COMPILE_RULE, \
764  $(OBJ), $(patsubst %.o,$(ROOTDIR)/$(SRCDIR)/%.c,$(notdir $(OBJ))), \
765  $(INCDIR)/libxsmm.h $(INCDIR)/libxsmm_source.h $(BLDDIR)/libxsmm_dispatch.h,-mmic \
766  $(DFLAGS) $(IFLAGS) $(call applyif,1,libxsmm_main,$(OBJ),-I$(BLDDIR)) $(CFLAGS))))
767$(foreach OBJ,$(KRNOBJS_MIC),$(eval $(call DEFINE_COMPILE_RULE, \
768  $(OBJ), $(patsubst %.o,$(BLDDIR)/%.c,$(notdir $(OBJ))), \
769  $(INCDIR)/libxsmm.h $(INCDIR)/libxsmm_source.h,-mmic \
770  $(DFLAGS) $(IFLAGS) $(CFLAGS))))
771$(foreach OBJ,$(EXTOBJS_MIC),$(eval $(call DEFINE_COMPILE_RULE, \
772  $(OBJ), $(patsubst %.o,$(ROOTDIR)/$(SRCDIR)/%.c,$(notdir $(OBJ))), \
773  $(INCDIR)/libxsmm.h $(INCDIR)/libxsmm_source.h,-mmic \
774  $(DFLAGS) $(IFLAGS) $(EXTCFLAGS) $(CFLAGS))))
775$(eval $(call DEFINE_COMPILE_RULE,$(NOBLAS_MIC),$(ROOTDIR)/$(SRCDIR)/libxsmm_ext.c,$(INCDIR)/libxsmm.h,-mmic \
776  $(NOBLAS_CFLAGS) $(NOBLAS_FLAGS) $(NOBLAS_IFLAGS) $(DNOBLAS)))
777endif
778endif
779
780# build rules that include target flags
781$(eval $(call DEFINE_COMPILE_RULE,$(NOBLAS_HST),$(ROOTDIR)/$(SRCDIR)/libxsmm_ext.c,$(INCDIR)/libxsmm.h, \
782  $(CTARGET) $(NOBLAS_CFLAGS) $(NOBLAS_FLAGS) $(NOBLAS_IFLAGS) $(DNOBLAS)))
783$(foreach OBJ,$(OBJFILES_HST),$(eval $(call DEFINE_COMPILE_RULE, \
784  $(OBJ),$(patsubst %.o,$(ROOTDIR)/$(SRCDIR)/%.c,$(notdir $(OBJ))), \
785  $(INCDIR)/libxsmm.h $(INCDIR)/libxsmm_source.h $(BLDDIR)/libxsmm_dispatch.h, \
786  $(DFLAGS) $(IFLAGS) $(call applyif,1,libxsmm_main,$(OBJ),-I$(BLDDIR)) $(CTARGET) $(CFLAGS))))
787$(foreach OBJ,$(KRNOBJS_HST),$(eval $(call DEFINE_COMPILE_RULE, \
788  $(OBJ),$(patsubst %.o,$(BLDDIR)/%.c,$(notdir $(OBJ))), \
789  $(INCDIR)/libxsmm.h $(INCDIR)/libxsmm_source.h, \
790  $(DFLAGS) $(IFLAGS) $(CTARGET) $(CFLAGS))))
791$(foreach OBJ,$(EXTOBJS_HST),$(eval $(call DEFINE_COMPILE_RULE, \
792  $(OBJ),$(patsubst %.o,$(ROOTDIR)/$(SRCDIR)/%.c,$(notdir $(OBJ))), \
793  $(INCDIR)/libxsmm.h $(INCDIR)/libxsmm_source.h, \
794  $(DFLAGS) $(IFLAGS) $(CTARGET) $(EXTCFLAGS) $(CFLAGS))))
795
796# build rules that by default include no target flags
797ifneq (0,$(TGT))
798  TGT_FLAGS ?= $(CTARGET)
799endif
800$(foreach OBJ,$(OBJFILES_GEN_LIB),$(eval $(call DEFINE_COMPILE_RULE, \
801  $(OBJ),$(patsubst %.o,$(ROOTDIR)/$(SRCDIR)/%.c,$(notdir $(OBJ))), \
802  $(INCDIR)/libxsmm.h $(INCDIR)/libxsmm_source.h, \
803  $(DFLAGS) $(IFLAGS) $(TGT_FLAGS) $(CFLAGS))))
804$(foreach OBJ,$(OBJFILES_GEN_GEMM_BIN),$(eval $(call DEFINE_COMPILE_RULE, \
805  $(OBJ),$(patsubst %.o,$(ROOTDIR)/$(SRCDIR)/%.c,$(notdir $(OBJ))), \
806  $(INCDIR)/libxsmm.h $(INCDIR)/libxsmm_source.h, \
807  $(DFLAGS) $(IFLAGS) $(TGT_FLAGS) $(CFLAGS))))
808
809.PHONY: compile_mic
810ifneq (0,$(MIC))
811ifneq (0,$(MPSS))
812compile_mic:
813$(BLDDIR)/mic/%.o: $(BLDDIR)/%.c $(BLDDIR)/mic/.make $(INCDIR)/libxsmm.h $(INCDIR)/libxsmm_source.h $(BLDDIR)/libxsmm_dispatch.h
814	$(CC) $(DFLAGS) $(IFLAGS) $(CFLAGS) -mmic -c $< -o $@
815endif
816endif
817
818.PHONY: compile_hst
819compile_hst:
820$(BLDDIR)/intel64/%.o: $(BLDDIR)/%.c $(BLDDIR)/intel64/.make $(INCDIR)/libxsmm.h $(INCDIR)/libxsmm_source.h $(BLDDIR)/libxsmm_dispatch.h
821	$(CC) $(DFLAGS) $(IFLAGS) $(CFLAGS) $(CTARGET) -c $< -o $@
822
823.PHONY: module_mic
824ifneq (0,$(MIC))
825ifneq (0,$(MPSS))
826ifneq (,$(strip $(FC)))
827module_mic: $(INCDIR)/mic/libxsmm.mod
828$(BLDDIR)/mic/libxsmm-mod.o: $(BLDDIR)/mic/.make $(INCDIR)/mic/.make $(INCDIR)/libxsmm.f
829	$(FC) $(DFLAGS) $(IFLAGS) $(FCMTFLAGS) $(FCFLAGS) -mmic -c $(INCDIR)/libxsmm.f -o $@ $(FMFLAGS) $(INCDIR)/mic
830$(INCDIR)/mic/libxsmm.mod: $(BLDDIR)/mic/libxsmm-mod.o
831	@if [ -e $(BLDDIR)/mic/LIBXSMM.mod ]; then $(CP) $(BLDDIR)/mic/LIBXSMM.mod $(INCDIR); fi
832	@if [ -e $(BLDDIR)/mic/libxsmm.mod ]; then $(CP) $(BLDDIR)/mic/libxsmm.mod $(INCDIR); fi
833	@if [ -e LIBXSMM.mod ]; then $(MV) LIBXSMM.mod $(INCDIR); fi
834	@if [ -e libxsmm.mod ]; then $(MV) libxsmm.mod $(INCDIR); fi
835	@touch $@
836else
837.PHONY: $(BLDDIR)/mic/libxsmm-mod.o
838.PHONY: $(INCDIR)/mic/libxsmm.mod
839endif
840else
841.PHONY: $(BLDDIR)/mic/libxsmm-mod.o
842.PHONY: $(INCDIR)/mic/libxsmm.mod
843endif
844else
845.PHONY: $(BLDDIR)/mic/libxsmm-mod.o
846.PHONY: $(INCDIR)/mic/libxsmm.mod
847endif
848
849.PHONY: module_hst
850ifneq (,$(strip $(FC)))
851module_hst: $(INCDIR)/libxsmm.mod
852$(BLDDIR)/intel64/libxsmm-mod.o: $(BLDDIR)/intel64/.make $(INCDIR)/libxsmm.f
853	$(FC) $(DFLAGS) $(IFLAGS) $(FCMTFLAGS) $(FCFLAGS) $(FTARGET) -c $(INCDIR)/libxsmm.f -o $@ $(FMFLAGS) $(INCDIR)
854$(INCDIR)/libxsmm.mod: $(BLDDIR)/intel64/libxsmm-mod.o
855	@if [ -e $(BLDDIR)/intel64/LIBXSMM.mod ]; then $(CP) $(BLDDIR)/intel64/LIBXSMM.mod $(INCDIR); fi
856	@if [ -e $(BLDDIR)/intel64/libxsmm.mod ]; then $(CP) $(BLDDIR)/intel64/libxsmm.mod $(INCDIR); fi
857	@if [ -e LIBXSMM.mod ]; then $(MV) LIBXSMM.mod $(INCDIR); fi
858	@if [ -e libxsmm.mod ]; then $(MV) libxsmm.mod $(INCDIR); fi
859	@touch $@
860else
861.PHONY: $(BLDDIR)/intel64/libxsmm-mod.o
862.PHONY: $(INCDIR)/libxsmm.mod
863endif
864
865.PHONY: module
866module: module_hst module_mic
867
868.PHONY: build_generator_lib
869build_generator_lib: $(OUTDIR)/libxsmmgen.$(LIBEXT)
870$(OUTDIR)/libxsmmgen.$(LIBEXT): $(OUTDIR)/.make $(OBJFILES_GEN_LIB) $(OUTDIR)/module
871ifeq (0,$(STATIC))
872	$(LIB_LD) $(call solink,$@,$(VERSION_MAJOR),$(VERSION_MINOR),$(VERSION_UPDATE),$(VERSION_API)) \
873		$(OBJFILES_GEN_LIB) $(call cleanld,$(NOBLAS_LDFLAGS) $(NOBLAS_CLDFLAGS))
874else # static
875	@rm -f $@
876	$(AR) -rs $@ $(OBJFILES_GEN_LIB)
877endif
878
879.PHONY: generator
880generator: $(BINDIR)/libxsmm_gemm_generator
881$(BINDIR)/libxsmm_gemm_generator: $(BINDIR)/.make $(OBJFILES_GEN_GEMM_BIN) $(OUTDIR)/libxsmmgen.$(LIBEXT)
882	$(LD) -o $@ $(OBJFILES_GEN_GEMM_BIN) $(call abslib,$(OUTDIR)/libxsmmgen.$(ILIBEXT)) \
883		$(call cleanld,$(NOBLAS_LDFLAGS) $(NOBLAS_CLDFLAGS))
884
885ifneq (,$(strip $(LIBJITPROFILING)))
886$(LIBJITPROFILING): $(BLDDIR)/jitprofiling/.make
887	@$(CP) $(VTUNEROOT)/lib64/libjitprofiling.$(SLIBEXT) $(BLDDIR)/jitprofiling
888	@cd $(BLDDIR)/jitprofiling; $(AR) x libjitprofiling.$(SLIBEXT)
889endif
890
891.PHONY: clib_mic
892ifneq (0,$(MIC))
893ifneq (0,$(MPSS))
894clib_mic: $(OUTDIR)/mic/libxsmm.$(LIBEXT)
895$(OUTDIR)/mic/libxsmm.$(LIBEXT): $(OUTDIR)/mic/.make $(OBJFILES_MIC) $(KRNOBJS_MIC)
896ifeq (0,$(STATIC))
897	$(LIB_LD) -mmic $(call solink,$@,$(VERSION_MAJOR),$(VERSION_MINOR),$(VERSION_UPDATE),$(VERSION_API)) \
898		$(OBJFILES_MIC) $(KRNOBJS_MIC) $(call cleanld,$(LDFLAGS) $(CLDFLAGS))
899else # static
900	@rm -f $@
901	$(AR) -rs $@ $(OBJFILES_MIC) $(KRNOBJS_MIC)
902endif
903endif
904endif
905
906.PHONY: clib_hst
907clib_hst: $(OUTDIR)/libxsmm.pc
908$(OUTDIR)/libxsmm.$(LIBEXT): $(OUTDIR)/.make $(OBJFILES_HST) $(OBJFILES_GEN_LIB) $(KRNOBJS_HST) $(LIBJITPROFILING)
909ifeq (0,$(STATIC))
910	$(LIB_LD) $(call solink,$@,$(VERSION_MAJOR),$(VERSION_MINOR),$(VERSION_UPDATE),$(VERSION_API)) \
911		$(OBJFILES_HST) $(OBJFILES_GEN_LIB) $(KRNOBJS_HST) $(LIBJITPROFILING) $(call cleanld,$(LDFLAGS) $(CLDFLAGS))
912else # static
913	@rm -f $@
914	$(AR) -rs $@ $(OBJFILES_HST) $(OBJFILES_GEN_LIB) $(KRNOBJS_HST) $(OBJJITPROFILING)
915endif
916
917.PHONY: flib_mic
918ifneq (0,$(MIC))
919ifneq (0,$(MPSS))
920ifneq (,$(strip $(FC)))
921flib_mic: $(OUTDIR)/mic/libxsmmf.$(LIBEXT)
922$(OUTDIR)/mic/libxsmmf.$(LIBEXT): $(INCDIR)/mic/libxsmm.mod $(OUTDIR)/mic/libxsmm.$(LIBEXT)
923ifeq (0,$(STATIC))
924	$(LIB_FLD) -mmic $(FCMTFLAGS) $(call solink,$@,$(VERSION_MAJOR),$(VERSION_MINOR),$(VERSION_UPDATE),$(VERSION_API)) \
925		$(BLDDIR)/mic/libxsmm-mod.o $(call abslib,$(OUTDIR)/mic/libxsmm.$(ILIBEXT)) $(call cleanld,$(LDFLAGS) $(FLDFLAGS))
926else # static
927	@rm -f $@
928	$(AR) -rs $@ $(BLDDIR)/mic/libxsmm-mod.o
929endif
930else
931.PHONY: $(OUTDIR)/mic/libxsmmf.$(LIBEXT)
932endif
933endif
934endif
935
936.PHONY: flib_hst
937ifneq (,$(strip $(FC)))
938flib_hst: $(OUTDIR)/libxsmmf.pc
939$(OUTDIR)/libxsmmf.$(LIBEXT): $(INCDIR)/libxsmm.mod $(OUTDIR)/libxsmm.$(LIBEXT)
940ifeq (0,$(STATIC))
941	$(LIB_FLD) $(FCMTFLAGS) $(call solink,$@,$(VERSION_MAJOR),$(VERSION_MINOR),$(VERSION_UPDATE),$(VERSION_API)) \
942		$(BLDDIR)/intel64/libxsmm-mod.o $(call abslib,$(OUTDIR)/libxsmm.$(ILIBEXT)) $(call cleanld,$(LDFLAGS) $(FLDFLAGS))
943else # static
944	@rm -f $@
945	$(AR) -rs $@ $(BLDDIR)/intel64/libxsmm-mod.o
946endif
947else
948.PHONY: $(OUTDIR)/libxsmmf.pc
949endif
950
951.PHONY: ext_mic
952ifneq (0,$(MIC))
953ifneq (0,$(MPSS))
954ext_mic: $(OUTDIR)/mic/libxsmmext.$(LIBEXT)
955$(OUTDIR)/mic/libxsmmext.$(LIBEXT): $(EXTOBJS_MIC) $(OUTDIR)/mic/libxsmm.$(LIBEXT)
956ifeq (0,$(STATIC))
957	$(LIB_LD) -mmic $(EXTLDFLAGS) $(call solink,$@,$(VERSION_MAJOR),$(VERSION_MINOR),$(VERSION_UPDATE),$(VERSION_API)) \
958		$(EXTOBJS_MIC) $(call abslib,$(OUTDIR)/mic/libxsmm.$(ILIBEXT)) $(call cleanld,$(LDFLAGS) $(CLDFLAGS))
959else # static
960	@rm -f $@
961	$(AR) -rs $@ $(EXTOBJS_MIC)
962endif
963endif
964endif
965
966.PHONY: ext_hst
967ext_hst: $(OUTDIR)/libxsmmext.pc
968$(OUTDIR)/libxsmmext.$(LIBEXT): $(OUTDIR)/libxsmm.$(LIBEXT) $(EXTOBJS_HST)
969ifeq (0,$(STATIC))
970	$(LIB_LD) $(EXTLDFLAGS) $(call solink,$@,$(VERSION_MAJOR),$(VERSION_MINOR),$(VERSION_UPDATE),$(VERSION_API)) \
971		$(EXTOBJS_HST) $(call abslib,$(OUTDIR)/libxsmm.$(ILIBEXT)) $(call cleanld,$(LDFLAGS) $(CLDFLAGS))
972else # static
973	@rm -f $@
974	$(AR) -rs $@ $(EXTOBJS_HST)
975endif
976
977.PHONY: noblas_mic
978ifneq (0,$(MIC))
979ifneq (0,$(MPSS))
980noblas_mic: $(OUTDIR)/mic/libxsmmnoblas.$(LIBEXT)
981$(OUTDIR)/mic/libxsmmnoblas.$(LIBEXT): $(NOBLAS_MIC)
982ifeq (0,$(STATIC))
983	$(LIB_LD) -mmic $(call solink,$@,$(VERSION_MAJOR),$(VERSION_MINOR),$(VERSION_UPDATE),$(VERSION_API)) \
984		$(NOBLAS_MIC) $(call cleanld,$(NOBLAS_LDFLAGS) $(NOBLAS_CLDFLAGS))
985else # static
986	@rm -f $@
987	$(AR) -rs $@ $(NOBLAS_MIC)
988endif
989endif
990endif
991
992.PHONY: noblas_hst
993noblas_hst: $(OUTDIR)/libxsmmnoblas.pc
994$(OUTDIR)/libxsmmnoblas.$(LIBEXT): $(NOBLAS_HST)
995ifeq (0,$(STATIC))
996	$(LIB_LD) $(call solink,$@,$(VERSION_MAJOR),$(VERSION_MINOR),$(VERSION_UPDATE),$(VERSION_API)) \
997		$(NOBLAS_HST) $(call cleanld,$(NOBLAS_LDFLAGS) $(NOBLAS_CLDFLAGS))
998else # static
999	@rm -f $@
1000	$(AR) -rs $@ $(NOBLAS_HST)
1001endif
1002
1003# use dir not qdir to avoid quotes; also $(ROOTDIR)/$(SPLDIR) is relative
1004DIRS_SAMPLES = $(dir $(shell find $(ROOTDIR)/$(SPLDIR) -type f -name Makefile \
1005	| grep -v /deeplearning/tvm_cnnlayer/ \
1006	| grep -v /deeplearning/tf_lstm_ops/ \
1007	| grep -v /deeplearning/gxm/ \
1008	| grep -v /edge/repro/ \
1009	| grep -v /packed/ \
1010	| grep -v /pyfr/ \
1011	$(NULL)))
1012
1013.PHONY: samples $(DIRS_SAMPLES)
1014samples: $(DIRS_SAMPLES)
1015$(DIRS_SAMPLES): lib_hst
1016	@$(FLOCK) $@ "$(MAKE) DEPSTATIC=$(STATIC)"
1017
1018.PHONY: cp2k cp2k_mic
1019cp2k: lib_hst
1020	@$(FLOCK) $(ROOTDIR)/$(SPLDIR)/cp2k "$(MAKE) --no-print-directory DEPSTATIC=$(STATIC)"
1021cp2k_mic: lib_mic
1022	@$(FLOCK) $(ROOTDIR)/$(SPLDIR)/cp2k "$(MAKE) --no-print-directory DEPSTATIC=$(STATIC) KNC=1"
1023
1024.PHONY: wrap wrap_mic
1025wrap: lib_hst
1026	@$(FLOCK) $(ROOTDIR)/$(SPLDIR)/utilities/wrap "$(MAKE) --no-print-directory DEPSTATIC=$(STATIC) TRACE=0"
1027wrap_mic: lib_mic
1028	@$(FLOCK) $(ROOTDIR)/$(SPLDIR)/utilities/wrap "$(MAKE) --no-print-directory DEPSTATIC=$(STATIC) KNC=1 TRACE=0"
1029
1030.PHONY: nek nek_mic
1031nek: lib_hst
1032	@$(FLOCK) $(ROOTDIR)/$(SPLDIR)/nek "$(MAKE) --no-print-directory DEPSTATIC=$(STATIC)"
1033nek_mic: lib_mic
1034	@$(FLOCK) $(ROOTDIR)/$(SPLDIR)/nek "$(MAKE) --no-print-directory DEPSTATIC=$(STATIC) KNC=1"
1035
1036.PHONY: smm smm_mic
1037smm: lib_hst
1038	@$(FLOCK) $(ROOTDIR)/$(SPLDIR)/smm "$(MAKE) --no-print-directory DEPSTATIC=$(STATIC)"
1039smm_mic: lib_mic
1040	@$(FLOCK) $(ROOTDIR)/$(SPLDIR)/smm "$(MAKE) --no-print-directory DEPSTATIC=$(STATIC) KNC=1"
1041
1042# added for specfem sample
1043# will need option: make MNK="5 25" ..
1044.PHONY: specfem specfem_mic
1045specfem: lib_hst
1046	@$(FLOCK) $(ROOTDIR)/$(SPLDIR)/specfem "$(MAKE) --no-print-directory DEPSTATIC=$(STATIC)"
1047specfem_mic: lib_mic
1048	@$(FLOCK) $(ROOTDIR)/$(SPLDIR)/specfem "$(MAKE) --no-print-directory DEPSTATIC=$(STATIC) KNC=1"
1049
1050.PHONY: drytest
1051drytest: $(ROOTDIR)/$(SPLDIR)/cp2k/cp2k-perf.sh $(ROOTDIR)/$(SPLDIR)/smm/smmf-perf.sh \
1052	$(ROOTDIR)/$(SPLDIR)/nek/axhm-perf.sh $(ROOTDIR)/$(SPLDIR)/nek/grad-perf.sh $(ROOTDIR)/$(SPLDIR)/nek/rstr-perf.sh
1053
1054$(ROOTDIR)/$(SPLDIR)/cp2k/cp2k-perf.sh: $(ROOTDIR)/$(SPLDIR)/cp2k/.make $(ROOTDIR)/Makefile
1055	@echo "#!/usr/bin/env sh" > $@
1056	@echo >> $@
1057	@echo "HERE=\$$(cd \$$(dirname \$$0); pwd -P)" >> $@
1058	@echo "FILE=cp2k-perf.txt" >> $@
1059ifneq (,$(strip $(INDICES)))
1060	@echo "RUNS=\"$(INDICES)\"" >> $@
1061else
1062	@echo "RUNS=\"23_23_23 4_6_9 13_5_7 24_3_36\"" >> $@
1063endif
1064	@echo >> $@
1065	@echo "if [ \"\" != \"\$$1\" ]; then" >> $@
1066	@echo "  FILE=\$$1" >> $@
1067	@echo "  shift" >> $@
1068	@echo "fi" >> $@
1069	@echo "if [ \"\" != \"\$$1\" ]; then" >> $@
1070	@echo "  SIZE=\$$1" >> $@
1071	@echo "  shift" >> $@
1072	@echo "else" >> $@
1073	@echo "  SIZE=0" >> $@
1074	@echo "fi" >> $@
1075	@echo "cat /dev/null > \$${FILE}" >> $@
1076	@echo >> $@
1077	@echo "NRUN=1" >> $@
1078	@echo "NMAX=\$$(echo \$${RUNS} | wc -w | tr -d ' ')" >> $@
1079	@echo "for RUN in \$${RUNS}; do" >> $@
1080	@echo "  MVALUE=\$$(echo \$${RUN} | cut -d_ -f1)" >> $@
1081	@echo "  NVALUE=\$$(echo \$${RUN} | cut -d_ -f2)" >> $@
1082	@echo "  KVALUE=\$$(echo \$${RUN} | cut -d_ -f3)" >> $@
1083	@echo "  >&2 echo -n \"\$${NRUN} of \$${NMAX} (M=\$${MVALUE} N=\$${NVALUE} K=\$${KVALUE})... \"" >> $@
1084	@echo "  ERROR=\$$({ CHECK=1 \$${HERE}/cp2k-dbcsr.sh \$${MVALUE} \$${SIZE} 0 \$${NVALUE} \$${KVALUE} >> \$${FILE}; } 2>&1)" >> $@
1085	@echo "  RESULT=\$$?" >> $@
1086	@echo "  if [ 0 != \$${RESULT} ]; then" >> $@
1087	@echo "    echo \"FAILED(\$${RESULT}) \$${ERROR}\"" >> $@
1088	@echo "    exit 1" >> $@
1089	@echo "  else" >> $@
1090	@echo "    echo \"OK \$${ERROR}\"" >> $@
1091	@echo "  fi" >> $@
1092	@echo "  echo >> \$${FILE}" >> $@
1093	@echo "  NRUN=\$$((NRUN+1))" >> $@
1094	@echo "done" >> $@
1095	@echo >> $@
1096	@chmod +x $@
1097
1098$(ROOTDIR)/$(SPLDIR)/smm/smmf-perf.sh: $(ROOTDIR)/$(SPLDIR)/smm/.make $(ROOTDIR)/Makefile
1099	@echo "#!/usr/bin/env sh" > $@
1100	@echo >> $@
1101	@echo "HERE=\$$(cd \$$(dirname \$$0); pwd -P)" >> $@
1102	@echo "FILE=\$${HERE}/smmf-perf.txt" >> $@
1103ifneq (,$(strip $(INDICES)))
1104	@echo "RUNS=\"$(INDICES)\"" >> $@
1105else
1106	@echo "RUNS=\"23_23_23 4_6_9 13_5_7 24_3_36\"" >> $@
1107endif
1108	@echo >> $@
1109	@echo "if [ \"\" != \"\$$1\" ]; then" >> $@
1110	@echo "  FILE=\$$1" >> $@
1111	@echo "  shift" >> $@
1112	@echo "fi" >> $@
1113	@echo "cat /dev/null > \$${FILE}" >> $@
1114	@echo >> $@
1115	@echo "NRUN=1" >> $@
1116	@echo "NMAX=\$$(echo \$${RUNS} | wc -w | tr -d ' ')" >> $@
1117	@echo "for RUN in \$${RUNS}; do" >> $@
1118	@echo "  MVALUE=\$$(echo \$${RUN} | cut -d_ -f1)" >> $@
1119	@echo "  NVALUE=\$$(echo \$${RUN} | cut -d_ -f2)" >> $@
1120	@echo "  KVALUE=\$$(echo \$${RUN} | cut -d_ -f3)" >> $@
1121	@echo "  >&2 echo -n \"\$${NRUN} of \$${NMAX} (M=\$${MVALUE} N=\$${NVALUE} K=\$${KVALUE})... \"" >> $@
1122	@echo "  ERROR=\$$({ CHECK=1 \$${HERE}/smm.sh \$${MVALUE} \$${NVALUE} \$${KVALUE} \$$* >> \$${FILE}; } 2>&1)" >> $@
1123	@echo "  RESULT=\$$?" >> $@
1124	@echo "  if [ 0 != \$${RESULT} ]; then" >> $@
1125	@echo "    echo \"FAILED(\$${RESULT}) \$${ERROR}\"" >> $@
1126	@echo "    exit 1" >> $@
1127	@echo "  else" >> $@
1128	@echo "    echo \"OK \$${ERROR}\"" >> $@
1129	@echo "  fi" >> $@
1130	@echo "  echo >> \$${FILE}" >> $@
1131	@echo "  NRUN=\$$((NRUN+1))" >> $@
1132	@echo "done" >> $@
1133	@echo >> $@
1134	@chmod +x $@
1135
1136$(ROOTDIR)/$(SPLDIR)/nek/axhm-perf.sh: $(ROOTDIR)/$(SPLDIR)/nek/.make $(ROOTDIR)/Makefile
1137	@echo "#!/usr/bin/env sh" > $@
1138	@echo >> $@
1139	@echo "HERE=\$$(cd \$$(dirname \$$0); pwd -P)" >> $@
1140	@echo "FILE=\$${HERE}/axhm-perf.txt" >> $@
1141ifneq (,$(strip $(INDICES)))
1142	@echo "RUNS=\"$(INDICES)\"" >> $@
1143else
1144	@echo "RUNS=\"4_6_9 8_8_8 13_13_13 16_8_13\"" >> $@
1145endif
1146	@echo >> $@
1147	@echo "if [ \"\" != \"\$$1\" ]; then" >> $@
1148	@echo "  FILE=\$$1" >> $@
1149	@echo "  shift" >> $@
1150	@echo "fi" >> $@
1151	@echo "cat /dev/null > \$${FILE}" >> $@
1152	@echo >> $@
1153	@echo "NRUN=1" >> $@
1154	@echo "NMAX=\$$(echo \$${RUNS} | wc -w | tr -d ' ')" >> $@
1155	@echo "for RUN in \$${RUNS}; do" >> $@
1156	@echo "  MVALUE=\$$(echo \$${RUN} | cut -d_ -f1)" >> $@
1157	@echo "  NVALUE=\$$(echo \$${RUN} | cut -d_ -f2)" >> $@
1158	@echo "  KVALUE=\$$(echo \$${RUN} | cut -d_ -f3)" >> $@
1159	@echo "  >&2 echo -n \"\$${NRUN} of \$${NMAX} (M=\$${MVALUE} N=\$${NVALUE} K=\$${KVALUE})... \"" >> $@
1160	@echo "  ERROR=\$$({ CHECK=1 \$${HERE}/axhm.sh \$${MVALUE} \$${NVALUE} \$${KVALUE} \$$* >> \$${FILE}; } 2>&1)" >> $@
1161	@echo "  RESULT=\$$?" >> $@
1162	@echo "  if [ 0 != \$${RESULT} ]; then" >> $@
1163	@echo "    echo \"FAILED(\$${RESULT}) \$${ERROR}\"" >> $@
1164	@echo "    exit 1" >> $@
1165	@echo "  else" >> $@
1166	@echo "    echo \"OK \$${ERROR}\"" >> $@
1167	@echo "  fi" >> $@
1168	@echo "  echo >> \$${FILE}" >> $@
1169	@echo "  NRUN=\$$((NRUN+1))" >> $@
1170	@echo "done" >> $@
1171	@echo >> $@
1172	@chmod +x $@
1173
1174$(ROOTDIR)/$(SPLDIR)/nek/grad-perf.sh: $(ROOTDIR)/$(SPLDIR)/nek/.make $(ROOTDIR)/Makefile
1175	@echo "#!/usr/bin/env sh" > $@
1176	@echo >> $@
1177	@echo "HERE=\$$(cd \$$(dirname \$$0); pwd -P)" >> $@
1178	@echo "FILE=\$${HERE}/grad-perf.txt" >> $@
1179ifneq (,$(strip $(INDICES)))
1180	@echo "RUNS=\"$(INDICES)\"" >> $@
1181else
1182	@echo "RUNS=\"4_6_9 8_8_8 13_13_13 16_8_13\"" >> $@
1183endif
1184	@echo >> $@
1185	@echo "if [ \"\" != \"\$$1\" ]; then" >> $@
1186	@echo "  FILE=\$$1" >> $@
1187	@echo "  shift" >> $@
1188	@echo "fi" >> $@
1189	@echo "cat /dev/null > \$${FILE}" >> $@
1190	@echo >> $@
1191	@echo "NRUN=1" >> $@
1192	@echo "NMAX=\$$(echo \$${RUNS} | wc -w | tr -d ' ')" >> $@
1193	@echo "for RUN in \$${RUNS}; do" >> $@
1194	@echo "  MVALUE=\$$(echo \$${RUN} | cut -d_ -f1)" >> $@
1195	@echo "  NVALUE=\$$(echo \$${RUN} | cut -d_ -f2)" >> $@
1196	@echo "  KVALUE=\$$(echo \$${RUN} | cut -d_ -f3)" >> $@
1197	@echo "  >&2 echo -n \"\$${NRUN} of \$${NMAX} (M=\$${MVALUE} N=\$${NVALUE} K=\$${KVALUE})... \"" >> $@
1198	@echo "  ERROR=\$$({ CHECK=1 \$${HERE}/grad.sh \$${MVALUE} \$${NVALUE} \$${KVALUE} \$$* >> \$${FILE}; } 2>&1)" >> $@
1199	@echo "  RESULT=\$$?" >> $@
1200	@echo "  if [ 0 != \$${RESULT} ]; then" >> $@
1201	@echo "    echo \"FAILED(\$${RESULT}) \$${ERROR}\"" >> $@
1202	@echo "    exit 1" >> $@
1203	@echo "  else" >> $@
1204	@echo "    echo \"OK \$${ERROR}\"" >> $@
1205	@echo "  fi" >> $@
1206	@echo "  echo >> \$${FILE}" >> $@
1207	@echo "  NRUN=\$$((NRUN+1))" >> $@
1208	@echo "done" >> $@
1209	@echo >> $@
1210	@chmod +x $@
1211
1212$(ROOTDIR)/$(SPLDIR)/nek/rstr-perf.sh: $(ROOTDIR)/$(SPLDIR)/nek/.make $(ROOTDIR)/Makefile
1213	@echo "#!/usr/bin/env sh" > $@
1214	@echo >> $@
1215	@echo "HERE=\$$(cd \$$(dirname \$$0); pwd -P)" >> $@
1216	@echo "FILE=\$${HERE}/rstr-perf.txt" >> $@
1217ifneq (,$(strip $(INDICES)))
1218	@echo "RUNS=\"$(INDICES)\"" >> $@
1219	@echo "RUNT=\"$(INDICES)\"" >> $@
1220else
1221	@echo "RUNS=\"4_4_4 8_8_8\"" >> $@
1222	@echo "RUNT=\"7_7_7 10_10_10\"" >> $@
1223endif
1224	@echo >> $@
1225	@echo "if [ \"\" != \"\$$1\" ]; then" >> $@
1226	@echo "  FILE=\$$1" >> $@
1227	@echo "  shift" >> $@
1228	@echo "fi" >> $@
1229	@echo "cat /dev/null > \$${FILE}" >> $@
1230	@echo >> $@
1231	@echo "NRUN=1" >> $@
1232	@echo "NRUNS=\$$(echo \$${RUNS} | wc -w | tr -d ' ')" >> $@
1233	@echo "NRUNT=\$$(echo \$${RUNT} | wc -w | tr -d ' ')" >> $@
1234	@echo "NMAX=\$$((NRUNS*NRUNT))" >> $@
1235	@echo "for RUN1 in \$${RUNS}; do" >> $@
1236	@echo "  for RUN2 in \$${RUNT}; do" >> $@
1237	@echo "  MVALUE=\$$(echo \$${RUN1} | cut -d_ -f1)" >> $@
1238	@echo "  NVALUE=\$$(echo \$${RUN1} | cut -d_ -f2)" >> $@
1239	@echo "  KVALUE=\$$(echo \$${RUN1} | cut -d_ -f3)" >> $@
1240	@echo "  MMVALUE=\$$(echo \$${RUN2} | cut -d_ -f1)" >> $@
1241	@echo "  NNVALUE=\$$(echo \$${RUN2} | cut -d_ -f2)" >> $@
1242	@echo "  KKVALUE=\$$(echo \$${RUN2} | cut -d_ -f3)" >> $@
1243	@echo "  >&2 echo -n \"\$${NRUN} of \$${NMAX} (MNK=\$${MVALUE}x\$${NVALUE}x\$${KVALUE} MNK2=\$${MMVALUE}x\$${NNVALUE}x\$${KKVALUE})... \"" >> $@
1244	@echo "  ERROR=\$$({ CHECK=1 \$${HERE}/rstr.sh \$${MVALUE} \$${NVALUE} \$${KVALUE} \$${MMVALUE} \$${NNVALUE} \$${KKVALUE} \$$* >> \$${FILE}; } 2>&1)" >> $@
1245	@echo "  RESULT=\$$?" >> $@
1246	@echo "  if [ 0 != \$${RESULT} ]; then" >> $@
1247	@echo "    echo \"FAILED(\$${RESULT}) \$${ERROR}\"" >> $@
1248	@echo "    exit 1" >> $@
1249	@echo "  else" >> $@
1250	@echo "    echo \"OK \$${ERROR}\"" >> $@
1251	@echo "  fi" >> $@
1252	@echo "  echo >> \$${FILE}" >> $@
1253	@echo "  NRUN=\$$((NRUN+1))" >> $@
1254	@echo "done" >> $@
1255	@echo "done" >> $@
1256	@echo >> $@
1257	@chmod +x $@
1258
1259.PHONY: test
1260test: tests
1261
1262.PHONY: perf
1263perf: perf-cp2k
1264
1265.PHONY: test-all
1266test-all: tests test-cp2k test-smm test-nek test-wrap
1267
1268.PHONY: build-tests
1269build-tests: lib_hst
1270	@$(FLOCK) $(ROOTDIR)/$(TSTDIR) "$(MAKE) --no-print-directory DEPSTATIC=$(STATIC)"
1271
1272.PHONY: tests
1273tests: lib_hst
1274	@$(FLOCK) $(ROOTDIR)/$(TSTDIR) "$(MAKE) --no-print-directory DEPSTATIC=$(STATIC) test"
1275
1276.PHONY: cpp-test
1277cpp-test: test-cpp
1278
1279.PHONY: test-cpp
1280test-cpp: $(INCDIR)/libxsmm_source.h
1281	@$(FLOCK) $(ROOTDIR)/$(SPLDIR)/cp2k "$(MAKE) --no-print-directory DEPSTATIC=$(STATIC) TRACE=0 \
1282		ECXXFLAGS='-DUSE_HEADER_ONLY $(ECXXFLAGS)' clean compile"
1283
1284.PHONY: test-cp2k
1285test-cp2k: $(ROOTDIR)/$(SPLDIR)/cp2k/cp2k-test.txt
1286$(ROOTDIR)/$(SPLDIR)/cp2k/cp2k-test.txt: $(ROOTDIR)/$(SPLDIR)/cp2k/cp2k-perf.sh lib_hst cp2k
1287	@$(FLOCK) $(call qdir,$@) "./cp2k-perf.sh $(call qndir,$@) $(shell echo $$(($(TESTSIZE) * 128)))"
1288
1289.PHONY: perf-cp2k
1290perf-cp2k: $(ROOTDIR)/$(SPLDIR)/cp2k/cp2k-perf.txt
1291$(ROOTDIR)/$(SPLDIR)/cp2k/cp2k-perf.txt: $(ROOTDIR)/$(SPLDIR)/cp2k/cp2k-perf.sh lib_hst cp2k
1292	@$(FLOCK) $(ROOTDIR)/$(SPLDIR)/cp2k "./cp2k-perf.sh $(call qndir,$@)"
1293
1294.PHONY: test-wrap
1295test-wrap: wrap
1296	@$(FLOCK) $(ROOTDIR)/$(SPLDIR)/utilities/wrap "$(MAKE) --no-print-directory DEPSTATIC=$(STATIC) TRACE=0 test"
1297
1298.PHONY: test-smm
1299ifneq (,$(strip $(FC)))
1300test-smm: $(ROOTDIR)/$(SPLDIR)/smm/smm-test.txt
1301$(ROOTDIR)/$(SPLDIR)/smm/smm-test.txt: $(ROOTDIR)/$(SPLDIR)/smm/smmf-perf.sh lib_hst smm
1302	@$(FLOCK) $(call qdir,$@) "./smmf-perf.sh $(call qndir,$@) $(shell echo $$(($(TESTSIZE) * -128)))"
1303endif
1304
1305.PHONY: perf-smm
1306ifneq (,$(strip $(FC)))
1307perf-smm: $(ROOTDIR)/$(SPLDIR)/smm/smmf-perf.txt
1308$(ROOTDIR)/$(SPLDIR)/smm/smmf-perf.txt: $(ROOTDIR)/$(SPLDIR)/smm/smmf-perf.sh lib_hst smm
1309	@$(FLOCK) $(call qdir,$@) "./smmf-perf.sh $(call qndir,$@)"
1310endif
1311
1312.PHONY: test-nek
1313ifneq (,$(strip $(FC)))
1314test-nek: \
1315	$(ROOTDIR)/$(SPLDIR)/nek/axhm-perf.txt \
1316	$(ROOTDIR)/$(SPLDIR)/nek/grad-perf.txt \
1317	$(ROOTDIR)/$(SPLDIR)/nek/rstr-perf.txt
1318$(ROOTDIR)/$(SPLDIR)/nek/axhm-perf.txt: $(ROOTDIR)/$(SPLDIR)/nek/axhm-perf.sh lib_hst
1319	@$(FLOCK) $(ROOTDIR)/$(SPLDIR)/nek "$(MAKE) --no-print-directory DEPSTATIC=$(STATIC) axhm"
1320	@$(FLOCK) $(ROOTDIR)/$(SPLDIR)/nek "./axhm-perf.sh $(call qndir,$@) $(shell echo $$(($(TESTSIZE) * -128)))"
1321$(ROOTDIR)/$(SPLDIR)/nek/grad-perf.txt: $(ROOTDIR)/$(SPLDIR)/nek/grad-perf.sh lib_hst
1322	@$(FLOCK) $(ROOTDIR)/$(SPLDIR)/nek "$(MAKE) --no-print-directory DEPSTATIC=$(STATIC) grad"
1323	@$(FLOCK) $(ROOTDIR)/$(SPLDIR)/nek "./grad-perf.sh $(call qndir,$@) $(shell echo $$(($(TESTSIZE) * -128)))"
1324$(ROOTDIR)/$(SPLDIR)/nek/rstr-perf.txt: $(ROOTDIR)/$(SPLDIR)/nek/rstr-perf.sh lib_hst
1325	@$(FLOCK) $(ROOTDIR)/$(SPLDIR)/nek "$(MAKE) --no-print-directory DEPSTATIC=$(STATIC) rstr"
1326	@$(FLOCK) $(ROOTDIR)/$(SPLDIR)/nek "./rstr-perf.sh $(call qndir,$@) $(shell echo $$(($(TESTSIZE) * -128)))"
1327endif
1328
1329$(DOCDIR)/index.md: $(DOCDIR)/.make $(ROOTDIR)/Makefile $(ROOTDIR)/README.md
1330	@sed $(ROOTDIR)/README.md \
1331		-e 's/\[!\[..*\](..*)\](..*)//g' \
1332		-e 's/\[\[..*\](..*)\]//g' \
1333		-e "s/](${DOCDIR}\//](/g" \
1334		-e 'N;/^\n$$/d;P;D' \
1335		> $@
1336
1337$(DOCDIR)/libxsmm.$(DOCEXT): $(DOCDIR)/.make $(ROOTDIR)/documentation/index.md \
1338$(ROOTDIR)/documentation/libxsmm_mm.md $(ROOTDIR)/documentation/libxsmm_dl.md $(ROOTDIR)/documentation/libxsmm_aux.md \
1339$(ROOTDIR)/documentation/libxsmm_prof.md $(ROOTDIR)/documentation/libxsmm_tune.md $(ROOTDIR)/documentation/libxsmm_be.md
1340	$(eval TMPFILE = $(shell $(MKTEMP) $(ROOTDIR)/documentation/.libxsmm_XXXXXX.tex))
1341	@pandoc -D latex \
1342	| sed \
1343		-e 's/\(\\documentclass\[..*\]{..*}\)/\1\n\\pagenumbering{gobble}\n\\RedeclareSectionCommands[beforeskip=-1pt,afterskip=1pt]{subsection,subsubsection}/' \
1344		-e 's/\\usepackage{listings}/\\usepackage{listings}\\lstset{basicstyle=\\footnotesize\\ttfamily}/' \
1345		-e 's/\(\\usepackage.*{hyperref}\)/\\usepackage[hyphens]{url}\n\1/' \
1346		> $(TMPFILE)
1347	@cd $(ROOTDIR)/documentation && ( \
1348		iconv -t utf-8 index.md && echo && \
1349		echo "# LIBXSMM Domains" && \
1350		iconv -t utf-8 libxsmm_mm.md && echo && \
1351		iconv -t utf-8 libxsmm_dl.md && echo && \
1352		iconv -t utf-8 libxsmm_aux.md && echo && \
1353		iconv -t utf-8 libxsmm_prof.md && echo && \
1354		iconv -t utf-8 libxsmm_tune.md && echo && \
1355		iconv -t utf-8 libxsmm_be.md && echo && \
1356		echo "# Appendix" && \
1357		echo "## Compatibility" && \
1358		wget -T $(TIMEOUT) -q -O - https://raw.githubusercontent.com/wiki/hfp/libxsmm/Compatibility.md 2>/dev/null && echo && \
1359		echo "## Validation" && \
1360		wget -T $(TIMEOUT) -q -O - https://raw.githubusercontent.com/wiki/hfp/libxsmm/Validation.md 2>/dev/null; ) \
1361	| sed \
1362		-e 's/<sub>/~/g' -e 's/<\/sub>/~/g' \
1363		-e 's/<sup>/^/g' -e 's/<\/sup>/^/g' \
1364		-e 's/----*//g' \
1365	| pandoc \
1366		--template=$(call qndir,$(TMPFILE)) --listings \
1367		-f markdown_github+all_symbols_escapable+subscript+superscript \
1368		-V documentclass=scrartcl \
1369		-V title-meta="LIBXSMM Documentation" \
1370		-V author-meta="Hans Pabst, Alexander Heinecke" \
1371		-V classoption=DIV=45 \
1372		-V linkcolor=black \
1373		-V citecolor=black \
1374		-V urlcolor=black \
1375		-o $(call qndir,$@)
1376	@rm $(TMPFILE)
1377
1378$(DOCDIR)/libxsmm_samples.md: $(ROOTDIR)/Makefile $(ROOTDIR)/$(SPLDIR)/*/README.md $(ROOTDIR)/$(SPLDIR)/deeplearning/*/README.md $(ROOTDIR)/$(SPLDIR)/utilities/*/README.md
1379	@cat $(ROOTDIR)/$(SPLDIR)/*/README.md $(ROOTDIR)/$(SPLDIR)/deeplearning/*/README.md $(ROOTDIR)/$(SPLDIR)/utilities/*/README.md \
1380	| sed \
1381		-e 's/^#/##/' \
1382		-e 's/<sub>/~/g' -e 's/<\/sub>/~/g' \
1383		-e 's/<sup>/^/g' -e 's/<\/sup>/^/g' \
1384		-e 's/----*//g' \
1385		-e '1s/^/# [LIBXSMM Samples](https:\/\/github.com\/hfp\/libxsmm\/raw\/master\/documentation\/libxsmm_samples.pdf)\n\n/' \
1386		> $@
1387
1388$(DOCDIR)/libxsmm_samples.$(DOCEXT): $(ROOTDIR)/documentation/libxsmm_samples.md
1389	$(eval TMPFILE = $(shell $(MKTEMP) .libxsmm_XXXXXX.tex))
1390	@pandoc -D latex \
1391	| sed \
1392		-e 's/\(\\documentclass\[..*\]{..*}\)/\1\n\\pagenumbering{gobble}\n\\RedeclareSectionCommands[beforeskip=-1pt,afterskip=1pt]{subsection,subsubsection}/' \
1393		-e 's/\\usepackage{listings}/\\usepackage{listings}\\lstset{basicstyle=\\footnotesize\\ttfamily}/' \
1394		-e 's/\(\\usepackage.*{hyperref}\)/\\usepackage[hyphens]{url}\n\1/' \
1395		> $(TMPFILE)
1396	@iconv -t utf-8 $(ROOTDIR)/documentation/libxsmm_samples.md \
1397	| pandoc \
1398		--template=$(TMPFILE) --listings \
1399		-f markdown_github+all_symbols_escapable+subscript+superscript \
1400		-V documentclass=scrartcl \
1401		-V title-meta="LIBXSMM Sample Code Summary" \
1402		-V classoption=DIV=45 \
1403		-V linkcolor=black \
1404		-V citecolor=black \
1405		-V urlcolor=black \
1406		-o $@
1407	@rm $(TMPFILE)
1408
1409$(DOCDIR)/tensorflow.$(DOCEXT): $(DOCDIR)/.make $(ROOTDIR)/Makefile $(ROOTDIR)/documentation/tensorflow.md
1410	$(eval TMPFILE = $(shell $(MKTEMP) $(ROOTDIR)/documentation/.libxsmm_XXXXXX.tex))
1411	@pandoc -D latex \
1412	| sed \
1413		-e 's/\(\\documentclass\[..*\]{..*}\)/\1\n\\pagenumbering{gobble}\n\\RedeclareSectionCommands[beforeskip=-1pt,afterskip=1pt]{subsection,subsubsection}/' \
1414		-e 's/\\usepackage{listings}/\\usepackage{listings}\\lstset{basicstyle=\\footnotesize\\ttfamily}/' \
1415		-e 's/\(\\usepackage.*{hyperref}\)/\\usepackage[hyphens]{url}\n\1/' \
1416		> $(TMPFILE)
1417	@cd $(ROOTDIR)/documentation && iconv -t utf-8 tensorflow.md \
1418	| sed \
1419		-e 's/<sub>/~/g' -e 's/<\/sub>/~/g' \
1420		-e 's/<sup>/^/g' -e 's/<\/sup>/^/g' \
1421		-e 's/----*//g' \
1422	| pandoc \
1423		--template=$(call qndir,$(TMPFILE)) --listings \
1424		-f markdown_github+all_symbols_escapable+subscript+superscript \
1425		-V documentclass=scrartcl \
1426		-V title-meta="TensorFlow with LIBXSMM" \
1427		-V author-meta="Hans Pabst" \
1428		-V classoption=DIV=45 \
1429		-V linkcolor=black \
1430		-V citecolor=black \
1431		-V urlcolor=black \
1432		-o $(call qndir,$@)
1433	@rm $(TMPFILE)
1434
1435.PHONY: documentation
1436documentation: \
1437$(DOCDIR)/libxsmm.$(DOCEXT) \
1438$(DOCDIR)/libxsmm_samples.$(DOCEXT) \
1439$(DOCDIR)/tensorflow.$(DOCEXT)
1440
1441.PHONY: mkdocs
1442mkdocs: $(ROOTDIR)/documentation/index.md $(ROOTDIR)/documentation/libxsmm_samples.md
1443	@mkdocs build --clean
1444	@mkdocs serve
1445
1446.PHONY: clean
1447clean:
1448ifneq ($(call qapath,$(BLDDIR)),$(ROOTDIR))
1449ifneq ($(call qapath,$(BLDDIR)),$(HEREDIR))
1450	@rm -rf $(BLDDIR)
1451endif
1452endif
1453ifneq (,$(wildcard $(BLDDIR))) # still exists
1454	@rm -f $(OBJECTS) $(FTNOBJS) $(SRCFILES_KERNELS) $(BLDDIR)/libxsmm_dispatch.h
1455	@rm -f $(BLDDIR)/*.gcno $(BLDDIR)/*.gcda $(BLDDIR)/*.gcov
1456endif
1457	@find . -type f \( -name .make -or -name .state \) -exec rm {} \;
1458	@rm -f $(ROOTDIR)/$(SCRDIR)/libxsmm_utilities.pyc
1459	@rm -rf $(ROOTDIR)/$(SCRDIR)/__pycache__
1460
1461.PHONY: realclean
1462realclean: clean
1463ifneq ($(call qapath,$(OUTDIR)),$(ROOTDIR))
1464ifneq ($(call qapath,$(OUTDIR)),$(HEREDIR))
1465	@rm -rf $(OUTDIR)
1466endif
1467endif
1468ifneq (,$(wildcard $(OUTDIR))) # still exists
1469	@rm -f $(OUTDIR)/libxsmm.$(LIBEXT)* $(OUTDIR)/mic/libxsmm.$(LIBEXT)*
1470	@rm -f $(OUTDIR)/libxsmmf.$(LIBEXT)* $(OUTDIR)/mic/libxsmmf.$(LIBEXT)*
1471	@rm -f $(OUTDIR)/libxsmmext.$(LIBEXT)* $(OUTDIR)/mic/libxsmmext.$(LIBEXT)*
1472	@rm -f $(OUTDIR)/libxsmmnoblas.$(LIBEXT)* $(OUTDIR)/mic/libxsmmnoblas.$(LIBEXT)*
1473	@rm -f $(OUTDIR)/libxsmmgen.$(LIBEXT)*
1474	@rm -f $(OUTDIR)/libxsmm*.pc
1475endif
1476ifneq ($(call qapath,$(BINDIR)),$(ROOTDIR))
1477ifneq ($(call qapath,$(BINDIR)),$(HEREDIR))
1478	@rm -rf $(BINDIR)
1479endif
1480endif
1481ifneq (,$(wildcard $(BINDIR))) # still exists
1482	@rm -f $(BINDIR)/libxsmm_*_generator
1483endif
1484	@rm -f $(ROOTDIR)/$(SPLDIR)/cp2k/cp2k-perf.sh
1485	@rm -f $(ROOTDIR)/$(SPLDIR)/smm/smmf-perf.sh
1486	@rm -f $(ROOTDIR)/$(SPLDIR)/nek/grad-perf.sh
1487	@rm -f $(ROOTDIR)/$(SPLDIR)/nek/axhm-perf.sh
1488	@rm -f $(ROOTDIR)/$(SPLDIR)/nek/rstr-perf.sh
1489	@rm -f $(INCDIR)/libxsmm_version.h
1490	@rm -f $(INCDIR)/libxsmm.modmic
1491	@rm -f $(INCDIR)/libxsmm.mod
1492	@rm -f $(INCDIR)/libxsmm.f
1493
1494.PHONY: clean-all
1495clean-all: clean
1496	@find $(ROOTDIR) -type f -name Makefile -exec $(FLOCK) {} \
1497		"$(MAKE) --no-print-directory clean" \; 2>/dev/null || true
1498
1499.PHONY: realclean-all
1500realclean-all: realclean
1501	@find $(ROOTDIR) -type f -name Makefile -exec $(FLOCK) {} \
1502		"$(MAKE) --no-print-directory realclean" \; 2>/dev/null || true
1503
1504.PHONY: distclean
1505distclean: realclean-all
1506	@rm -rf libxsmm*
1507
1508# keep original prefix (:)
1509ALIAS_PREFIX := $(PREFIX)
1510
1511# DESTDIR is used as prefix of PREFIX
1512ifneq (,$(strip $(DESTDIR)))
1513  override PREFIX := $(call qapath,$(DESTDIR)/$(PREFIX))
1514endif
1515# fall-back
1516ifeq (,$(strip $(PREFIX)))
1517  override PREFIX := $(HEREDIR)
1518endif
1519
1520# setup maintainer-layout
1521ifeq (,$(strip $(ALIAS_PREFIX)))
1522  override ALIAS_PREFIX := $(PREFIX)
1523endif
1524ifneq ($(ALIAS_PREFIX),$(PREFIX))
1525  PPKGDIR = libdata/pkgconfig
1526  PMODDIR = share/modules
1527endif
1528
1529.PHONY: install-minimal
1530install-minimal: libxsmm
1531ifneq ($(PREFIX),$(ABSDIR))
1532	@mkdir -p $(PREFIX)/$(POUTDIR) $(PREFIX)/$(PBINDIR) $(PREFIX)/$(PINCDIR) $(PREFIX)/$(PSRCDIR)
1533	@echo
1534	@echo "LIBXSMM installing libraries..."
1535	@$(CP) -va $(OUTDIR)/libxsmmnoblas.$(DLIBEXT)* $(PREFIX)/$(POUTDIR) 2>/dev/null || true
1536	@$(CP) -v  $(OUTDIR)/libxsmmnoblas.$(SLIBEXT)  $(PREFIX)/$(POUTDIR) 2>/dev/null || true
1537	@$(CP) -va $(OUTDIR)/libxsmmgen.$(DLIBEXT)* $(PREFIX)/$(POUTDIR) 2>/dev/null || true
1538	@$(CP) -v  $(OUTDIR)/libxsmmgen.$(SLIBEXT)  $(PREFIX)/$(POUTDIR) 2>/dev/null || true
1539	@$(CP) -va $(OUTDIR)/libxsmmext.$(DLIBEXT)* $(PREFIX)/$(POUTDIR) 2>/dev/null || true
1540	@$(CP) -v  $(OUTDIR)/libxsmmext.$(SLIBEXT)  $(PREFIX)/$(POUTDIR) 2>/dev/null || true
1541	@$(CP) -va $(OUTDIR)/libxsmmf.$(DLIBEXT)* $(PREFIX)/$(POUTDIR) 2>/dev/null || true
1542	@$(CP) -v  $(OUTDIR)/libxsmmf.$(SLIBEXT)  $(PREFIX)/$(POUTDIR) 2>/dev/null || true
1543	@$(CP) -va $(OUTDIR)/libxsmm.$(DLIBEXT)* $(PREFIX)/$(POUTDIR) 2>/dev/null || true
1544	@$(CP) -v  $(OUTDIR)/libxsmm.$(SLIBEXT)  $(PREFIX)/$(POUTDIR) 2>/dev/null || true
1545	@if [ -e $(OUTDIR)/mic/libxsmmnoblas.$(DLIBEXT) ]; then \
1546		mkdir -p $(PREFIX)/$(POUTDIR)/mic; \
1547		$(CP) -va $(OUTDIR)/mic/libxsmmnoblas.$(DLIBEXT)* $(PREFIX)/$(POUTDIR)/mic; \
1548	fi
1549	@if [ -e $(OUTDIR)/mic/libxsmmnoblas.$(SLIBEXT) ]; then \
1550		mkdir -p $(PREFIX)/$(POUTDIR)/mic; \
1551		$(CP) -v $(OUTDIR)/mic/libxsmmnoblas.$(SLIBEXT) $(PREFIX)/$(POUTDIR)/mic; \
1552	fi
1553	@if [ -e $(OUTDIR)/mic/libxsmmext.$(DLIBEXT) ]; then \
1554		mkdir -p $(PREFIX)/$(POUTDIR)/mic; \
1555		$(CP) -va $(OUTDIR)/mic/libxsmmext.$(DLIBEXT)* $(PREFIX)/$(POUTDIR)/mic; \
1556	fi
1557	@if [ -e $(OUTDIR)/mic/libxsmmext.$(SLIBEXT) ]; then \
1558		mkdir -p $(PREFIX)/$(POUTDIR)/mic; \
1559		$(CP) -v $(OUTDIR)/mic/libxsmmext.$(SLIBEXT) $(PREFIX)/$(POUTDIR)/mic; \
1560	fi
1561	@if [ -e $(OUTDIR)/mic/libxsmmf.$(DLIBEXT) ]; then \
1562		mkdir -p $(PREFIX)/$(POUTDIR)/mic; \
1563		$(CP) -va $(OUTDIR)/mic/libxsmmf.$(DLIBEXT)* $(PREFIX)/$(POUTDIR)/mic; \
1564	fi
1565	@if [ -e $(OUTDIR)/mic/libxsmmf.$(SLIBEXT) ]; then \
1566		mkdir -p $(PREFIX)/$(POUTDIR)/mic; \
1567		$(CP) -v $(OUTDIR)/mic/libxsmmf.$(SLIBEXT) $(PREFIX)/$(POUTDIR)/mic; \
1568	fi
1569	@if [ -e $(OUTDIR)/mic/libxsmm.$(DLIBEXT) ]; then \
1570		mkdir -p $(PREFIX)/$(POUTDIR)/mic; \
1571		$(CP) -va $(OUTDIR)/mic/libxsmm.$(DLIBEXT)* $(PREFIX)/$(POUTDIR)/mic; \
1572	fi
1573	@if [ -e $(OUTDIR)/mic/libxsmm.$(SLIBEXT) ]; then \
1574		mkdir -p $(PREFIX)/$(POUTDIR)/mic; \
1575		$(CP) -v $(OUTDIR)/mic/libxsmm.$(SLIBEXT) $(PREFIX)/$(POUTDIR)/mic; \
1576	fi
1577	@echo
1578	@echo "LIBXSMM installing pkg-config and module files..."
1579	@mkdir -p $(PREFIX)/$(PPKGDIR)
1580	@$(CP) -v $(OUTDIR)/*.pc $(PREFIX)/$(PPKGDIR) 2>/dev/null || true
1581	@mkdir -p $(PREFIX)/$(PMODDIR)
1582	@if [ ! -e $(PREFIX)/$(PMODDIR)/module ]; then \
1583		@$(CP) -v $(OUTDIR)/module $(PREFIX)/$(PMODDIR)/libxsmm 2>/dev/null || true; \
1584	fi
1585	@echo
1586	@echo "LIBXSMM installing stand-alone generators..."
1587	@$(CP) -v $(BINDIR)/libxsmm_*_generator $(PREFIX)/$(PBINDIR) 2>/dev/null || true
1588	@echo
1589	@echo "LIBXSMM installing interface..."
1590	@$(CP) -v $(INCDIR)/libxsmm*.h $(PREFIX)/$(PINCDIR) 2>/dev/null || true
1591	@$(CP) -v $(INCDIR)/libxsmm.f $(PREFIX)/$(PINCDIR) 2>/dev/null || true
1592	@$(CP) -v $(INCDIR)/*.mod* $(PREFIX)/$(PINCDIR) 2>/dev/null || true
1593	@echo
1594	@echo "LIBXSMM installing header-only..."
1595	@$(CP) -r $(ROOTDIR)/$(SRCDIR)/* $(PREFIX)/$(PSRCDIR) >/dev/null 2>/dev/null || true
1596endif
1597
1598.PHONY: install
1599install: install-minimal
1600ifneq ($(PREFIX),$(ABSDIR))
1601	@echo
1602	@echo "LIBXSMM installing documentation..."
1603	@mkdir -p $(PREFIX)/$(PDOCDIR)
1604	@$(CP) -v $(ROOTDIR)/$(DOCDIR)/*.pdf $(PREFIX)/$(PDOCDIR)
1605	@$(CP) -v $(ROOTDIR)/$(DOCDIR)/*.md $(PREFIX)/$(PDOCDIR)
1606	@$(CP) -v $(ROOTDIR)/SECURITY.md $(PREFIX)/$(PDOCDIR)
1607	@$(CP) -v $(ROOTDIR)/version.txt $(PREFIX)/$(PDOCDIR)
1608	@sed "s/^\"//;s/\\\n\"$$//;/STATIC=/d" $(DIRSTATE)/.state > $(PREFIX)/$(PDOCDIR)/build.txt 2>/dev/null || true
1609	@mkdir -p $(PREFIX)/$(LICFDIR)
1610ifneq ($(call qapath,$(PREFIX)/$(PDOCDIR)/LICENSE.md),$(call qapath,$(PREFIX)/$(LICFDIR)/$(LICFILE)))
1611	@$(MV) $(PREFIX)/$(PDOCDIR)/LICENSE.md $(PREFIX)/$(LICFDIR)/$(LICFILE)
1612endif
1613endif
1614
1615.PHONY: install-all
1616install-all: install
1617ifneq ($(PREFIX),$(ABSDIR))
1618	@echo
1619	@echo "LIBXSMM installing samples..."
1620	@$(CP) -v $(addprefix $(ROOTDIR)/$(SPLDIR)/cp2k/,cp2k cp2k.sh cp2k-perf* cp2k-plot.sh) $(PREFIX)/$(PBINDIR) 2>/dev/null || true
1621	@$(CP) -v $(addprefix $(ROOTDIR)/$(SPLDIR)/wrap/,dgemm-blas dgemm-blas.sh dgemm-wrap dgemm-wrap.sh wrap-test.sh) $(PREFIX)/$(PBINDIR) 2>/dev/null || true
1622	@$(CP) -v $(addprefix $(ROOTDIR)/$(SPLDIR)/dispatch/,dispatch dispatch.sh) $(PREFIX)/$(PBINDIR) 2>/dev/null || true
1623	@$(CP) -v $(addprefix $(ROOTDIR)/$(SPLDIR)/nek/,axhm grad rstr *.sh) $(PREFIX)/$(PBINDIR) 2>/dev/null || true
1624	@$(CP) -v $(addprefix $(ROOTDIR)/$(SPLDIR)/smm/,smm smm.sh smm-perf* smmf-perf.sh smm-plot.sh) $(PREFIX)/$(PBINDIR) 2>/dev/null || true
1625	@$(CP) -v $(addprefix $(ROOTDIR)/$(SPLDIR)/smm/,specialized specialized.sh) $(PREFIX)/$(PBINDIR) 2>/dev/null || true
1626	@$(CP) -v $(addprefix $(ROOTDIR)/$(SPLDIR)/smm/,dispatched dispatched.sh) $(PREFIX)/$(PBINDIR) 2>/dev/null || true
1627	@$(CP) -v $(addprefix $(ROOTDIR)/$(SPLDIR)/smm/,inlined inlined.sh) $(PREFIX)/$(PBINDIR) 2>/dev/null || true
1628	@$(CP) -v $(addprefix $(ROOTDIR)/$(SPLDIR)/smm/,blas blas.sh) $(PREFIX)/$(PBINDIR) 2>/dev/null || true
1629endif
1630
1631.PHONY: install-dev
1632install-dev: install-all build-tests
1633ifneq ($(PREFIX),$(ABSDIR))
1634	@echo
1635	@echo "LIBXSMM installing tests..."
1636	@mkdir -p $(PREFIX)/$(PTSTDIR)
1637	@$(CP) -v $(basename $(wildcard $(ROOTDIR)/$(TSTDIR)/*.c)) $(PREFIX)/$(PTSTDIR) 2>/dev/null || true
1638endif
1639
1640.PHONY: install-artifacts
1641install-artifacts: install-dev
1642ifneq ($(PREFIX),$(ABSDIR))
1643	@echo
1644	@echo "LIBXSMM installing artifacts..."
1645	@mkdir -p $(PREFIX)/$(PDOCDIR)/artifacts
1646	@$(CP) -v $(DIRSTATE)/.state $(PREFIX)/$(PDOCDIR)/artifacts/make.txt
1647endif
1648
1649ifeq (Windows_NT,$(UNAME))
1650  ALIAS_PRIVLIBS = $(call ldlib,$(LD),$(SLDFLAGS),dbghelp)
1651else ifneq (Darwin,$(UNAME))
1652  ifneq (DragonFly,$(UNAME))
1653    ALIAS_PRIVLIBS = $(LIBPTHREAD) $(LIBRT) $(LIBDL) $(LIBM) $(LIBC)
1654  else
1655    ALIAS_PRIVLIBS = $(LIBDL) $(LIBM) $(LIBC)
1656  endif
1657endif
1658ifneq (Darwin,$(UNAME))
1659  ALIAS_PRIVLIBS_EXT = -fopenmp
1660endif
1661
1662ALIAS_INCLUDEDIR = $(subst $$$$,$(if $(findstring $$$$/,$$$$$(PINCDIR)),,\$${prefix}/),$(subst $$$$$(ALIAS_PREFIX),\$${prefix},$$$$$(PINCDIR)))
1663ALIAS_LIBDIR = $(subst $$$$,$(if $(findstring $$$$/,$$$$$(POUTDIR)),,\$${prefix}/),$(subst $$$$$(ALIAS_PREFIX),\$${prefix},$$$$$(POUTDIR)))
1664
1665$(OUTDIR)/libxsmm.pc: $(OUTDIR)/libxsmm.$(LIBEXT)
1666	@echo "Name: libxsmm" > $@
1667	@echo "Description: Matrix operations and deep learning primitives" >> $@
1668	@echo "URL: https://github.com/hfp/libxsmm" >> $@
1669	@echo "Version: $(VERSION_STRING)" >> $@
1670	@echo >> $@
1671	@echo "prefix=$(ALIAS_PREFIX)" >> $@
1672	@echo "includedir=$(ALIAS_INCLUDEDIR)" >> $@
1673	@echo "libdir=$(ALIAS_LIBDIR)" >> $@
1674	@echo >> $@
1675	@echo "Cflags: -I\$${includedir}" >> $@
1676ifneq (,$(ALIAS_PRIVLIBS))
1677	@if [ -e $(OUTDIR)/libxsmm.$(DLIBEXT) ]; then \
1678		echo "Libs: -L\$${libdir} -lxsmm" >> $@; \
1679		echo "Libs.private: $(ALIAS_PRIVLIBS)" >> $@; \
1680	else \
1681		echo "Libs: -L\$${libdir} -lxsmm $(ALIAS_PRIVLIBS)" >> $@; \
1682	fi
1683else # no private libraries
1684	@echo "Libs: -L\$${libdir} -lxsmm" >> $@
1685endif
1686
1687$(OUTDIR)/libxsmmf.pc: $(OUTDIR)/libxsmmf.$(LIBEXT)
1688	@echo "Name: libxsmm/f" > $@
1689	@echo "Description: LIBXSMM for Fortran" >> $@
1690	@echo "URL: https://github.com/hfp/libxsmm" >> $@
1691	@echo "Version: $(VERSION_STRING)" >> $@
1692	@echo >> $@
1693	@echo "prefix=$(ALIAS_PREFIX)" >> $@
1694	@echo "includedir=$(ALIAS_INCLUDEDIR)" >> $@
1695	@echo "libdir=$(ALIAS_LIBDIR)" >> $@
1696	@echo >> $@
1697	@echo "Requires: libxsmm" >> $@
1698	@echo "Cflags: -I\$${includedir}" >> $@
1699	@echo "Libs: -L\$${libdir} -lxsmmf" >> $@
1700
1701$(OUTDIR)/libxsmmext.pc: $(OUTDIR)/libxsmmext.$(LIBEXT)
1702	@echo "Name: libxsmm/ext" > $@
1703	@echo "Description: LIBXSMM/multithreaded for OpenMP" >> $@
1704	@echo "URL: https://github.com/hfp/libxsmm" >> $@
1705	@echo "Version: $(VERSION_STRING)" >> $@
1706	@echo >> $@
1707	@echo "prefix=$(ALIAS_PREFIX)" >> $@
1708	@echo "includedir=$(ALIAS_INCLUDEDIR)" >> $@
1709	@echo "libdir=$(ALIAS_LIBDIR)" >> $@
1710	@echo >> $@
1711	@echo "Requires: libxsmm" >> $@
1712	@echo "Cflags: -I\$${includedir}" >> $@
1713ifneq (,$(ALIAS_PRIVLIBS_EXT))
1714	@if [ -e $(OUTDIR)/libxsmmext.$(DLIBEXT) ]; then \
1715		echo "Libs: -L\$${libdir} -lxsmmext" >> $@; \
1716		echo "Libs.private: $(ALIAS_PRIVLIBS_EXT)" >> $@; \
1717	else \
1718		echo "Libs: -L\$${libdir} -lxsmmext $(ALIAS_PRIVLIBS_EXT)" >> $@; \
1719	fi
1720else # no private libraries
1721	@echo "Libs: -L\$${libdir} -lxsmmext" >> $@
1722endif
1723
1724$(OUTDIR)/libxsmmnoblas.pc: $(OUTDIR)/libxsmmnoblas.$(LIBEXT)
1725	@echo "Name: libxsmm/noblas" > $@
1726	@echo "Description: LIBXSMM substituted LAPACK/BLAS dependency" >> $@
1727	@echo "URL: https://github.com/hfp/libxsmm" >> $@
1728	@echo "Version: $(VERSION_STRING)" >> $@
1729	@echo >> $@
1730	@echo "prefix=$(ALIAS_PREFIX)" >> $@
1731	@echo "includedir=$(ALIAS_INCLUDEDIR)" >> $@
1732	@echo "libdir=$(ALIAS_LIBDIR)" >> $@
1733	@echo >> $@
1734	@echo "Requires: libxsmm" >> $@
1735	@echo "Cflags: -I\$${includedir}" >> $@
1736	@echo "Libs: -L\$${libdir} -lxsmmnoblas" >> $@
1737
1738$(OUTDIR)/module: $(OUTDIR)/.make $(INCDIR)/libxsmm.h
1739	@echo "#%Module1.0" > $@
1740	@echo >> $@
1741	@echo "module-whatis \"LIBXSMM $(VERSION_STRING)\"" >> $@
1742	@echo >> $@
1743	@echo "set PREFIX \"$(ALIAS_PREFIX)\"" >> $@
1744	@echo "prepend-path PATH \"\$$PREFIX/bin\"" >> $@
1745	@echo "prepend-path LD_LIBRARY_PATH \"\$$PREFIX/lib\"" >> $@
1746	@echo >> $@
1747	@echo "prepend-path PKG_CONFIG_PATH \"\$$PREFIX/lib\"" >> $@
1748	@echo "prepend-path LIBRARY_PATH \"\$$PREFIX/lib\"" >> $@
1749	@echo "prepend-path CPATH \"\$$PREFIX/include\"" >> $@
1750
1751.PHONY: deb
1752deb:
1753	@if [ "" != "$$(command -v git)" ]; then \
1754		VERSION_ARCHIVE=$$(git describe --tags --abbrev=0 2>/dev/null); \
1755		VERSION_ARCHIVE_SONAME=$$($(PYTHON) $(ROOTDIR)/$(SCRDIR)/libxsmm_utilities.py 0 $${VERSION_ARCHIVE}); \
1756	fi; \
1757	if [ "" != "$${VERSION_ARCHIVE}" ] && [ "" != "$${VERSION_ARCHIVE_SONAME}" ]; then \
1758		ARCHIVE_AUTHOR_NAME="$$(git config user.name)"; \
1759		ARCHIVE_AUTHOR_MAIL="$$(git config user.email)"; \
1760		ARCHIVE_NAME=libxsmm$${VERSION_ARCHIVE_SONAME}; \
1761		ARCHIVE_DATE="$$(LANG=C date -R)"; \
1762		if [ "" != "$${ARCHIVE_AUTHOR_NAME}" ] && [ "" != "$${ARCHIVE_AUTHOR_MAIL}" ]; then \
1763			ARCHIVE_AUTHOR="$${ARCHIVE_AUTHOR_NAME} <$${ARCHIVE_AUTHOR_MAIL}>"; \
1764		else \
1765			echo "Warning: Please git-config user.name and user.email!"; \
1766			if [ "" != "$${ARCHIVE_AUTHOR_NAME}" ] || [ "" != "$${ARCHIVE_AUTHOR_MAIL}" ]; then \
1767				ARCHIVE_AUTHOR="$${ARCHIVE_AUTHOR_NAME}$${ARCHIVE_AUTHOR_MAIL}"; \
1768			fi \
1769		fi; \
1770		if ! [ -e $${ARCHIVE_NAME}_$${VERSION_ARCHIVE}.orig.tar.gz ]; then \
1771			git archive --prefix $${ARCHIVE_NAME}-$${VERSION_ARCHIVE}/ \
1772				-o $${ARCHIVE_NAME}_$${VERSION_ARCHIVE}.orig.tar.gz $(VERSION_RELEASE); \
1773		fi; \
1774		tar xf $${ARCHIVE_NAME}_$${VERSION_ARCHIVE}.orig.tar.gz; \
1775		cd $${ARCHIVE_NAME}-$${VERSION_ARCHIVE}; \
1776		mkdir -p debian/source; cd debian/source; \
1777		echo "3.0 (quilt)" > format; \
1778		cd ..; \
1779		echo "Source: $${ARCHIVE_NAME}" > control; \
1780		echo "Section: libs" >> control; \
1781		echo "Homepage: https://github.com/hfp/libxsmm" >> control; \
1782		echo "Vcs-Git: https://github.com/hfp/libxsmm/libxsmm.git" >> control; \
1783		echo "Maintainer: $${ARCHIVE_AUTHOR}" >> control; \
1784		echo "Priority: optional" >> control; \
1785		echo "Build-Depends: debhelper (>= 9)" >> control; \
1786		echo "Standards-Version: 3.9.8" >> control; \
1787		echo >> control; \
1788		echo "Package: $${ARCHIVE_NAME}" >> control; \
1789		echo "Section: libs" >> control; \
1790		echo "Architecture: amd64" >> control; \
1791		echo "Depends: \$${shlibs:Depends}, \$${misc:Depends}" >> control; \
1792		echo "Description: Matrix operations and deep learning primitives" >> control; \
1793		wget -T $(TIMEOUT) -qO- https://api.github.com/repos/hfp/libxsmm \
1794		| sed -n 's/ *\"description\": \"\(..*\)\".*/\1/p' \
1795		| fold -s -w 79 | sed -e 's/^/ /' -e 's/[[:space:]][[:space:]]*$$//' >> control; \
1796		echo "$${ARCHIVE_NAME} ($${VERSION_ARCHIVE}-$(VERSION_PACKAGE)) UNRELEASED; urgency=low" > changelog; \
1797		echo >> changelog; \
1798		wget -T $(TIMEOUT) -qO- https://api.github.com/repos/hfp/libxsmm/releases/tags/$${VERSION_ARCHIVE} \
1799		| sed -n 's/ *\"body\": \"\(..*\)\".*/\1/p' \
1800		| sed -e 's/\\r\\n/\n/g' -e 's/\\"/"/g' -e 's/\[\([^]]*\)\]([^)]*)/\1/g' \
1801		| sed -n 's/^\* \(..*\)/\* \1/p' \
1802		| fold -s -w 78 | sed -e 's/^/  /g' -e 's/^  \* /\* /' -e 's/^/  /' -e 's/[[:space:]][[:space:]]*$$//' >> changelog; \
1803		echo >> changelog; \
1804		echo " -- $${ARCHIVE_AUTHOR}  $${ARCHIVE_DATE}" >> changelog; \
1805		echo "#!/usr/bin/make -f" > rules; \
1806		echo "export DH_VERBOSE = 1" >> rules; \
1807		echo >> rules; \
1808		echo "%:" >> rules; \
1809		$$(which echo) -e "\tdh \$$@" >> rules; \
1810		echo >> rules; \
1811		echo "override_dh_auto_install:" >> rules; \
1812		$$(which echo) -e "\tdh_auto_install -- prefix=/usr" >> rules; \
1813		echo >> rules; \
1814		echo "9" > compat; \
1815		$(CP) ../LICENSE.md copyright; \
1816		rm -f ../$(TSTDIR)/mhd_test.mhd; \
1817		chmod +x rules; \
1818		debuild \
1819			-e PREFIX=debian/$${ARCHIVE_NAME}/usr \
1820			-e PDOCDIR=share/doc/$${ARCHIVE_NAME} \
1821			-e LICFILE=copyright \
1822			-e LICFDIR=../.. \
1823			-e SONAMELNK=1 \
1824			-e SHARED=1 \
1825			-e SYM=1 \
1826			-us -uc; \
1827	else \
1828		echo "Error: Git is unavailable or make-deb runs outside of cloned repository!"; \
1829	fi
1830
1831