1# Licensed to the Apache Software Foundation (ASF) under one
2# or more contributor license agreements.  See the NOTICE file
3# distributed with this work for additional information
4# regarding copyright ownership.  The ASF licenses this file
5# to you under the Apache License, Version 2.0 (the
6# "License"); you may not use this file except in compliance
7# with the License.  You may obtain a copy of the License at
8#
9#   http://www.apache.org/licenses/LICENSE-2.0
10#
11# Unless required by applicable law or agreed to in writing,
12# software distributed under the License is distributed on an
13# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14# KIND, either express or implied.  See the License for the
15# specific language governing permissions and limitations
16# under the License.
17
18#---------------------------------------------------------------------------------------
19#  mshadow configuration script
20#
21#  include mshadow.mk after the variables are set
22#
23#  Add MSHADOW_CFLAGS to the compile flags
24#  Add MSHADOW_LDFLAGS to the linker flags
25#  Add MSHADOW_NVCCFLAGS to the nvcc compile flags
26#----------------------------------------------------------------------------------------
27
28MSHADOW_CFLAGS = -funroll-loops -Wno-unused-parameter -Wno-unknown-pragmas -Wno-unused-local-typedefs
29MSHADOW_LDFLAGS = -lm
30MSHADOW_NVCCFLAGS =
31
32
33# atlas blas library has different name on CentOS
34OS := $(shell cat /etc/system-release 2>/dev/null)
35ifeq ($(findstring CentOS,$(OS)), CentOS)
36  ATLAS_LDFLAGS := -lsatlas -L/usr/lib64/atlas
37else
38  ATLAS_LDFLAGS := -lcblas
39endif
40
41ifndef USE_SSE
42	USE_SSE=1
43endif
44
45ifeq ($(USE_SSE), 1)
46	MSHADOW_CFLAGS += -msse3
47else
48	MSHADOW_CFLAGS += -DMSHADOW_USE_SSE=0
49endif
50
51# whether to use F16C instruction set extension for fast fp16 compute on CPU
52# if cross compiling you may want to explicitly turn it off if target system does not support it
53ifndef USE_F16C
54    ifneq ($(OS),Windows_NT)
55        detected_OS := $(shell uname -s)
56        ifeq ($(detected_OS),Darwin)
57            F16C_SUPP = $(shell sysctl -a | grep machdep.cpu.features | grep F16C)
58        endif
59        ifeq ($(detected_OS),Linux)
60            F16C_SUPP = $(shell cat /proc/cpuinfo | grep flags | grep f16c)
61        endif
62	ifneq ($(strip $(F16C_SUPP)),)
63                USE_F16C=1
64        else
65                USE_F16C=0
66        endif
67    endif
68    # if OS is Windows, check if your processor and compiler support F16C architecture.
69    # One way to check if processor supports it is to download the tool
70    # https://docs.microsoft.com/en-us/sysinternals/downloads/coreinfo.
71    # If coreinfo -c shows F16C and compiler supports it,
72    # then you can set USE_F16C=1 explicitly to leverage that capability"
73endif
74
75ifeq ($(USE_F16C), 1)
76        MSHADOW_CFLAGS += -mf16c
77else
78        MSHADOW_CFLAGS += -DMSHADOW_USE_F16C=0
79endif
80
81ifeq ($(USE_CUDA), 0)
82	MSHADOW_CFLAGS += -DMSHADOW_USE_CUDA=0
83else
84	MSHADOW_LDFLAGS += -lcudart -lcublas -lcurand -lcusolver
85endif
86ifneq ($(USE_CUDA_PATH), NONE)
87	MSHADOW_CFLAGS += -I$(USE_CUDA_PATH)/include
88	MSHADOW_LDFLAGS += -L$(USE_CUDA_PATH)/lib64 -L$(USE_CUDA_PATH)/lib
89endif
90
91ifeq ($(USE_BLAS), mkl)
92ifneq ($(USE_INTEL_PATH), NONE)
93	UNAME_S := $(shell uname -s)
94	ifeq ($(UNAME_S),Darwin)
95		MSHADOW_LDFLAGS += -L$(USE_INTEL_PATH)/mkl/lib
96		MSHADOW_LDFLAGS += -L$(USE_INTEL_PATH)/lib
97	else
98		MSHADOW_LDFLAGS += -L$(USE_INTEL_PATH)/mkl/lib/intel64
99		MSHADOW_LDFLAGS += -L$(USE_INTEL_PATH)/compiler/lib/intel64
100		MSHADOW_LDFLAGS += -L$(USE_INTEL_PATH)/lib/intel64
101	endif
102	MSHADOW_CFLAGS += -I$(USE_INTEL_PATH)/mkl/include
103endif
104ifneq ($(USE_STATIC_MKL), NONE)
105ifeq ($(USE_INTEL_PATH), NONE)
106	MKLROOT = /opt/intel/mkl
107else
108	MKLROOT = $(USE_INTEL_PATH)/mkl
109endif
110	MSHADOW_LDFLAGS += -L${MKLROOT}/../compiler/lib/intel64 -Wl,--start-group ${MKLROOT}/lib/intel64/libmkl_intel_lp64.a ${MKLROOT}/lib/intel64/libmkl_core.a ${MKLROOT}/lib/intel64/libmkl_intel_thread.a -Wl,--end-group -liomp5 -ldl -lpthread -lm
111else
112ifneq ($(USE_MKLML), 1)
113  MSHADOW_LDFLAGS += -lmkl_intel_lp64 -lmkl_intel_thread -lmkl_core -liomp5
114endif
115endif
116else
117ifneq ($(USE_BLAS), NONE)
118	MSHADOW_CFLAGS += -DMSHADOW_USE_CBLAS=1 -DMSHADOW_USE_MKL=0
119endif
120endif
121
122ifeq ($(USE_MKLML), 1)
123	MSHADOW_CFLAGS += -I$(MKLROOT)/include
124	ifneq ($(shell uname),Darwin)
125		MSHADOW_LDFLAGS += -Wl,--as-needed -lmklml_intel -lmklml_gnu
126	else
127		MSHADOW_LDFLAGS += -lmklml
128	endif
129	MSHADOW_LDFLAGS += -liomp5 -L$(MKLROOT)/lib/
130endif
131
132ifeq ($(USE_BLAS), openblas)
133	MSHADOW_LDFLAGS += -lopenblas
134else ifeq ($(USE_BLAS), perfblas)
135	MSHADOW_LDFLAGS += -lperfblas
136else ifeq ($(USE_BLAS), atlas)
137	MSHADOW_LDFLAGS += $(ATLAS_LDFLAGS)
138else ifeq ($(USE_BLAS), blas)
139	MSHADOW_LDFLAGS += -lblas
140else ifeq ($(USE_BLAS), apple)
141	MSHADOW_CFLAGS += -I/System/Library/Frameworks/Accelerate.framework/Versions/Current/Frameworks/vecLib.framework/Versions/Current/Headers/
142	MSHADOW_LDFLAGS += -framework Accelerate
143endif
144
145ifeq ($(PS_PATH), NONE)
146	PS_PATH = ..
147endif
148ifeq ($(PS_THIRD_PATH), NONE)
149	PS_THIRD_PATH = $(PS_PATH)/third_party
150endif
151
152ifndef RABIT_PATH
153	RABIT_PATH = rabit
154endif
155
156ifeq ($(RABIT_PATH), NONE)
157	RABIT_PATH = rabit
158endif
159
160ifeq ($(USE_RABIT_PS),1)
161	MSHADOW_CFLAGS += -I$(RABIT_PATH)/include
162	MSHADOW_LDFLAGS += -L$(RABIT_PATH)/lib -lrabit_base
163	MSHADOW_CFLAGS += -DMSHADOW_RABIT_PS=1
164else
165	MSHADOW_CFLAGS += -DMSHADOW_RABIT_PS=0
166endif
167
168ifeq ($(USE_DIST_PS),1)
169MSHADOW_CFLAGS += -DMSHADOW_DIST_PS=1 -std=c++11 \
170	-I$(PS_PATH)/src -I$(PS_THIRD_PATH)/include
171PS_LIB = $(addprefix $(PS_PATH)/build/, libps.a libps_main.a) \
172	$(addprefix $(PS_THIRD_PATH)/lib/, libgflags.a libzmq.a libprotobuf.a \
173	libglog.a libz.a libsnappy.a)
174	# -L$(PS_THIRD_PATH)/lib -lgflags -lzmq -lprotobuf -lglog -lz -lsnappy
175MSHADOW_NVCCFLAGS += --std=c++11
176else
177	MSHADOW_CFLAGS+= -DMSHADOW_DIST_PS=0
178endif
179
180# MSHADOW_USE_PASCAL=1 used to enable true-fp16 gemms.  Now, mshadow
181# only uses pseudo-fp16 gemms, so this flag will be removed after
182# dependent projects no longer reference it.
183MSHADOW_CFLAGS += -DMSHADOW_USE_PASCAL=0
184