1# Licensed to the Apache Software Foundation (ASF) under one
2# or more contributor license agreements.  See the NOTICE file
3# distributed with this work for additional information
4# regarding copyright ownership.  The ASF licenses this file
5# to you under the Apache License, Version 2.0 (the
6# "License"); you may not use this file except in compliance
7# with the License.  You may obtain a copy of the License at
8#
9#   http://www.apache.org/licenses/LICENSE-2.0
10#
11# Unless required by applicable law or agreed to in writing,
12# software distributed under the License is distributed on an
13# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14# KIND, either express or implied.  See the License for the
15# specific language governing permissions and limitations
16# under the License.
17
18#-------------------------------------------------------------------------------
19#  Template configuration for compiling MXNet
20#
21#  If you want to change the configuration, please use the following steps.
22#  Assume you are on the root directory of mxnet. First copy this file so that
23#  any local changes will be ignored by git
24#
25#  $ cp config/linux_arm.cmake config.cmake
26#
27#  Next modify the according entries, and then compile by
28#
29#  $ mkdir build; cd build
30#  $ cmake ..
31#  $ cmake --build .
32#
33# Specify `cmake --build . --parallel N` to set the number of parallel compilation jobs.
34# Default is derived from CPUs available.
35#
36#-------------------------------------------------------------------------------
37
38#---------------------------------------------
39# Arm flags
40#---------------------------------------------
41# Set the correct C and CXX flags according to your Arm processor's architecture
42# e.g. "armv8-a"
43set(CFLAGS "-march=armv8-a" CACHE STRING "CFLAGS")
44set(CXXFLAGS "-march=armv8-a" CACHE STRING "CXXFLAGS")
45
46#---------------------------------------------
47# GPU support
48#---------------------------------------------
49set(USE_CUDA OFF CACHE BOOL "Build with CUDA support")
50set(USE_CUDNN OFF CACHE BOOL "Build with cudnn support, if found")
51
52# Target NVIDIA GPU achitecture.
53# Valid options are "Auto" for autodetection, "All" for all available
54# architectures or a list of architectures by compute capability number, such as
55# "7.0" or "7.0;7.5" as well as name, such as "Volta" or "Volta;Turing".
56# The value specified here is passed to cmake's CUDA_SELECT_NVCC_ARCH_FLAGS to
57# obtain the compilation flags for nvcc.
58#
59# When compiling on a machine without GPU, autodetection will fail and you
60# should instead specify the target architecture manually to avoid excessive
61# compilation times.
62set(MXNET_CUDA_ARCH "Auto" CACHE STRING "Target NVIDIA GPU achitecture")
63
64#---------------------------------------------
65# Common libraries
66#---------------------------------------------
67set(USE_BLAS "open" CACHE STRING "BLAS Vendor")
68
69set(USE_OPENCV ON CACHE BOOL "Build with OpenCV support")
70set(OPENCV_ROOT "" CACHE BOOL "OpenCV install path. Supports autodetection.")
71
72set(USE_OPENMP ON CACHE BOOL "Build with Openmp support")
73
74set(USE_MKL_IF_AVAILABLE ON CACHE BOOL "Use Intel MKL if found")
75set(USE_MKLDNN ON CACHE BOOL "Build with MKL-DNN support")
76
77set(USE_LAPACK ON CACHE BOOL "Build with lapack support")
78
79set(USE_TVM_OP OFF CACHE BOOL "Enable use of TVM operator build system.")
80
81# Integrate MKLDNN with Arm Performance Libraries
82# Note that APL needs to be added to LD_LIBRARY_PATH
83set(MKLDNN_USE_APL OFF CACHE BOOL "Integrate MKLDNN with Arm Performance Libraries")
84
85# Integrate MKLDNN with Arm Compute Library
86set(ENV{ACL_ROOT_DIR} "")
87set(MKLDNN_USE_ACL OFF CACHE BOOL "Integrate MKLDNN with Arm Compute Library")
88
89#---------------------
90# Compilers
91#--------------------
92set(CMAKE_GENERATOR "Ninja" CACHE STRING "Build Tool Generator used by CMake")
93
94# Compilers are usually autodetected. Uncomment and modify the next 3 lines to
95# choose manually:
96
97# set(CMAKE_C_COMPILER "" CACHE BOOL "C compiler")
98# set(CMAKE_CXX_COMPILER "" CACHE BOOL "C++ compiler")
99# set(CMAKE_CUDA_COMPILER "" CACHE BOOL "Cuda compiler (nvcc)")
100
101# Uncomment the following line to compile with debug information
102# set(CMAKE_BUILD_TYPE Debug CACHE STRING "CMake build type")
103
104#---------------------------------------------
105# CPU instruction sets: The support is autodetected if turned ON
106#---------------------------------------------
107set(USE_SSE OFF CACHE BOOL "Build with x86 SSE instruction support")
108set(USE_F16C OFF CACHE BOOL "Build with x86 F16C instruction support")
109
110
111#----------------------------
112# distributed computing
113#----------------------------
114set(USE_DIST_KVSTORE OFF CACHE BOOL "Build with DIST_KVSTORE support")
115
116
117#----------------------------
118# performance settings
119#----------------------------
120set(USE_OPERATOR_TUNING ON CACHE BOOL  "Enable auto-tuning of operators")
121set(USE_GPERFTOOLS OFF CACHE BOOL "Build with GPerfTools support")
122set(USE_JEMALLOC OFF CACHE BOOL "Build with Jemalloc support")
123
124
125#----------------------------
126# additional operators
127#----------------------------
128# path to folders containing projects specific operators that you don't want to
129# put in src/operators
130SET(EXTRA_OPERATORS "" CACHE PATH "EXTRA OPERATORS PATH")
131
132
133#----------------------------
134# other features
135#----------------------------
136# Create C++ interface package
137set(USE_CPP_PACKAGE OFF CACHE BOOL "Build C++ Package")
138
139# Use int64_t type to represent the total number of elements in a tensor
140# This will cause performance degradation reported in issue #14496
141# Set to 1 for large tensor with tensor size greater than INT32_MAX i.e. 2147483647
142# Note: the size of each dimension is still bounded by INT32_MAX
143set(USE_INT64_TENSOR_SIZE OFF CACHE BOOL "Use int64_t to represent the total number of elements in a tensor")
144
145# Other GPU features
146set(USE_NCCL "Use NVidia NCCL with CUDA" OFF)
147set(NCCL_ROOT "" CACHE BOOL "NCCL install path. Supports autodetection.")
148set(ENABLE_CUDA_RTC OFF CACHE BOOL "Build with CUDA runtime compilation support")
149set(USE_NVTX OFF CACHE BOOL "Build with NVTX support")
150