1# Licensed to the Apache Software Foundation (ASF) under one 2# or more contributor license agreements. See the NOTICE file 3# distributed with this work for additional information 4# regarding copyright ownership. The ASF licenses this file 5# to you under the Apache License, Version 2.0 (the 6# "License"); you may not use this file except in compliance 7# with the License. You may obtain a copy of the License at 8# 9# http://www.apache.org/licenses/LICENSE-2.0 10# 11# Unless required by applicable law or agreed to in writing, 12# software distributed under the License is distributed on an 13# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14# KIND, either express or implied. See the License for the 15# specific language governing permissions and limitations 16# under the License. 17 18#------------------------------------------------------------------------------- 19# Template configuration for compiling MXNet 20# 21# If you want to change the configuration, please use the following steps. 22# Assume you are on the root directory of mxnet. First copy this file so that 23# any local changes will be ignored by git 24# 25# $ cp config/linux_arm.cmake config.cmake 26# 27# Next modify the according entries, and then compile by 28# 29# $ mkdir build; cd build 30# $ cmake .. 31# $ cmake --build . 32# 33# Specify `cmake --build . --parallel N` to set the number of parallel compilation jobs. 34# Default is derived from CPUs available. 35# 36#------------------------------------------------------------------------------- 37 38#--------------------------------------------- 39# Arm flags 40#--------------------------------------------- 41# Set the correct C and CXX flags according to your Arm processor's architecture 42# e.g. "armv8-a" 43set(CFLAGS "-march=armv8-a" CACHE STRING "CFLAGS") 44set(CXXFLAGS "-march=armv8-a" CACHE STRING "CXXFLAGS") 45 46#--------------------------------------------- 47# GPU support 48#--------------------------------------------- 49set(USE_CUDA OFF CACHE BOOL "Build with CUDA support") 50set(USE_CUDNN OFF CACHE BOOL "Build with cudnn support, if found") 51 52# Target NVIDIA GPU achitecture. 53# Valid options are "Auto" for autodetection, "All" for all available 54# architectures or a list of architectures by compute capability number, such as 55# "7.0" or "7.0;7.5" as well as name, such as "Volta" or "Volta;Turing". 56# The value specified here is passed to cmake's CUDA_SELECT_NVCC_ARCH_FLAGS to 57# obtain the compilation flags for nvcc. 58# 59# When compiling on a machine without GPU, autodetection will fail and you 60# should instead specify the target architecture manually to avoid excessive 61# compilation times. 62set(MXNET_CUDA_ARCH "Auto" CACHE STRING "Target NVIDIA GPU achitecture") 63 64#--------------------------------------------- 65# Common libraries 66#--------------------------------------------- 67set(USE_BLAS "open" CACHE STRING "BLAS Vendor") 68 69set(USE_OPENCV ON CACHE BOOL "Build with OpenCV support") 70set(OPENCV_ROOT "" CACHE BOOL "OpenCV install path. Supports autodetection.") 71 72set(USE_OPENMP ON CACHE BOOL "Build with Openmp support") 73 74set(USE_MKL_IF_AVAILABLE ON CACHE BOOL "Use Intel MKL if found") 75set(USE_MKLDNN ON CACHE BOOL "Build with MKL-DNN support") 76 77set(USE_LAPACK ON CACHE BOOL "Build with lapack support") 78 79set(USE_TVM_OP OFF CACHE BOOL "Enable use of TVM operator build system.") 80 81# Integrate MKLDNN with Arm Performance Libraries 82# Note that APL needs to be added to LD_LIBRARY_PATH 83set(MKLDNN_USE_APL OFF CACHE BOOL "Integrate MKLDNN with Arm Performance Libraries") 84 85# Integrate MKLDNN with Arm Compute Library 86set(ENV{ACL_ROOT_DIR} "") 87set(MKLDNN_USE_ACL OFF CACHE BOOL "Integrate MKLDNN with Arm Compute Library") 88 89#--------------------- 90# Compilers 91#-------------------- 92set(CMAKE_GENERATOR "Ninja" CACHE STRING "Build Tool Generator used by CMake") 93 94# Compilers are usually autodetected. Uncomment and modify the next 3 lines to 95# choose manually: 96 97# set(CMAKE_C_COMPILER "" CACHE BOOL "C compiler") 98# set(CMAKE_CXX_COMPILER "" CACHE BOOL "C++ compiler") 99# set(CMAKE_CUDA_COMPILER "" CACHE BOOL "Cuda compiler (nvcc)") 100 101# Uncomment the following line to compile with debug information 102# set(CMAKE_BUILD_TYPE Debug CACHE STRING "CMake build type") 103 104#--------------------------------------------- 105# CPU instruction sets: The support is autodetected if turned ON 106#--------------------------------------------- 107set(USE_SSE OFF CACHE BOOL "Build with x86 SSE instruction support") 108set(USE_F16C OFF CACHE BOOL "Build with x86 F16C instruction support") 109 110 111#---------------------------- 112# distributed computing 113#---------------------------- 114set(USE_DIST_KVSTORE OFF CACHE BOOL "Build with DIST_KVSTORE support") 115 116 117#---------------------------- 118# performance settings 119#---------------------------- 120set(USE_OPERATOR_TUNING ON CACHE BOOL "Enable auto-tuning of operators") 121set(USE_GPERFTOOLS OFF CACHE BOOL "Build with GPerfTools support") 122set(USE_JEMALLOC OFF CACHE BOOL "Build with Jemalloc support") 123 124 125#---------------------------- 126# additional operators 127#---------------------------- 128# path to folders containing projects specific operators that you don't want to 129# put in src/operators 130SET(EXTRA_OPERATORS "" CACHE PATH "EXTRA OPERATORS PATH") 131 132 133#---------------------------- 134# other features 135#---------------------------- 136# Create C++ interface package 137set(USE_CPP_PACKAGE OFF CACHE BOOL "Build C++ Package") 138 139# Use int64_t type to represent the total number of elements in a tensor 140# This will cause performance degradation reported in issue #14496 141# Set to 1 for large tensor with tensor size greater than INT32_MAX i.e. 2147483647 142# Note: the size of each dimension is still bounded by INT32_MAX 143set(USE_INT64_TENSOR_SIZE OFF CACHE BOOL "Use int64_t to represent the total number of elements in a tensor") 144 145# Other GPU features 146set(USE_NCCL "Use NVidia NCCL with CUDA" OFF) 147set(NCCL_ROOT "" CACHE BOOL "NCCL install path. Supports autodetection.") 148set(ENABLE_CUDA_RTC OFF CACHE BOOL "Build with CUDA runtime compilation support") 149set(USE_NVTX OFF CACHE BOOL "Build with NVTX support") 150