1# 'make' builds libHalide.a, the internal test suite, and runs the internal test suite 2# 'make run_tests' builds and runs all the end-to-end tests in the test subdirectory 3# 'make {error,performance}_foo' builds and runs test/{...}/foo.cpp for any 4# c_source file in the corresponding subdirectory of the test folder 5# 'make correctness_foo' builds and runs test/correctness/foo.cpp for any 6# c_source file in the correctness/ subdirectory of the test folder 7# 'make test_apps' checks some of the apps build and run (but does not check their output) 8# 'make time_compilation_tests' records the compile time for each test module into a csv file. 9# For correctness and performance tests this include halide build time and run time. For 10# the tests in test/generator/ this times only the halide build time. 11 12# Disable built-in makefile rules for all apps to avoid pointless file-system 13# scanning and general weirdness resulting from implicit rules. 14MAKEFLAGS += --no-builtin-rules 15.SUFFIXES: 16 17UNAME = $(shell uname) 18 19ifeq ($(OS), Windows_NT) 20 $(error Halide no longer supports the MinGW environment.) 21else 22 # let's assume "normal" UNIX such as linux 23 COMMON_LD_FLAGS=$(LDFLAGS) -ldl -lpthread -lz 24 FPIC=-fPIC 25ifeq ($(UNAME), Darwin) 26 SHARED_EXT=dylib 27else 28 SHARED_EXT=so 29endif 30endif 31 32ifeq ($(UNAME), Darwin) 33 # Anything that we us install_name_tool on needs these linker flags 34 # to ensure there is enough padding for install_name_tool to use 35 INSTALL_NAME_TOOL_LD_FLAGS=-Wl,-headerpad_max_install_names 36else 37 INSTALL_NAME_TOOL_LD_FLAGS= 38endif 39 40ifeq ($(UNAME), Darwin) 41define alwayslink 42 -Wl,-force_load,$(1) 43endef 44else 45define alwayslink 46 -Wl,--whole-archive $(1) -Wl,-no-whole-archive 47endef 48endif 49 50SHELL = bash 51CXX ?= g++ 52PREFIX ?= /usr/local 53LLVM_CONFIG ?= llvm-config 54LLVM_COMPONENTS= $(shell $(LLVM_CONFIG) --components) 55LLVM_VERSION = $(shell $(LLVM_CONFIG) --version | sed 's/\([0-9][0-9]*\)\.\([0-9]\).*/\1.\2/') 56 57LLVM_FULL_VERSION = $(shell $(LLVM_CONFIG) --version) 58LLVM_BINDIR = $(shell $(LLVM_CONFIG) --bindir | sed -e 's/\\/\//g' -e 's/\([a-zA-Z]\):/\/\1/g') 59LLVM_LIBDIR = $(shell $(LLVM_CONFIG) --libdir | sed -e 's/\\/\//g' -e 's/\([a-zA-Z]\):/\/\1/g') 60# Apparently there is no llvm_config flag to get canonical paths to tools, 61# so we'll just construct one relative to --src-root and hope that is stable everywhere. 62LLVM_GIT_LLD_INCLUDE_DIR = $(shell $(LLVM_CONFIG) --src-root | sed -e 's/\\/\//g' -e 's/\([a-zA-Z]\):/\/\1/g')/../lld/include 63LLVM_SYSTEM_LIBS=$(shell ${LLVM_CONFIG} --system-libs --link-static | sed -e 's/[\/&]/\\&/g') 64LLVM_AS = $(LLVM_BINDIR)/llvm-as 65LLVM_NM = $(LLVM_BINDIR)/llvm-nm 66LLVM_CXX_FLAGS = -std=c++11 $(filter-out -O% -g -fomit-frame-pointer -pedantic -W% -W, $(shell $(LLVM_CONFIG) --cxxflags | sed -e 's/\\/\//g' -e 's/\([a-zA-Z]\):/\/\1/g;s/-D/ -D/g;s/-O/ -O/g')) -I$(LLVM_GIT_LLD_INCLUDE_DIR) 67OPTIMIZE ?= -O3 68OPTIMIZE_FOR_BUILD_TIME ?= -O0 69 70PYTHON ?= python3 71 72CLANG ?= $(LLVM_BINDIR)/clang 73CLANG_VERSION = $(shell $(CLANG) --version) 74 75SANITIZER_FLAGS ?= 76 77# TODO: this is suboptimal hackery; we should really add the relevant 78# support libs for the sanitizer(s) as weak symbols in Codegen_LLVM. 79# (Note also that, in general, most Sanitizers work most reliably with an all-Clang 80# build system.) 81 82ifneq (,$(findstring tsan,$(HL_TARGET)$(HL_JIT_TARGET))) 83 84# Note that attempting to use TSAN with the JIT can produce false positives 85# if libHalide is not also compiled with TSAN enabled; we tack the relevant 86# flag onto OPTIMIZE here, but that's really only effective if you ensure 87# to do a clean build before testing. (In general, most of the Sanitizers 88# only work well when used in a completely clean environment.) 89OPTIMIZE += -fsanitize=thread 90SANITIZER_FLAGS += -fsanitize=thread 91 92endif 93 94ifneq (,$(findstring asan,$(HL_TARGET)$(HL_JIT_TARGET))) 95OPTIMIZE += -fsanitize=address 96SANITIZER_FLAGS += -fsanitize=address 97endif 98 99COMMON_LD_FLAGS += $(SANITIZER_FLAGS) 100 101LLVM_VERSION_TIMES_10 = $(shell $(LLVM_CONFIG) --version | sed 's/\([0-9][0-9]*\)\.\([0-9]\).*/\1\2/') 102 103LLVM_CXX_FLAGS += -DLLVM_VERSION=$(LLVM_VERSION_TIMES_10) 104 105# All WITH_* flags are either empty or not-empty. They do not behave 106# like true/false values in most languages. To turn one off, either 107# edit this file, add "WITH_FOO=" (no assigned value) to the make 108# line, or define an environment variable WITH_FOO that has an empty 109# value. 110WITH_X86 ?= $(findstring x86, $(LLVM_COMPONENTS)) 111WITH_ARM ?= $(findstring arm, $(LLVM_COMPONENTS)) 112WITH_HEXAGON ?= $(findstring hexagon, $(LLVM_COMPONENTS)) 113WITH_MIPS ?= $(findstring mips, $(LLVM_COMPONENTS)) 114WITH_RISCV ?= $(findstring riscv, $(LLVM_COMPONENTS)) 115WITH_AARCH64 ?= $(findstring aarch64, $(LLVM_COMPONENTS)) 116WITH_POWERPC ?= $(findstring powerpc, $(LLVM_COMPONENTS)) 117WITH_NVPTX ?= $(findstring nvptx, $(LLVM_COMPONENTS)) 118# AMDGPU target is WIP 119WITH_AMDGPU ?= $(findstring amdgpu, $(LLVM_COMPONENTS)) 120WITH_OPENCL ?= not-empty 121WITH_METAL ?= not-empty 122WITH_OPENGL ?= not-empty 123WITH_D3D12 ?= not-empty 124WITH_INTROSPECTION ?= not-empty 125WITH_EXCEPTIONS ?= 126WITH_LLVM_INSIDE_SHARED_LIBHALIDE ?= not-empty 127 128# If HL_TARGET or HL_JIT_TARGET aren't set, use host 129HL_TARGET ?= host 130HL_JIT_TARGET ?= host 131 132X86_CXX_FLAGS=$(if $(WITH_X86), -DWITH_X86, ) 133X86_LLVM_CONFIG_LIB=$(if $(WITH_X86), x86, ) 134 135ARM_CXX_FLAGS=$(if $(WITH_ARM), -DWITH_ARM, ) 136ARM_LLVM_CONFIG_LIB=$(if $(WITH_ARM), arm, ) 137 138MIPS_CXX_FLAGS=$(if $(WITH_MIPS), -DWITH_MIPS, ) 139MIPS_LLVM_CONFIG_LIB=$(if $(WITH_MIPS), mips, ) 140 141POWERPC_CXX_FLAGS=$(if $(WITH_POWERPC), -DWITH_POWERPC, ) 142POWERPC_LLVM_CONFIG_LIB=$(if $(WITH_POWERPC), powerpc, ) 143 144PTX_CXX_FLAGS=$(if $(WITH_NVPTX), -DWITH_NVPTX, ) 145PTX_LLVM_CONFIG_LIB=$(if $(WITH_NVPTX), nvptx, ) 146PTX_DEVICE_INITIAL_MODULES=$(if $(WITH_NVPTX), libdevice.compute_20.10.bc libdevice.compute_30.10.bc libdevice.compute_35.10.bc, ) 147 148AMDGPU_CXX_FLAGS=$(if $(WITH_AMDGPU), -DWITH_AMDGPU, ) 149AMDGPU_LLVM_CONFIG_LIB=$(if $(WITH_AMDGPU), amdgpu, ) 150# TODO add bitcode files 151 152OPENCL_CXX_FLAGS=$(if $(WITH_OPENCL), -DWITH_OPENCL, ) 153OPENCL_LLVM_CONFIG_LIB=$(if $(WITH_OPENCL), , ) 154 155METAL_CXX_FLAGS=$(if $(WITH_METAL), -DWITH_METAL, ) 156METAL_LLVM_CONFIG_LIB=$(if $(WITH_METAL), , ) 157 158OPENGL_CXX_FLAGS=$(if $(WITH_OPENGL), -DWITH_OPENGL, ) 159 160D3D12_CXX_FLAGS=$(if $(WITH_D3D12), -DWITH_D3D12, ) 161D3D12_LLVM_CONFIG_LIB=$(if $(WITH_D3D12), , ) 162 163AARCH64_CXX_FLAGS=$(if $(WITH_AARCH64), -DWITH_AARCH64, ) 164AARCH64_LLVM_CONFIG_LIB=$(if $(WITH_AARCH64), aarch64, ) 165 166RISCV_CXX_FLAGS=$(if $(WITH_RISCV), -DWITH_RISCV, ) 167RISCV_LLVM_CONFIG_LIB=$(if $(WITH_RISCV), riscv, ) 168 169INTROSPECTION_CXX_FLAGS=$(if $(WITH_INTROSPECTION), -DWITH_INTROSPECTION, ) 170EXCEPTIONS_CXX_FLAGS=$(if $(WITH_EXCEPTIONS), -DHALIDE_WITH_EXCEPTIONS -fexceptions, ) 171 172HEXAGON_CXX_FLAGS=$(if $(WITH_HEXAGON), -DWITH_HEXAGON, ) 173HEXAGON_LLVM_CONFIG_LIB=$(if $(WITH_HEXAGON), hexagon, ) 174 175LLVM_HAS_NO_RTTI = $(findstring -fno-rtti, $(LLVM_CXX_FLAGS)) 176WITH_RTTI ?= $(if $(LLVM_HAS_NO_RTTI),, not-empty) 177RTTI_CXX_FLAGS=$(if $(WITH_RTTI), , -fno-rtti ) 178 179CXX_VERSION = $(shell $(CXX) --version | head -n1) 180CXX_WARNING_FLAGS = -Wall -Werror -Wno-unused-function -Wcast-qual -Wignored-qualifiers -Wno-comment -Wsign-compare -Wno-unknown-warning-option -Wno-psabi 181ifneq (,$(findstring g++,$(CXX_VERSION))) 182GCC_MAJOR_VERSION := $(shell $(CXX) -dumpfullversion -dumpversion | cut -f1 -d.) 183GCC_MINOR_VERSION := $(shell $(CXX) -dumpfullversion -dumpversion | cut -f2 -d.) 184ifeq (1,$(shell expr $(GCC_MAJOR_VERSION) \> 5 \| $(GCC_MAJOR_VERSION) = 5 \& $(GCC_MINOR_VERSION) \>= 1)) 185CXX_WARNING_FLAGS += -Wsuggest-override 186endif 187endif 188 189ifneq (,$(findstring clang,$(CXX_VERSION))) 190LLVM_CXX_FLAGS_LIBCPP := $(findstring -stdlib=libc++, $(LLVM_CXX_FLAGS)) 191endif 192 193CXX_FLAGS = $(CXXFLAGS) $(CXX_WARNING_FLAGS) $(RTTI_CXX_FLAGS) -Woverloaded-virtual $(FPIC) $(OPTIMIZE) -fno-omit-frame-pointer -DCOMPILING_HALIDE 194 195CXX_FLAGS += $(LLVM_CXX_FLAGS) 196CXX_FLAGS += $(PTX_CXX_FLAGS) 197CXX_FLAGS += $(ARM_CXX_FLAGS) 198CXX_FLAGS += $(HEXAGON_CXX_FLAGS) 199CXX_FLAGS += $(AARCH64_CXX_FLAGS) 200CXX_FLAGS += $(X86_CXX_FLAGS) 201CXX_FLAGS += $(OPENCL_CXX_FLAGS) 202CXX_FLAGS += $(METAL_CXX_FLAGS) 203CXX_FLAGS += $(OPENGL_CXX_FLAGS) 204CXX_FLAGS += $(D3D12_CXX_FLAGS) 205CXX_FLAGS += $(MIPS_CXX_FLAGS) 206CXX_FLAGS += $(POWERPC_CXX_FLAGS) 207CXX_FLAGS += $(INTROSPECTION_CXX_FLAGS) 208CXX_FLAGS += $(EXCEPTIONS_CXX_FLAGS) 209CXX_FLAGS += $(AMDGPU_CXX_FLAGS) 210CXX_FLAGS += $(RISCV_CXX_FLAGS) 211 212# This is required on some hosts like powerpc64le-linux-gnu because we may build 213# everything with -fno-exceptions. Without -funwind-tables, libHalide.so fails 214# to propagate exceptions and causes a test failure. 215CXX_FLAGS += -funwind-tables 216 217print-%: 218 @echo '$*=$($*)' 219 220LLVM_STATIC_LIBFILES = \ 221 bitwriter \ 222 bitreader \ 223 linker \ 224 ipo \ 225 passes \ 226 mcjit \ 227 $(X86_LLVM_CONFIG_LIB) \ 228 $(ARM_LLVM_CONFIG_LIB) \ 229 $(OPENCL_LLVM_CONFIG_LIB) \ 230 $(METAL_LLVM_CONFIG_LIB) \ 231 $(PTX_LLVM_CONFIG_LIB) \ 232 $(AARCH64_LLVM_CONFIG_LIB) \ 233 $(MIPS_LLVM_CONFIG_LIB) \ 234 $(POWERPC_LLVM_CONFIG_LIB) \ 235 $(HEXAGON_LLVM_CONFIG_LIB) \ 236 $(AMDGPU_LLVM_CONFIG_LIB) \ 237 $(WEBASSEMBLY_LLVM_CONFIG_LIB) \ 238 $(RISCV_LLVM_CONFIG_LIB) 239 240LLVM_STATIC_LIBS = -L $(LLVM_LIBDIR) $(shell $(LLVM_CONFIG) --link-static --libfiles $(LLVM_STATIC_LIBFILES) | sed -e 's/\\/\//g' -e 's/\([a-zA-Z]\):/\/\1/g') 241 242# Add a rpath to the llvm used for linking, in case multiple llvms are 243# installed. Bakes a path on the build system into the .so, so don't 244# use this config for distributions. 245LLVM_SHARED_LIBS = -Wl,-rpath=$(LLVM_LIBDIR) -L $(LLVM_LIBDIR) -lLLVM 246 247LLVM_LIBS_FOR_SHARED_LIBHALIDE=$(if $(WITH_LLVM_INSIDE_SHARED_LIBHALIDE),$(LLVM_STATIC_LIBS),$(LLVM_SHARED_LIBS)) 248 249TUTORIAL_CXX_FLAGS ?= -std=c++11 -g -fno-omit-frame-pointer $(RTTI_CXX_FLAGS) -I $(ROOT_DIR)/tools $(SANITIZER_FLAGS) $(LLVM_CXX_FLAGS_LIBCPP) 250# The tutorials contain example code with warnings that we don't want 251# to be flagged as errors, so the test flags are the tutorial flags 252# plus our warning flags. 253# Also allow tests, via conditional compilation, to use the entire 254# capability of the CPU being compiled on via -march=native. This 255# presumes tests are run on the same machine they are compiled on. 256ARCH_FOR_TESTS ?= native 257TEST_CXX_FLAGS ?= $(TUTORIAL_CXX_FLAGS) $(CXX_WARNING_FLAGS) -march=${ARCH_FOR_TESTS} 258TEST_LD_FLAGS = -L$(BIN_DIR) -lHalide $(COMMON_LD_FLAGS) 259 260# In the tests, some of our expectations change depending on the llvm version 261TEST_CXX_FLAGS += -DLLVM_VERSION=$(LLVM_VERSION_TIMES_10) 262 263# gcc 4.8 fires a bogus warning on old versions of png.h 264ifneq (,$(findstring g++,$(CXX_VERSION))) 265ifneq (,$(findstring 4.8,$(CXX_VERSION))) 266TEST_CXX_FLAGS += -Wno-literal-suffix 267endif 268endif 269 270ifeq ($(UNAME), Linux) 271TEST_LD_FLAGS += -rdynamic -Wl,--rpath=$(CURDIR)/$(BIN_DIR) 272endif 273 274ifeq ($(WITH_LLVM_INSIDE_SHARED_LIBHALIDE), ) 275TEST_LD_FLAGS += -Wl,--rpath=$(LLVM_LIBDIR) 276endif 277 278ifneq ($(WITH_NVPTX), ) 279ifneq (,$(findstring ptx,$(HL_TARGET))) 280TEST_CUDA = 1 281endif 282ifneq (,$(findstring cuda,$(HL_TARGET))) 283TEST_CUDA = 1 284endif 285endif 286 287ifneq ($(WITH_OPENCL), ) 288ifneq (,$(findstring opencl,$(HL_TARGET))) 289TEST_OPENCL = 1 290endif 291endif 292 293ifneq ($(WITH_METAL), ) 294ifneq (,$(findstring metal,$(HL_TARGET))) 295TEST_METAL = 1 296endif 297endif 298 299ifeq ($(UNAME), Linux) 300ifneq ($(TEST_CUDA), ) 301CUDA_LD_FLAGS ?= -L/usr/lib/nvidia-current -lcuda 302endif 303ifneq ($(TEST_OPENCL), ) 304OPENCL_LD_FLAGS ?= -lOpenCL 305endif 306OPENGL_LD_FLAGS ?= -lGL 307HOST_OS=linux 308endif 309 310ifeq ($(UNAME), Darwin) 311# Someone with an osx box with cuda installed please fix the line below 312ifneq ($(TEST_CUDA), ) 313CUDA_LD_FLAGS ?= -L/usr/local/cuda/lib -lcuda 314endif 315ifneq ($(TEST_OPENCL), ) 316OPENCL_LD_FLAGS ?= -framework OpenCL 317endif 318ifneq ($(TEST_METAL), ) 319METAL_LD_FLAGS ?= -framework Metal -framework Foundation 320endif 321OPENGL_LD_FLAGS ?= -framework OpenGL 322HOST_OS=os_x 323endif 324 325ifneq ($(TEST_OPENCL), ) 326TEST_CXX_FLAGS += -DTEST_OPENCL 327endif 328 329ifneq ($(TEST_METAL), ) 330TEST_CXX_FLAGS += -DTEST_METAL 331endif 332 333ifneq ($(TEST_CUDA), ) 334TEST_CXX_FLAGS += -DTEST_CUDA 335TEST_CXX_FLAGS += -I/usr/local/cuda/include 336endif 337 338# Compiling the tutorials requires libpng 339LIBPNG_LIBS_DEFAULT = $(shell libpng-config --ldflags) 340LIBPNG_CXX_FLAGS ?= $(shell libpng-config --cflags) 341# Workaround for libpng-config pointing to 64-bit versions on linux even when we're building for 32-bit 342ifneq (,$(findstring -m32,$(CXX))) 343ifneq (,$(findstring x86_64,$(LIBPNG_LIBS_DEFAULT))) 344LIBPNG_LIBS ?= -lpng 345endif 346endif 347LIBPNG_LIBS ?= $(LIBPNG_LIBS_DEFAULT) 348 349# Workaround brew Cellar path for libpng-config output. 350LIBJPEG_LINKER_PATH ?= $(shell echo $(LIBPNG_LIBS_DEFAULT) | sed -e'/-L.*[/][Cc]ellar[/]libpng/!d;s=\(.*\)/[Cc]ellar/libpng/.*=\1/lib=') 351LIBJPEG_LIBS ?= $(LIBJPEG_LINKER_PATH) -ljpeg 352 353# There's no libjpeg-config, unfortunately. We should look for 354# jpeglib.h one directory level up from png.h . Also handle 355# Mac OS brew installs where libpng-config returns paths 356# into the PNG cellar. 357LIBPNG_INCLUDE_DIRS = $(filter -I%,$(LIBPNG_CXX_FLAGS)) 358LIBJPEG_CXX_FLAGS ?= $(shell echo $(LIBPNG_INCLUDE_DIRS) | sed -e'/[Cc]ellar[/]libpng/!s=\(.*\)=\1/..=;s=\(.*\)/[Cc]ellar/libpng/.*=\1/include=') 359 360IMAGE_IO_LIBS = $(LIBPNG_LIBS) $(LIBJPEG_LIBS) 361IMAGE_IO_CXX_FLAGS = $(LIBPNG_CXX_FLAGS) $(LIBJPEG_CXX_FLAGS) 362 363# We're building into the current directory $(CURDIR). Find the Halide 364# repo root directory (the location of the makefile) 365THIS_MAKEFILE = $(realpath $(filter %Makefile, $(MAKEFILE_LIST))) 366ROOT_DIR = $(strip $(shell dirname $(THIS_MAKEFILE))) 367SRC_DIR = $(ROOT_DIR)/src 368 369TARGET=$(if $(HL_TARGET),$(HL_TARGET),host) 370 371# The following directories are all relative to the output directory (i.e. $(CURDIR), not $(SRC_DIR)) 372LIB_DIR = lib 373BIN_DIR = bin 374DISTRIB_DIR = distrib 375INCLUDE_DIR = include 376SHARE_DIR = share 377DOC_DIR = $(SHARE_DIR)/doc/Halide 378BUILD_DIR = $(BIN_DIR)/build 379FILTERS_DIR = $(BIN_DIR)/$(TARGET)/build 380TMP_DIR = $(BUILD_DIR)/tmp 381HEXAGON_RUNTIME_LIBS_DIR = src/runtime/hexagon_remote/bin 382HEXAGON_RUNTIME_LIBS = \ 383 $(HEXAGON_RUNTIME_LIBS_DIR)/arm-32-android/libhalide_hexagon_host.so \ 384 $(HEXAGON_RUNTIME_LIBS_DIR)/arm-64-android/libhalide_hexagon_host.so \ 385 $(HEXAGON_RUNTIME_LIBS_DIR)/host/libhalide_hexagon_host.so \ 386 $(HEXAGON_RUNTIME_LIBS_DIR)/v62/hexagon_sim_remote \ 387 $(HEXAGON_RUNTIME_LIBS_DIR)/v62/libhalide_hexagon_remote_skel.so \ 388 $(HEXAGON_RUNTIME_LIBS_DIR)/v62/signed_by_debug/libhalide_hexagon_remote_skel.so 389 390# Keep this list sorted in alphabetical order. 391SOURCE_FILES = \ 392 AddAtomicMutex.cpp \ 393 AddImageChecks.cpp \ 394 AddParameterChecks.cpp \ 395 AlignLoads.cpp \ 396 AllocationBoundsInference.cpp \ 397 ApplySplit.cpp \ 398 Argument.cpp \ 399 AssociativeOpsTable.cpp \ 400 Associativity.cpp \ 401 AsyncProducers.cpp \ 402 AutoSchedule.cpp \ 403 AutoScheduleUtils.cpp \ 404 BoundaryConditions.cpp \ 405 Bounds.cpp \ 406 BoundsInference.cpp \ 407 BoundSmallAllocations.cpp \ 408 Buffer.cpp \ 409 CanonicalizeGPUVars.cpp \ 410 Closure.cpp \ 411 CodeGen_ARM.cpp \ 412 CodeGen_C.cpp \ 413 CodeGen_D3D12Compute_Dev.cpp \ 414 CodeGen_GPU_Dev.cpp \ 415 CodeGen_GPU_Host.cpp \ 416 CodeGen_Hexagon.cpp \ 417 CodeGen_Internal.cpp \ 418 CodeGen_LLVM.cpp \ 419 CodeGen_Metal_Dev.cpp \ 420 CodeGen_MIPS.cpp \ 421 CodeGen_OpenCL_Dev.cpp \ 422 CodeGen_OpenGL_Dev.cpp \ 423 CodeGen_OpenGLCompute_Dev.cpp \ 424 CodeGen_Posix.cpp \ 425 CodeGen_PowerPC.cpp \ 426 CodeGen_PTX_Dev.cpp \ 427 CodeGen_PyTorch.cpp \ 428 CodeGen_RISCV.cpp \ 429 CodeGen_WebAssembly.cpp \ 430 CodeGen_X86.cpp \ 431 CompilerLogger.cpp \ 432 CPlusPlusMangle.cpp \ 433 CSE.cpp \ 434 Debug.cpp \ 435 DebugArguments.cpp \ 436 DebugToFile.cpp \ 437 Definition.cpp \ 438 Deinterleave.cpp \ 439 Derivative.cpp \ 440 DerivativeUtils.cpp \ 441 DeviceArgument.cpp \ 442 DeviceInterface.cpp \ 443 Dimension.cpp \ 444 EarlyFree.cpp \ 445 Elf.cpp \ 446 EliminateBoolVectors.cpp \ 447 EmulateFloat16Math.cpp \ 448 Error.cpp \ 449 Expr.cpp \ 450 FastIntegerDivide.cpp \ 451 FindCalls.cpp \ 452 Float16.cpp \ 453 Func.cpp \ 454 Function.cpp \ 455 FuseGPUThreadLoops.cpp \ 456 FuzzFloatStores.cpp \ 457 Generator.cpp \ 458 HexagonOffload.cpp \ 459 HexagonOptimize.cpp \ 460 ImageParam.cpp \ 461 InferArguments.cpp \ 462 InjectHostDevBufferCopies.cpp \ 463 InjectOpenGLIntrinsics.cpp \ 464 Inline.cpp \ 465 InlineReductions.cpp \ 466 IntegerDivisionTable.cpp \ 467 Interval.cpp \ 468 Introspection.cpp \ 469 IR.cpp \ 470 IREquality.cpp \ 471 IRMatch.cpp \ 472 IRMutator.cpp \ 473 IROperator.cpp \ 474 IRPrinter.cpp \ 475 IRVisitor.cpp \ 476 JITModule.cpp \ 477 Lerp.cpp \ 478 LICM.cpp \ 479 LLVM_Output.cpp \ 480 LLVM_Runtime_Linker.cpp \ 481 LoopCarry.cpp \ 482 Lower.cpp \ 483 LowerWarpShuffles.cpp \ 484 MatlabWrapper.cpp \ 485 Memoization.cpp \ 486 Module.cpp \ 487 ModulusRemainder.cpp \ 488 Monotonic.cpp \ 489 ObjectInstanceRegistry.cpp \ 490 OutputImageParam.cpp \ 491 ParallelRVar.cpp \ 492 Parameter.cpp \ 493 ParamMap.cpp \ 494 PartitionLoops.cpp \ 495 Pipeline.cpp \ 496 Prefetch.cpp \ 497 PrintLoopNest.cpp \ 498 Profiling.cpp \ 499 PurifyIndexMath.cpp \ 500 PythonExtensionGen.cpp \ 501 Qualify.cpp \ 502 Random.cpp \ 503 RDom.cpp \ 504 Realization.cpp \ 505 RealizationOrder.cpp \ 506 Reduction.cpp \ 507 RegionCosts.cpp \ 508 RemoveDeadAllocations.cpp \ 509 RemoveExternLoops.cpp \ 510 RemoveUndef.cpp \ 511 Schedule.cpp \ 512 ScheduleFunctions.cpp \ 513 SelectGPUAPI.cpp \ 514 Simplify.cpp \ 515 Simplify_Add.cpp \ 516 Simplify_And.cpp \ 517 Simplify_Call.cpp \ 518 Simplify_Cast.cpp \ 519 Simplify_Div.cpp \ 520 Simplify_EQ.cpp \ 521 Simplify_Exprs.cpp \ 522 Simplify_Let.cpp \ 523 Simplify_LT.cpp \ 524 Simplify_Max.cpp \ 525 Simplify_Min.cpp \ 526 Simplify_Mod.cpp \ 527 Simplify_Mul.cpp \ 528 Simplify_Not.cpp \ 529 Simplify_Or.cpp \ 530 Simplify_Select.cpp \ 531 Simplify_Shuffle.cpp \ 532 Simplify_Stmts.cpp \ 533 Simplify_Sub.cpp \ 534 SimplifyCorrelatedDifferences.cpp \ 535 SimplifySpecializations.cpp \ 536 SkipStages.cpp \ 537 SlidingWindow.cpp \ 538 Solve.cpp \ 539 SplitTuples.cpp \ 540 StmtToHtml.cpp \ 541 StorageFlattening.cpp \ 542 StorageFolding.cpp \ 543 StrictifyFloat.cpp \ 544 Substitute.cpp \ 545 Target.cpp \ 546 Tracing.cpp \ 547 TrimNoOps.cpp \ 548 Tuple.cpp \ 549 Type.cpp \ 550 UnifyDuplicateLets.cpp \ 551 UniquifyVariableNames.cpp \ 552 UnpackBuffers.cpp \ 553 UnrollLoops.cpp \ 554 UnsafePromises.cpp \ 555 Util.cpp \ 556 Var.cpp \ 557 VaryingAttributes.cpp \ 558 VectorizeLoops.cpp \ 559 WasmExecutor.cpp \ 560 WrapCalls.cpp 561 562# The externally-visible header files that go into making Halide.h. 563# Don't include anything here that includes llvm headers. 564# Keep this list sorted in alphabetical order. 565HEADER_FILES = \ 566 AddAtomicMutex.h \ 567 AddImageChecks.h \ 568 AddParameterChecks.h \ 569 AlignLoads.h \ 570 AllocationBoundsInference.h \ 571 ApplySplit.h \ 572 Argument.h \ 573 AssociativeOpsTable.h \ 574 Associativity.h \ 575 AsyncProducers.h \ 576 AutoSchedule.h \ 577 AutoScheduleUtils.h \ 578 BoundaryConditions.h \ 579 Bounds.h \ 580 BoundsInference.h \ 581 BoundSmallAllocations.h \ 582 Buffer.h \ 583 CanonicalizeGPUVars.h \ 584 Closure.h \ 585 CodeGen_ARM.h \ 586 CodeGen_C.h \ 587 CodeGen_D3D12Compute_Dev.h \ 588 CodeGen_GPU_Dev.h \ 589 CodeGen_GPU_Host.h \ 590 CodeGen_Internal.h \ 591 CodeGen_LLVM.h \ 592 CodeGen_Metal_Dev.h \ 593 CodeGen_MIPS.h \ 594 CodeGen_OpenCL_Dev.h \ 595 CodeGen_OpenGL_Dev.h \ 596 CodeGen_OpenGLCompute_Dev.h \ 597 CodeGen_Posix.h \ 598 CodeGen_PowerPC.h \ 599 CodeGen_PTX_Dev.h \ 600 CodeGen_PyTorch.h \ 601 CodeGen_RISCV.h \ 602 CodeGen_WebAssembly.h \ 603 CodeGen_X86.h \ 604 CompilerLogger.h \ 605 ConciseCasts.h \ 606 CPlusPlusMangle.h \ 607 CSE.h \ 608 Debug.h \ 609 DebugArguments.h \ 610 DebugToFile.h \ 611 Definition.h \ 612 Deinterleave.h \ 613 Derivative.h \ 614 DerivativeUtils.h \ 615 DeviceAPI.h \ 616 DeviceArgument.h \ 617 DeviceInterface.h \ 618 Dimension.h \ 619 EarlyFree.h \ 620 Elf.h \ 621 EliminateBoolVectors.h \ 622 EmulateFloat16Math.h \ 623 Error.h \ 624 Expr.h \ 625 ExprUsesVar.h \ 626 Extern.h \ 627 ExternFuncArgument.h \ 628 FastIntegerDivide.h \ 629 FindCalls.h \ 630 Float16.h \ 631 Func.h \ 632 Function.h \ 633 FunctionPtr.h \ 634 FuseGPUThreadLoops.h \ 635 FuzzFloatStores.h \ 636 Generator.h \ 637 HexagonOffload.h \ 638 HexagonOptimize.h \ 639 ImageParam.h \ 640 InferArguments.h \ 641 InjectHostDevBufferCopies.h \ 642 InjectOpenGLIntrinsics.h \ 643 Inline.h \ 644 InlineReductions.h \ 645 IntegerDivisionTable.h \ 646 Interval.h \ 647 Introspection.h \ 648 IntrusivePtr.h \ 649 IR.h \ 650 IREquality.h \ 651 IRMatch.h \ 652 IRMutator.h \ 653 IROperator.h \ 654 IRPrinter.h \ 655 IRVisitor.h \ 656 WasmExecutor.h \ 657 JITModule.h \ 658 Lambda.h \ 659 Lerp.h \ 660 LICM.h \ 661 LLVM_Output.h \ 662 LLVM_Runtime_Linker.h \ 663 LoopCarry.h \ 664 Lower.h \ 665 LowerWarpShuffles.h \ 666 MainPage.h \ 667 MatlabWrapper.h \ 668 Memoization.h \ 669 Module.h \ 670 ModulusRemainder.h \ 671 Monotonic.h \ 672 ObjectInstanceRegistry.h \ 673 OutputImageParam.h \ 674 ParallelRVar.h \ 675 Param.h \ 676 Parameter.h \ 677 ParamMap.h \ 678 PartitionLoops.h \ 679 Pipeline.h \ 680 Prefetch.h \ 681 Profiling.h \ 682 PurifyIndexMath.h \ 683 PythonExtensionGen.h \ 684 Qualify.h \ 685 Random.h \ 686 Realization.h \ 687 RDom.h \ 688 RealizationOrder.h \ 689 Reduction.h \ 690 RegionCosts.h \ 691 RemoveDeadAllocations.h \ 692 RemoveExternLoops.h \ 693 RemoveUndef.h \ 694 runtime/HalideBuffer.h \ 695 runtime/HalideRuntime.h \ 696 Schedule.h \ 697 ScheduleFunctions.h \ 698 Scope.h \ 699 SelectGPUAPI.h \ 700 Simplify.h \ 701 SimplifyCorrelatedDifferences.h \ 702 SimplifySpecializations.h \ 703 SkipStages.h \ 704 SlidingWindow.h \ 705 Solve.h \ 706 SplitTuples.h \ 707 StmtToHtml.h \ 708 StorageFlattening.h \ 709 StorageFolding.h \ 710 StrictifyFloat.h \ 711 Substitute.h \ 712 Target.h \ 713 ThreadPool.h \ 714 Tracing.h \ 715 TrimNoOps.h \ 716 Tuple.h \ 717 Type.h \ 718 UnifyDuplicateLets.h \ 719 UniquifyVariableNames.h \ 720 UnpackBuffers.h \ 721 UnrollLoops.h \ 722 UnsafePromises.h \ 723 Util.h \ 724 Var.h \ 725 VaryingAttributes.h \ 726 VectorizeLoops.h \ 727 WrapCalls.h 728 729OBJECTS = $(SOURCE_FILES:%.cpp=$(BUILD_DIR)/%.o) 730HEADERS = $(HEADER_FILES:%.h=$(SRC_DIR)/%.h) 731 732RUNTIME_CPP_COMPONENTS = \ 733 aarch64_cpu_features \ 734 alignment_128 \ 735 alignment_32 \ 736 allocation_cache \ 737 alignment_64 \ 738 android_clock \ 739 android_host_cpu_count \ 740 android_io \ 741 arm_cpu_features \ 742 cache \ 743 can_use_target \ 744 cuda \ 745 destructors \ 746 device_interface \ 747 errors \ 748 fake_get_symbol \ 749 fake_thread_pool \ 750 float16_t \ 751 fuchsia_clock \ 752 fuchsia_host_cpu_count \ 753 fuchsia_yield \ 754 gpu_device_selection \ 755 halide_buffer_t \ 756 hexagon_cache_allocator \ 757 hexagon_cpu_features \ 758 hexagon_dma_pool \ 759 hexagon_dma \ 760 hexagon_host \ 761 ios_io \ 762 linux_clock \ 763 linux_host_cpu_count \ 764 linux_yield \ 765 matlab \ 766 metadata \ 767 metal \ 768 metal_objc_arm \ 769 metal_objc_x86 \ 770 mips_cpu_features \ 771 module_aot_ref_count \ 772 module_jit_ref_count \ 773 msan \ 774 msan_stubs \ 775 opencl \ 776 opengl \ 777 openglcompute \ 778 opengl_egl_context \ 779 opengl_glx_context \ 780 osx_clock \ 781 osx_get_symbol \ 782 osx_host_cpu_count \ 783 osx_opengl_context \ 784 osx_yield \ 785 posix_abort \ 786 posix_allocator \ 787 posix_clock \ 788 posix_error_handler \ 789 posix_get_symbol \ 790 posix_io \ 791 posix_print \ 792 posix_threads \ 793 posix_threads_tsan \ 794 powerpc_cpu_features \ 795 prefetch \ 796 profiler \ 797 profiler_inlined \ 798 pseudostack \ 799 qurt_allocator \ 800 qurt_hvx \ 801 qurt_hvx_vtcm \ 802 qurt_init_fini \ 803 qurt_threads \ 804 qurt_threads_tsan \ 805 qurt_yield \ 806 riscv_cpu_features \ 807 runtime_api \ 808 ssp \ 809 to_string \ 810 trace_helper \ 811 tracing \ 812 wasm_cpu_features \ 813 windows_abort \ 814 windows_clock \ 815 windows_cuda \ 816 windows_d3d12compute_x86 \ 817 windows_get_symbol \ 818 windows_io \ 819 windows_opencl \ 820 windows_profiler \ 821 windows_threads \ 822 windows_threads_tsan \ 823 windows_yield \ 824 write_debug_image \ 825 x86_cpu_features \ 826 827RUNTIME_LL_COMPONENTS = \ 828 aarch64 \ 829 arm \ 830 arm_no_neon \ 831 hvx_64 \ 832 hvx_128 \ 833 mips \ 834 posix_math \ 835 powerpc \ 836 ptx_dev \ 837 wasm_math \ 838 win32_math \ 839 x86 \ 840 x86_avx \ 841 x86_avx2 \ 842 x86_sse41 843 844RUNTIME_EXPORTED_INCLUDES = $(INCLUDE_DIR)/HalideRuntime.h \ 845 $(INCLUDE_DIR)/HalideRuntimeD3D12Compute.h \ 846 $(INCLUDE_DIR)/HalideRuntimeCuda.h \ 847 $(INCLUDE_DIR)/HalideRuntimeHexagonDma.h \ 848 $(INCLUDE_DIR)/HalideRuntimeHexagonHost.h \ 849 $(INCLUDE_DIR)/HalideRuntimeOpenCL.h \ 850 $(INCLUDE_DIR)/HalideRuntimeOpenGL.h \ 851 $(INCLUDE_DIR)/HalideRuntimeOpenGLCompute.h \ 852 $(INCLUDE_DIR)/HalideRuntimeMetal.h \ 853 $(INCLUDE_DIR)/HalideRuntimeQurt.h \ 854 $(INCLUDE_DIR)/HalideBuffer.h \ 855 $(INCLUDE_DIR)/HalidePyTorchHelpers.h \ 856 $(INCLUDE_DIR)/HalidePyTorchCudaHelpers.h 857 858INITIAL_MODULES = $(RUNTIME_CPP_COMPONENTS:%=$(BUILD_DIR)/initmod.%_32.o) \ 859 $(RUNTIME_CPP_COMPONENTS:%=$(BUILD_DIR)/initmod.%_64.o) \ 860 $(RUNTIME_CPP_COMPONENTS:%=$(BUILD_DIR)/initmod.%_32_debug.o) \ 861 $(RUNTIME_CPP_COMPONENTS:%=$(BUILD_DIR)/initmod.%_64_debug.o) \ 862 $(RUNTIME_EXPORTED_INCLUDES:$(INCLUDE_DIR)/%.h=$(BUILD_DIR)/initmod.%_h.o) \ 863 $(BUILD_DIR)/initmod.inlined_c.o \ 864 $(RUNTIME_LL_COMPONENTS:%=$(BUILD_DIR)/initmod.%_ll.o) \ 865 $(PTX_DEVICE_INITIAL_MODULES:libdevice.%.bc=$(BUILD_DIR)/initmod_ptx.%_ll.o) 866 867# Add the Hexagon simulator to the rpath on Linux. (Not supported elsewhere, so no else cases.) 868ifeq ($(UNAME), Linux) 869ifneq (,$(WITH_HEXAGON)) 870ifneq (,$(HL_HEXAGON_TOOLS)) 871TEST_LD_FLAGS += -Wl,--rpath=$(ROOT_DIR)/src/runtime/hexagon_remote/bin/host 872TEST_LD_FLAGS += -Wl,--rpath=$(HL_HEXAGON_TOOLS)/lib/iss 873endif 874endif 875endif 876 877.PHONY: all 878all: distrib test_internal 879 880# Depending on which linker we're using, 881# we need a different invocation to get the 882# linker map file. 883ifeq ($(UNAME), Darwin) 884 MAP_FLAGS= -Wl,-map -Wl,$(BUILD_DIR)/llvm_objects/list.all 885else 886 MAP_FLAGS= -Wl,-Map=$(BUILD_DIR)/llvm_objects/list.all 887endif 888 889$(BUILD_DIR)/llvm_objects/list: $(OBJECTS) $(INITIAL_MODULES) 890 # Determine the relevant object files from llvm with a dummy 891 # compilation. Passing -map to the linker gets it to list, as 892 # part of the linker map file, the object files in which archives it uses to 893 # resolve symbols. We only care about the libLLVM ones, which we will filter below. 894 @mkdir -p $(@D) 895 $(CXX) -o /dev/null -shared $(MAP_FLAGS) $(OBJECTS) $(INITIAL_MODULES) $(LLVM_STATIC_LIBS) $(LLVM_SYSTEM_LIBS) $(COMMON_LD_FLAGS) > /dev/null 896 # if the list has changed since the previous build, or there 897 # is no list from a previous build, then delete any old object 898 # files and re-extract the required object files 899 cd $(BUILD_DIR)/llvm_objects; \ 900 cat list.all | LANG=C sed -n 's/^[^\/]*\(\/[^ ()]*libLLVM.*[.]a\)[^a-zA-Z]*\([^ ()]*[.]o\).*$$/\1 \2/p' | sort | uniq > list.new; \ 901 rm list.all; \ 902 if cmp -s list.new list; \ 903 then \ 904 echo "No changes in LLVM deps"; \ 905 touch list; \ 906 else \ 907 rm -f llvm_*.o*; \ 908 cat list.new | sed = | sed "N;s/\n /\n/;s/\([0-9]*\)\n\([^ ]*\) \([^ ]*\)/ar x \2 \3; mv \3 llvm_\1_\3/" | bash - ; \ 909 mv list.new list; \ 910 fi 911 912$(LIB_DIR)/libHalide.a: $(OBJECTS) $(INITIAL_MODULES) $(BUILD_DIR)/llvm_objects/list 913 # Archive together all the halide and llvm object files 914 @mkdir -p $(@D) 915 @rm -f $(LIB_DIR)/libHalide.a 916 ar q $(LIB_DIR)/libHalide.a $(OBJECTS) $(INITIAL_MODULES) $(BUILD_DIR)/llvm_objects/llvm_*.o* 917 ranlib $(LIB_DIR)/libHalide.a 918 919ifeq ($(UNAME), Linux) 920LIBHALIDE_SONAME_FLAGS=-Wl,-soname,libHalide.so 921else 922LIBHALIDE_SONAME_FLAGS= 923endif 924 925$(BIN_DIR)/libHalide.$(SHARED_EXT): $(OBJECTS) $(INITIAL_MODULES) 926 @mkdir -p $(@D) 927 $(CXX) -shared $(OBJECTS) $(INITIAL_MODULES) $(LLVM_LIBS_FOR_SHARED_LIBHALIDE) $(LLVM_SYSTEM_LIBS) $(COMMON_LD_FLAGS) $(INSTALL_NAME_TOOL_LD_FLAGS) $(LIBHALIDE_SONAME_FLAGS) -o $(BIN_DIR)/libHalide.$(SHARED_EXT) 928ifeq ($(UNAME), Darwin) 929 install_name_tool -id $(CURDIR)/$(BIN_DIR)/libHalide.$(SHARED_EXT) $(BIN_DIR)/libHalide.$(SHARED_EXT) 930endif 931 932$(INCLUDE_DIR)/Halide.h: $(SRC_DIR)/../LICENSE.txt $(HEADERS) $(BIN_DIR)/build_halide_h 933 @mkdir -p $(@D) 934 $(BIN_DIR)/build_halide_h $(SRC_DIR)/../LICENSE.txt $(HEADERS) > $(INCLUDE_DIR)/Halide.h 935 # Also generate a precompiled version in the same folder so that anything compiled with a compatible set of flags can use it 936 @mkdir -p $(INCLUDE_DIR)/Halide.h.gch 937 $(CXX) -std=c++11 $(TEST_CXX_FLAGS) -I$(ROOT_DIR) $(OPTIMIZE) -x c++-header $(INCLUDE_DIR)/Halide.h -o $(INCLUDE_DIR)/Halide.h.gch/Halide.default.gch 938 $(CXX) -std=c++11 $(TEST_CXX_FLAGS) -I$(ROOT_DIR) $(OPTIMIZE_FOR_BUILD_TIME) -x c++-header $(INCLUDE_DIR)/Halide.h -o $(INCLUDE_DIR)/Halide.h.gch/Halide.test.gch 939 940$(INCLUDE_DIR)/HalideRuntime%: $(SRC_DIR)/runtime/HalideRuntime% 941 echo Copying $< 942 @mkdir -p $(@D) 943 cp $< $(INCLUDE_DIR)/ 944 945$(INCLUDE_DIR)/HalideBuffer.h: $(SRC_DIR)/runtime/HalideBuffer.h 946 echo Copying $< 947 @mkdir -p $(@D) 948 cp $< $(INCLUDE_DIR)/ 949 950$(INCLUDE_DIR)/HalidePyTorchHelpers.h: $(SRC_DIR)/runtime/HalidePyTorchHelpers.h 951 echo Copying $< 952 @mkdir -p $(@D) 953 cp $< $(INCLUDE_DIR)/ 954 955$(INCLUDE_DIR)/HalidePyTorchCudaHelpers.h: $(SRC_DIR)/runtime/HalidePyTorchCudaHelpers.h 956 echo Copying $< 957 @mkdir -p $(@D) 958 cp $< $(INCLUDE_DIR)/ 959 960$(BIN_DIR)/build_halide_h: $(ROOT_DIR)/tools/build_halide_h.cpp 961 @-mkdir -p $(@D) 962 $(CXX) -std=c++11 $< -o $@ 963 964-include $(OBJECTS:.o=.d) 965-include $(INITIAL_MODULES:.o=.d) 966 967# Compile generic 32- or 64-bit code 968# (The 'nacl' is a red herring. This is just a generic 32-bit little-endian target.) 969RUNTIME_TRIPLE_32 = "le32-unknown-nacl-unknown" 970RUNTIME_TRIPLE_64 = "le64-unknown-unknown-unknown" 971 972# windows-specific modules use the __stdcall calling convention 973RUNTIME_TRIPLE_WIN_32 = "i386-unknown-unknown-unknown" 974RUNTIME_TRIPLE_WIN_64 = "x86_64-unknown-windows-unknown" 975 976# -std=gnu++98 is deliberate; we do NOT want c++11 here, 977# as we don't want static locals to get thread synchronization stuff. 978RUNTIME_CXX_FLAGS = -O3 -fno-vectorize -ffreestanding -fno-blocks -fno-exceptions -fno-unwind-tables -std=gnu++98 979 980$(BUILD_DIR)/initmod.windows_%_32.ll: $(SRC_DIR)/runtime/windows_%.cpp $(BUILD_DIR)/clang_ok 981 @mkdir -p $(@D) 982 $(CLANG) $(CXX_WARNING_FLAGS) $(RUNTIME_CXX_FLAGS) -fpic -m32 -target $(RUNTIME_TRIPLE_WIN_32) -DCOMPILING_HALIDE_RUNTIME -DBITS_32 -emit-llvm -S $(SRC_DIR)/runtime/windows_$*.cpp -o $@ -MMD -MP -MF $(BUILD_DIR)/initmod.windows_$*_32.d 983 984$(BUILD_DIR)/initmod.windows_%_64.ll: $(SRC_DIR)/runtime/windows_%.cpp $(BUILD_DIR)/clang_ok 985 @mkdir -p $(@D) 986 $(CLANG) $(CXX_WARNING_FLAGS) $(RUNTIME_CXX_FLAGS) -m64 -target $(RUNTIME_TRIPLE_WIN_64) -DCOMPILING_HALIDE_RUNTIME -DBITS_64 -emit-llvm -S $(SRC_DIR)/runtime/windows_$*.cpp -o $@ -MMD -MP -MF $(BUILD_DIR)/initmod.windows_$*_64.d 987 988$(BUILD_DIR)/initmod.%_64.ll: $(SRC_DIR)/runtime/%.cpp $(BUILD_DIR)/clang_ok 989 @mkdir -p $(@D) 990 $(CLANG) $(CXX_WARNING_FLAGS) $(RUNTIME_CXX_FLAGS) -fpic -m64 -target $(RUNTIME_TRIPLE_64) -DCOMPILING_HALIDE_RUNTIME -DBITS_64 -emit-llvm -S $(SRC_DIR)/runtime/$*.cpp -o $@ -MMD -MP -MF $(BUILD_DIR)/initmod.$*_64.d 991 992$(BUILD_DIR)/initmod.%_32.ll: $(SRC_DIR)/runtime/%.cpp $(BUILD_DIR)/clang_ok 993 @mkdir -p $(@D) 994 $(CLANG) $(CXX_WARNING_FLAGS) $(RUNTIME_CXX_FLAGS) -fpic -m32 -target $(RUNTIME_TRIPLE_32) -DCOMPILING_HALIDE_RUNTIME -DBITS_32 -emit-llvm -S $(SRC_DIR)/runtime/$*.cpp -o $@ -MMD -MP -MF $(BUILD_DIR)/initmod.$*_32.d 995 996$(BUILD_DIR)/initmod.windows_%_64_debug.ll: $(SRC_DIR)/runtime/windows_%.cpp $(BUILD_DIR)/clang_ok 997 @mkdir -p $(@D) 998 $(CLANG) $(CXX_WARNING_FLAGS) -g -DDEBUG_RUNTIME $(RUNTIME_CXX_FLAGS) -m32 -target $(RUNTIME_TRIPLE_WIN_64) -DCOMPILING_HALIDE_RUNTIME -DBITS_32 -emit-llvm -S $(SRC_DIR)/runtime/windows_$*.cpp -o $@ -MMD -MP -MF $(BUILD_DIR)/initmod.windows_$*_32_debug.d 999 1000$(BUILD_DIR)/initmod.%_64_debug.ll: $(SRC_DIR)/runtime/%.cpp $(BUILD_DIR)/clang_ok 1001 @mkdir -p $(@D) 1002 $(CLANG) $(CXX_WARNING_FLAGS) -g -DDEBUG_RUNTIME $(RUNTIME_CXX_FLAGS) -m64 -target $(RUNTIME_TRIPLE_64) -DCOMPILING_HALIDE_RUNTIME -DBITS_64 -emit-llvm -S $(SRC_DIR)/runtime/$*.cpp -o $@ -MMD -MP -MF $(BUILD_DIR)/initmod.$*_64_debug.d 1003 1004$(BUILD_DIR)/initmod.windows_%_32_debug.ll: $(SRC_DIR)/runtime/windows_%.cpp $(BUILD_DIR)/clang_ok 1005 @mkdir -p $(@D) 1006 $(CLANG) $(CXX_WARNING_FLAGS) -g -DDEBUG_RUNTIME $(RUNTIME_CXX_FLAGS) -m32 -target $(RUNTIME_TRIPLE_WIN_32) -DCOMPILING_HALIDE_RUNTIME -DBITS_32 -emit-llvm -S $(SRC_DIR)/runtime/windows_$*.cpp -o $@ -MMD -MP -MF $(BUILD_DIR)/initmod.windows_$*_32_debug.d 1007 1008$(BUILD_DIR)/initmod.%_32_debug.ll: $(SRC_DIR)/runtime/%.cpp $(BUILD_DIR)/clang_ok 1009 @mkdir -p $(@D) 1010 $(CLANG) $(CXX_WARNING_FLAGS) -g -DDEBUG_RUNTIME -O3 $(RUNTIME_CXX_FLAGS) -m32 -target $(RUNTIME_TRIPLE_32) -DCOMPILING_HALIDE_RUNTIME -DBITS_32 -emit-llvm -S $(SRC_DIR)/runtime/$*.cpp -o $@ -MMD -MP -MF $(BUILD_DIR)/initmod.$*_32_debug.d 1011 1012$(BUILD_DIR)/initmod.%_ll.ll: $(SRC_DIR)/runtime/%.ll 1013 @mkdir -p $(@D) 1014 cp $(SRC_DIR)/runtime/$*.ll $(BUILD_DIR)/initmod.$*_ll.ll 1015 1016$(BUILD_DIR)/initmod.%.bc: $(BUILD_DIR)/initmod.%.ll $(BUILD_DIR)/llvm_ok 1017 $(LLVM_AS) $(BUILD_DIR)/initmod.$*.ll -o $(BUILD_DIR)/initmod.$*.bc 1018 1019$(BUILD_DIR)/initmod.%.cpp: $(BIN_DIR)/binary2cpp $(BUILD_DIR)/initmod.%.bc 1020 ./$(BIN_DIR)/binary2cpp halide_internal_initmod_$* < $(BUILD_DIR)/initmod.$*.bc > $@ 1021 1022$(BUILD_DIR)/initmod.%_h.cpp: $(BIN_DIR)/binary2cpp $(SRC_DIR)/runtime/%.h 1023 ./$(BIN_DIR)/binary2cpp halide_internal_runtime_header_$*_h < $(SRC_DIR)/runtime/$*.h > $@ 1024 1025# Any c in the runtime that must be inlined needs to be copy-pasted into the output for the C backend. 1026$(BUILD_DIR)/initmod.inlined_c.cpp: $(BIN_DIR)/binary2cpp $(SRC_DIR)/runtime/halide_buffer_t.cpp 1027 ./$(BIN_DIR)/binary2cpp halide_internal_initmod_inlined_c < $(SRC_DIR)/runtime/halide_buffer_t.cpp > $@ 1028 1029$(BUILD_DIR)/initmod_ptx.%_ll.cpp: $(BIN_DIR)/binary2cpp $(SRC_DIR)/runtime/nvidia_libdevice_bitcode/libdevice.%.bc 1030 ./$(BIN_DIR)/binary2cpp halide_internal_initmod_ptx_$(basename $*)_ll < $(SRC_DIR)/runtime/nvidia_libdevice_bitcode/libdevice.$*.bc > $@ 1031 1032$(BIN_DIR)/binary2cpp: $(ROOT_DIR)/tools/binary2cpp.cpp 1033 @mkdir -p $(@D) 1034 $(CXX) $< -o $@ 1035 1036$(BUILD_DIR)/initmod_ptx.%_ll.o: $(BUILD_DIR)/initmod_ptx.%_ll.cpp 1037 $(CXX) -c $< -o $@ -MMD -MP -MF $(BUILD_DIR)/$*.d -MT $(BUILD_DIR)/$*.o 1038 1039$(BUILD_DIR)/initmod.%.o: $(BUILD_DIR)/initmod.%.cpp 1040 $(CXX) -c $< -o $@ -MMD -MP -MF $(BUILD_DIR)/$*.d -MT $(BUILD_DIR)/$*.o 1041 1042$(BUILD_DIR)/%.o: $(SRC_DIR)/%.cpp $(SRC_DIR)/%.h $(BUILD_DIR)/llvm_ok 1043 @mkdir -p $(@D) 1044 $(CXX) $(CXX_FLAGS) -c $< -o $@ -MMD -MP -MF $(BUILD_DIR)/$*.d -MT $(BUILD_DIR)/$*.o 1045 1046$(BUILD_DIR)/Simplify_%.o: $(SRC_DIR)/Simplify_%.cpp $(SRC_DIR)/Simplify_Internal.h $(BUILD_DIR)/llvm_ok 1047 @mkdir -p $(@D) 1048 $(CXX) $(CXX_FLAGS) -c $< -o $@ -MMD -MP -MF $(BUILD_DIR)/Simplify_$*.d -MT $@ 1049 1050.PHONY: clean 1051clean: 1052 rm -rf $(LIB_DIR) 1053 rm -rf $(BIN_DIR) 1054 rm -rf $(BUILD_DIR) 1055 rm -rf $(TMP_DIR) 1056 rm -rf $(FILTERS_DIR) 1057 rm -rf $(INCLUDE_DIR) 1058 rm -rf $(SHARE_DIR) 1059 rm -rf $(DISTRIB_DIR) 1060 rm -rf $(ROOT_DIR)/apps/*/bin 1061 1062.SECONDARY: 1063 1064CORRECTNESS_TESTS = $(shell ls $(ROOT_DIR)/test/correctness/*.cpp) $(shell ls $(ROOT_DIR)/test/correctness/*.c) 1065PERFORMANCE_TESTS = $(shell ls $(ROOT_DIR)/test/performance/*.cpp) 1066ERROR_TESTS = $(shell ls $(ROOT_DIR)/test/error/*.cpp) 1067WARNING_TESTS = $(shell ls $(ROOT_DIR)/test/warning/*.cpp) 1068OPENGL_TESTS := $(shell ls $(ROOT_DIR)/test/opengl/*.cpp) 1069GENERATOR_EXTERNAL_TESTS := $(shell ls $(ROOT_DIR)/test/generator/*test.cpp) 1070GENERATOR_EXTERNAL_TEST_GENERATOR := $(shell ls $(ROOT_DIR)/test/generator/*_generator.cpp) 1071TUTORIALS = $(filter-out %_generate.cpp, $(shell ls $(ROOT_DIR)/tutorial/*.cpp)) 1072AUTO_SCHEDULE_TESTS = $(shell ls $(ROOT_DIR)/test/auto_schedule/*.cpp) 1073 1074-include $(OPENGL_TESTS:$(ROOT_DIR)/test/opengl/%.cpp=$(BUILD_DIR)/test_opengl_%.d) 1075 1076test_correctness: $(CORRECTNESS_TESTS:$(ROOT_DIR)/test/correctness/%.cpp=quiet_correctness_%) $(CORRECTNESS_TESTS:$(ROOT_DIR)/test/correctness/%.c=quiet_correctness_%) 1077test_performance: $(PERFORMANCE_TESTS:$(ROOT_DIR)/test/performance/%.cpp=performance_%) 1078test_error: $(ERROR_TESTS:$(ROOT_DIR)/test/error/%.cpp=error_%) 1079test_warning: $(WARNING_TESTS:$(ROOT_DIR)/test/warning/%.cpp=warning_%) 1080test_tutorial: $(TUTORIALS:$(ROOT_DIR)/tutorial/%.cpp=tutorial_%) 1081test_valgrind: $(CORRECTNESS_TESTS:$(ROOT_DIR)/test/correctness/%.cpp=valgrind_%) 1082test_avx512: $(CORRECTNESS_TESTS:$(ROOT_DIR)/test/correctness/%.cpp=avx512_%) 1083test_opengl: $(OPENGL_TESTS:$(ROOT_DIR)/test/opengl/%.cpp=opengl_%) 1084test_auto_schedule: $(AUTO_SCHEDULE_TESTS:$(ROOT_DIR)/test/auto_schedule/%.cpp=auto_schedule_%) 1085 1086.PHONY: test_correctness_multi_gpu 1087test_correctness_multi_gpu: correctness_gpu_multi_device 1088 1089# There are 3 types of tests for generators: 1090# 1) Externally-written aot-based tests 1091# 2) Externally-written aot-based tests (compiled using C++ backend) 1092# 3) Externally-written JIT-based tests 1093GENERATOR_AOT_TESTS = $(GENERATOR_EXTERNAL_TESTS:$(ROOT_DIR)/test/generator/%_aottest.cpp=generator_aot_%) 1094GENERATOR_AOTCPP_TESTS = $(GENERATOR_EXTERNAL_TESTS:$(ROOT_DIR)/test/generator/%_aottest.cpp=generator_aotcpp_%) 1095GENERATOR_JIT_TESTS = $(GENERATOR_EXTERNAL_TESTS:$(ROOT_DIR)/test/generator/%_jittest.cpp=generator_jit_%) 1096 1097# multitarget test doesn't make any sense for the CPP backend; just skip it. 1098GENERATOR_AOTCPP_TESTS := $(filter-out generator_aotcpp_multitarget,$(GENERATOR_AOTCPP_TESTS)) 1099 1100# Note that many of the AOT-CPP tests are broken right now; 1101# remove AOT-CPP tests that don't (yet) work for C++ backend 1102# (each tagged with the *known* blocking issue(s)) 1103 1104# https://github.com/halide/Halide/issues/2084 (only if opencl enabled) 1105GENERATOR_AOTCPP_TESTS := $(filter-out generator_aotcpp_acquire_release,$(GENERATOR_AOTCPP_TESTS)) 1106 1107# https://github.com/halide/Halide/issues/2084 (only if opencl enabled) 1108GENERATOR_AOTCPP_TESTS := $(filter-out generator_aotcpp_define_extern_opencl,$(GENERATOR_AOTCPP_TESTS)) 1109 1110# https://github.com/halide/Halide/issues/2084 (only if opencl enabled) 1111GENERATOR_AOTCPP_TESTS := $(filter-out generator_aotcpp_gpu_object_lifetime,$(GENERATOR_AOTCPP_TESTS)) 1112 1113# https://github.com/halide/Halide/issues/2084 (only if opencl enabled) 1114GENERATOR_AOTCPP_TESTS := $(filter-out generator_aotcpp_gpu_only,$(GENERATOR_AOTCPP_TESTS)) 1115 1116# https://github.com/halide/Halide/issues/2084 (only if opencl enabled)) 1117GENERATOR_AOTCPP_TESTS := $(filter-out generator_aotcpp_cleanup_on_error,$(GENERATOR_AOTCPP_TESTS)) 1118 1119# https://github.com/halide/Halide/issues/2084 (only if opencl enabled) 1120GENERATOR_AOTCPP_TESTS := $(filter-out generator_aotcpp_buffer_copy,$(GENERATOR_AOTCPP_TESTS)) 1121 1122# https://github.com/halide/Halide/issues/2071 1123GENERATOR_AOTCPP_TESTS := $(filter-out generator_aotcpp_user_context,$(GENERATOR_AOTCPP_TESTS)) 1124 1125# https://github.com/halide/Halide/issues/2071 1126GENERATOR_AOTCPP_TESTS := $(filter-out generator_aotcpp_argvcall,$(GENERATOR_AOTCPP_TESTS)) 1127 1128# https://github.com/halide/Halide/issues/2071 1129GENERATOR_AOTCPP_TESTS := $(filter-out generator_aotcpp_metadata_tester,$(GENERATOR_AOTCPP_TESTS)) 1130 1131# https://github.com/halide/Halide/issues/2071 1132GENERATOR_AOTCPP_TESTS := $(filter-out generator_aotcpp_cxx_mangling,$(GENERATOR_AOTCPP_TESTS)) 1133 1134# https://github.com/halide/Halide/issues/2075 1135GENERATOR_AOTCPP_TESTS := $(filter-out generator_aotcpp_msan,$(GENERATOR_AOTCPP_TESTS)) 1136 1137# https://github.com/halide/Halide/issues/2075 1138GENERATOR_AOTCPP_TESTS := $(filter-out generator_aotcpp_memory_profiler_mandelbrot,$(GENERATOR_AOTCPP_TESTS)) 1139 1140# https://github.com/halide/Halide/issues/2082 1141GENERATOR_AOTCPP_TESTS := $(filter-out generator_aotcpp_matlab,$(GENERATOR_AOTCPP_TESTS)) 1142 1143# https://github.com/halide/Halide/issues/2093 1144GENERATOR_AOTCPP_TESTS := $(filter-out generator_aotcpp_async_parallel,$(GENERATOR_AOTCPP_TESTS)) 1145 1146# https://github.com/halide/Halide/issues/4916 1147GENERATOR_AOTCPP_TESTS := $(filter-out generator_aotcpp_stubtest,$(GENERATOR_AOTCPP_TESTS)) 1148GENERATOR_AOTCPP_TESTS := $(filter-out generator_aotcpp_stubuser,$(GENERATOR_AOTCPP_TESTS)) 1149 1150test_aotcpp_generator: $(GENERATOR_AOTCPP_TESTS) 1151 1152# This is just a test to ensure than RunGen builds and links for a critical mass of Generators; 1153# not all will work directly (e.g. due to missing define_externs at link time), so we disable 1154# those known to be broken for plausible reasons. 1155GENERATOR_BUILD_RUNGEN_TESTS = $(GENERATOR_EXTERNAL_TEST_GENERATOR:$(ROOT_DIR)/test/generator/%_generator.cpp=$(FILTERS_DIR)/%.rungen) 1156GENERATOR_BUILD_RUNGEN_TESTS := $(filter-out $(FILTERS_DIR)/async_parallel.rungen,$(GENERATOR_BUILD_RUNGEN_TESTS)) 1157GENERATOR_BUILD_RUNGEN_TESTS := $(filter-out $(FILTERS_DIR)/cxx_mangling_define_extern.rungen,$(GENERATOR_BUILD_RUNGEN_TESTS)) 1158GENERATOR_BUILD_RUNGEN_TESTS := $(filter-out $(FILTERS_DIR)/define_extern_opencl.rungen,$(GENERATOR_BUILD_RUNGEN_TESTS)) 1159GENERATOR_BUILD_RUNGEN_TESTS := $(filter-out $(FILTERS_DIR)/matlab.rungen,$(GENERATOR_BUILD_RUNGEN_TESTS)) 1160GENERATOR_BUILD_RUNGEN_TESTS := $(filter-out $(FILTERS_DIR)/msan.rungen,$(GENERATOR_BUILD_RUNGEN_TESTS)) 1161GENERATOR_BUILD_RUNGEN_TESTS := $(filter-out $(FILTERS_DIR)/multitarget.rungen,$(GENERATOR_BUILD_RUNGEN_TESTS)) 1162GENERATOR_BUILD_RUNGEN_TESTS := $(filter-out $(FILTERS_DIR)/nested_externs.rungen,$(GENERATOR_BUILD_RUNGEN_TESTS)) 1163GENERATOR_BUILD_RUNGEN_TESTS := $(filter-out $(FILTERS_DIR)/tiled_blur.rungen,$(GENERATOR_BUILD_RUNGEN_TESTS)) 1164GENERATOR_BUILD_RUNGEN_TESTS := $(filter-out $(FILTERS_DIR)/extern_output.rungen,$(GENERATOR_BUILD_RUNGEN_TESTS)) 1165GENERATOR_BUILD_RUNGEN_TESTS := $(GENERATOR_BUILD_RUNGEN_TESTS) \ 1166 $(FILTERS_DIR)/multi_rungen \ 1167 $(FILTERS_DIR)/multi_rungen2 \ 1168 $(FILTERS_DIR)/rungen_test \ 1169 $(FILTERS_DIR)/registration_test 1170 1171test_rungen: $(GENERATOR_BUILD_RUNGEN_TESTS) 1172 $(FILTERS_DIR)/rungen_test 1173 $(FILTERS_DIR)/registration_test 1174 1175test_generator: $(GENERATOR_AOT_TESTS) $(GENERATOR_AOTCPP_TESTS) $(GENERATOR_JIT_TESTS) $(GENERATOR_BUILD_RUNGEN_TESTS) 1176 $(FILTERS_DIR)/rungen_test 1177 $(FILTERS_DIR)/registration_test 1178 1179ALL_TESTS = test_internal test_correctness test_error test_tutorial test_warning test_generator 1180 1181# These targets perform timings of each test. For most tests this includes Halide JIT compile times, and run times. 1182# For generator tests they time the compile time only. The times are recorded in CSV files. 1183time_compilation_correctness: init_time_compilation_correctness $(CORRECTNESS_TESTS:$(ROOT_DIR)/test/correctness/%.cpp=time_compilation_test_%) 1184time_compilation_performance: init_time_compilation_performance $(PERFORMANCE_TESTS:$(ROOT_DIR)/test/performance/%.cpp=time_compilation_performance_%) 1185time_compilation_opengl: init_time_compilation_opengl $(OPENGL_TESTS:$(ROOT_DIR)/test/opengl/%.cpp=time_compilation_opengl_%) 1186time_compilation_generator: init_time_compilation_generator $(GENERATOR_TESTS:$(ROOT_DIR)/test/generator/%_aottest.cpp=time_compilation_generator_%) 1187 1188init_time_compilation_%: 1189 echo "TEST,User (s),System (s),Real" > $(@:init_time_compilation_%=compile_times_%.csv) 1190 1191TIME_COMPILATION ?= /usr/bin/time -a -f "$@,%U,%S,%E" -o 1192 1193run_tests: $(ALL_TESTS) 1194 make -f $(THIS_MAKEFILE) test_performance test_auto_schedule 1195 1196.PHONY: build_tests 1197build_tests: $(CORRECTNESS_TESTS:$(ROOT_DIR)/test/correctness/%.cpp=$(BIN_DIR)/correctness_%) \ 1198 $(PERFORMANCE_TESTS:$(ROOT_DIR)/test/performance/%.cpp=$(BIN_DIR)/performance_%) \ 1199 $(ERROR_TESTS:$(ROOT_DIR)/test/error/%.cpp=$(BIN_DIR)/error_%) \ 1200 $(WARNING_TESTS:$(ROOT_DIR)/test/warning/%.cpp=$(BIN_DIR)/warning_%) \ 1201 $(GENERATOR_EXTERNAL_TESTS:$(ROOT_DIR)/test/generator/%_aottest.cpp=$(BIN_DIR)/$(TARGET)/generator_aot_%) \ 1202 $(GENERATOR_EXTERNAL_TESTS:$(ROOT_DIR)/test/generator/%_jittest.cpp=$(BIN_DIR)/generator_jit_%) \ 1203 $(AUTO_SCHEDULE_TESTS:$(ROOT_DIR)/test/auto_schedule/%.cpp=$(BIN_DIR)/auto_schedule_%) 1204 1205# OpenGL doesn't build on every host platform we support (eg. ARM). 1206.PHONY: build_opengl_tests 1207build_opengl_tests: $(OPENGL_TESTS:$(ROOT_DIR)/test/opengl/%.cpp=$(BIN_DIR)/opengl_%) 1208 1209ifneq ($(WITH_OPENGL),) 1210build_tests: build_opengl_tests 1211endif 1212 1213clean_generator: 1214 rm -rf $(BIN_DIR)/*.generator 1215 rm -rf $(BIN_DIR)/*/runtime.a 1216 rm -rf $(FILTERS_DIR) 1217 rm -rf $(BIN_DIR)/*/generator_* 1218 rm -rf $(BUILD_DIR)/*_generator.o 1219 rm -f $(BUILD_DIR)/GenGen.o 1220 rm -f $(BUILD_DIR)/RunGenMain.o 1221 1222time_compilation_tests: time_compilation_correctness time_compilation_performance time_compilation_generator 1223 1224$(BUILD_DIR)/GenGen.o: $(ROOT_DIR)/tools/GenGen.cpp $(INCLUDE_DIR)/Halide.h 1225 @mkdir -p $(@D) 1226 $(CXX) -c $< $(TEST_CXX_FLAGS) -I$(INCLUDE_DIR) -o $@ 1227 1228# Make an empty generator for generating runtimes. 1229$(BIN_DIR)/runtime.generator: $(BUILD_DIR)/GenGen.o $(BIN_DIR)/libHalide.$(SHARED_EXT) 1230 @mkdir -p $(@D) 1231 $(CXX) $< $(TEST_LD_FLAGS) -o $@ 1232 1233# Generate a standalone runtime for a given target string 1234$(BIN_DIR)/%/runtime.a: $(BIN_DIR)/runtime.generator 1235 @mkdir -p $(@D) 1236 $(CURDIR)/$< -r runtime -o $(CURDIR)/$(BIN_DIR)/$* target=$* 1237 1238$(BIN_DIR)/test_internal: $(ROOT_DIR)/test/internal.cpp $(BIN_DIR)/libHalide.$(SHARED_EXT) 1239 @mkdir -p $(@D) 1240 $(CXX) $(TEST_CXX_FLAGS) $< -I$(SRC_DIR) $(TEST_LD_FLAGS) -o $@ 1241 1242# Correctness test that link against libHalide 1243$(BIN_DIR)/correctness_%: $(ROOT_DIR)/test/correctness/%.cpp $(BIN_DIR)/libHalide.$(SHARED_EXT) $(INCLUDE_DIR)/Halide.h $(RUNTIME_EXPORTED_INCLUDES) 1244 @mkdir -p $(@D) 1245 $(CXX) $(TEST_CXX_FLAGS) -I$(ROOT_DIR)/src/runtime -I$(ROOT_DIR)/test/common $(OPTIMIZE_FOR_BUILD_TIME) $< -I$(INCLUDE_DIR) $(TEST_LD_FLAGS) -o $@ 1246 1247# Correctness tests that do NOT link against libHalide 1248$(BIN_DIR)/correctness_plain_c_includes: $(ROOT_DIR)/test/correctness/plain_c_includes.c $(RUNTIME_EXPORTED_INCLUDES) 1249 $(CXX) -x c -Wall -Werror -I$(ROOT_DIR)/src/runtime $(OPTIMIZE_FOR_BUILD_TIME) $< -I$(ROOT_DIR)/src/runtime -o $@ 1250 1251# Note that this test must *not* link in either libHalide, or a Halide runtime; 1252# this test should be usable without either. 1253$(BIN_DIR)/correctness_halide_buffer: $(ROOT_DIR)/test/correctness/halide_buffer.cpp $(INCLUDE_DIR)/HalideBuffer.h $(RUNTIME_EXPORTED_INCLUDES) 1254 $(CXX) $(TEST_CXX_FLAGS) $(OPTIMIZE_FOR_BUILD_TIME) $< -I$(INCLUDE_DIR) -o $@ 1255 1256# The image_io test additionally needs to link to libpng and 1257# libjpeg. 1258$(BIN_DIR)/correctness_image_io: $(ROOT_DIR)/test/correctness/image_io.cpp $(BIN_DIR)/libHalide.$(SHARED_EXT) $(INCLUDE_DIR)/Halide.h $(RUNTIME_EXPORTED_INCLUDES) 1259 $(CXX) $(TEST_CXX_FLAGS) $(IMAGE_IO_CXX_FLAGS) -I$(ROOT_DIR)/src/runtime -I$(ROOT_DIR)/test/common $(OPTIMIZE_FOR_BUILD_TIME) $< -I$(INCLUDE_DIR) $(TEST_LD_FLAGS) $(IMAGE_IO_LIBS) -o $@ 1260 1261# OpenCL runtime correctness test requires runtime.a to be linked. 1262$(BIN_DIR)/$(TARGET)/correctness_opencl_runtime: $(ROOT_DIR)/test/correctness/opencl_runtime.cpp $(RUNTIME_EXPORTED_INCLUDES) $(BIN_DIR)/$(TARGET)/runtime.a 1263 @mkdir -p $(@D) 1264 $(CXX) $(BIN_DIR)/$(TARGET)/runtime.a $(TEST_CXX_FLAGS) -I$(ROOT_DIR)/src/runtime $(OPTIMIZE_FOR_BUILD_TIME) $< -I$(INCLUDE_DIR) $(TEST_LD_FLAGS) -o $@ 1265 1266$(BIN_DIR)/performance_%: $(ROOT_DIR)/test/performance/%.cpp $(BIN_DIR)/libHalide.$(SHARED_EXT) $(INCLUDE_DIR)/Halide.h 1267 $(CXX) $(TEST_CXX_FLAGS) $(OPTIMIZE) $< -I$(INCLUDE_DIR) -I$(ROOT_DIR)/src/runtime -I$(ROOT_DIR)/test/common $(TEST_LD_FLAGS) -o $@ 1268 1269# Error tests that link against libHalide 1270$(BIN_DIR)/error_%: $(ROOT_DIR)/test/error/%.cpp $(BIN_DIR)/libHalide.$(SHARED_EXT) $(INCLUDE_DIR)/Halide.h 1271 $(CXX) $(TEST_CXX_FLAGS) -I$(ROOT_DIR)/src/runtime -I$(ROOT_DIR)/test/common $(OPTIMIZE_FOR_BUILD_TIME) $< -I$(INCLUDE_DIR) $(TEST_LD_FLAGS) -o $@ 1272 1273$(BIN_DIR)/warning_%: $(ROOT_DIR)/test/warning/%.cpp $(BIN_DIR)/libHalide.$(SHARED_EXT) $(INCLUDE_DIR)/Halide.h 1274 $(CXX) $(TEST_CXX_FLAGS) $(OPTIMIZE_FOR_BUILD_TIME) $< -I$(INCLUDE_DIR) $(TEST_LD_FLAGS) -o $@ 1275 1276$(BIN_DIR)/opengl_%: $(ROOT_DIR)/test/opengl/%.cpp $(BIN_DIR)/libHalide.$(SHARED_EXT) $(INCLUDE_DIR)/Halide.h $(INCLUDE_DIR)/HalideRuntime.h $(INCLUDE_DIR)/HalideRuntimeOpenGL.h 1277 $(CXX) $(TEST_CXX_FLAGS) $(OPTIMIZE_FOR_BUILD_TIME) $< -I$(INCLUDE_DIR) -I$(SRC_DIR) $(TEST_LD_FLAGS) $(OPENGL_LD_FLAGS) -o $@ -MMD -MF $(BUILD_DIR)/test_opengl_$*.d 1278 1279# Auto schedule tests that link against libHalide 1280$(BIN_DIR)/auto_schedule_%: $(ROOT_DIR)/test/auto_schedule/%.cpp $(BIN_DIR)/libHalide.$(SHARED_EXT) $(INCLUDE_DIR)/Halide.h 1281 $(CXX) $(TEST_CXX_FLAGS) $(OPTIMIZE_FOR_BUILD_TIME) $< -I$(INCLUDE_DIR) $(TEST_LD_FLAGS) -o $@ 1282 1283# TODO(srj): this doesn't auto-delete, why not? 1284.INTERMEDIATE: $(BIN_DIR)/%.generator 1285 1286# By default, %.generator is produced by building %_generator.cpp 1287# Note that the rule includes all _generator.cpp files, so that generator with define_extern 1288# usage can just add deps later. 1289$(BUILD_DIR)/%_generator.o: $(ROOT_DIR)/test/generator/%_generator.cpp $(INCLUDE_DIR)/Halide.h 1290 @mkdir -p $(@D) 1291 $(CXX) $(TEST_CXX_FLAGS) -I$(INCLUDE_DIR) -I$(CURDIR)/$(FILTERS_DIR) -c $< -o $@ 1292 1293$(BIN_DIR)/%.generator: $(BUILD_DIR)/GenGen.o $(BIN_DIR)/libHalide.$(SHARED_EXT) $(BUILD_DIR)/%_generator.o 1294 @mkdir -p $(@D) 1295 $(CXX) $(filter %.cpp %.o %.a,$^) $(TEST_LD_FLAGS) -o $@ 1296 1297# It is not always possible to cross compile between 32-bit and 64-bit via the clang build as part of llvm 1298# These next two rules can fail the compilationa nd produce zero length bitcode blobs. 1299# If the zero length blob is actually used, the test will fail anyway, but usually only the bitness 1300# of the target is used. 1301$(BUILD_DIR)/external_code_extern_bitcode_32.cpp : $(ROOT_DIR)/test/generator/external_code_extern.cpp $(BIN_DIR)/binary2cpp 1302 @mkdir -p $(@D) 1303 $(CLANG) $(CXX_WARNING_FLAGS) -O3 -c -m32 -target $(RUNTIME_TRIPLE_32) -emit-llvm $< -o $(BUILD_DIR)/external_code_extern_32.bc || echo -n > $(BUILD_DIR)/external_code_extern_32.bc 1304 ./$(BIN_DIR)/binary2cpp external_code_extern_bitcode_32 < $(BUILD_DIR)/external_code_extern_32.bc > $@ 1305 1306$(BUILD_DIR)/external_code_extern_bitcode_64.cpp : $(ROOT_DIR)/test/generator/external_code_extern.cpp $(BIN_DIR)/binary2cpp 1307 @mkdir -p $(@D) 1308 $(CLANG) $(CXX_WARNING_FLAGS) -O3 -c -m64 -target $(RUNTIME_TRIPLE_64) -emit-llvm $< -o $(BUILD_DIR)/external_code_extern_64.bc || echo -n > $(BUILD_DIR)/external_code_extern_64.bc 1309 ./$(BIN_DIR)/binary2cpp external_code_extern_bitcode_64 < $(BUILD_DIR)/external_code_extern_64.bc > $@ 1310 1311$(BUILD_DIR)/external_code_extern_cpp_source.cpp : $(ROOT_DIR)/test/generator/external_code_extern.cpp $(BIN_DIR)/binary2cpp 1312 @mkdir -p $(@D) 1313 ./$(BIN_DIR)/binary2cpp external_code_extern_cpp_source < $(ROOT_DIR)/test/generator/external_code_extern.cpp > $@ 1314 1315$(BIN_DIR)/external_code.generator: $(BUILD_DIR)/GenGen.o $(BIN_DIR)/libHalide.$(SHARED_EXT) $(BUILD_DIR)/external_code_generator.o $(BUILD_DIR)/external_code_extern_bitcode_32.cpp $(BUILD_DIR)/external_code_extern_bitcode_64.cpp $(BUILD_DIR)/external_code_extern_cpp_source.cpp 1316 @mkdir -p $(@D) 1317 $(CXX) $(filter %.cpp %.o %.a,$^) $(TEST_LD_FLAGS) -o $@ 1318 1319NAME_MANGLING_TARGET=$(NON_EMPTY_TARGET)-c_plus_plus_name_mangling 1320 1321GEN_AOT_OUTPUTS=-e static_library,c_header,c_source,registration 1322 1323# By default, %.a/.h are produced by executing %.generator. Runtimes are not included in these. 1324# (We explicitly also generate .cpp output here as well, as additional test surface for the C++ backend.) 1325$(FILTERS_DIR)/%.a: $(BIN_DIR)/%.generator 1326 @mkdir -p $(@D) 1327 $(CURDIR)/$< -g $* $(GEN_AOT_OUTPUTS) -o $(CURDIR)/$(FILTERS_DIR) target=$(TARGET)-no_runtime 1328 1329$(FILTERS_DIR)/%.h: $(FILTERS_DIR)/%.a 1330 @echo $@ produced implicitly by $^ 1331 1332$(FILTERS_DIR)/%.halide_generated.cpp: $(FILTERS_DIR)/%.a 1333 @echo $@ produced implicitly by $^ 1334 1335$(FILTERS_DIR)/%.registration.cpp: $(FILTERS_DIR)/%.a 1336 @echo $@ produced implicitly by $^ 1337 1338$(FILTERS_DIR)/%.stub.h: $(BIN_DIR)/%.generator 1339 @mkdir -p $(@D) 1340 $(CURDIR)/$< -g $* -n $* -o $(CURDIR)/$(FILTERS_DIR) -e cpp_stub 1341 1342$(FILTERS_DIR)/cxx_mangling_externs.o: $(ROOT_DIR)/test/generator/cxx_mangling_externs.cpp 1343 @mkdir -p $(@D) 1344 $(CXX) $(GEN_AOT_CXX_FLAGS) -c $(filter-out %.h,$^) $(GEN_AOT_INCLUDES) -o $@ 1345 1346# If we want to use a Generator with custom GeneratorParams, we need to write 1347# custom rules: to pass the GeneratorParams, and to give a unique function and file name. 1348$(FILTERS_DIR)/cxx_mangling.a: $(BIN_DIR)/cxx_mangling.generator $(FILTERS_DIR)/cxx_mangling_externs.o 1349 @mkdir -p $(@D) 1350 $(CURDIR)/$< -g cxx_mangling $(GEN_AOT_OUTPUTS) -o $(CURDIR)/$(FILTERS_DIR) target=$(TARGET)-no_runtime-c_plus_plus_name_mangling -f "HalideTest::AnotherNamespace::cxx_mangling" 1351 $(ROOT_DIR)/tools/makelib.sh $@ $@ $(FILTERS_DIR)/cxx_mangling_externs.o 1352 1353ifneq ($(TEST_CUDA), ) 1354# Also build with a gpu target to ensure that the GPU-Host generation 1355# code handles name mangling properly. (Note that we don't need to 1356# run this code, just check for link errors.) 1357$(FILTERS_DIR)/cxx_mangling_gpu.a: $(BIN_DIR)/cxx_mangling.generator $(FILTERS_DIR)/cxx_mangling_externs.o 1358 @mkdir -p $(@D) 1359 $(CURDIR)/$< -g cxx_mangling $(GEN_AOT_OUTPUTS) -o $(CURDIR)/$(FILTERS_DIR) target=$(TARGET)-no_runtime-c_plus_plus_name_mangling-cuda-cuda_capability_30 -f "HalideTest::cxx_mangling_gpu" 1360 $(ROOT_DIR)/tools/makelib.sh $@ $@ $(FILTERS_DIR)/cxx_mangling_externs.o 1361endif 1362 1363$(FILTERS_DIR)/cxx_mangling_define_extern_externs.o: $(ROOT_DIR)/test/generator/cxx_mangling_define_extern_externs.cpp $(FILTERS_DIR)/cxx_mangling.h 1364 @mkdir -p $(@D) 1365 $(CXX) $(GEN_AOT_CXX_FLAGS) -c $(filter-out %.h,$^) $(GEN_AOT_INCLUDES) -o $@ 1366 1367$(FILTERS_DIR)/cxx_mangling_define_extern.a: $(BIN_DIR)/cxx_mangling_define_extern.generator $(FILTERS_DIR)/cxx_mangling_define_extern_externs.o 1368 @mkdir -p $(@D) 1369 $(CURDIR)/$< -g cxx_mangling_define_extern $(GEN_AOT_OUTPUTS) -o $(CURDIR)/$(FILTERS_DIR) target=$(TARGET)-no_runtime-c_plus_plus_name_mangling-user_context -f "HalideTest::cxx_mangling_define_extern" 1370 $(ROOT_DIR)/tools/makelib.sh $@ $@ $(FILTERS_DIR)/cxx_mangling_define_extern_externs.o 1371 1372# pyramid needs a custom arg. 1373$(FILTERS_DIR)/pyramid.a: $(BIN_DIR)/pyramid.generator 1374 @mkdir -p $(@D) 1375 $(CURDIR)/$< -g pyramid -f pyramid $(GEN_AOT_OUTPUTS) -o $(CURDIR)/$(FILTERS_DIR) target=$(TARGET)-no_runtime levels=10 1376 1377$(FILTERS_DIR)/string_param.a: $(BIN_DIR)/string_param.generator 1378 @mkdir -p $(@D) 1379 $(CURDIR)/$< -g string_param -f string_param $(GEN_AOT_OUTPUTS) -o $(CURDIR)/$(FILTERS_DIR) target=$(TARGET)-no_runtime rpn_expr="5 y * x +" 1380 1381# memory_profiler_mandelbrot need profiler set 1382$(FILTERS_DIR)/memory_profiler_mandelbrot.a: $(BIN_DIR)/memory_profiler_mandelbrot.generator 1383 @mkdir -p $(@D) 1384 $(CURDIR)/$< -g memory_profiler_mandelbrot -f memory_profiler_mandelbrot $(GEN_AOT_OUTPUTS) -o $(CURDIR)/$(FILTERS_DIR) target=$(TARGET)-no_runtime-profile 1385 1386$(FILTERS_DIR)/alias_with_offset_42.a: $(BIN_DIR)/alias.generator 1387 @mkdir -p $(@D) 1388 $(CURDIR)/$< -g alias_with_offset_42 -f alias_with_offset_42 $(GEN_AOT_OUTPUTS) -o $(CURDIR)/$(FILTERS_DIR) target=$(TARGET)-no_runtime 1389 1390METADATA_TESTER_GENERATOR_ARGS=\ 1391 input.type=uint8 input.dim=3 \ 1392 dim_only_input_buffer.type=uint8 \ 1393 untyped_input_buffer.type=uint8 untyped_input_buffer.dim=3 \ 1394 output.type=float32,float32 output.dim=3 \ 1395 input_not_nod.type=uint8 input_not_nod.dim=3 \ 1396 input_nod.dim=3 \ 1397 input_not.type=uint8 \ 1398 array_input.size=2 \ 1399 array_i8.size=2 \ 1400 array_i16.size=2 \ 1401 array_i32.size=2 \ 1402 array_h.size=2 \ 1403 buffer_array_input2.dim=3 \ 1404 buffer_array_input3.type=float32 \ 1405 buffer_array_input4.dim=3 \ 1406 buffer_array_input4.type=float32 \ 1407 buffer_array_input5.size=2 \ 1408 buffer_array_input6.size=2 \ 1409 buffer_array_input6.dim=3 \ 1410 buffer_array_input7.size=2 \ 1411 buffer_array_input7.type=float32 \ 1412 buffer_array_input8.size=2 \ 1413 buffer_array_input8.dim=3 \ 1414 buffer_array_input8.type=float32 \ 1415 buffer_f16_untyped.type=float16 \ 1416 array_outputs.size=2 \ 1417 array_outputs7.size=2 \ 1418 array_outputs8.size=2 \ 1419 array_outputs9.size=2 1420 1421# metadata_tester is built with and without user-context 1422$(FILTERS_DIR)/metadata_tester.a: $(BIN_DIR)/metadata_tester.generator 1423 @mkdir -p $(@D) 1424 $(CURDIR)/$< -g metadata_tester -f metadata_tester $(GEN_AOT_OUTPUTS) -o $(CURDIR)/$(FILTERS_DIR) target=$(TARGET)-no_runtime $(METADATA_TESTER_GENERATOR_ARGS) 1425 1426$(FILTERS_DIR)/metadata_tester_ucon.a: $(BIN_DIR)/metadata_tester.generator 1427 @mkdir -p $(@D) 1428 $(CURDIR)/$< -g metadata_tester -f metadata_tester_ucon $(GEN_AOT_OUTPUTS) -o $(CURDIR)/$(FILTERS_DIR) target=$(TARGET)-user_context-no_runtime $(METADATA_TESTER_GENERATOR_ARGS) 1429 1430$(BIN_DIR)/$(TARGET)/generator_aot_metadata_tester: $(FILTERS_DIR)/metadata_tester_ucon.a 1431 1432$(FILTERS_DIR)/multitarget.a: $(BIN_DIR)/multitarget.generator 1433 @mkdir -p $(@D) 1434 $(CURDIR)/$< -g multitarget -f "HalideTest::multitarget" $(GEN_AOT_OUTPUTS) -o $(CURDIR)/$(FILTERS_DIR) \ 1435 target=$(TARGET)-no_bounds_query-no_runtime-c_plus_plus_name_mangling,$(TARGET)-no_runtime-c_plus_plus_name_mangling \ 1436 -e assembly,bitcode,c_source,c_header,stmt_html,static_library,stmt 1437 1438$(FILTERS_DIR)/msan.a: $(BIN_DIR)/msan.generator 1439 @mkdir -p $(@D) 1440 $(CURDIR)/$< -g msan -f msan $(GEN_AOT_OUTPUTS) -o $(CURDIR)/$(FILTERS_DIR) target=$(TARGET)-msan 1441 1442# user_context needs to be generated with user_context as the first argument to its calls 1443$(FILTERS_DIR)/user_context.a: $(BIN_DIR)/user_context.generator 1444 @mkdir -p $(@D) 1445 $(CURDIR)/$< -g user_context $(GEN_AOT_OUTPUTS) -o $(CURDIR)/$(FILTERS_DIR) target=$(TARGET)-no_runtime-user_context 1446 1447# ditto for user_context_insanity 1448$(FILTERS_DIR)/user_context_insanity.a: $(BIN_DIR)/user_context_insanity.generator 1449 @mkdir -p $(@D) 1450 $(CURDIR)/$< -g user_context_insanity $(GEN_AOT_OUTPUTS) -o $(CURDIR)/$(FILTERS_DIR) target=$(TARGET)-no_runtime-user_context 1451 1452# matlab needs to be generated with matlab in TARGET 1453$(FILTERS_DIR)/matlab.a: $(BIN_DIR)/matlab.generator 1454 @mkdir -p $(@D) 1455 $(CURDIR)/$< -g matlab $(GEN_AOT_OUTPUTS) -o $(CURDIR)/$(FILTERS_DIR) target=$(TARGET)-no_runtime-matlab 1456 1457# Some .generators have additional dependencies (usually due to define_extern usage). 1458# These typically require two extra dependencies: 1459# (1) Ensuring the extra _generator.cpp is built into the .generator. 1460# (2) Ensuring the extra .a is linked into the final output. 1461 1462# TODO(srj): we really want to say "anything that depends on tiled_blur.a also depends on blur2x2.a"; 1463# is there a way to specify that in Make? 1464$(BIN_DIR)/$(TARGET)/generator_aot_tiled_blur: $(FILTERS_DIR)/blur2x2.a 1465ifneq ($(TEST_CUDA), ) 1466$(BIN_DIR)/$(TARGET)/generator_aot_cxx_mangling: $(FILTERS_DIR)/cxx_mangling_gpu.a 1467endif 1468$(BIN_DIR)/$(TARGET)/generator_aot_cxx_mangling_define_extern: $(FILTERS_DIR)/cxx_mangling.a 1469 1470$(BIN_DIR)/$(TARGET)/generator_aotcpp_tiled_blur: $(FILTERS_DIR)/blur2x2.halide_generated.cpp 1471ifneq ($(TEST_CUDA), ) 1472$(BIN_DIR)/$(TARGET)/generator_aotcpp_cxx_mangling: $(FILTERS_DIR)/cxx_mangling_gpu.halide_generated.cpp 1473endif 1474$(BIN_DIR)/$(TARGET)/generator_aotcpp_cxx_mangling: $(FILTERS_DIR)/cxx_mangling_externs.o 1475$(BIN_DIR)/$(TARGET)/generator_aotcpp_cxx_mangling_define_extern: $(FILTERS_DIR)/cxx_mangling.halide_generated.cpp $(FILTERS_DIR)/cxx_mangling_externs.o $(FILTERS_DIR)/cxx_mangling_define_extern_externs.o 1476 1477$(BUILD_DIR)/stubuser_generator.o: $(FILTERS_DIR)/stubtest.stub.h $(FILTERS_DIR)/configure.stub.h 1478$(BIN_DIR)/stubuser.generator: $(BUILD_DIR)/stubtest_generator.o $(BUILD_DIR)/configure_generator.o 1479 1480# stubtest has input and output funcs with undefined types and array sizes; this is fine for stub 1481# usage (the types can be inferred), but for AOT compilation, we must make the types 1482# concrete via generator args. 1483STUBTEST_GENERATOR_ARGS=\ 1484 untyped_buffer_input.type=uint8 untyped_buffer_input.dim=3 \ 1485 simple_input.type=float32 \ 1486 array_input.type=float32 array_input.size=2 \ 1487 int_arg.size=2 \ 1488 tuple_output.type=float32,float32 \ 1489 vectorize=true 1490 1491$(FILTERS_DIR)/stubtest.a: $(BIN_DIR)/stubtest.generator 1492 @mkdir -p $(@D) 1493 $(CURDIR)/$< -g stubtest -f stubtest $(GEN_AOT_OUTPUTS) -o $(CURDIR)/$(FILTERS_DIR) target=$(TARGET)-no_runtime $(STUBTEST_GENERATOR_ARGS) 1494 1495$(FILTERS_DIR)/external_code.a: $(BIN_DIR)/external_code.generator 1496 @mkdir -p $(@D) 1497 $(CURDIR)/$< -g external_code -e static_library,c_header,registration -o $(CURDIR)/$(FILTERS_DIR) target=$(TARGET)-no_runtime external_code_is_bitcode=true 1498 1499$(FILTERS_DIR)/external_code.halide_generated.cpp: $(BIN_DIR)/external_code.generator 1500 @mkdir -p $(@D) 1501 $(CURDIR)/$< -g external_code -e c_source -o $(CURDIR)/$(FILTERS_DIR) target=$(TARGET)-no_runtime external_code_is_bitcode=false 1502 1503$(FILTERS_DIR)/autograd_grad.a: $(BIN_DIR)/autograd.generator 1504 @mkdir -p $(@D) 1505 $(CURDIR)/$< -g autograd $(GEN_AOT_OUTPUTS) -o $(CURDIR)/$(FILTERS_DIR) -f autograd_grad -d 1 target=$(TARGET)-no_runtime auto_schedule=true 1506 1507# Usually, it's considered best practice to have one Generator per 1508# .cpp file, with the generator-name and filename matching; 1509# nested_externs_generators.cpp is a counterexample, and thus requires 1510# some special casing to get right. First, make a special rule to 1511# build each of the Generators in nested_externs_generator.cpp (which 1512# all have the form nested_externs_*). 1513$(FILTERS_DIR)/nested_externs_%.a: $(BIN_DIR)/nested_externs.generator 1514 @mkdir -p $(@D) 1515 $(CURDIR)/$< -g nested_externs_$* $(GEN_AOT_OUTPUTS) -o $(CURDIR)/$(FILTERS_DIR) target=$(TARGET)-no_runtime 1516 1517GEN_AOT_CXX_FLAGS=$(TEST_CXX_FLAGS) -Wno-unknown-pragmas 1518GEN_AOT_INCLUDES=-I$(INCLUDE_DIR) -I$(FILTERS_DIR) -I$(ROOT_DIR)/src/runtime -I$(ROOT_DIR)/test/common -I $(ROOT_DIR)/apps/support -I $(SRC_DIR)/runtime -I$(ROOT_DIR)/tools 1519GEN_AOT_LD_FLAGS=$(COMMON_LD_FLAGS) 1520 1521ifneq ($(TEST_METAL), ) 1522# Unlike cuda and opencl, which dynamically go find the appropriate symbols, metal requires actual linking. 1523GEN_AOT_LD_FLAGS+=$(METAL_LD_FLAGS) 1524endif 1525 1526# By default, %_aottest.cpp depends on $(FILTERS_DIR)/%.a/.h (but not libHalide). 1527$(BIN_DIR)/$(TARGET)/generator_aot_%: $(ROOT_DIR)/test/generator/%_aottest.cpp $(FILTERS_DIR)/%.a $(FILTERS_DIR)/%.h $(RUNTIME_EXPORTED_INCLUDES) $(BIN_DIR)/$(TARGET)/runtime.a 1528 @mkdir -p $(@D) 1529 $(CXX) $(GEN_AOT_CXX_FLAGS) $(filter %.cpp %.o %.a,$^) $(GEN_AOT_INCLUDES) $(GEN_AOT_LD_FLAGS) -o $@ 1530 1531# Also make AOT testing targets that depends on the .cpp output (rather than .a). 1532$(BIN_DIR)/$(TARGET)/generator_aotcpp_%: $(ROOT_DIR)/test/generator/%_aottest.cpp $(FILTERS_DIR)/%.halide_generated.cpp $(FILTERS_DIR)/%.h $(RUNTIME_EXPORTED_INCLUDES) $(BIN_DIR)/$(TARGET)/runtime.a 1533 @mkdir -p $(@D) 1534 $(CXX) $(GEN_AOT_CXX_FLAGS) $(filter %.cpp %.o %.a,$^) $(GEN_AOT_INCLUDES) $(GEN_AOT_LD_FLAGS) -o $@ 1535 1536# MSAN test doesn't use the standard runtime 1537$(BIN_DIR)/$(TARGET)/generator_aot_msan: $(ROOT_DIR)/test/generator/msan_aottest.cpp $(FILTERS_DIR)/msan.a $(FILTERS_DIR)/msan.h $(RUNTIME_EXPORTED_INCLUDES) 1538 @mkdir -p $(@D) 1539 $(CXX) $(GEN_AOT_CXX_FLAGS) $(filter-out %.h,$^) $(GEN_AOT_INCLUDES) $(GEN_AOT_LD_FLAGS) -o $@ 1540 1541# alias has additional deps to link in 1542$(BIN_DIR)/$(TARGET)/generator_aot_alias: $(ROOT_DIR)/test/generator/alias_aottest.cpp $(FILTERS_DIR)/alias.a $(FILTERS_DIR)/alias_with_offset_42.a $(RUNTIME_EXPORTED_INCLUDES) $(BIN_DIR)/$(TARGET)/runtime.a 1543 @mkdir -p $(@D) 1544 $(CXX) $(GEN_AOT_CXX_FLAGS) $(filter %.cpp %.o %.a,$^) $(GEN_AOT_INCLUDES) $(GEN_AOT_LD_FLAGS) -o $@ 1545 1546$(BIN_DIR)/$(TARGET)/generator_aotcpp_alias: $(ROOT_DIR)/test/generator/alias_aottest.cpp $(FILTERS_DIR)/alias.halide_generated.cpp $(FILTERS_DIR)/alias_with_offset_42.halide_generated.cpp $(RUNTIME_EXPORTED_INCLUDES) $(BIN_DIR)/$(TARGET)/runtime.a 1547 @mkdir -p $(@D) 1548 $(CXX) $(GEN_AOT_CXX_FLAGS) $(filter %.cpp %.o %.a,$^) $(GEN_AOT_INCLUDES) $(GEN_AOT_LD_FLAGS) -o $@ 1549 1550# autograd has additional deps to link in 1551$(BIN_DIR)/$(TARGET)/generator_aot_autograd: $(ROOT_DIR)/test/generator/autograd_aottest.cpp $(FILTERS_DIR)/autograd.a $(FILTERS_DIR)/autograd_grad.a $(RUNTIME_EXPORTED_INCLUDES) $(BIN_DIR)/$(TARGET)/runtime.a 1552 @mkdir -p $(@D) 1553 $(CXX) $(GEN_AOT_CXX_FLAGS) $(filter %.cpp %.o %.a,$^) $(GEN_AOT_INCLUDES) $(GEN_AOT_LD_FLAGS) -o $@ 1554 1555$(BIN_DIR)/$(TARGET)/generator_aotcpp_autograd: $(ROOT_DIR)/test/generator/autograd_aottest.cpp $(FILTERS_DIR)/autograd.halide_generated.cpp $(FILTERS_DIR)/autograd_grad.halide_generated.cpp $(RUNTIME_EXPORTED_INCLUDES) $(BIN_DIR)/$(TARGET)/runtime.a 1556 @mkdir -p $(@D) 1557 $(CXX) $(GEN_AOT_CXX_FLAGS) $(filter %.cpp %.o %.a,$^) $(GEN_AOT_INCLUDES) $(GEN_AOT_LD_FLAGS) -o $@ 1558 1559# nested_externs has additional deps to link in 1560$(BIN_DIR)/$(TARGET)/generator_aot_nested_externs: $(ROOT_DIR)/test/generator/nested_externs_aottest.cpp $(FILTERS_DIR)/nested_externs_root.a $(FILTERS_DIR)/nested_externs_inner.a $(FILTERS_DIR)/nested_externs_combine.a $(FILTERS_DIR)/nested_externs_leaf.a $(RUNTIME_EXPORTED_INCLUDES) $(BIN_DIR)/$(TARGET)/runtime.a 1561 @mkdir -p $(@D) 1562 $(CXX) $(GEN_AOT_CXX_FLAGS) $(filter %.cpp %.o %.a,$^) $(GEN_AOT_INCLUDES) $(GEN_AOT_LD_FLAGS) -o $@ 1563 1564$(BIN_DIR)/$(TARGET)/generator_aotcpp_nested_externs: $(ROOT_DIR)/test/generator/nested_externs_aottest.cpp $(FILTERS_DIR)/nested_externs_root.halide_generated.cpp $(FILTERS_DIR)/nested_externs_inner.halide_generated.cpp $(FILTERS_DIR)/nested_externs_combine.halide_generated.cpp $(FILTERS_DIR)/nested_externs_leaf.halide_generated.cpp $(RUNTIME_EXPORTED_INCLUDES) $(BIN_DIR)/$(TARGET)/runtime.a 1565 @mkdir -p $(@D) 1566 $(CXX) $(GEN_AOT_CXX_FLAGS) $(filter %.cpp %.o %.a,$^) $(GEN_AOT_INCLUDES) $(GEN_AOT_LD_FLAGS) -o $@ 1567 1568# The matlab tests needs "-matlab" in the runtime 1569$(BIN_DIR)/$(TARGET)/generator_aot_matlab: $(ROOT_DIR)/test/generator/matlab_aottest.cpp $(FILTERS_DIR)/matlab.a $(FILTERS_DIR)/matlab.h $(RUNTIME_EXPORTED_INCLUDES) $(BIN_DIR)/$(TARGET)-matlab/runtime.a 1570 @mkdir -p $(@D) 1571 $(CXX) $(GEN_AOT_CXX_FLAGS) $(filter %.cpp %.o %.a,$^) $(GEN_AOT_INCLUDES) $(GEN_AOT_LD_FLAGS) $(TEST_LD_FLAGS) -o $@ 1572 1573$(BIN_DIR)/$(TARGET)/generator_aotcpp_matlab: $(ROOT_DIR)/test/generator/matlab_aottest.cpp $(FILTERS_DIR)/matlab.halide_generated.cpp $(FILTERS_DIR)/matlab.h $(RUNTIME_EXPORTED_INCLUDES) $(BIN_DIR)/$(TARGET)-matlab/runtime.a 1574 @mkdir -p $(@D) 1575 $(CXX) $(GEN_AOT_CXX_FLAGS) $(filter %.cpp %.o %.a,$^) $(GEN_AOT_INCLUDES) $(GEN_AOT_LD_FLAGS) $(TEST_LD_FLAGS) -o $@ 1576 1577# The gpu object lifetime test needs the debug runtime 1578$(BIN_DIR)/$(TARGET)/generator_aot_gpu_object_lifetime: $(ROOT_DIR)/test/generator/gpu_object_lifetime_aottest.cpp $(FILTERS_DIR)/gpu_object_lifetime.a $(FILTERS_DIR)/gpu_object_lifetime.h $(RUNTIME_EXPORTED_INCLUDES) $(BIN_DIR)/$(TARGET)-debug/runtime.a 1579 @mkdir -p $(@D) 1580 $(CXX) $(GEN_AOT_CXX_FLAGS) $(filter %.cpp %.o %.a,$^) $(GEN_AOT_INCLUDES) $(GEN_AOT_LD_FLAGS) $(TEST_LD_FLAGS) -o $@ 1581 1582# acquire_release explicitly uses CUDA/OpenCL APIs, so link those here. 1583$(BIN_DIR)/$(TARGET)/generator_aot_acquire_release: $(ROOT_DIR)/test/generator/acquire_release_aottest.cpp $(FILTERS_DIR)/acquire_release.a $(FILTERS_DIR)/acquire_release.h $(RUNTIME_EXPORTED_INCLUDES) $(BIN_DIR)/$(TARGET)/runtime.a 1584 @mkdir -p $(@D) 1585 $(CXX) $(GEN_AOT_CXX_FLAGS) $(filter %.cpp %.o %.a,$^) $(GEN_AOT_INCLUDES) $(GEN_AOT_LD_FLAGS) $(OPENCL_LD_FLAGS) $(CUDA_LD_FLAGS) -o $@ 1586 1587$(BIN_DIR)/$(TARGET)/generator_aotcpp_acquire_release: $(ROOT_DIR)/test/generator/acquire_release_aottest.cpp $(FILTERS_DIR)/acquire_release.halide_generated.cpp $(FILTERS_DIR)/acquire_release.h $(RUNTIME_EXPORTED_INCLUDES) $(BIN_DIR)/$(TARGET)/runtime.a 1588 @mkdir -p $(@D) 1589 $(CXX) $(GEN_AOT_CXX_FLAGS) $(filter %.cpp %.o %.a,$^) $(GEN_AOT_INCLUDES) $(GEN_AOT_LD_FLAGS) $(OPENCL_LD_FLAGS) $(CUDA_LD_FLAGS) -o $@ 1590 1591# define_extern_opencl explicitly uses OpenCL APIs, so link those here. 1592$(BIN_DIR)/$(TARGET)/generator_aot_define_extern_opencl: $(ROOT_DIR)/test/generator/define_extern_opencl_aottest.cpp $(FILTERS_DIR)/define_extern_opencl.a $(FILTERS_DIR)/define_extern_opencl.h $(RUNTIME_EXPORTED_INCLUDES) $(BIN_DIR)/$(TARGET)/runtime.a 1593 @mkdir -p $(@D) 1594 $(CXX) $(GEN_AOT_CXX_FLAGS) $(filter %.cpp %.o %.a,$^) $(GEN_AOT_INCLUDES) $(GEN_AOT_LD_FLAGS) $(OPENCL_LD_FLAGS) -o $@ 1595 1596$(BIN_DIR)/$(TARGET)/generator_aotcpp_define_extern_opencl: $(ROOT_DIR)/test/generator/define_extern_opencl_aottest.cpp $(FILTERS_DIR)/define_extern_opencl.halide_generated.cpp $(FILTERS_DIR)/define_extern_opencl.h $(RUNTIME_EXPORTED_INCLUDES) $(BIN_DIR)/$(TARGET)/runtime.a 1597 @mkdir -p $(@D) 1598 $(CXX) $(GEN_AOT_CXX_FLAGS) $(filter %.cpp %.o %.a,$^) $(GEN_AOT_INCLUDES) $(GEN_AOT_LD_FLAGS) $(OPENCL_LD_FLAGS) -o $@ 1599 1600# By default, %_jittest.cpp depends on libHalide, plus the stubs for the Generator. These are external tests that use the JIT. 1601$(BIN_DIR)/generator_jit_%: $(ROOT_DIR)/test/generator/%_jittest.cpp $(BIN_DIR)/libHalide.$(SHARED_EXT) $(INCLUDE_DIR)/Halide.h $(FILTERS_DIR)/%.stub.h $(BUILD_DIR)/%_generator.o 1602 @mkdir -p $(@D) 1603 $(CXX) -g $(TEST_CXX_FLAGS) $(filter %.cpp %.o %.a,$^) -I$(INCLUDE_DIR) -I$(FILTERS_DIR) -I $(ROOT_DIR)/apps/support $(TEST_LD_FLAGS) -o $@ 1604 1605# generator_aot_multitarget is run multiple times, with different env vars. 1606generator_aot_multitarget: $(BIN_DIR)/$(TARGET)/generator_aot_multitarget 1607 @mkdir -p $(@D) 1608 HL_MULTITARGET_TEST_USE_NOBOUNDSQUERY_FEATURE=0 $(CURDIR)/$< 1609 HL_MULTITARGET_TEST_USE_NOBOUNDSQUERY_FEATURE=1 $(CURDIR)/$< 1610 @-echo 1611 1612# nested externs doesn't actually contain a generator named 1613# "nested_externs", and has no internal tests in any case. 1614test_generator_nested_externs: 1615 @echo "Skipping" 1616 1617$(BUILD_DIR)/RunGenMain.o: $(ROOT_DIR)/tools/RunGenMain.cpp $(RUNTIME_EXPORTED_INCLUDES) $(ROOT_DIR)/tools/RunGen.h 1618 @mkdir -p $(@D) 1619 $(CXX) -c $< $(filter-out -g, $(TEST_CXX_FLAGS)) $(OPTIMIZE) -Os $(IMAGE_IO_CXX_FLAGS) -I$(INCLUDE_DIR) -I $(SRC_DIR)/runtime -I$(ROOT_DIR)/tools -o $@ 1620 1621$(FILTERS_DIR)/%.registration.o: $(FILTERS_DIR)/%.registration.cpp 1622 @mkdir -p $(@D) 1623 $(CXX) -c $< $(TEST_CXX_FLAGS) -o $@ 1624 1625$(FILTERS_DIR)/%.rungen: $(BUILD_DIR)/RunGenMain.o $(BIN_DIR)/$(TARGET)/runtime.a $(FILTERS_DIR)/%.registration.o $(FILTERS_DIR)/%.a 1626 @mkdir -p $(@D) 1627 $(CXX) -std=c++11 -I$(FILTERS_DIR) \ 1628 $(BUILD_DIR)/RunGenMain.o \ 1629 $(BIN_DIR)/$(TARGET)/runtime.a \ 1630 $(call alwayslink,$(FILTERS_DIR)/$*.registration.o) \ 1631 $(FILTERS_DIR)/$*.a \ 1632 $(GEN_AOT_LD_FLAGS) $(IMAGE_IO_LIBS) -o $@ 1633 1634RUNARGS ?= 1635 1636$(FILTERS_DIR)/%.run: $(FILTERS_DIR)/%.rungen 1637 $(CURDIR)/$< $(RUNARGS) 1638 @-echo 1639 1640$(FILTERS_DIR)/%.registration_extra.o: $(FILTERS_DIR)/%.registration.cpp 1641 @mkdir -p $(@D) 1642 $(CXX) -c $< $(TEST_CXX_FLAGS) -DHALIDE_REGISTER_EXTRA_KEY_VALUE_PAIRS_FUNC=halide_register_extra_key_value_pairs_$* -o $@ 1643 1644# Test the registration mechanism, independent of RunGen. 1645# Note that this depends on the registration_extra.o (rather than registration.o) 1646# because it compiles with HALIDE_REGISTER_EXTRA_KEY_VALUE_PAIRS_FUNC defined. 1647$(FILTERS_DIR)/registration_test: $(ROOT_DIR)/test/generator/registration_test.cpp \ 1648 $(BIN_DIR)/$(TARGET)/runtime.a \ 1649 $(FILTERS_DIR)/blur2x2.registration_extra.o $(FILTERS_DIR)/blur2x2.a \ 1650 $(FILTERS_DIR)/cxx_mangling.registration_extra.o $(FILTERS_DIR)/cxx_mangling.a \ 1651 $(FILTERS_DIR)/pyramid.registration_extra.o $(FILTERS_DIR)/pyramid.a 1652 @mkdir -p $(@D) 1653 $(CXX) $(GEN_AOT_CXX_FLAGS) $(GEN_AOT_INCLUDES) \ 1654 $(ROOT_DIR)/test/generator/registration_test.cpp \ 1655 $(FILTERS_DIR)/blur2x2.registration_extra.o \ 1656 $(FILTERS_DIR)/cxx_mangling.registration_extra.o \ 1657 $(FILTERS_DIR)/pyramid.registration_extra.o \ 1658 $(FILTERS_DIR)/blur2x2.a \ 1659 $(FILTERS_DIR)/cxx_mangling.a \ 1660 $(FILTERS_DIR)/pyramid.a \ 1661 $(BIN_DIR)/$(TARGET)/runtime.a \ 1662 $(GEN_AOT_LD_FLAGS) $(IMAGE_IO_LIBS) -o $@ 1663 1664# Test RunGen itself 1665$(FILTERS_DIR)/rungen_test: $(ROOT_DIR)/test/generator/rungen_test.cpp \ 1666 $(BIN_DIR)/$(TARGET)/runtime.a \ 1667 $(FILTERS_DIR)/example.registration.o \ 1668 $(FILTERS_DIR)/example.a 1669 @mkdir -p $(@D) 1670 $(CXX) $(GEN_AOT_CXX_FLAGS) $(IMAGE_IO_CXX_FLAGS) $(GEN_AOT_INCLUDES) \ 1671 $(ROOT_DIR)/test/generator/rungen_test.cpp \ 1672 $(BIN_DIR)/$(TARGET)/runtime.a \ 1673 $(call alwayslink,$(FILTERS_DIR)/example.registration.o) \ 1674 $(FILTERS_DIR)/example.a \ 1675 $(GEN_AOT_LD_FLAGS) $(IMAGE_IO_LIBS) -o $@ 1676 1677# Test linking multiple filters into a single RunGen instance 1678$(FILTERS_DIR)/multi_rungen: $(BUILD_DIR)/RunGenMain.o $(BIN_DIR)/$(TARGET)/runtime.a \ 1679 $(FILTERS_DIR)/blur2x2.registration.o $(FILTERS_DIR)/blur2x2.a \ 1680 $(FILTERS_DIR)/cxx_mangling.registration.o $(FILTERS_DIR)/cxx_mangling.a \ 1681 $(FILTERS_DIR)/pyramid.registration.o $(FILTERS_DIR)/pyramid.a 1682 @mkdir -p $(@D) 1683 $(CXX) -std=c++11 -I$(FILTERS_DIR) \ 1684 $(BUILD_DIR)/RunGenMain.o \ 1685 $(BIN_DIR)/$(TARGET)/runtime.a \ 1686 $(call alwayslink,$(FILTERS_DIR)/blur2x2.registration.o) \ 1687 $(call alwayslink,$(FILTERS_DIR)/cxx_mangling.registration.o) \ 1688 $(call alwayslink,$(FILTERS_DIR)/pyramid.registration.o) \ 1689 $(FILTERS_DIR)/blur2x2.a \ 1690 $(FILTERS_DIR)/cxx_mangling.a \ 1691 $(FILTERS_DIR)/pyramid.a \ 1692 $(GEN_AOT_LD_FLAGS) $(IMAGE_IO_LIBS) -o $@ 1693 1694# Test concatenating multiple registration files as well, which should also work 1695$(FILTERS_DIR)/multi_rungen2.registration.cpp: $(FILTERS_DIR)/blur2x2.registration.cpp $(FILTERS_DIR)/cxx_mangling.registration.cpp $(FILTERS_DIR)/pyramid.registration.cpp 1696 cat $^ > $@ 1697 1698$(FILTERS_DIR)/multi_rungen2: $(BUILD_DIR)/RunGenMain.o $(BIN_DIR)/$(TARGET)/runtime.a \ 1699 $(FILTERS_DIR)/multi_rungen2.registration.cpp \ 1700 $(FILTERS_DIR)/blur2x2.a \ 1701 $(FILTERS_DIR)/cxx_mangling.a \ 1702 $(FILTERS_DIR)/pyramid.a 1703 @mkdir -p $(@D) 1704 $(CXX) -std=c++11 -I$(FILTERS_DIR) $^ $(GEN_AOT_LD_FLAGS) $(IMAGE_IO_LIBS) -o $@ 1705 1706$(BIN_DIR)/tutorial_%: $(ROOT_DIR)/tutorial/%.cpp $(BIN_DIR)/libHalide.$(SHARED_EXT) $(INCLUDE_DIR)/Halide.h $(INCLUDE_DIR)/HalideRuntime.h 1707 @ if [[ $@ == *_run ]]; then \ 1708 export TUTORIAL=$* ;\ 1709 export LESSON=`echo $${TUTORIAL} | cut -b1-9`; \ 1710 make -f $(THIS_MAKEFILE) tutorial_$${TUTORIAL/run/generate}; \ 1711 $(CXX) $(TUTORIAL_CXX_FLAGS) $(IMAGE_IO_CXX_FLAGS) $(OPTIMIZE_FOR_BUILD_TIME) $< \ 1712 -I$(TMP_DIR) -I$(INCLUDE_DIR) $(TMP_DIR)/$${LESSON}_*.a $(GEN_AOT_LD_FLAGS) $(IMAGE_IO_LIBS) -lz -o $@; \ 1713 else \ 1714 $(CXX) $(TUTORIAL_CXX_FLAGS) $(IMAGE_IO_CXX_FLAGS) $(OPTIMIZE_FOR_BUILD_TIME) $< \ 1715 -I$(INCLUDE_DIR) -I$(ROOT_DIR)/tools $(TEST_LD_FLAGS) $(IMAGE_IO_LIBS) -o $@;\ 1716 fi 1717 1718$(BIN_DIR)/tutorial_lesson_15_generators: $(ROOT_DIR)/tutorial/lesson_15_generators.cpp $(BIN_DIR)/libHalide.$(SHARED_EXT) $(INCLUDE_DIR)/Halide.h $(BUILD_DIR)/GenGen.o 1719 $(CXX) $(TUTORIAL_CXX_FLAGS) $(IMAGE_IO_CXX_FLAGS) $(OPTIMIZE_FOR_BUILD_TIME) $< $(BUILD_DIR)/GenGen.o \ 1720 -I$(INCLUDE_DIR) $(TEST_LD_FLAGS) $(IMAGE_IO_LIBS) -o $@ 1721 1722tutorial_lesson_15_generators: $(ROOT_DIR)/tutorial/lesson_15_generators_usage.sh $(BIN_DIR)/tutorial_lesson_15_generators 1723 @-mkdir -p $(TMP_DIR) 1724 cp $(BIN_DIR)/tutorial_lesson_15_generators $(TMP_DIR)/lesson_15_generate; \ 1725 cd $(TMP_DIR); \ 1726 PATH="$${PATH}:$(CURDIR)/$(BIN_DIR)" source $(ROOT_DIR)/tutorial/lesson_15_generators_usage.sh 1727 @-echo 1728 1729$(BIN_DIR)/tutorial_lesson_16_rgb_generate: $(ROOT_DIR)/tutorial/lesson_16_rgb_generate.cpp $(BIN_DIR)/libHalide.$(SHARED_EXT) $(INCLUDE_DIR)/Halide.h $(BUILD_DIR)/GenGen.o 1730 $(CXX) $(TUTORIAL_CXX_FLAGS) $(IMAGE_IO_CXX_FLAGS) $(OPTIMIZE_FOR_BUILD_TIME) $< $(BUILD_DIR)/GenGen.o \ 1731 -I$(INCLUDE_DIR) $(TEST_LD_FLAGS) $(IMAGE_IO_LIBS) -o $@ 1732 1733$(BIN_DIR)/tutorial_lesson_16_rgb_run: $(ROOT_DIR)/tutorial/lesson_16_rgb_run.cpp $(BIN_DIR)/tutorial_lesson_16_rgb_generate 1734 @-mkdir -p $(TMP_DIR) 1735 # Run the generator 1736 $(BIN_DIR)/tutorial_lesson_16_rgb_generate -g brighten -o $(TMP_DIR) -f brighten_planar target=host layout=planar 1737 $(BIN_DIR)/tutorial_lesson_16_rgb_generate -g brighten -o $(TMP_DIR) -f brighten_interleaved target=host-no_runtime layout=interleaved 1738 $(BIN_DIR)/tutorial_lesson_16_rgb_generate -g brighten -o $(TMP_DIR) -f brighten_either target=host-no_runtime layout=either 1739 $(BIN_DIR)/tutorial_lesson_16_rgb_generate -g brighten -o $(TMP_DIR) -f brighten_specialized target=host-no_runtime layout=specialized 1740 # Compile the runner 1741 $(CXX) $(TUTORIAL_CXX_FLAGS) $(IMAGE_IO_CXX_FLAGS) $(OPTIMIZE_FOR_BUILD_TIME) $< \ 1742 -I$(INCLUDE_DIR) -L$(BIN_DIR) -I $(TMP_DIR) $(TMP_DIR)/brighten_*.a \ 1743 -lHalide $(TEST_LD_FLAGS) $(COMMON_LD_FLAGS) $(IMAGE_IO_LIBS) -o $@ 1744 @-echo 1745 1746$(BIN_DIR)/tutorial_lesson_21_auto_scheduler_generate: $(ROOT_DIR)/tutorial/lesson_21_auto_scheduler_generate.cpp $(BIN_DIR)/libHalide.$(SHARED_EXT) $(INCLUDE_DIR)/Halide.h $(BUILD_DIR)/GenGen.o 1747 $(CXX) $(TUTORIAL_CXX_FLAGS) $(IMAGE_IO_CXX_FLAGS) $(OPTIMIZE_FOR_BUILD_TIME) $< $(BUILD_DIR)/GenGen.o \ 1748 -I$(INCLUDE_DIR) $(TEST_LD_FLAGS) $(IMAGE_IO_LIBS) -o $@ 1749 1750# The values in MachineParams are: 1751# - the maximum level of parallelism available, 1752# - the size of the last-level cache (in bytes), 1753# - the ratio between the cost of a miss at the last level cache and the cost 1754# of arithmetic on the target architecture 1755# ...in that order. 1756LESSON_21_MACHINE_PARAMS = 32,16777216,40 1757 1758$(BIN_DIR)/tutorial_lesson_21_auto_scheduler_run: $(ROOT_DIR)/tutorial/lesson_21_auto_scheduler_run.cpp $(BIN_DIR)/tutorial_lesson_21_auto_scheduler_generate 1759 @-mkdir -p $(TMP_DIR) 1760 # Run the generator 1761 $(BIN_DIR)/tutorial_lesson_21_auto_scheduler_generate -g auto_schedule_gen -o $(TMP_DIR) -e static_library,c_header,schedule -f auto_schedule_false target=host auto_schedule=false 1762 $(BIN_DIR)/tutorial_lesson_21_auto_scheduler_generate -g auto_schedule_gen -o $(TMP_DIR) -e static_library,c_header,schedule -f auto_schedule_true target=host-no_runtime auto_schedule=true machine_params=$(LESSON_21_MACHINE_PARAMS) 1763 # Compile the runner 1764 $(CXX) $(TUTORIAL_CXX_FLAGS) $(IMAGE_IO_CXX_FLAGS) $(OPTIMIZE_FOR_BUILD_TIME) $< \ 1765 -I$(INCLUDE_DIR) -L$(BIN_DIR) -I $(TMP_DIR) $(TMP_DIR)/auto_schedule_*.a \ 1766 -lHalide $(TEST_LD_FLAGS) $(COMMON_LD_FLAGS) $(IMAGE_IO_LIBS) -o $@ 1767 @-echo 1768 1769test_internal: $(BIN_DIR)/test_internal 1770 @-mkdir -p $(TMP_DIR) 1771 cd $(TMP_DIR) ; $(CURDIR)/$< 1772 @-echo 1773 1774correctness_%: $(BIN_DIR)/correctness_% 1775 @-mkdir -p $(TMP_DIR) 1776 cd $(TMP_DIR) ; $(CURDIR)/$< 1777 @-echo 1778 1779correctness_opencl_runtime: $(BIN_DIR)/$(TARGET)/correctness_opencl_runtime 1780 @-mkdir -p $(TMP_DIR) 1781 cd $(TMP_DIR) ; $(CURDIR)/$< 1782 @-echo 1783 1784quiet_correctness_%: $(BIN_DIR)/correctness_% 1785 @-mkdir -p $(TMP_DIR) 1786 @cd $(TMP_DIR) ; ( $(CURDIR)/$< 2>stderr_$*.txt > stdout_$*.txt && echo -n . ) || ( echo ; echo FAILED TEST: $* ; cat stdout_$*.txt stderr_$*.txt ; false ) 1787 1788valgrind_%: $(BIN_DIR)/correctness_% 1789 @-mkdir -p $(TMP_DIR) 1790 cd $(TMP_DIR) ; valgrind --error-exitcode=-1 $(CURDIR)/$< 1791 @-echo 1792 1793# Use Intel SDE to emulate an avx 512 processor. 1794avx512_%: $(BIN_DIR)/correctness_% 1795 @-mkdir -p $(TMP_DIR) 1796 cd $(TMP_DIR) ; sde -cnl -- $(CURDIR)/$< 1797 cd $(TMP_DIR) ; sde -knl -- $(CURDIR)/$< 1798 @-echo 1799 1800# This test is *supposed* to do an out-of-bounds read, so skip it when testing under valgrind 1801valgrind_tracing_stack: $(BIN_DIR)/correctness_tracing_stack 1802 @-mkdir -p $(TMP_DIR) 1803 cd $(TMP_DIR) ; $(CURDIR)/$(BIN_DIR)/correctness_tracing_stack 1804 @-echo 1805 1806performance_%: $(BIN_DIR)/performance_% 1807 @-mkdir -p $(TMP_DIR) 1808 cd $(TMP_DIR) ; $(CURDIR)/$< 1809 @-echo 1810 1811error_%: $(BIN_DIR)/error_% 1812 @-mkdir -p $(TMP_DIR) 1813 cd $(TMP_DIR) ; $(CURDIR)/$< 2>&1 | egrep --q "terminating with uncaught exception|^terminate called|^Error|Assertion.*failed" 1814 @-echo 1815 1816warning_%: $(BIN_DIR)/warning_% 1817 @-mkdir -p $(TMP_DIR) 1818 cd $(TMP_DIR) ; $(CURDIR)/$< 2>&1 | egrep --q "^Warning" 1819 @-echo 1820 1821opengl_%: $(BIN_DIR)/opengl_% 1822 @-mkdir -p $(TMP_DIR) 1823 cd $(TMP_DIR) ; $(CURDIR)/$< 2>&1 1824 @-echo 1825 1826generator_jit_%: $(BIN_DIR)/generator_jit_% 1827 @-mkdir -p $(TMP_DIR) 1828 cd $(TMP_DIR) ; $(CURDIR)/$< 1829 @-echo 1830 1831generator_aot_%: $(BIN_DIR)/$(TARGET)/generator_aot_% 1832 @-mkdir -p $(TMP_DIR) 1833 cd $(TMP_DIR) ; $(CURDIR)/$< 1834 @-echo 1835 1836generator_aotcpp_%: $(BIN_DIR)/$(TARGET)/generator_aotcpp_% 1837 @-mkdir -p $(TMP_DIR) 1838 cd $(TMP_DIR) ; $(CURDIR)/$< 1839 @-echo 1840 1841$(TMP_DIR)/images/%.png: $(ROOT_DIR)/tutorial/images/%.png 1842 @-mkdir -p $(TMP_DIR)/images 1843 cp $< $(TMP_DIR)/images/ 1844 1845tutorial_%: $(BIN_DIR)/tutorial_% $(TMP_DIR)/images/rgb.png $(TMP_DIR)/images/gray.png 1846 @-mkdir -p $(TMP_DIR) 1847 cd $(TMP_DIR) ; $(CURDIR)/$< 1848 @-echo 1849 1850auto_schedule_%: $(BIN_DIR)/auto_schedule_% 1851 @-mkdir -p $(TMP_DIR) 1852 cd $(TMP_DIR) ; $(CURDIR)/$< 1853 @-echo 1854 1855time_compilation_test_%: $(BIN_DIR)/test_% 1856 $(TIME_COMPILATION) compile_times_correctness.csv make -f $(THIS_MAKEFILE) $(@:time_compilation_test_%=test_%) 1857 1858time_compilation_performance_%: $(BIN_DIR)/performance_% 1859 $(TIME_COMPILATION) compile_times_performance.csv make -f $(THIS_MAKEFILE) $(@:time_compilation_performance_%=performance_%) 1860 1861time_compilation_opengl_%: $(BIN_DIR)/opengl_% 1862 $(TIME_COMPILATION) compile_times_opengl.csv make -f $(THIS_MAKEFILE) $(@:time_compilation_opengl_%=opengl_%) 1863 1864time_compilation_generator_%: $(BIN_DIR)/%.generator 1865 $(TIME_COMPILATION) compile_times_generator.csv make -f $(THIS_MAKEFILE) $(@:time_compilation_generator_%=$(FILTERS_DIR)/%.a) 1866 1867TEST_APPS=\ 1868 HelloMatlab \ 1869 autoscheduler \ 1870 bilateral_grid \ 1871 bgu \ 1872 blur \ 1873 c_backend \ 1874 camera_pipe \ 1875 conv_layer \ 1876 fft \ 1877 gradient_autoscheduler \ 1878 hist \ 1879 interpolate \ 1880 lens_blur \ 1881 linear_algebra \ 1882 local_laplacian \ 1883 max_filter \ 1884 nl_means \ 1885 onnx \ 1886 resize \ 1887 resnet_50 \ 1888 stencil_chain \ 1889 wavelet 1890 1891TEST_APPS_DEPS=$(TEST_APPS:%=%_test_app) 1892BUILD_APPS_DEPS=$(TEST_APPS:%=%_build_app) 1893 1894$(BUILD_APPS_DEPS): distrib build_python_bindings 1895 @echo Building app $(@:%_build_app=%) for ${HL_TARGET}... 1896 @$(MAKE) -C $(ROOT_DIR)/apps/$(@:%_build_app=%) build \ 1897 HALIDE_DISTRIB_PATH=$(CURDIR)/$(DISTRIB_DIR) \ 1898 HALIDE_PYTHON_BINDINGS_PATH=$(CURDIR)/$(BIN_DIR)/python3_bindings \ 1899 BIN_DIR=$(CURDIR)/$(BIN_DIR)/apps/$(@:%_build_app=%)/bin \ 1900 HL_TARGET=$(HL_TARGET) \ 1901 || exit 1 ; \ 1902 1903$(TEST_APPS_DEPS): distrib build_python_bindings 1904 @echo Testing app $(@:%_test_app=%) for ${HL_TARGET}... 1905 @$(MAKE) -C $(ROOT_DIR)/apps/$(@:%_test_app=%) test \ 1906 HALIDE_DISTRIB_PATH=$(CURDIR)/$(DISTRIB_DIR) \ 1907 HALIDE_PYTHON_BINDINGS_PATH=$(CURDIR)/$(BIN_DIR)/python3_bindings \ 1908 BIN_DIR=$(CURDIR)/$(BIN_DIR)/apps/$(@:%_test_app=%)/bin \ 1909 HL_TARGET=$(HL_TARGET) \ 1910 || exit 1 ; \ 1911 1912.PHONY: test_apps build_apps $(BUILD_APPS_DEPS) 1913build_apps: $(BUILD_APPS_DEPS) 1914 1915test_apps: $(BUILD_APPS_DEPS) 1916 $(MAKE) -f $(THIS_MAKEFILE) -j1 $(TEST_APPS_DEPS) 1917 1918BENCHMARK_APPS=\ 1919 bilateral_grid \ 1920 camera_pipe \ 1921 lens_blur \ 1922 local_laplacian \ 1923 nl_means \ 1924 stencil_chain 1925 1926$(BENCHMARK_APPS): distrib build_python_bindings 1927 @echo Building $@ for ${HL_TARGET}... 1928 @$(MAKE) -C $(ROOT_DIR)/apps/$@ \ 1929 $(CURDIR)/$(BIN_DIR)/apps/$@/bin/$(HL_TARGET)/$@.rungen \ 1930 HALIDE_DISTRIB_PATH=$(CURDIR)/$(DISTRIB_DIR) \ 1931 HALIDE_PYTHON_BINDINGS_PATH=$(CURDIR)/$(BIN_DIR)/python3_bindings \ 1932 BIN_DIR=$(CURDIR)/$(BIN_DIR)/apps/$@/bin \ 1933 HL_TARGET=$(HL_TARGET) \ 1934 > /dev/null \ 1935 || exit 1 1936 1937.PHONY: benchmark_apps $(BENCHMARK_APPS) 1938benchmark_apps: $(BENCHMARK_APPS) 1939 @for APP in $(BENCHMARK_APPS); do \ 1940 echo ;\ 1941 echo Benchmarking $${APP} for ${HL_TARGET}... ; \ 1942 make -C $(ROOT_DIR)/apps/$${APP} \ 1943 $${APP}.benchmark \ 1944 HALIDE_DISTRIB_PATH=$(CURDIR)/$(DISTRIB_DIR) \ 1945 HALIDE_PYTHON_BINDINGS_PATH=$(CURDIR)/$(BIN_DIR)/python3_bindings \ 1946 BIN_DIR=$(CURDIR)/$(BIN_DIR)/apps/$${APP}/bin \ 1947 HL_TARGET=$(HL_TARGET) \ 1948 || exit 1 ; \ 1949 done 1950 1951# TODO(srj): the python bindings need to be put into the distrib folders; 1952# this is a hopefully-temporary workaround (https://github.com/halide/Halide/issues/4368) 1953.PHONY: build_python_bindings 1954build_python_bindings: distrib $(BIN_DIR)/host/runtime.a 1955 $(MAKE) -C $(ROOT_DIR)/python_bindings \ 1956 -f $(ROOT_DIR)/python_bindings/Makefile \ 1957 build_python_bindings \ 1958 HALIDE_DISTRIB_PATH=$(CURDIR)/$(DISTRIB_DIR) \ 1959 BIN=$(CURDIR)/$(BIN_DIR)/python3_bindings \ 1960 PYTHON=$(PYTHON) \ 1961 OPTIMIZE=$(OPTIMIZE) 1962 1963.PHONY: test_python 1964test_python: distrib $(BIN_DIR)/host/runtime.a build_python_bindings 1965 $(MAKE) -C $(ROOT_DIR)/python_bindings \ 1966 -f $(ROOT_DIR)/python_bindings/Makefile \ 1967 test \ 1968 HALIDE_DISTRIB_PATH=$(CURDIR)/$(DISTRIB_DIR) \ 1969 BIN=$(CURDIR)/$(BIN_DIR)/python3_bindings \ 1970 PYTHON=$(PYTHON) \ 1971 OPTIMIZE=$(OPTIMIZE) 1972 1973# It's just for compiling the runtime, so earlier clangs *might* work, 1974# but best to peg it to the minimum llvm version. 1975ifneq (,$(findstring clang version 3.7,$(CLANG_VERSION))) 1976CLANG_OK=yes 1977endif 1978 1979ifneq (,$(findstring clang version 3.8,$(CLANG_VERSION))) 1980CLANG_OK=yes 1981endif 1982 1983ifneq (,$(findstring clang version 4.0,$(CLANG_VERSION))) 1984CLANG_OK=yes 1985endif 1986 1987ifneq (,$(findstring clang version 5.0,$(CLANG_VERSION))) 1988CLANG_OK=yes 1989endif 1990 1991ifneq (,$(findstring clang version 6.0,$(CLANG_VERSION))) 1992CLANG_OK=yes 1993endif 1994 1995ifneq (,$(findstring clang version 7.0,$(CLANG_VERSION))) 1996CLANG_OK=yes 1997endif 1998 1999ifneq (,$(findstring clang version 7.1,$(CLANG_VERSION))) 2000CLANG_OK=yes 2001endif 2002 2003ifneq (,$(findstring clang version 8.0,$(CLANG_VERSION))) 2004CLANG_OK=yes 2005endif 2006 2007ifneq (,$(findstring clang version 9.0,$(CLANG_VERSION))) 2008CLANG_OK=yes 2009endif 2010 2011ifneq (,$(findstring clang version 10.0,$(CLANG_VERSION))) 2012CLANG_OK=yes 2013endif 2014 2015ifneq (,$(findstring clang version 11.0,$(CLANG_VERSION))) 2016CLANG_OK=yes 2017endif 2018 2019ifneq (,$(findstring clang version 12.0,$(CLANG_VERSION))) 2020CLANG_OK=yes 2021endif 2022 2023ifneq (,$(findstring Apple LLVM version 5.0,$(CLANG_VERSION))) 2024CLANG_OK=yes 2025endif 2026 2027ifneq ($(CLANG_OK), ) 2028$(BUILD_DIR)/clang_ok: 2029 @echo "Found a new enough version of clang" 2030 mkdir -p $(BUILD_DIR) 2031 touch $(BUILD_DIR)/clang_ok 2032else 2033$(BUILD_DIR)/clang_ok: 2034 @echo "Can't find clang or version of clang too old (we need 3.7 or greater):" 2035 @echo "You can override this check by setting CLANG_OK=y" 2036 echo '$(CLANG_VERSION)' 2037 echo $(findstring version 3,$(CLANG_VERSION)) 2038 echo $(findstring version 3.0,$(CLANG_VERSION)) 2039 $(CLANG) --version 2040 @exit 1 2041endif 2042 2043ifneq (,$(findstring $(LLVM_VERSION_TIMES_10), 90 100 110 120)) 2044LLVM_OK=yes 2045endif 2046 2047ifneq ($(LLVM_OK), ) 2048$(BUILD_DIR)/llvm_ok: $(BUILD_DIR)/rtti_ok 2049 @echo "Found a new enough version of llvm" 2050 mkdir -p $(BUILD_DIR) 2051 touch $(BUILD_DIR)/llvm_ok 2052else 2053$(BUILD_DIR)/llvm_ok: 2054 @echo "Can't find llvm or version of llvm too old (we need 9.0 or greater):" 2055 @echo "You can override this check by setting LLVM_OK=y" 2056 $(LLVM_CONFIG) --version 2057 @exit 1 2058endif 2059 2060ifneq ($(WITH_RTTI), ) 2061ifneq ($(LLVM_HAS_NO_RTTI), ) 2062else 2063RTTI_OK=yes # Enabled in Halide and LLVM 2064endif 2065else 2066RTTI_OK=yes # Enabled in LLVM but not in Halide 2067endif 2068 2069ifneq ($(RTTI_OK), ) 2070$(BUILD_DIR)/rtti_ok: 2071 mkdir -p $(BUILD_DIR) 2072 touch $(BUILD_DIR)/rtti_ok 2073else 2074$(BUILD_DIR)/rtti_ok: 2075 @echo "Can't enable RTTI - llvm was compiled without it." 2076 @echo "LLVM c++ flags: " $(LLVM_CXX_FLAGS) 2077 @exit 1 2078endif 2079 2080install: $(LIB_DIR)/libHalide.a $(BIN_DIR)/libHalide.$(SHARED_EXT) $(INCLUDE_DIR)/Halide.h $(RUNTIME_EXPORTED_INCLUDES) 2081 mkdir -p $(PREFIX)/include $(PREFIX)/bin $(PREFIX)/lib $(PREFIX)/share/halide/tutorial/images $(PREFIX)/share/halide/tools $(PREFIX)/share/halide/tutorial/figures 2082 cp $(LIB_DIR)/libHalide.a $(BIN_DIR)/libHalide.$(SHARED_EXT) $(PREFIX)/lib 2083 cp $(INCLUDE_DIR)/Halide.h $(PREFIX)/include 2084 cp $(INCLUDE_DIR)/HalideBuffer.h $(PREFIX)/include 2085 cp $(INCLUDE_DIR)/HalideRuntim*.h $(PREFIX)/include 2086 cp $(ROOT_DIR)/tutorial/images/*.png $(PREFIX)/share/halide/tutorial/images 2087 cp $(ROOT_DIR)/tutorial/figures/*.gif $(PREFIX)/share/halide/tutorial/figures 2088 cp $(ROOT_DIR)/tutorial/figures/*.jpg $(PREFIX)/share/halide/tutorial/figures 2089 cp $(ROOT_DIR)/tutorial/figures/*.mp4 $(PREFIX)/share/halide/tutorial/figures 2090 cp $(ROOT_DIR)/tutorial/*.cpp $(PREFIX)/share/halide/tutorial 2091 cp $(ROOT_DIR)/tutorial/*.h $(PREFIX)/share/halide/tutorial 2092 cp $(ROOT_DIR)/tutorial/*.sh $(PREFIX)/share/halide/tutorial 2093 cp $(ROOT_DIR)/tools/mex_halide.m $(PREFIX)/share/halide/tools 2094 cp $(ROOT_DIR)/tools/GenGen.cpp $(PREFIX)/share/halide/tools 2095 cp $(ROOT_DIR)/tools/RunGen.h $(PREFIX)/share/halide/tools 2096 cp $(ROOT_DIR)/tools/RunGenMain.cpp $(PREFIX)/share/halide/tools 2097 cp $(ROOT_DIR)/tools/halide_image.h $(PREFIX)/share/halide/tools 2098 cp $(ROOT_DIR)/tools/halide_image_io.h $(PREFIX)/share/halide/tools 2099 cp $(ROOT_DIR)/tools/halide_image_info.h $(PREFIX)/share/halide/tools 2100 cp $(ROOT_DIR)/tools/halide_malloc_trace.h $(PREFIX)/share/halide/tools 2101ifeq ($(UNAME), Darwin) 2102 install_name_tool -id $(PREFIX)/lib/libHalide.$(SHARED_EXT) $(PREFIX)/lib/libHalide.$(SHARED_EXT) 2103endif 2104 2105# This is a specialized 'install' for users who need Hexagon support libraries as well. 2106install_qc: install $(HEXAGON_RUNTIME_LIBS) 2107 mkdir -p $(PREFIX)/lib/arm-32-android $(PREFIX)/lib/arm-64-android $(PREFIX)/lib/host $(PREFIX)/lib/v62 $(PREFIX)/tools 2108 cp $(HEXAGON_RUNTIME_LIBS_DIR)/arm-32-android/* $(PREFIX)/lib/arm-32-android 2109 cp $(HEXAGON_RUNTIME_LIBS_DIR)/arm-64-android/* $(PREFIX)/lib/arm-64-android 2110 cp $(HEXAGON_RUNTIME_LIBS_DIR)/host/* $(PREFIX)/lib/host 2111 cp -r $(HEXAGON_RUNTIME_LIBS_DIR)/v62/* $(PREFIX)/lib/v62 2112 ln -sf $(PREFIX)/share/halide/tools/GenGen.cpp $(PREFIX)/tools/GenGen.cpp 2113 ln -sf $(PREFIX)/lib/v62/hexagon_sim_remote $(PREFIX)/bin/hexagon_sim_remote 2114 ln -sf $(PREFIX)/lib/v62/libsim_qurt.a $(PREFIX)/lib/libsim_qurt.a 2115 ln -sf $(PREFIX)/lib/v62/libsim_qurt_vtcm.a $(PREFIX)/lib/libsim_qurt_vtcm.a 2116 2117# We need to capture the system libraries that we'll need to link 2118# against, so that downstream consumers of our build rules don't 2119# have to guess what's necessary on their system; call 2120# llvm-config and capture the result in config files that 2121# we include in our distribution. 2122HALIDE_RTTI_RAW=$(if $(WITH_RTTI),1,0) 2123 2124$(BUILD_DIR)/halide_config.%: $(ROOT_DIR)/tools/halide_config.%.tpl 2125 @mkdir -p $(@D) 2126 cat $< | sed -e 's/@HALIDE_SYSTEM_LIBS_RAW@/${LLVM_SYSTEM_LIBS}/g' \ 2127 | sed -e 's/@HALIDE_RTTI_RAW@/${HALIDE_RTTI_RAW}/g' \ 2128 | sed -e 's;@HALIDE_LLVM_CXX_FLAGS_RAW@;${LLVM_CXX_FLAGS};g' > $@ 2129 2130 2131$(DISTRIB_DIR)/halide.tgz: $(LIB_DIR)/libHalide.a \ 2132 $(BIN_DIR)/libHalide.$(SHARED_EXT) \ 2133 $(INCLUDE_DIR)/Halide.h \ 2134 $(RUNTIME_EXPORTED_INCLUDES) \ 2135 $(ROOT_DIR)/README*.md \ 2136 $(BUILD_DIR)/halide_config.cmake \ 2137 $(BUILD_DIR)/halide_config.make 2138 rm -rf $(DISTRIB_DIR) 2139 mkdir -p $(DISTRIB_DIR)/include \ 2140 $(DISTRIB_DIR)/bin \ 2141 $(DISTRIB_DIR)/lib \ 2142 $(DISTRIB_DIR)/tutorial \ 2143 $(DISTRIB_DIR)/tutorial/images \ 2144 $(DISTRIB_DIR)/tools \ 2145 $(DISTRIB_DIR)/tutorial/figures 2146 cp $(BIN_DIR)/libHalide.$(SHARED_EXT) $(DISTRIB_DIR)/bin 2147 cp $(LIB_DIR)/libHalide.a $(DISTRIB_DIR)/lib 2148 cp $(INCLUDE_DIR)/Halide.h $(DISTRIB_DIR)/include 2149 cp $(INCLUDE_DIR)/HalideBuffer.h $(DISTRIB_DIR)/include 2150 cp $(INCLUDE_DIR)/HalideRuntim*.h $(DISTRIB_DIR)/include 2151 cp $(INCLUDE_DIR)/HalidePyTorch*.h $(DISTRIB_DIR)/include 2152 cp $(ROOT_DIR)/tutorial/images/*.png $(DISTRIB_DIR)/tutorial/images 2153 cp $(ROOT_DIR)/tutorial/figures/*.gif $(DISTRIB_DIR)/tutorial/figures 2154 cp $(ROOT_DIR)/tutorial/figures/*.jpg $(DISTRIB_DIR)/tutorial/figures 2155 cp $(ROOT_DIR)/tutorial/figures/*.mp4 $(DISTRIB_DIR)/tutorial/figures 2156 cp $(ROOT_DIR)/tutorial/*.cpp $(DISTRIB_DIR)/tutorial 2157 cp $(ROOT_DIR)/tutorial/*.h $(DISTRIB_DIR)/tutorial 2158 cp $(ROOT_DIR)/tutorial/*.sh $(DISTRIB_DIR)/tutorial 2159 cp $(ROOT_DIR)/tools/mex_halide.m $(DISTRIB_DIR)/tools 2160 cp $(ROOT_DIR)/tools/GenGen.cpp $(DISTRIB_DIR)/tools 2161 cp $(ROOT_DIR)/tools/RunGen.h $(DISTRIB_DIR)/tools 2162 cp $(ROOT_DIR)/tools/RunGenMain.cpp $(DISTRIB_DIR)/tools 2163 cp $(ROOT_DIR)/tools/halide_benchmark.h $(DISTRIB_DIR)/tools 2164 cp $(ROOT_DIR)/tools/halide_image.h $(DISTRIB_DIR)/tools 2165 cp $(ROOT_DIR)/tools/halide_image_io.h $(DISTRIB_DIR)/tools 2166 cp $(ROOT_DIR)/tools/halide_image_info.h $(DISTRIB_DIR)/tools 2167 cp $(ROOT_DIR)/tools/halide_malloc_trace.h $(DISTRIB_DIR)/tools 2168 cp $(ROOT_DIR)/tools/halide_trace_config.h $(DISTRIB_DIR)/tools 2169 cp $(ROOT_DIR)/README*.md $(DISTRIB_DIR) 2170 cp $(BUILD_DIR)/halide_config.* $(DISTRIB_DIR) 2171 ln -sf $(DISTRIB_DIR) halide 2172 tar -czf $(BUILD_DIR)/halide.tgz \ 2173 halide/bin \ 2174 halide/lib \ 2175 halide/include \ 2176 halide/tools \ 2177 halide/tutorial \ 2178 halide/README*.md \ 2179 halide/halide_config.* 2180 rm -rf halide 2181 mv $(BUILD_DIR)/halide.tgz $(DISTRIB_DIR)/halide.tgz 2182 2183 2184.PHONY: distrib 2185distrib: $(DISTRIB_DIR)/halide.tgz 2186 2187$(BIN_DIR)/HalideTraceViz: $(ROOT_DIR)/util/HalideTraceViz.cpp $(INCLUDE_DIR)/HalideRuntime.h $(ROOT_DIR)/tools/halide_image_io.h $(ROOT_DIR)/tools/halide_trace_config.h 2188 $(CXX) $(OPTIMIZE) -std=c++11 $(filter %.cpp,$^) -I$(INCLUDE_DIR) -I$(ROOT_DIR)/tools -L$(BIN_DIR) -o $@ 2189 2190$(BIN_DIR)/HalideTraceDump: $(ROOT_DIR)/util/HalideTraceDump.cpp $(ROOT_DIR)/util/HalideTraceUtils.cpp $(INCLUDE_DIR)/HalideRuntime.h $(ROOT_DIR)/tools/halide_image_io.h 2191 $(CXX) $(OPTIMIZE) -std=c++11 $(filter %.cpp,$^) -I$(INCLUDE_DIR) -I$(ROOT_DIR)/tools -I$(ROOT_DIR)/src/runtime -L$(BIN_DIR) $(IMAGE_IO_CXX_FLAGS) $(IMAGE_IO_LIBS) -o $@ 2192 2193# Run clang-format on most of the source. The tutorials directory is 2194# explicitly skipped, as those files are manually formatted to 2195# maximize readability. NB: clang-format is *not* stable across versions; 2196# we are currently standardized on the formatting from clang-format-10. 2197# If CLANG_FORMAT points to a different version, you may get incorrectly-formatted code. 2198CLANG_FORMAT ?= ${CLANG}-format 2199 2200.PHONY: format 2201format: 2202 find "${ROOT_DIR}/apps" "${ROOT_DIR}/src" "${ROOT_DIR}/tools" "${ROOT_DIR}/test" "${ROOT_DIR}/util" "${ROOT_DIR}/python_bindings" -name *.cpp -o -name *.h -o -name *.c | xargs ${CLANG_FORMAT} -i -style=file 2203 2204# run-clang-tidy.py is a script that comes with LLVM for running clang 2205# tidy in parallel. Assume it's in the standard install path relative to clang. 2206RUN_CLANG_TIDY ?= $(shell dirname $(CLANG))/../share/clang/run-clang-tidy.py 2207 2208# Run clang-tidy on everything in src/. In future we may increase this 2209# surface. Not doing it for now because things outside src are not 2210# performance-critical. 2211CLANG_TIDY_TARGETS= $(addprefix $(SRC_DIR)/,$(SOURCE_FILES)) 2212 2213INVOKE_CLANG_TIDY ?= $(RUN_CLANG_TIDY) -p $(BUILD_DIR) $(CLANG_TIDY_TARGETS) -clang-tidy-binary $(CLANG)-tidy -clang-apply-replacements-binary $(CLANG)-apply-replacements -quiet 2214 2215$(BUILD_DIR)/compile_commands.json: 2216 mkdir -p $(BUILD_DIR) 2217 echo '[' >> $@ 2218 BD=$$(realpath $(BUILD_DIR)); \ 2219 SD=$$(realpath $(SRC_DIR)); \ 2220 ID=$$(realpath $(INCLUDE_DIR)); \ 2221 for S in $(SOURCE_FILES); do \ 2222 echo "{ \"directory\": \"$${BD}\"," >> $@; \ 2223 echo " \"command\": \"$(CXX) $(CXX_FLAGS) -c $$SD/$$S -o /dev/null\"," >> $@; \ 2224 echo " \"file\": \"$$SD/$$S\" }," >> $@; \ 2225 done 2226 # Add a sentinel to make it valid json (no trailing comma) 2227 echo "{ \"directory\": \"$${BD}\"," >> $@; \ 2228 echo " \"command\": \"$(CXX) -c /dev/null -o /dev/null\"," >> $@; \ 2229 echo " \"file\": \"$$S\" }]" >> $@; \ 2230 2231.PHONY: clang-tidy 2232clang-tidy: $(BUILD_DIR)/compile_commands.json 2233 @$(INVOKE_CLANG_TIDY) 2>&1 | grep -v "warnings generated" | grep -v '^$(CLANG)-tidy ' 2234 2235.PHONY: clang-tidy-fix 2236clang-tidy-fix: $(BUILD_DIR)/compile_commands.json 2237 @$(INVOKE_CLANG_TIDY) -fix 2>&1 | grep -v "warnings generated" | grep -v '^$(CLANG)-tidy ' 2238 2239# Build the documentation. Be sure to keep this synchronized with doc/CMakeLists.txt 2240# if you choose to edit it. 2241 2242# Copy ROOT_DIR to keep the following Doxyfile closer to CMake 2243Halide_SOURCE_DIR=${ROOT_DIR} 2244 2245define Doxyfile 2246# Keep the following in sync with doc/CMakeLists.txt 2247ALPHABETICAL_INDEX = NO 2248BUILTIN_STL_SUPPORT = YES 2249CASE_SENSE_NAMES = NO 2250CLASS_DIAGRAMS = NO 2251DISTRIBUTE_GROUP_DOC = YES 2252EXAMPLE_PATH = "${Halide_SOURCE_DIR}/tutorial" 2253EXCLUDE = bin 2254EXTRACT_ALL = YES 2255EXTRACT_LOCAL_CLASSES = NO 2256FILE_PATTERNS = *.h 2257GENERATE_TREEVIEW = YES 2258HIDE_FRIEND_COMPOUNDS = YES 2259HIDE_IN_BODY_DOCS = YES 2260HIDE_UNDOC_CLASSES = YES 2261HIDE_UNDOC_MEMBERS = YES 2262JAVADOC_AUTOBRIEF = YES 2263QT_AUTOBRIEF = YES 2264QUIET = YES 2265REFERENCED_BY_RELATION = YES 2266REFERENCES_RELATION = YES 2267SORT_BY_SCOPE_NAME = YES 2268SORT_MEMBER_DOCS = NO 2269SOURCE_BROWSER = YES 2270STRIP_CODE_COMMENTS = NO 2271 2272# Makefile-specific options 2273GENERATE_LATEX = NO 2274HAVE_DOT = NO 2275HTML_OUTPUT = . 2276INPUT = "${Halide_SOURCE_DIR}/src" "${Halide_SOURCE_DIR}/test" 2277OUTPUT_DIRECTORY = ${DOC_DIR} 2278PROJECT_NAME = Halide 2279endef 2280 2281# Make the above Doxyfile variable available to the doc target. 2282export Doxyfile 2283 2284.PHONY: doc 2285doc: 2286 @-mkdir -p $(TMP_DIR) 2287 echo "$$Doxyfile" > $(TMP_DIR)/Doxyfile 2288 @-mkdir -p ${DOC_DIR} 2289 doxygen $(TMP_DIR)/Doxyfile 2290