1# 'make' builds libHalide.a, the internal test suite, and runs the internal test suite
2# 'make run_tests' builds and runs all the end-to-end tests in the test subdirectory
3# 'make {error,performance}_foo' builds and runs test/{...}/foo.cpp for any
4#     c_source file in the corresponding subdirectory of the test folder
5# 'make correctness_foo' builds and runs test/correctness/foo.cpp for any
6#     c_source file in the correctness/ subdirectory of the test folder
7# 'make test_apps' checks some of the apps build and run (but does not check their output)
8# 'make time_compilation_tests' records the compile time for each test module into a csv file.
9#     For correctness and performance tests this include halide build time and run time. For
10#     the tests in test/generator/ this times only the halide build time.
11
12# Disable built-in makefile rules for all apps to avoid pointless file-system
13# scanning and general weirdness resulting from implicit rules.
14MAKEFLAGS += --no-builtin-rules
15.SUFFIXES:
16
17UNAME = $(shell uname)
18
19ifeq ($(OS), Windows_NT)
20	$(error Halide no longer supports the MinGW environment.)
21else
22    # let's assume "normal" UNIX such as linux
23    COMMON_LD_FLAGS=$(LDFLAGS) -ldl -lpthread -lz
24    FPIC=-fPIC
25ifeq ($(UNAME), Darwin)
26    SHARED_EXT=dylib
27else
28    SHARED_EXT=so
29endif
30endif
31
32ifeq ($(UNAME), Darwin)
33  # Anything that we us install_name_tool on needs these linker flags
34  # to ensure there is enough padding for install_name_tool to use
35  INSTALL_NAME_TOOL_LD_FLAGS=-Wl,-headerpad_max_install_names
36else
37  INSTALL_NAME_TOOL_LD_FLAGS=
38endif
39
40ifeq ($(UNAME), Darwin)
41define alwayslink
42	-Wl,-force_load,$(1)
43endef
44else
45define alwayslink
46	-Wl,--whole-archive $(1) -Wl,-no-whole-archive
47endef
48endif
49
50SHELL = bash
51CXX ?= g++
52PREFIX ?= /usr/local
53LLVM_CONFIG ?= llvm-config
54LLVM_COMPONENTS= $(shell $(LLVM_CONFIG) --components)
55LLVM_VERSION = $(shell $(LLVM_CONFIG) --version | sed 's/\([0-9][0-9]*\)\.\([0-9]\).*/\1.\2/')
56
57LLVM_FULL_VERSION = $(shell $(LLVM_CONFIG) --version)
58LLVM_BINDIR = $(shell $(LLVM_CONFIG) --bindir | sed -e 's/\\/\//g' -e 's/\([a-zA-Z]\):/\/\1/g')
59LLVM_LIBDIR = $(shell $(LLVM_CONFIG) --libdir | sed -e 's/\\/\//g' -e 's/\([a-zA-Z]\):/\/\1/g')
60# Apparently there is no llvm_config flag to get canonical paths to tools,
61# so we'll just construct one relative to --src-root and hope that is stable everywhere.
62LLVM_GIT_LLD_INCLUDE_DIR = $(shell $(LLVM_CONFIG) --src-root | sed -e 's/\\/\//g' -e 's/\([a-zA-Z]\):/\/\1/g')/../lld/include
63LLVM_SYSTEM_LIBS=$(shell ${LLVM_CONFIG} --system-libs --link-static | sed -e 's/[\/&]/\\&/g')
64LLVM_AS = $(LLVM_BINDIR)/llvm-as
65LLVM_NM = $(LLVM_BINDIR)/llvm-nm
66LLVM_CXX_FLAGS = -std=c++11  $(filter-out -O% -g -fomit-frame-pointer -pedantic -W% -W, $(shell $(LLVM_CONFIG) --cxxflags | sed -e 's/\\/\//g' -e 's/\([a-zA-Z]\):/\/\1/g;s/-D/ -D/g;s/-O/ -O/g')) -I$(LLVM_GIT_LLD_INCLUDE_DIR)
67OPTIMIZE ?= -O3
68OPTIMIZE_FOR_BUILD_TIME ?= -O0
69
70PYTHON ?= python3
71
72CLANG ?= $(LLVM_BINDIR)/clang
73CLANG_VERSION = $(shell $(CLANG) --version)
74
75SANITIZER_FLAGS ?=
76
77# TODO: this is suboptimal hackery; we should really add the relevant
78# support libs for the sanitizer(s) as weak symbols in Codegen_LLVM.
79# (Note also that, in general, most Sanitizers work most reliably with an all-Clang
80# build system.)
81
82ifneq (,$(findstring tsan,$(HL_TARGET)$(HL_JIT_TARGET)))
83
84# Note that attempting to use TSAN with the JIT can produce false positives
85# if libHalide is not also compiled with TSAN enabled; we tack the relevant
86# flag onto OPTIMIZE here, but that's really only effective if you ensure
87# to do a clean build before testing. (In general, most of the Sanitizers
88# only work well when used in a completely clean environment.)
89OPTIMIZE += -fsanitize=thread
90SANITIZER_FLAGS += -fsanitize=thread
91
92endif
93
94ifneq (,$(findstring asan,$(HL_TARGET)$(HL_JIT_TARGET)))
95OPTIMIZE += -fsanitize=address
96SANITIZER_FLAGS += -fsanitize=address
97endif
98
99COMMON_LD_FLAGS += $(SANITIZER_FLAGS)
100
101LLVM_VERSION_TIMES_10 = $(shell $(LLVM_CONFIG) --version | sed 's/\([0-9][0-9]*\)\.\([0-9]\).*/\1\2/')
102
103LLVM_CXX_FLAGS += -DLLVM_VERSION=$(LLVM_VERSION_TIMES_10)
104
105# All WITH_* flags are either empty or not-empty. They do not behave
106# like true/false values in most languages.  To turn one off, either
107# edit this file, add "WITH_FOO=" (no assigned value) to the make
108# line, or define an environment variable WITH_FOO that has an empty
109# value.
110WITH_X86 ?= $(findstring x86, $(LLVM_COMPONENTS))
111WITH_ARM ?= $(findstring arm, $(LLVM_COMPONENTS))
112WITH_HEXAGON ?= $(findstring hexagon, $(LLVM_COMPONENTS))
113WITH_MIPS ?= $(findstring mips, $(LLVM_COMPONENTS))
114WITH_RISCV ?= $(findstring riscv, $(LLVM_COMPONENTS))
115WITH_AARCH64 ?= $(findstring aarch64, $(LLVM_COMPONENTS))
116WITH_POWERPC ?= $(findstring powerpc, $(LLVM_COMPONENTS))
117WITH_NVPTX ?= $(findstring nvptx, $(LLVM_COMPONENTS))
118# AMDGPU target is WIP
119WITH_AMDGPU ?= $(findstring amdgpu, $(LLVM_COMPONENTS))
120WITH_OPENCL ?= not-empty
121WITH_METAL ?= not-empty
122WITH_OPENGL ?= not-empty
123WITH_D3D12 ?= not-empty
124WITH_INTROSPECTION ?= not-empty
125WITH_EXCEPTIONS ?=
126WITH_LLVM_INSIDE_SHARED_LIBHALIDE ?= not-empty
127
128# If HL_TARGET or HL_JIT_TARGET aren't set, use host
129HL_TARGET ?= host
130HL_JIT_TARGET ?= host
131
132X86_CXX_FLAGS=$(if $(WITH_X86), -DWITH_X86, )
133X86_LLVM_CONFIG_LIB=$(if $(WITH_X86), x86, )
134
135ARM_CXX_FLAGS=$(if $(WITH_ARM), -DWITH_ARM, )
136ARM_LLVM_CONFIG_LIB=$(if $(WITH_ARM), arm, )
137
138MIPS_CXX_FLAGS=$(if $(WITH_MIPS), -DWITH_MIPS, )
139MIPS_LLVM_CONFIG_LIB=$(if $(WITH_MIPS), mips, )
140
141POWERPC_CXX_FLAGS=$(if $(WITH_POWERPC), -DWITH_POWERPC, )
142POWERPC_LLVM_CONFIG_LIB=$(if $(WITH_POWERPC), powerpc, )
143
144PTX_CXX_FLAGS=$(if $(WITH_NVPTX), -DWITH_NVPTX, )
145PTX_LLVM_CONFIG_LIB=$(if $(WITH_NVPTX), nvptx, )
146PTX_DEVICE_INITIAL_MODULES=$(if $(WITH_NVPTX), libdevice.compute_20.10.bc libdevice.compute_30.10.bc libdevice.compute_35.10.bc, )
147
148AMDGPU_CXX_FLAGS=$(if $(WITH_AMDGPU), -DWITH_AMDGPU, )
149AMDGPU_LLVM_CONFIG_LIB=$(if $(WITH_AMDGPU), amdgpu, )
150# TODO add bitcode files
151
152OPENCL_CXX_FLAGS=$(if $(WITH_OPENCL), -DWITH_OPENCL, )
153OPENCL_LLVM_CONFIG_LIB=$(if $(WITH_OPENCL), , )
154
155METAL_CXX_FLAGS=$(if $(WITH_METAL), -DWITH_METAL, )
156METAL_LLVM_CONFIG_LIB=$(if $(WITH_METAL), , )
157
158OPENGL_CXX_FLAGS=$(if $(WITH_OPENGL), -DWITH_OPENGL, )
159
160D3D12_CXX_FLAGS=$(if $(WITH_D3D12), -DWITH_D3D12, )
161D3D12_LLVM_CONFIG_LIB=$(if $(WITH_D3D12), , )
162
163AARCH64_CXX_FLAGS=$(if $(WITH_AARCH64), -DWITH_AARCH64, )
164AARCH64_LLVM_CONFIG_LIB=$(if $(WITH_AARCH64), aarch64, )
165
166RISCV_CXX_FLAGS=$(if $(WITH_RISCV), -DWITH_RISCV, )
167RISCV_LLVM_CONFIG_LIB=$(if $(WITH_RISCV), riscv, )
168
169INTROSPECTION_CXX_FLAGS=$(if $(WITH_INTROSPECTION), -DWITH_INTROSPECTION, )
170EXCEPTIONS_CXX_FLAGS=$(if $(WITH_EXCEPTIONS), -DHALIDE_WITH_EXCEPTIONS -fexceptions, )
171
172HEXAGON_CXX_FLAGS=$(if $(WITH_HEXAGON), -DWITH_HEXAGON, )
173HEXAGON_LLVM_CONFIG_LIB=$(if $(WITH_HEXAGON), hexagon, )
174
175LLVM_HAS_NO_RTTI = $(findstring -fno-rtti, $(LLVM_CXX_FLAGS))
176WITH_RTTI ?= $(if $(LLVM_HAS_NO_RTTI),, not-empty)
177RTTI_CXX_FLAGS=$(if $(WITH_RTTI), , -fno-rtti )
178
179CXX_VERSION = $(shell $(CXX) --version | head -n1)
180CXX_WARNING_FLAGS = -Wall -Werror -Wno-unused-function -Wcast-qual -Wignored-qualifiers -Wno-comment -Wsign-compare -Wno-unknown-warning-option -Wno-psabi
181ifneq (,$(findstring g++,$(CXX_VERSION)))
182GCC_MAJOR_VERSION := $(shell $(CXX) -dumpfullversion -dumpversion | cut -f1 -d.)
183GCC_MINOR_VERSION := $(shell $(CXX) -dumpfullversion -dumpversion | cut -f2 -d.)
184ifeq (1,$(shell expr $(GCC_MAJOR_VERSION) \> 5 \| $(GCC_MAJOR_VERSION) = 5 \& $(GCC_MINOR_VERSION) \>= 1))
185CXX_WARNING_FLAGS += -Wsuggest-override
186endif
187endif
188
189ifneq (,$(findstring clang,$(CXX_VERSION)))
190LLVM_CXX_FLAGS_LIBCPP := $(findstring -stdlib=libc++, $(LLVM_CXX_FLAGS))
191endif
192
193CXX_FLAGS = $(CXXFLAGS) $(CXX_WARNING_FLAGS) $(RTTI_CXX_FLAGS) -Woverloaded-virtual $(FPIC) $(OPTIMIZE) -fno-omit-frame-pointer -DCOMPILING_HALIDE
194
195CXX_FLAGS += $(LLVM_CXX_FLAGS)
196CXX_FLAGS += $(PTX_CXX_FLAGS)
197CXX_FLAGS += $(ARM_CXX_FLAGS)
198CXX_FLAGS += $(HEXAGON_CXX_FLAGS)
199CXX_FLAGS += $(AARCH64_CXX_FLAGS)
200CXX_FLAGS += $(X86_CXX_FLAGS)
201CXX_FLAGS += $(OPENCL_CXX_FLAGS)
202CXX_FLAGS += $(METAL_CXX_FLAGS)
203CXX_FLAGS += $(OPENGL_CXX_FLAGS)
204CXX_FLAGS += $(D3D12_CXX_FLAGS)
205CXX_FLAGS += $(MIPS_CXX_FLAGS)
206CXX_FLAGS += $(POWERPC_CXX_FLAGS)
207CXX_FLAGS += $(INTROSPECTION_CXX_FLAGS)
208CXX_FLAGS += $(EXCEPTIONS_CXX_FLAGS)
209CXX_FLAGS += $(AMDGPU_CXX_FLAGS)
210CXX_FLAGS += $(RISCV_CXX_FLAGS)
211
212# This is required on some hosts like powerpc64le-linux-gnu because we may build
213# everything with -fno-exceptions.  Without -funwind-tables, libHalide.so fails
214# to propagate exceptions and causes a test failure.
215CXX_FLAGS += -funwind-tables
216
217print-%:
218	@echo '$*=$($*)'
219
220LLVM_STATIC_LIBFILES = \
221	bitwriter \
222	bitreader \
223	linker \
224	ipo \
225	passes \
226	mcjit \
227	$(X86_LLVM_CONFIG_LIB) \
228	$(ARM_LLVM_CONFIG_LIB) \
229	$(OPENCL_LLVM_CONFIG_LIB) \
230	$(METAL_LLVM_CONFIG_LIB) \
231	$(PTX_LLVM_CONFIG_LIB) \
232	$(AARCH64_LLVM_CONFIG_LIB) \
233	$(MIPS_LLVM_CONFIG_LIB) \
234	$(POWERPC_LLVM_CONFIG_LIB) \
235	$(HEXAGON_LLVM_CONFIG_LIB) \
236	$(AMDGPU_LLVM_CONFIG_LIB) \
237	$(WEBASSEMBLY_LLVM_CONFIG_LIB) \
238	$(RISCV_LLVM_CONFIG_LIB)
239
240LLVM_STATIC_LIBS = -L $(LLVM_LIBDIR) $(shell $(LLVM_CONFIG) --link-static --libfiles $(LLVM_STATIC_LIBFILES) | sed -e 's/\\/\//g' -e 's/\([a-zA-Z]\):/\/\1/g')
241
242# Add a rpath to the llvm used for linking, in case multiple llvms are
243# installed. Bakes a path on the build system into the .so, so don't
244# use this config for distributions.
245LLVM_SHARED_LIBS = -Wl,-rpath=$(LLVM_LIBDIR) -L $(LLVM_LIBDIR) -lLLVM
246
247LLVM_LIBS_FOR_SHARED_LIBHALIDE=$(if $(WITH_LLVM_INSIDE_SHARED_LIBHALIDE),$(LLVM_STATIC_LIBS),$(LLVM_SHARED_LIBS))
248
249TUTORIAL_CXX_FLAGS ?= -std=c++11 -g -fno-omit-frame-pointer $(RTTI_CXX_FLAGS) -I $(ROOT_DIR)/tools $(SANITIZER_FLAGS) $(LLVM_CXX_FLAGS_LIBCPP)
250# The tutorials contain example code with warnings that we don't want
251# to be flagged as errors, so the test flags are the tutorial flags
252# plus our warning flags.
253# Also allow tests, via conditional compilation, to use the entire
254# capability of the CPU being compiled on via -march=native. This
255# presumes tests are run on the same machine they are compiled on.
256ARCH_FOR_TESTS ?= native
257TEST_CXX_FLAGS ?= $(TUTORIAL_CXX_FLAGS) $(CXX_WARNING_FLAGS) -march=${ARCH_FOR_TESTS}
258TEST_LD_FLAGS = -L$(BIN_DIR) -lHalide $(COMMON_LD_FLAGS)
259
260# In the tests, some of our expectations change depending on the llvm version
261TEST_CXX_FLAGS += -DLLVM_VERSION=$(LLVM_VERSION_TIMES_10)
262
263# gcc 4.8 fires a bogus warning on old versions of png.h
264ifneq (,$(findstring g++,$(CXX_VERSION)))
265ifneq (,$(findstring 4.8,$(CXX_VERSION)))
266TEST_CXX_FLAGS += -Wno-literal-suffix
267endif
268endif
269
270ifeq ($(UNAME), Linux)
271TEST_LD_FLAGS += -rdynamic -Wl,--rpath=$(CURDIR)/$(BIN_DIR)
272endif
273
274ifeq ($(WITH_LLVM_INSIDE_SHARED_LIBHALIDE), )
275TEST_LD_FLAGS += -Wl,--rpath=$(LLVM_LIBDIR)
276endif
277
278ifneq ($(WITH_NVPTX), )
279ifneq (,$(findstring ptx,$(HL_TARGET)))
280TEST_CUDA = 1
281endif
282ifneq (,$(findstring cuda,$(HL_TARGET)))
283TEST_CUDA = 1
284endif
285endif
286
287ifneq ($(WITH_OPENCL), )
288ifneq (,$(findstring opencl,$(HL_TARGET)))
289TEST_OPENCL = 1
290endif
291endif
292
293ifneq ($(WITH_METAL), )
294ifneq (,$(findstring metal,$(HL_TARGET)))
295TEST_METAL = 1
296endif
297endif
298
299ifeq ($(UNAME), Linux)
300ifneq ($(TEST_CUDA), )
301CUDA_LD_FLAGS ?= -L/usr/lib/nvidia-current -lcuda
302endif
303ifneq ($(TEST_OPENCL), )
304OPENCL_LD_FLAGS ?= -lOpenCL
305endif
306OPENGL_LD_FLAGS ?= -lGL
307HOST_OS=linux
308endif
309
310ifeq ($(UNAME), Darwin)
311# Someone with an osx box with cuda installed please fix the line below
312ifneq ($(TEST_CUDA), )
313CUDA_LD_FLAGS ?= -L/usr/local/cuda/lib -lcuda
314endif
315ifneq ($(TEST_OPENCL), )
316OPENCL_LD_FLAGS ?= -framework OpenCL
317endif
318ifneq ($(TEST_METAL), )
319METAL_LD_FLAGS ?= -framework Metal -framework Foundation
320endif
321OPENGL_LD_FLAGS ?= -framework OpenGL
322HOST_OS=os_x
323endif
324
325ifneq ($(TEST_OPENCL), )
326TEST_CXX_FLAGS += -DTEST_OPENCL
327endif
328
329ifneq ($(TEST_METAL), )
330TEST_CXX_FLAGS += -DTEST_METAL
331endif
332
333ifneq ($(TEST_CUDA), )
334TEST_CXX_FLAGS += -DTEST_CUDA
335TEST_CXX_FLAGS += -I/usr/local/cuda/include
336endif
337
338# Compiling the tutorials requires libpng
339LIBPNG_LIBS_DEFAULT = $(shell libpng-config --ldflags)
340LIBPNG_CXX_FLAGS ?= $(shell libpng-config --cflags)
341# Workaround for libpng-config pointing to 64-bit versions on linux even when we're building for 32-bit
342ifneq (,$(findstring -m32,$(CXX)))
343ifneq (,$(findstring x86_64,$(LIBPNG_LIBS_DEFAULT)))
344LIBPNG_LIBS ?= -lpng
345endif
346endif
347LIBPNG_LIBS ?= $(LIBPNG_LIBS_DEFAULT)
348
349# Workaround brew Cellar path for libpng-config output.
350LIBJPEG_LINKER_PATH ?= $(shell echo $(LIBPNG_LIBS_DEFAULT) | sed -e'/-L.*[/][Cc]ellar[/]libpng/!d;s=\(.*\)/[Cc]ellar/libpng/.*=\1/lib=')
351LIBJPEG_LIBS ?= $(LIBJPEG_LINKER_PATH) -ljpeg
352
353# There's no libjpeg-config, unfortunately. We should look for
354# jpeglib.h one directory level up from png.h . Also handle
355# Mac OS brew installs where libpng-config returns paths
356# into the PNG cellar.
357LIBPNG_INCLUDE_DIRS = $(filter -I%,$(LIBPNG_CXX_FLAGS))
358LIBJPEG_CXX_FLAGS ?= $(shell echo $(LIBPNG_INCLUDE_DIRS) | sed -e'/[Cc]ellar[/]libpng/!s=\(.*\)=\1/..=;s=\(.*\)/[Cc]ellar/libpng/.*=\1/include=')
359
360IMAGE_IO_LIBS = $(LIBPNG_LIBS) $(LIBJPEG_LIBS)
361IMAGE_IO_CXX_FLAGS = $(LIBPNG_CXX_FLAGS) $(LIBJPEG_CXX_FLAGS)
362
363# We're building into the current directory $(CURDIR). Find the Halide
364# repo root directory (the location of the makefile)
365THIS_MAKEFILE = $(realpath $(filter %Makefile, $(MAKEFILE_LIST)))
366ROOT_DIR = $(strip $(shell dirname $(THIS_MAKEFILE)))
367SRC_DIR  = $(ROOT_DIR)/src
368
369TARGET=$(if $(HL_TARGET),$(HL_TARGET),host)
370
371# The following directories are all relative to the output directory (i.e. $(CURDIR), not $(SRC_DIR))
372LIB_DIR     = lib
373BIN_DIR     = bin
374DISTRIB_DIR = distrib
375INCLUDE_DIR = include
376SHARE_DIR   = share
377DOC_DIR     = $(SHARE_DIR)/doc/Halide
378BUILD_DIR   = $(BIN_DIR)/build
379FILTERS_DIR = $(BIN_DIR)/$(TARGET)/build
380TMP_DIR     = $(BUILD_DIR)/tmp
381HEXAGON_RUNTIME_LIBS_DIR = src/runtime/hexagon_remote/bin
382HEXAGON_RUNTIME_LIBS = \
383  $(HEXAGON_RUNTIME_LIBS_DIR)/arm-32-android/libhalide_hexagon_host.so \
384  $(HEXAGON_RUNTIME_LIBS_DIR)/arm-64-android/libhalide_hexagon_host.so \
385  $(HEXAGON_RUNTIME_LIBS_DIR)/host/libhalide_hexagon_host.so \
386  $(HEXAGON_RUNTIME_LIBS_DIR)/v62/hexagon_sim_remote \
387  $(HEXAGON_RUNTIME_LIBS_DIR)/v62/libhalide_hexagon_remote_skel.so \
388  $(HEXAGON_RUNTIME_LIBS_DIR)/v62/signed_by_debug/libhalide_hexagon_remote_skel.so
389
390# Keep this list sorted in alphabetical order.
391SOURCE_FILES = \
392  AddAtomicMutex.cpp \
393  AddImageChecks.cpp \
394  AddParameterChecks.cpp \
395  AlignLoads.cpp \
396  AllocationBoundsInference.cpp \
397  ApplySplit.cpp \
398  Argument.cpp \
399  AssociativeOpsTable.cpp \
400  Associativity.cpp \
401  AsyncProducers.cpp \
402  AutoSchedule.cpp \
403  AutoScheduleUtils.cpp \
404  BoundaryConditions.cpp \
405  Bounds.cpp \
406  BoundsInference.cpp \
407  BoundSmallAllocations.cpp \
408  Buffer.cpp \
409  CanonicalizeGPUVars.cpp \
410  Closure.cpp \
411  CodeGen_ARM.cpp \
412  CodeGen_C.cpp \
413  CodeGen_D3D12Compute_Dev.cpp \
414  CodeGen_GPU_Dev.cpp \
415  CodeGen_GPU_Host.cpp \
416  CodeGen_Hexagon.cpp \
417  CodeGen_Internal.cpp \
418  CodeGen_LLVM.cpp \
419  CodeGen_Metal_Dev.cpp \
420  CodeGen_MIPS.cpp \
421  CodeGen_OpenCL_Dev.cpp \
422  CodeGen_OpenGL_Dev.cpp \
423  CodeGen_OpenGLCompute_Dev.cpp \
424  CodeGen_Posix.cpp \
425  CodeGen_PowerPC.cpp \
426  CodeGen_PTX_Dev.cpp \
427  CodeGen_PyTorch.cpp \
428  CodeGen_RISCV.cpp \
429  CodeGen_WebAssembly.cpp \
430  CodeGen_X86.cpp \
431  CompilerLogger.cpp \
432  CPlusPlusMangle.cpp \
433  CSE.cpp \
434  Debug.cpp \
435  DebugArguments.cpp \
436  DebugToFile.cpp \
437  Definition.cpp \
438  Deinterleave.cpp \
439  Derivative.cpp \
440  DerivativeUtils.cpp \
441  DeviceArgument.cpp \
442  DeviceInterface.cpp \
443  Dimension.cpp \
444  EarlyFree.cpp \
445  Elf.cpp \
446  EliminateBoolVectors.cpp \
447  EmulateFloat16Math.cpp \
448  Error.cpp \
449  Expr.cpp \
450  FastIntegerDivide.cpp \
451  FindCalls.cpp \
452  Float16.cpp \
453  Func.cpp \
454  Function.cpp \
455  FuseGPUThreadLoops.cpp \
456  FuzzFloatStores.cpp \
457  Generator.cpp \
458  HexagonOffload.cpp \
459  HexagonOptimize.cpp \
460  ImageParam.cpp \
461  InferArguments.cpp \
462  InjectHostDevBufferCopies.cpp \
463  InjectOpenGLIntrinsics.cpp \
464  Inline.cpp \
465  InlineReductions.cpp \
466  IntegerDivisionTable.cpp \
467  Interval.cpp \
468  Introspection.cpp \
469  IR.cpp \
470  IREquality.cpp \
471  IRMatch.cpp \
472  IRMutator.cpp \
473  IROperator.cpp \
474  IRPrinter.cpp \
475  IRVisitor.cpp \
476  JITModule.cpp \
477  Lerp.cpp \
478  LICM.cpp \
479  LLVM_Output.cpp \
480  LLVM_Runtime_Linker.cpp \
481  LoopCarry.cpp \
482  Lower.cpp \
483  LowerWarpShuffles.cpp \
484  MatlabWrapper.cpp \
485  Memoization.cpp \
486  Module.cpp \
487  ModulusRemainder.cpp \
488  Monotonic.cpp \
489  ObjectInstanceRegistry.cpp \
490  OutputImageParam.cpp \
491  ParallelRVar.cpp \
492  Parameter.cpp \
493  ParamMap.cpp \
494  PartitionLoops.cpp \
495  Pipeline.cpp \
496  Prefetch.cpp \
497  PrintLoopNest.cpp \
498  Profiling.cpp \
499  PurifyIndexMath.cpp \
500  PythonExtensionGen.cpp \
501  Qualify.cpp \
502  Random.cpp \
503  RDom.cpp \
504  Realization.cpp \
505  RealizationOrder.cpp \
506  Reduction.cpp \
507  RegionCosts.cpp \
508  RemoveDeadAllocations.cpp \
509  RemoveExternLoops.cpp \
510  RemoveUndef.cpp \
511  Schedule.cpp \
512  ScheduleFunctions.cpp \
513  SelectGPUAPI.cpp \
514  Simplify.cpp \
515  Simplify_Add.cpp \
516  Simplify_And.cpp \
517  Simplify_Call.cpp \
518  Simplify_Cast.cpp \
519  Simplify_Div.cpp \
520  Simplify_EQ.cpp \
521  Simplify_Exprs.cpp \
522  Simplify_Let.cpp \
523  Simplify_LT.cpp \
524  Simplify_Max.cpp \
525  Simplify_Min.cpp \
526  Simplify_Mod.cpp \
527  Simplify_Mul.cpp \
528  Simplify_Not.cpp \
529  Simplify_Or.cpp \
530  Simplify_Select.cpp \
531  Simplify_Shuffle.cpp \
532  Simplify_Stmts.cpp \
533  Simplify_Sub.cpp \
534  SimplifyCorrelatedDifferences.cpp \
535  SimplifySpecializations.cpp \
536  SkipStages.cpp \
537  SlidingWindow.cpp \
538  Solve.cpp \
539  SplitTuples.cpp \
540  StmtToHtml.cpp \
541  StorageFlattening.cpp \
542  StorageFolding.cpp \
543  StrictifyFloat.cpp \
544  Substitute.cpp \
545  Target.cpp \
546  Tracing.cpp \
547  TrimNoOps.cpp \
548  Tuple.cpp \
549  Type.cpp \
550  UnifyDuplicateLets.cpp \
551  UniquifyVariableNames.cpp \
552  UnpackBuffers.cpp \
553  UnrollLoops.cpp \
554  UnsafePromises.cpp \
555  Util.cpp \
556  Var.cpp \
557  VaryingAttributes.cpp \
558  VectorizeLoops.cpp \
559  WasmExecutor.cpp \
560  WrapCalls.cpp
561
562# The externally-visible header files that go into making Halide.h.
563# Don't include anything here that includes llvm headers.
564# Keep this list sorted in alphabetical order.
565HEADER_FILES = \
566  AddAtomicMutex.h \
567  AddImageChecks.h \
568  AddParameterChecks.h \
569  AlignLoads.h \
570  AllocationBoundsInference.h \
571  ApplySplit.h \
572  Argument.h \
573  AssociativeOpsTable.h \
574  Associativity.h \
575  AsyncProducers.h \
576  AutoSchedule.h \
577  AutoScheduleUtils.h \
578  BoundaryConditions.h \
579  Bounds.h \
580  BoundsInference.h \
581  BoundSmallAllocations.h \
582  Buffer.h \
583  CanonicalizeGPUVars.h \
584  Closure.h \
585  CodeGen_ARM.h \
586  CodeGen_C.h \
587  CodeGen_D3D12Compute_Dev.h \
588  CodeGen_GPU_Dev.h \
589  CodeGen_GPU_Host.h \
590  CodeGen_Internal.h \
591  CodeGen_LLVM.h \
592  CodeGen_Metal_Dev.h \
593  CodeGen_MIPS.h \
594  CodeGen_OpenCL_Dev.h \
595  CodeGen_OpenGL_Dev.h \
596  CodeGen_OpenGLCompute_Dev.h \
597  CodeGen_Posix.h \
598  CodeGen_PowerPC.h \
599  CodeGen_PTX_Dev.h \
600  CodeGen_PyTorch.h \
601  CodeGen_RISCV.h \
602  CodeGen_WebAssembly.h \
603  CodeGen_X86.h \
604  CompilerLogger.h \
605  ConciseCasts.h \
606  CPlusPlusMangle.h \
607  CSE.h \
608  Debug.h \
609  DebugArguments.h \
610  DebugToFile.h \
611  Definition.h \
612  Deinterleave.h \
613  Derivative.h \
614  DerivativeUtils.h \
615  DeviceAPI.h \
616  DeviceArgument.h \
617  DeviceInterface.h \
618  Dimension.h \
619  EarlyFree.h \
620  Elf.h \
621  EliminateBoolVectors.h \
622  EmulateFloat16Math.h \
623  Error.h \
624  Expr.h \
625  ExprUsesVar.h \
626  Extern.h \
627  ExternFuncArgument.h \
628  FastIntegerDivide.h \
629  FindCalls.h \
630  Float16.h \
631  Func.h \
632  Function.h \
633  FunctionPtr.h \
634  FuseGPUThreadLoops.h \
635  FuzzFloatStores.h \
636  Generator.h \
637  HexagonOffload.h \
638  HexagonOptimize.h \
639  ImageParam.h \
640  InferArguments.h \
641  InjectHostDevBufferCopies.h \
642  InjectOpenGLIntrinsics.h \
643  Inline.h \
644  InlineReductions.h \
645  IntegerDivisionTable.h \
646  Interval.h \
647  Introspection.h \
648  IntrusivePtr.h \
649  IR.h \
650  IREquality.h \
651  IRMatch.h \
652  IRMutator.h \
653  IROperator.h \
654  IRPrinter.h \
655  IRVisitor.h \
656  WasmExecutor.h \
657  JITModule.h \
658  Lambda.h \
659  Lerp.h \
660  LICM.h \
661  LLVM_Output.h \
662  LLVM_Runtime_Linker.h \
663  LoopCarry.h \
664  Lower.h \
665  LowerWarpShuffles.h \
666  MainPage.h \
667  MatlabWrapper.h \
668  Memoization.h \
669  Module.h \
670  ModulusRemainder.h \
671  Monotonic.h \
672  ObjectInstanceRegistry.h \
673  OutputImageParam.h \
674  ParallelRVar.h \
675  Param.h \
676  Parameter.h \
677  ParamMap.h \
678  PartitionLoops.h \
679  Pipeline.h \
680  Prefetch.h \
681  Profiling.h \
682  PurifyIndexMath.h \
683  PythonExtensionGen.h \
684  Qualify.h \
685  Random.h \
686  Realization.h \
687  RDom.h \
688  RealizationOrder.h \
689  Reduction.h \
690  RegionCosts.h \
691  RemoveDeadAllocations.h \
692  RemoveExternLoops.h \
693  RemoveUndef.h \
694  runtime/HalideBuffer.h \
695  runtime/HalideRuntime.h \
696  Schedule.h \
697  ScheduleFunctions.h \
698  Scope.h \
699  SelectGPUAPI.h \
700  Simplify.h \
701  SimplifyCorrelatedDifferences.h \
702  SimplifySpecializations.h \
703  SkipStages.h \
704  SlidingWindow.h \
705  Solve.h \
706  SplitTuples.h \
707  StmtToHtml.h \
708  StorageFlattening.h \
709  StorageFolding.h \
710  StrictifyFloat.h \
711  Substitute.h \
712  Target.h \
713  ThreadPool.h \
714  Tracing.h \
715  TrimNoOps.h \
716  Tuple.h \
717  Type.h \
718  UnifyDuplicateLets.h \
719  UniquifyVariableNames.h \
720  UnpackBuffers.h \
721  UnrollLoops.h \
722  UnsafePromises.h \
723  Util.h \
724  Var.h \
725  VaryingAttributes.h \
726  VectorizeLoops.h \
727  WrapCalls.h
728
729OBJECTS = $(SOURCE_FILES:%.cpp=$(BUILD_DIR)/%.o)
730HEADERS = $(HEADER_FILES:%.h=$(SRC_DIR)/%.h)
731
732RUNTIME_CPP_COMPONENTS = \
733  aarch64_cpu_features \
734  alignment_128 \
735  alignment_32 \
736  allocation_cache \
737  alignment_64 \
738  android_clock \
739  android_host_cpu_count \
740  android_io \
741  arm_cpu_features \
742  cache \
743  can_use_target \
744  cuda \
745  destructors \
746  device_interface \
747  errors \
748  fake_get_symbol \
749  fake_thread_pool \
750  float16_t \
751  fuchsia_clock \
752  fuchsia_host_cpu_count \
753  fuchsia_yield \
754  gpu_device_selection \
755  halide_buffer_t \
756  hexagon_cache_allocator \
757  hexagon_cpu_features \
758  hexagon_dma_pool \
759  hexagon_dma \
760  hexagon_host \
761  ios_io \
762  linux_clock \
763  linux_host_cpu_count \
764  linux_yield \
765  matlab \
766  metadata \
767  metal \
768  metal_objc_arm \
769  metal_objc_x86 \
770  mips_cpu_features \
771  module_aot_ref_count \
772  module_jit_ref_count \
773  msan \
774  msan_stubs \
775  opencl \
776  opengl \
777  openglcompute \
778  opengl_egl_context \
779  opengl_glx_context \
780  osx_clock \
781  osx_get_symbol \
782  osx_host_cpu_count \
783  osx_opengl_context \
784  osx_yield \
785  posix_abort \
786  posix_allocator \
787  posix_clock \
788  posix_error_handler \
789  posix_get_symbol \
790  posix_io \
791  posix_print \
792  posix_threads \
793  posix_threads_tsan \
794  powerpc_cpu_features \
795  prefetch \
796  profiler \
797  profiler_inlined \
798  pseudostack \
799  qurt_allocator \
800  qurt_hvx \
801  qurt_hvx_vtcm \
802  qurt_init_fini \
803  qurt_threads \
804  qurt_threads_tsan \
805  qurt_yield \
806  riscv_cpu_features \
807  runtime_api \
808  ssp \
809  to_string \
810  trace_helper \
811  tracing \
812  wasm_cpu_features \
813  windows_abort \
814  windows_clock \
815  windows_cuda \
816  windows_d3d12compute_x86 \
817  windows_get_symbol \
818  windows_io \
819  windows_opencl \
820  windows_profiler \
821  windows_threads \
822  windows_threads_tsan \
823  windows_yield \
824  write_debug_image \
825  x86_cpu_features \
826
827RUNTIME_LL_COMPONENTS = \
828  aarch64 \
829  arm \
830  arm_no_neon \
831  hvx_64 \
832  hvx_128 \
833  mips \
834  posix_math \
835  powerpc \
836  ptx_dev \
837  wasm_math \
838  win32_math \
839  x86 \
840  x86_avx \
841  x86_avx2 \
842  x86_sse41
843
844RUNTIME_EXPORTED_INCLUDES = $(INCLUDE_DIR)/HalideRuntime.h \
845                            $(INCLUDE_DIR)/HalideRuntimeD3D12Compute.h \
846                            $(INCLUDE_DIR)/HalideRuntimeCuda.h \
847                            $(INCLUDE_DIR)/HalideRuntimeHexagonDma.h \
848                            $(INCLUDE_DIR)/HalideRuntimeHexagonHost.h \
849                            $(INCLUDE_DIR)/HalideRuntimeOpenCL.h \
850                            $(INCLUDE_DIR)/HalideRuntimeOpenGL.h \
851                            $(INCLUDE_DIR)/HalideRuntimeOpenGLCompute.h \
852                            $(INCLUDE_DIR)/HalideRuntimeMetal.h	\
853                            $(INCLUDE_DIR)/HalideRuntimeQurt.h \
854                            $(INCLUDE_DIR)/HalideBuffer.h \
855                            $(INCLUDE_DIR)/HalidePyTorchHelpers.h \
856                            $(INCLUDE_DIR)/HalidePyTorchCudaHelpers.h
857
858INITIAL_MODULES = $(RUNTIME_CPP_COMPONENTS:%=$(BUILD_DIR)/initmod.%_32.o) \
859                  $(RUNTIME_CPP_COMPONENTS:%=$(BUILD_DIR)/initmod.%_64.o) \
860                  $(RUNTIME_CPP_COMPONENTS:%=$(BUILD_DIR)/initmod.%_32_debug.o) \
861                  $(RUNTIME_CPP_COMPONENTS:%=$(BUILD_DIR)/initmod.%_64_debug.o) \
862                  $(RUNTIME_EXPORTED_INCLUDES:$(INCLUDE_DIR)/%.h=$(BUILD_DIR)/initmod.%_h.o) \
863                  $(BUILD_DIR)/initmod.inlined_c.o \
864                  $(RUNTIME_LL_COMPONENTS:%=$(BUILD_DIR)/initmod.%_ll.o) \
865                  $(PTX_DEVICE_INITIAL_MODULES:libdevice.%.bc=$(BUILD_DIR)/initmod_ptx.%_ll.o)
866
867# Add the Hexagon simulator to the rpath on Linux. (Not supported elsewhere, so no else cases.)
868ifeq ($(UNAME), Linux)
869ifneq (,$(WITH_HEXAGON))
870ifneq (,$(HL_HEXAGON_TOOLS))
871TEST_LD_FLAGS += -Wl,--rpath=$(ROOT_DIR)/src/runtime/hexagon_remote/bin/host
872TEST_LD_FLAGS += -Wl,--rpath=$(HL_HEXAGON_TOOLS)/lib/iss
873endif
874endif
875endif
876
877.PHONY: all
878all: distrib test_internal
879
880# Depending on which linker we're using,
881# we need a different invocation to get the
882# linker map file.
883ifeq ($(UNAME), Darwin)
884    MAP_FLAGS= -Wl,-map -Wl,$(BUILD_DIR)/llvm_objects/list.all
885else
886    MAP_FLAGS= -Wl,-Map=$(BUILD_DIR)/llvm_objects/list.all
887endif
888
889$(BUILD_DIR)/llvm_objects/list: $(OBJECTS) $(INITIAL_MODULES)
890	# Determine the relevant object files from llvm with a dummy
891	# compilation. Passing -map to the linker gets it to list, as
892	# part of the linker map file, the object files in which archives it uses to
893	# resolve symbols. We only care about the libLLVM ones, which we will filter below.
894	@mkdir -p $(@D)
895	$(CXX) -o /dev/null -shared $(MAP_FLAGS) $(OBJECTS) $(INITIAL_MODULES) $(LLVM_STATIC_LIBS) $(LLVM_SYSTEM_LIBS) $(COMMON_LD_FLAGS) > /dev/null
896	# if the list has changed since the previous build, or there
897	# is no list from a previous build, then delete any old object
898	# files and re-extract the required object files
899	cd $(BUILD_DIR)/llvm_objects; \
900	cat list.all | LANG=C sed -n 's/^[^\/]*\(\/[^ ()]*libLLVM.*[.]a\)[^a-zA-Z]*\([^ ()]*[.]o\).*$$/\1 \2/p' | sort | uniq > list.new; \
901	rm list.all; \
902	if cmp -s list.new list; \
903	then \
904	echo "No changes in LLVM deps"; \
905	touch list; \
906	else \
907	rm -f llvm_*.o*; \
908	cat list.new | sed = | sed "N;s/\n /\n/;s/\([0-9]*\)\n\([^ ]*\) \([^ ]*\)/ar x \2 \3; mv \3 llvm_\1_\3/" | bash - ; \
909	mv list.new list; \
910	fi
911
912$(LIB_DIR)/libHalide.a: $(OBJECTS) $(INITIAL_MODULES) $(BUILD_DIR)/llvm_objects/list
913	# Archive together all the halide and llvm object files
914	@mkdir -p $(@D)
915	@rm -f $(LIB_DIR)/libHalide.a
916	ar q $(LIB_DIR)/libHalide.a $(OBJECTS) $(INITIAL_MODULES) $(BUILD_DIR)/llvm_objects/llvm_*.o*
917	ranlib $(LIB_DIR)/libHalide.a
918
919ifeq ($(UNAME), Linux)
920LIBHALIDE_SONAME_FLAGS=-Wl,-soname,libHalide.so
921else
922LIBHALIDE_SONAME_FLAGS=
923endif
924
925$(BIN_DIR)/libHalide.$(SHARED_EXT): $(OBJECTS) $(INITIAL_MODULES)
926	@mkdir -p $(@D)
927	$(CXX) -shared $(OBJECTS) $(INITIAL_MODULES) $(LLVM_LIBS_FOR_SHARED_LIBHALIDE) $(LLVM_SYSTEM_LIBS) $(COMMON_LD_FLAGS) $(INSTALL_NAME_TOOL_LD_FLAGS) $(LIBHALIDE_SONAME_FLAGS) -o $(BIN_DIR)/libHalide.$(SHARED_EXT)
928ifeq ($(UNAME), Darwin)
929	install_name_tool -id $(CURDIR)/$(BIN_DIR)/libHalide.$(SHARED_EXT) $(BIN_DIR)/libHalide.$(SHARED_EXT)
930endif
931
932$(INCLUDE_DIR)/Halide.h: $(SRC_DIR)/../LICENSE.txt $(HEADERS) $(BIN_DIR)/build_halide_h
933	@mkdir -p $(@D)
934	$(BIN_DIR)/build_halide_h $(SRC_DIR)/../LICENSE.txt $(HEADERS) > $(INCLUDE_DIR)/Halide.h
935	# Also generate a precompiled version in the same folder so that anything compiled with a compatible set of flags can use it
936	@mkdir -p $(INCLUDE_DIR)/Halide.h.gch
937	$(CXX) -std=c++11 $(TEST_CXX_FLAGS) -I$(ROOT_DIR) $(OPTIMIZE) -x c++-header $(INCLUDE_DIR)/Halide.h -o $(INCLUDE_DIR)/Halide.h.gch/Halide.default.gch
938	$(CXX) -std=c++11 $(TEST_CXX_FLAGS) -I$(ROOT_DIR) $(OPTIMIZE_FOR_BUILD_TIME) -x c++-header $(INCLUDE_DIR)/Halide.h -o $(INCLUDE_DIR)/Halide.h.gch/Halide.test.gch
939
940$(INCLUDE_DIR)/HalideRuntime%: $(SRC_DIR)/runtime/HalideRuntime%
941	echo Copying $<
942	@mkdir -p $(@D)
943	cp $< $(INCLUDE_DIR)/
944
945$(INCLUDE_DIR)/HalideBuffer.h: $(SRC_DIR)/runtime/HalideBuffer.h
946	echo Copying $<
947	@mkdir -p $(@D)
948	cp $< $(INCLUDE_DIR)/
949
950$(INCLUDE_DIR)/HalidePyTorchHelpers.h: $(SRC_DIR)/runtime/HalidePyTorchHelpers.h
951	echo Copying $<
952	@mkdir -p $(@D)
953	cp $< $(INCLUDE_DIR)/
954
955$(INCLUDE_DIR)/HalidePyTorchCudaHelpers.h: $(SRC_DIR)/runtime/HalidePyTorchCudaHelpers.h
956	echo Copying $<
957	@mkdir -p $(@D)
958	cp $< $(INCLUDE_DIR)/
959
960$(BIN_DIR)/build_halide_h: $(ROOT_DIR)/tools/build_halide_h.cpp
961	@-mkdir -p $(@D)
962	$(CXX) -std=c++11 $< -o $@
963
964-include $(OBJECTS:.o=.d)
965-include $(INITIAL_MODULES:.o=.d)
966
967# Compile generic 32- or 64-bit code
968# (The 'nacl' is a red herring. This is just a generic 32-bit little-endian target.)
969RUNTIME_TRIPLE_32 = "le32-unknown-nacl-unknown"
970RUNTIME_TRIPLE_64 = "le64-unknown-unknown-unknown"
971
972# windows-specific modules use the __stdcall calling convention
973RUNTIME_TRIPLE_WIN_32 = "i386-unknown-unknown-unknown"
974RUNTIME_TRIPLE_WIN_64 = "x86_64-unknown-windows-unknown"
975
976# -std=gnu++98 is deliberate; we do NOT want c++11 here,
977# as we don't want static locals to get thread synchronization stuff.
978RUNTIME_CXX_FLAGS = -O3 -fno-vectorize -ffreestanding -fno-blocks -fno-exceptions -fno-unwind-tables -std=gnu++98
979
980$(BUILD_DIR)/initmod.windows_%_32.ll: $(SRC_DIR)/runtime/windows_%.cpp $(BUILD_DIR)/clang_ok
981	@mkdir -p $(@D)
982	$(CLANG) $(CXX_WARNING_FLAGS) $(RUNTIME_CXX_FLAGS) -fpic -m32 -target $(RUNTIME_TRIPLE_WIN_32) -DCOMPILING_HALIDE_RUNTIME -DBITS_32 -emit-llvm -S $(SRC_DIR)/runtime/windows_$*.cpp -o $@ -MMD -MP -MF $(BUILD_DIR)/initmod.windows_$*_32.d
983
984$(BUILD_DIR)/initmod.windows_%_64.ll: $(SRC_DIR)/runtime/windows_%.cpp $(BUILD_DIR)/clang_ok
985	@mkdir -p $(@D)
986	$(CLANG) $(CXX_WARNING_FLAGS) $(RUNTIME_CXX_FLAGS) -m64 -target $(RUNTIME_TRIPLE_WIN_64) -DCOMPILING_HALIDE_RUNTIME -DBITS_64 -emit-llvm -S $(SRC_DIR)/runtime/windows_$*.cpp -o $@ -MMD -MP -MF $(BUILD_DIR)/initmod.windows_$*_64.d
987
988$(BUILD_DIR)/initmod.%_64.ll: $(SRC_DIR)/runtime/%.cpp $(BUILD_DIR)/clang_ok
989	@mkdir -p $(@D)
990	$(CLANG) $(CXX_WARNING_FLAGS) $(RUNTIME_CXX_FLAGS) -fpic -m64 -target $(RUNTIME_TRIPLE_64) -DCOMPILING_HALIDE_RUNTIME -DBITS_64 -emit-llvm -S $(SRC_DIR)/runtime/$*.cpp -o $@ -MMD -MP -MF $(BUILD_DIR)/initmod.$*_64.d
991
992$(BUILD_DIR)/initmod.%_32.ll: $(SRC_DIR)/runtime/%.cpp $(BUILD_DIR)/clang_ok
993	@mkdir -p $(@D)
994	$(CLANG) $(CXX_WARNING_FLAGS) $(RUNTIME_CXX_FLAGS) -fpic -m32 -target $(RUNTIME_TRIPLE_32) -DCOMPILING_HALIDE_RUNTIME -DBITS_32 -emit-llvm -S $(SRC_DIR)/runtime/$*.cpp -o $@ -MMD -MP -MF $(BUILD_DIR)/initmod.$*_32.d
995
996$(BUILD_DIR)/initmod.windows_%_64_debug.ll: $(SRC_DIR)/runtime/windows_%.cpp $(BUILD_DIR)/clang_ok
997	@mkdir -p $(@D)
998	$(CLANG) $(CXX_WARNING_FLAGS) -g -DDEBUG_RUNTIME $(RUNTIME_CXX_FLAGS) -m32 -target $(RUNTIME_TRIPLE_WIN_64) -DCOMPILING_HALIDE_RUNTIME -DBITS_32 -emit-llvm -S $(SRC_DIR)/runtime/windows_$*.cpp -o $@ -MMD -MP -MF $(BUILD_DIR)/initmod.windows_$*_32_debug.d
999
1000$(BUILD_DIR)/initmod.%_64_debug.ll: $(SRC_DIR)/runtime/%.cpp $(BUILD_DIR)/clang_ok
1001	@mkdir -p $(@D)
1002	$(CLANG) $(CXX_WARNING_FLAGS) -g -DDEBUG_RUNTIME $(RUNTIME_CXX_FLAGS) -m64 -target  $(RUNTIME_TRIPLE_64) -DCOMPILING_HALIDE_RUNTIME -DBITS_64 -emit-llvm -S $(SRC_DIR)/runtime/$*.cpp -o $@ -MMD -MP -MF $(BUILD_DIR)/initmod.$*_64_debug.d
1003
1004$(BUILD_DIR)/initmod.windows_%_32_debug.ll: $(SRC_DIR)/runtime/windows_%.cpp $(BUILD_DIR)/clang_ok
1005	@mkdir -p $(@D)
1006	$(CLANG) $(CXX_WARNING_FLAGS) -g -DDEBUG_RUNTIME $(RUNTIME_CXX_FLAGS) -m32 -target $(RUNTIME_TRIPLE_WIN_32) -DCOMPILING_HALIDE_RUNTIME -DBITS_32 -emit-llvm -S $(SRC_DIR)/runtime/windows_$*.cpp -o $@ -MMD -MP -MF $(BUILD_DIR)/initmod.windows_$*_32_debug.d
1007
1008$(BUILD_DIR)/initmod.%_32_debug.ll: $(SRC_DIR)/runtime/%.cpp $(BUILD_DIR)/clang_ok
1009	@mkdir -p $(@D)
1010	$(CLANG) $(CXX_WARNING_FLAGS) -g -DDEBUG_RUNTIME -O3 $(RUNTIME_CXX_FLAGS) -m32 -target $(RUNTIME_TRIPLE_32) -DCOMPILING_HALIDE_RUNTIME -DBITS_32 -emit-llvm -S $(SRC_DIR)/runtime/$*.cpp -o $@ -MMD -MP -MF $(BUILD_DIR)/initmod.$*_32_debug.d
1011
1012$(BUILD_DIR)/initmod.%_ll.ll: $(SRC_DIR)/runtime/%.ll
1013	@mkdir -p $(@D)
1014	cp $(SRC_DIR)/runtime/$*.ll $(BUILD_DIR)/initmod.$*_ll.ll
1015
1016$(BUILD_DIR)/initmod.%.bc: $(BUILD_DIR)/initmod.%.ll $(BUILD_DIR)/llvm_ok
1017	$(LLVM_AS) $(BUILD_DIR)/initmod.$*.ll -o $(BUILD_DIR)/initmod.$*.bc
1018
1019$(BUILD_DIR)/initmod.%.cpp: $(BIN_DIR)/binary2cpp $(BUILD_DIR)/initmod.%.bc
1020	./$(BIN_DIR)/binary2cpp halide_internal_initmod_$* < $(BUILD_DIR)/initmod.$*.bc > $@
1021
1022$(BUILD_DIR)/initmod.%_h.cpp: $(BIN_DIR)/binary2cpp $(SRC_DIR)/runtime/%.h
1023	./$(BIN_DIR)/binary2cpp halide_internal_runtime_header_$*_h < $(SRC_DIR)/runtime/$*.h > $@
1024
1025# Any c in the runtime that must be inlined needs to be copy-pasted into the output for the C backend.
1026$(BUILD_DIR)/initmod.inlined_c.cpp: $(BIN_DIR)/binary2cpp $(SRC_DIR)/runtime/halide_buffer_t.cpp
1027	./$(BIN_DIR)/binary2cpp halide_internal_initmod_inlined_c < $(SRC_DIR)/runtime/halide_buffer_t.cpp > $@
1028
1029$(BUILD_DIR)/initmod_ptx.%_ll.cpp: $(BIN_DIR)/binary2cpp $(SRC_DIR)/runtime/nvidia_libdevice_bitcode/libdevice.%.bc
1030	./$(BIN_DIR)/binary2cpp halide_internal_initmod_ptx_$(basename $*)_ll < $(SRC_DIR)/runtime/nvidia_libdevice_bitcode/libdevice.$*.bc > $@
1031
1032$(BIN_DIR)/binary2cpp: $(ROOT_DIR)/tools/binary2cpp.cpp
1033	@mkdir -p $(@D)
1034	$(CXX) $< -o $@
1035
1036$(BUILD_DIR)/initmod_ptx.%_ll.o: $(BUILD_DIR)/initmod_ptx.%_ll.cpp
1037	$(CXX) -c $< -o $@ -MMD -MP -MF $(BUILD_DIR)/$*.d -MT $(BUILD_DIR)/$*.o
1038
1039$(BUILD_DIR)/initmod.%.o: $(BUILD_DIR)/initmod.%.cpp
1040	$(CXX) -c $< -o $@ -MMD -MP -MF $(BUILD_DIR)/$*.d -MT $(BUILD_DIR)/$*.o
1041
1042$(BUILD_DIR)/%.o: $(SRC_DIR)/%.cpp $(SRC_DIR)/%.h $(BUILD_DIR)/llvm_ok
1043	@mkdir -p $(@D)
1044	$(CXX) $(CXX_FLAGS) -c $< -o $@ -MMD -MP -MF $(BUILD_DIR)/$*.d -MT $(BUILD_DIR)/$*.o
1045
1046$(BUILD_DIR)/Simplify_%.o: $(SRC_DIR)/Simplify_%.cpp $(SRC_DIR)/Simplify_Internal.h $(BUILD_DIR)/llvm_ok
1047	@mkdir -p $(@D)
1048	$(CXX) $(CXX_FLAGS) -c $< -o $@ -MMD -MP -MF $(BUILD_DIR)/Simplify_$*.d -MT $@
1049
1050.PHONY: clean
1051clean:
1052	rm -rf $(LIB_DIR)
1053	rm -rf $(BIN_DIR)
1054	rm -rf $(BUILD_DIR)
1055	rm -rf $(TMP_DIR)
1056	rm -rf $(FILTERS_DIR)
1057	rm -rf $(INCLUDE_DIR)
1058	rm -rf $(SHARE_DIR)
1059	rm -rf $(DISTRIB_DIR)
1060	rm -rf $(ROOT_DIR)/apps/*/bin
1061
1062.SECONDARY:
1063
1064CORRECTNESS_TESTS = $(shell ls $(ROOT_DIR)/test/correctness/*.cpp) $(shell ls $(ROOT_DIR)/test/correctness/*.c)
1065PERFORMANCE_TESTS = $(shell ls $(ROOT_DIR)/test/performance/*.cpp)
1066ERROR_TESTS = $(shell ls $(ROOT_DIR)/test/error/*.cpp)
1067WARNING_TESTS = $(shell ls $(ROOT_DIR)/test/warning/*.cpp)
1068OPENGL_TESTS := $(shell ls $(ROOT_DIR)/test/opengl/*.cpp)
1069GENERATOR_EXTERNAL_TESTS := $(shell ls $(ROOT_DIR)/test/generator/*test.cpp)
1070GENERATOR_EXTERNAL_TEST_GENERATOR := $(shell ls $(ROOT_DIR)/test/generator/*_generator.cpp)
1071TUTORIALS = $(filter-out %_generate.cpp, $(shell ls $(ROOT_DIR)/tutorial/*.cpp))
1072AUTO_SCHEDULE_TESTS = $(shell ls $(ROOT_DIR)/test/auto_schedule/*.cpp)
1073
1074-include $(OPENGL_TESTS:$(ROOT_DIR)/test/opengl/%.cpp=$(BUILD_DIR)/test_opengl_%.d)
1075
1076test_correctness: $(CORRECTNESS_TESTS:$(ROOT_DIR)/test/correctness/%.cpp=quiet_correctness_%) $(CORRECTNESS_TESTS:$(ROOT_DIR)/test/correctness/%.c=quiet_correctness_%)
1077test_performance: $(PERFORMANCE_TESTS:$(ROOT_DIR)/test/performance/%.cpp=performance_%)
1078test_error: $(ERROR_TESTS:$(ROOT_DIR)/test/error/%.cpp=error_%)
1079test_warning: $(WARNING_TESTS:$(ROOT_DIR)/test/warning/%.cpp=warning_%)
1080test_tutorial: $(TUTORIALS:$(ROOT_DIR)/tutorial/%.cpp=tutorial_%)
1081test_valgrind: $(CORRECTNESS_TESTS:$(ROOT_DIR)/test/correctness/%.cpp=valgrind_%)
1082test_avx512: $(CORRECTNESS_TESTS:$(ROOT_DIR)/test/correctness/%.cpp=avx512_%)
1083test_opengl: $(OPENGL_TESTS:$(ROOT_DIR)/test/opengl/%.cpp=opengl_%)
1084test_auto_schedule: $(AUTO_SCHEDULE_TESTS:$(ROOT_DIR)/test/auto_schedule/%.cpp=auto_schedule_%)
1085
1086.PHONY: test_correctness_multi_gpu
1087test_correctness_multi_gpu: correctness_gpu_multi_device
1088
1089# There are 3 types of tests for generators:
1090# 1) Externally-written aot-based tests
1091# 2) Externally-written aot-based tests (compiled using C++ backend)
1092# 3) Externally-written JIT-based tests
1093GENERATOR_AOT_TESTS = $(GENERATOR_EXTERNAL_TESTS:$(ROOT_DIR)/test/generator/%_aottest.cpp=generator_aot_%)
1094GENERATOR_AOTCPP_TESTS = $(GENERATOR_EXTERNAL_TESTS:$(ROOT_DIR)/test/generator/%_aottest.cpp=generator_aotcpp_%)
1095GENERATOR_JIT_TESTS = $(GENERATOR_EXTERNAL_TESTS:$(ROOT_DIR)/test/generator/%_jittest.cpp=generator_jit_%)
1096
1097# multitarget test doesn't make any sense for the CPP backend; just skip it.
1098GENERATOR_AOTCPP_TESTS := $(filter-out generator_aotcpp_multitarget,$(GENERATOR_AOTCPP_TESTS))
1099
1100# Note that many of the AOT-CPP tests are broken right now;
1101# remove AOT-CPP tests that don't (yet) work for C++ backend
1102# (each tagged with the *known* blocking issue(s))
1103
1104# https://github.com/halide/Halide/issues/2084 (only if opencl enabled)
1105GENERATOR_AOTCPP_TESTS := $(filter-out generator_aotcpp_acquire_release,$(GENERATOR_AOTCPP_TESTS))
1106
1107# https://github.com/halide/Halide/issues/2084 (only if opencl enabled)
1108GENERATOR_AOTCPP_TESTS := $(filter-out generator_aotcpp_define_extern_opencl,$(GENERATOR_AOTCPP_TESTS))
1109
1110# https://github.com/halide/Halide/issues/2084 (only if opencl enabled)
1111GENERATOR_AOTCPP_TESTS := $(filter-out generator_aotcpp_gpu_object_lifetime,$(GENERATOR_AOTCPP_TESTS))
1112
1113# https://github.com/halide/Halide/issues/2084 (only if opencl enabled)
1114GENERATOR_AOTCPP_TESTS := $(filter-out generator_aotcpp_gpu_only,$(GENERATOR_AOTCPP_TESTS))
1115
1116# https://github.com/halide/Halide/issues/2084 (only if opencl enabled))
1117GENERATOR_AOTCPP_TESTS := $(filter-out generator_aotcpp_cleanup_on_error,$(GENERATOR_AOTCPP_TESTS))
1118
1119# https://github.com/halide/Halide/issues/2084 (only if opencl enabled)
1120GENERATOR_AOTCPP_TESTS := $(filter-out generator_aotcpp_buffer_copy,$(GENERATOR_AOTCPP_TESTS))
1121
1122# https://github.com/halide/Halide/issues/2071
1123GENERATOR_AOTCPP_TESTS := $(filter-out generator_aotcpp_user_context,$(GENERATOR_AOTCPP_TESTS))
1124
1125# https://github.com/halide/Halide/issues/2071
1126GENERATOR_AOTCPP_TESTS := $(filter-out generator_aotcpp_argvcall,$(GENERATOR_AOTCPP_TESTS))
1127
1128# https://github.com/halide/Halide/issues/2071
1129GENERATOR_AOTCPP_TESTS := $(filter-out generator_aotcpp_metadata_tester,$(GENERATOR_AOTCPP_TESTS))
1130
1131# https://github.com/halide/Halide/issues/2071
1132GENERATOR_AOTCPP_TESTS := $(filter-out generator_aotcpp_cxx_mangling,$(GENERATOR_AOTCPP_TESTS))
1133
1134# https://github.com/halide/Halide/issues/2075
1135GENERATOR_AOTCPP_TESTS := $(filter-out generator_aotcpp_msan,$(GENERATOR_AOTCPP_TESTS))
1136
1137# https://github.com/halide/Halide/issues/2075
1138GENERATOR_AOTCPP_TESTS := $(filter-out generator_aotcpp_memory_profiler_mandelbrot,$(GENERATOR_AOTCPP_TESTS))
1139
1140# https://github.com/halide/Halide/issues/2082
1141GENERATOR_AOTCPP_TESTS := $(filter-out generator_aotcpp_matlab,$(GENERATOR_AOTCPP_TESTS))
1142
1143# https://github.com/halide/Halide/issues/2093
1144GENERATOR_AOTCPP_TESTS := $(filter-out generator_aotcpp_async_parallel,$(GENERATOR_AOTCPP_TESTS))
1145
1146# https://github.com/halide/Halide/issues/4916
1147GENERATOR_AOTCPP_TESTS := $(filter-out generator_aotcpp_stubtest,$(GENERATOR_AOTCPP_TESTS))
1148GENERATOR_AOTCPP_TESTS := $(filter-out generator_aotcpp_stubuser,$(GENERATOR_AOTCPP_TESTS))
1149
1150test_aotcpp_generator: $(GENERATOR_AOTCPP_TESTS)
1151
1152# This is just a test to ensure than RunGen builds and links for a critical mass of Generators;
1153# not all will work directly (e.g. due to missing define_externs at link time), so we disable
1154# those known to be broken for plausible reasons.
1155GENERATOR_BUILD_RUNGEN_TESTS = $(GENERATOR_EXTERNAL_TEST_GENERATOR:$(ROOT_DIR)/test/generator/%_generator.cpp=$(FILTERS_DIR)/%.rungen)
1156GENERATOR_BUILD_RUNGEN_TESTS := $(filter-out $(FILTERS_DIR)/async_parallel.rungen,$(GENERATOR_BUILD_RUNGEN_TESTS))
1157GENERATOR_BUILD_RUNGEN_TESTS := $(filter-out $(FILTERS_DIR)/cxx_mangling_define_extern.rungen,$(GENERATOR_BUILD_RUNGEN_TESTS))
1158GENERATOR_BUILD_RUNGEN_TESTS := $(filter-out $(FILTERS_DIR)/define_extern_opencl.rungen,$(GENERATOR_BUILD_RUNGEN_TESTS))
1159GENERATOR_BUILD_RUNGEN_TESTS := $(filter-out $(FILTERS_DIR)/matlab.rungen,$(GENERATOR_BUILD_RUNGEN_TESTS))
1160GENERATOR_BUILD_RUNGEN_TESTS := $(filter-out $(FILTERS_DIR)/msan.rungen,$(GENERATOR_BUILD_RUNGEN_TESTS))
1161GENERATOR_BUILD_RUNGEN_TESTS := $(filter-out $(FILTERS_DIR)/multitarget.rungen,$(GENERATOR_BUILD_RUNGEN_TESTS))
1162GENERATOR_BUILD_RUNGEN_TESTS := $(filter-out $(FILTERS_DIR)/nested_externs.rungen,$(GENERATOR_BUILD_RUNGEN_TESTS))
1163GENERATOR_BUILD_RUNGEN_TESTS := $(filter-out $(FILTERS_DIR)/tiled_blur.rungen,$(GENERATOR_BUILD_RUNGEN_TESTS))
1164GENERATOR_BUILD_RUNGEN_TESTS := $(filter-out $(FILTERS_DIR)/extern_output.rungen,$(GENERATOR_BUILD_RUNGEN_TESTS))
1165GENERATOR_BUILD_RUNGEN_TESTS := $(GENERATOR_BUILD_RUNGEN_TESTS) \
1166	$(FILTERS_DIR)/multi_rungen \
1167	$(FILTERS_DIR)/multi_rungen2 \
1168	$(FILTERS_DIR)/rungen_test \
1169	$(FILTERS_DIR)/registration_test
1170
1171test_rungen: $(GENERATOR_BUILD_RUNGEN_TESTS)
1172	$(FILTERS_DIR)/rungen_test
1173	$(FILTERS_DIR)/registration_test
1174
1175test_generator: $(GENERATOR_AOT_TESTS) $(GENERATOR_AOTCPP_TESTS) $(GENERATOR_JIT_TESTS) $(GENERATOR_BUILD_RUNGEN_TESTS)
1176	$(FILTERS_DIR)/rungen_test
1177	$(FILTERS_DIR)/registration_test
1178
1179ALL_TESTS = test_internal test_correctness test_error test_tutorial test_warning test_generator
1180
1181# These targets perform timings of each test. For most tests this includes Halide JIT compile times, and run times.
1182# For generator tests they time the compile time only. The times are recorded in CSV files.
1183time_compilation_correctness: init_time_compilation_correctness $(CORRECTNESS_TESTS:$(ROOT_DIR)/test/correctness/%.cpp=time_compilation_test_%)
1184time_compilation_performance: init_time_compilation_performance $(PERFORMANCE_TESTS:$(ROOT_DIR)/test/performance/%.cpp=time_compilation_performance_%)
1185time_compilation_opengl: init_time_compilation_opengl $(OPENGL_TESTS:$(ROOT_DIR)/test/opengl/%.cpp=time_compilation_opengl_%)
1186time_compilation_generator: init_time_compilation_generator $(GENERATOR_TESTS:$(ROOT_DIR)/test/generator/%_aottest.cpp=time_compilation_generator_%)
1187
1188init_time_compilation_%:
1189	echo "TEST,User (s),System (s),Real" > $(@:init_time_compilation_%=compile_times_%.csv)
1190
1191TIME_COMPILATION ?= /usr/bin/time -a -f "$@,%U,%S,%E" -o
1192
1193run_tests: $(ALL_TESTS)
1194	make -f $(THIS_MAKEFILE) test_performance test_auto_schedule
1195
1196.PHONY: build_tests
1197build_tests: $(CORRECTNESS_TESTS:$(ROOT_DIR)/test/correctness/%.cpp=$(BIN_DIR)/correctness_%) \
1198	$(PERFORMANCE_TESTS:$(ROOT_DIR)/test/performance/%.cpp=$(BIN_DIR)/performance_%) \
1199	$(ERROR_TESTS:$(ROOT_DIR)/test/error/%.cpp=$(BIN_DIR)/error_%) \
1200	$(WARNING_TESTS:$(ROOT_DIR)/test/warning/%.cpp=$(BIN_DIR)/warning_%) \
1201	$(GENERATOR_EXTERNAL_TESTS:$(ROOT_DIR)/test/generator/%_aottest.cpp=$(BIN_DIR)/$(TARGET)/generator_aot_%) \
1202	$(GENERATOR_EXTERNAL_TESTS:$(ROOT_DIR)/test/generator/%_jittest.cpp=$(BIN_DIR)/generator_jit_%) \
1203	$(AUTO_SCHEDULE_TESTS:$(ROOT_DIR)/test/auto_schedule/%.cpp=$(BIN_DIR)/auto_schedule_%)
1204
1205# OpenGL doesn't build on every host platform we support (eg. ARM).
1206.PHONY: build_opengl_tests
1207build_opengl_tests: $(OPENGL_TESTS:$(ROOT_DIR)/test/opengl/%.cpp=$(BIN_DIR)/opengl_%)
1208
1209ifneq ($(WITH_OPENGL),)
1210build_tests: build_opengl_tests
1211endif
1212
1213clean_generator:
1214	rm -rf $(BIN_DIR)/*.generator
1215	rm -rf $(BIN_DIR)/*/runtime.a
1216	rm -rf $(FILTERS_DIR)
1217	rm -rf $(BIN_DIR)/*/generator_*
1218	rm -rf $(BUILD_DIR)/*_generator.o
1219	rm -f $(BUILD_DIR)/GenGen.o
1220	rm -f $(BUILD_DIR)/RunGenMain.o
1221
1222time_compilation_tests: time_compilation_correctness time_compilation_performance time_compilation_generator
1223
1224$(BUILD_DIR)/GenGen.o: $(ROOT_DIR)/tools/GenGen.cpp $(INCLUDE_DIR)/Halide.h
1225	@mkdir -p $(@D)
1226	$(CXX) -c $< $(TEST_CXX_FLAGS) -I$(INCLUDE_DIR) -o $@
1227
1228# Make an empty generator for generating runtimes.
1229$(BIN_DIR)/runtime.generator: $(BUILD_DIR)/GenGen.o $(BIN_DIR)/libHalide.$(SHARED_EXT)
1230	@mkdir -p $(@D)
1231	$(CXX) $< $(TEST_LD_FLAGS) -o $@
1232
1233# Generate a standalone runtime for a given target string
1234$(BIN_DIR)/%/runtime.a: $(BIN_DIR)/runtime.generator
1235	@mkdir -p $(@D)
1236	$(CURDIR)/$< -r runtime -o $(CURDIR)/$(BIN_DIR)/$* target=$*
1237
1238$(BIN_DIR)/test_internal: $(ROOT_DIR)/test/internal.cpp $(BIN_DIR)/libHalide.$(SHARED_EXT)
1239	@mkdir -p $(@D)
1240	$(CXX) $(TEST_CXX_FLAGS) $< -I$(SRC_DIR) $(TEST_LD_FLAGS) -o $@
1241
1242# Correctness test that link against libHalide
1243$(BIN_DIR)/correctness_%: $(ROOT_DIR)/test/correctness/%.cpp $(BIN_DIR)/libHalide.$(SHARED_EXT) $(INCLUDE_DIR)/Halide.h $(RUNTIME_EXPORTED_INCLUDES)
1244	@mkdir -p $(@D)
1245	$(CXX) $(TEST_CXX_FLAGS) -I$(ROOT_DIR)/src/runtime -I$(ROOT_DIR)/test/common $(OPTIMIZE_FOR_BUILD_TIME) $< -I$(INCLUDE_DIR) $(TEST_LD_FLAGS) -o $@
1246
1247# Correctness tests that do NOT link against libHalide
1248$(BIN_DIR)/correctness_plain_c_includes: $(ROOT_DIR)/test/correctness/plain_c_includes.c $(RUNTIME_EXPORTED_INCLUDES)
1249	$(CXX) -x c -Wall -Werror -I$(ROOT_DIR)/src/runtime $(OPTIMIZE_FOR_BUILD_TIME) $< -I$(ROOT_DIR)/src/runtime -o $@
1250
1251# Note that this test must *not* link in either libHalide, or a Halide runtime;
1252# this test should be usable without either.
1253$(BIN_DIR)/correctness_halide_buffer: $(ROOT_DIR)/test/correctness/halide_buffer.cpp $(INCLUDE_DIR)/HalideBuffer.h $(RUNTIME_EXPORTED_INCLUDES)
1254	$(CXX) $(TEST_CXX_FLAGS) $(OPTIMIZE_FOR_BUILD_TIME) $< -I$(INCLUDE_DIR) -o $@
1255
1256# The image_io test additionally needs to link to libpng and
1257# libjpeg.
1258$(BIN_DIR)/correctness_image_io: $(ROOT_DIR)/test/correctness/image_io.cpp $(BIN_DIR)/libHalide.$(SHARED_EXT) $(INCLUDE_DIR)/Halide.h $(RUNTIME_EXPORTED_INCLUDES)
1259	$(CXX) $(TEST_CXX_FLAGS) $(IMAGE_IO_CXX_FLAGS) -I$(ROOT_DIR)/src/runtime -I$(ROOT_DIR)/test/common $(OPTIMIZE_FOR_BUILD_TIME) $< -I$(INCLUDE_DIR) $(TEST_LD_FLAGS) $(IMAGE_IO_LIBS) -o $@
1260
1261# OpenCL runtime correctness test requires runtime.a to be linked.
1262$(BIN_DIR)/$(TARGET)/correctness_opencl_runtime: $(ROOT_DIR)/test/correctness/opencl_runtime.cpp $(RUNTIME_EXPORTED_INCLUDES) $(BIN_DIR)/$(TARGET)/runtime.a
1263	@mkdir -p $(@D)
1264	$(CXX) $(BIN_DIR)/$(TARGET)/runtime.a $(TEST_CXX_FLAGS) -I$(ROOT_DIR)/src/runtime $(OPTIMIZE_FOR_BUILD_TIME) $< -I$(INCLUDE_DIR) $(TEST_LD_FLAGS) -o $@
1265
1266$(BIN_DIR)/performance_%: $(ROOT_DIR)/test/performance/%.cpp $(BIN_DIR)/libHalide.$(SHARED_EXT) $(INCLUDE_DIR)/Halide.h
1267	$(CXX) $(TEST_CXX_FLAGS) $(OPTIMIZE) $< -I$(INCLUDE_DIR) -I$(ROOT_DIR)/src/runtime -I$(ROOT_DIR)/test/common $(TEST_LD_FLAGS) -o $@
1268
1269# Error tests that link against libHalide
1270$(BIN_DIR)/error_%: $(ROOT_DIR)/test/error/%.cpp $(BIN_DIR)/libHalide.$(SHARED_EXT) $(INCLUDE_DIR)/Halide.h
1271	$(CXX) $(TEST_CXX_FLAGS) -I$(ROOT_DIR)/src/runtime -I$(ROOT_DIR)/test/common $(OPTIMIZE_FOR_BUILD_TIME) $< -I$(INCLUDE_DIR) $(TEST_LD_FLAGS) -o $@
1272
1273$(BIN_DIR)/warning_%: $(ROOT_DIR)/test/warning/%.cpp $(BIN_DIR)/libHalide.$(SHARED_EXT) $(INCLUDE_DIR)/Halide.h
1274	$(CXX) $(TEST_CXX_FLAGS) $(OPTIMIZE_FOR_BUILD_TIME) $< -I$(INCLUDE_DIR) $(TEST_LD_FLAGS) -o $@
1275
1276$(BIN_DIR)/opengl_%: $(ROOT_DIR)/test/opengl/%.cpp $(BIN_DIR)/libHalide.$(SHARED_EXT) $(INCLUDE_DIR)/Halide.h $(INCLUDE_DIR)/HalideRuntime.h $(INCLUDE_DIR)/HalideRuntimeOpenGL.h
1277	$(CXX) $(TEST_CXX_FLAGS) $(OPTIMIZE_FOR_BUILD_TIME) $< -I$(INCLUDE_DIR) -I$(SRC_DIR) $(TEST_LD_FLAGS) $(OPENGL_LD_FLAGS) -o $@ -MMD -MF $(BUILD_DIR)/test_opengl_$*.d
1278
1279# Auto schedule tests that link against libHalide
1280$(BIN_DIR)/auto_schedule_%: $(ROOT_DIR)/test/auto_schedule/%.cpp $(BIN_DIR)/libHalide.$(SHARED_EXT) $(INCLUDE_DIR)/Halide.h
1281	$(CXX) $(TEST_CXX_FLAGS) $(OPTIMIZE_FOR_BUILD_TIME) $< -I$(INCLUDE_DIR) $(TEST_LD_FLAGS) -o $@
1282
1283# TODO(srj): this doesn't auto-delete, why not?
1284.INTERMEDIATE: $(BIN_DIR)/%.generator
1285
1286# By default, %.generator is produced by building %_generator.cpp
1287# Note that the rule includes all _generator.cpp files, so that generator with define_extern
1288# usage can just add deps later.
1289$(BUILD_DIR)/%_generator.o: $(ROOT_DIR)/test/generator/%_generator.cpp $(INCLUDE_DIR)/Halide.h
1290	@mkdir -p $(@D)
1291	$(CXX) $(TEST_CXX_FLAGS) -I$(INCLUDE_DIR) -I$(CURDIR)/$(FILTERS_DIR) -c $< -o $@
1292
1293$(BIN_DIR)/%.generator: $(BUILD_DIR)/GenGen.o $(BIN_DIR)/libHalide.$(SHARED_EXT) $(BUILD_DIR)/%_generator.o
1294	@mkdir -p $(@D)
1295	$(CXX) $(filter %.cpp %.o %.a,$^) $(TEST_LD_FLAGS) -o $@
1296
1297# It is not always possible to cross compile between 32-bit and 64-bit via the clang build as part of llvm
1298# These next two rules can fail the compilationa nd produce zero length bitcode blobs.
1299# If the zero length blob is actually used, the test will fail anyway, but usually only the bitness
1300# of the target is used.
1301$(BUILD_DIR)/external_code_extern_bitcode_32.cpp : $(ROOT_DIR)/test/generator/external_code_extern.cpp $(BIN_DIR)/binary2cpp
1302	@mkdir -p $(@D)
1303	$(CLANG) $(CXX_WARNING_FLAGS) -O3 -c -m32 -target $(RUNTIME_TRIPLE_32) -emit-llvm $< -o $(BUILD_DIR)/external_code_extern_32.bc || echo -n > $(BUILD_DIR)/external_code_extern_32.bc
1304	./$(BIN_DIR)/binary2cpp external_code_extern_bitcode_32 < $(BUILD_DIR)/external_code_extern_32.bc > $@
1305
1306$(BUILD_DIR)/external_code_extern_bitcode_64.cpp : $(ROOT_DIR)/test/generator/external_code_extern.cpp $(BIN_DIR)/binary2cpp
1307	@mkdir -p $(@D)
1308	$(CLANG) $(CXX_WARNING_FLAGS) -O3 -c -m64 -target $(RUNTIME_TRIPLE_64) -emit-llvm $< -o $(BUILD_DIR)/external_code_extern_64.bc || echo -n > $(BUILD_DIR)/external_code_extern_64.bc
1309	./$(BIN_DIR)/binary2cpp external_code_extern_bitcode_64 < $(BUILD_DIR)/external_code_extern_64.bc > $@
1310
1311$(BUILD_DIR)/external_code_extern_cpp_source.cpp : $(ROOT_DIR)/test/generator/external_code_extern.cpp $(BIN_DIR)/binary2cpp
1312	@mkdir -p $(@D)
1313	./$(BIN_DIR)/binary2cpp external_code_extern_cpp_source < $(ROOT_DIR)/test/generator/external_code_extern.cpp > $@
1314
1315$(BIN_DIR)/external_code.generator: $(BUILD_DIR)/GenGen.o $(BIN_DIR)/libHalide.$(SHARED_EXT) $(BUILD_DIR)/external_code_generator.o $(BUILD_DIR)/external_code_extern_bitcode_32.cpp $(BUILD_DIR)/external_code_extern_bitcode_64.cpp $(BUILD_DIR)/external_code_extern_cpp_source.cpp
1316	@mkdir -p $(@D)
1317	$(CXX) $(filter %.cpp %.o %.a,$^) $(TEST_LD_FLAGS) -o $@
1318
1319NAME_MANGLING_TARGET=$(NON_EMPTY_TARGET)-c_plus_plus_name_mangling
1320
1321GEN_AOT_OUTPUTS=-e static_library,c_header,c_source,registration
1322
1323# By default, %.a/.h are produced by executing %.generator. Runtimes are not included in these.
1324# (We explicitly also generate .cpp output here as well, as additional test surface for the C++ backend.)
1325$(FILTERS_DIR)/%.a: $(BIN_DIR)/%.generator
1326	@mkdir -p $(@D)
1327	$(CURDIR)/$< -g $* $(GEN_AOT_OUTPUTS) -o $(CURDIR)/$(FILTERS_DIR) target=$(TARGET)-no_runtime
1328
1329$(FILTERS_DIR)/%.h: $(FILTERS_DIR)/%.a
1330	@echo $@ produced implicitly by $^
1331
1332$(FILTERS_DIR)/%.halide_generated.cpp: $(FILTERS_DIR)/%.a
1333	@echo $@ produced implicitly by $^
1334
1335$(FILTERS_DIR)/%.registration.cpp: $(FILTERS_DIR)/%.a
1336	@echo $@ produced implicitly by $^
1337
1338$(FILTERS_DIR)/%.stub.h: $(BIN_DIR)/%.generator
1339	@mkdir -p $(@D)
1340	$(CURDIR)/$< -g $* -n $* -o $(CURDIR)/$(FILTERS_DIR) -e cpp_stub
1341
1342$(FILTERS_DIR)/cxx_mangling_externs.o: $(ROOT_DIR)/test/generator/cxx_mangling_externs.cpp
1343	@mkdir -p $(@D)
1344	$(CXX) $(GEN_AOT_CXX_FLAGS) -c $(filter-out %.h,$^) $(GEN_AOT_INCLUDES) -o $@
1345
1346# If we want to use a Generator with custom GeneratorParams, we need to write
1347# custom rules: to pass the GeneratorParams, and to give a unique function and file name.
1348$(FILTERS_DIR)/cxx_mangling.a: $(BIN_DIR)/cxx_mangling.generator $(FILTERS_DIR)/cxx_mangling_externs.o
1349	@mkdir -p $(@D)
1350	$(CURDIR)/$< -g cxx_mangling $(GEN_AOT_OUTPUTS) -o $(CURDIR)/$(FILTERS_DIR) target=$(TARGET)-no_runtime-c_plus_plus_name_mangling -f "HalideTest::AnotherNamespace::cxx_mangling"
1351	$(ROOT_DIR)/tools/makelib.sh $@ $@ $(FILTERS_DIR)/cxx_mangling_externs.o
1352
1353ifneq ($(TEST_CUDA), )
1354# Also build with a gpu target to ensure that the GPU-Host generation
1355# code handles name mangling properly. (Note that we don't need to
1356# run this code, just check for link errors.)
1357$(FILTERS_DIR)/cxx_mangling_gpu.a: $(BIN_DIR)/cxx_mangling.generator $(FILTERS_DIR)/cxx_mangling_externs.o
1358	@mkdir -p $(@D)
1359	$(CURDIR)/$< -g cxx_mangling $(GEN_AOT_OUTPUTS) -o $(CURDIR)/$(FILTERS_DIR) target=$(TARGET)-no_runtime-c_plus_plus_name_mangling-cuda-cuda_capability_30 -f "HalideTest::cxx_mangling_gpu"
1360	$(ROOT_DIR)/tools/makelib.sh $@ $@ $(FILTERS_DIR)/cxx_mangling_externs.o
1361endif
1362
1363$(FILTERS_DIR)/cxx_mangling_define_extern_externs.o: $(ROOT_DIR)/test/generator/cxx_mangling_define_extern_externs.cpp $(FILTERS_DIR)/cxx_mangling.h
1364	@mkdir -p $(@D)
1365	$(CXX) $(GEN_AOT_CXX_FLAGS) -c $(filter-out %.h,$^) $(GEN_AOT_INCLUDES) -o $@
1366
1367$(FILTERS_DIR)/cxx_mangling_define_extern.a: $(BIN_DIR)/cxx_mangling_define_extern.generator $(FILTERS_DIR)/cxx_mangling_define_extern_externs.o
1368	@mkdir -p $(@D)
1369	$(CURDIR)/$< -g cxx_mangling_define_extern $(GEN_AOT_OUTPUTS) -o $(CURDIR)/$(FILTERS_DIR) target=$(TARGET)-no_runtime-c_plus_plus_name_mangling-user_context -f "HalideTest::cxx_mangling_define_extern"
1370	$(ROOT_DIR)/tools/makelib.sh $@ $@  $(FILTERS_DIR)/cxx_mangling_define_extern_externs.o
1371
1372# pyramid needs a custom arg.
1373$(FILTERS_DIR)/pyramid.a: $(BIN_DIR)/pyramid.generator
1374	@mkdir -p $(@D)
1375	$(CURDIR)/$< -g pyramid -f pyramid $(GEN_AOT_OUTPUTS) -o $(CURDIR)/$(FILTERS_DIR) target=$(TARGET)-no_runtime levels=10
1376
1377$(FILTERS_DIR)/string_param.a: $(BIN_DIR)/string_param.generator
1378	@mkdir -p $(@D)
1379	$(CURDIR)/$< -g string_param -f string_param  $(GEN_AOT_OUTPUTS) -o $(CURDIR)/$(FILTERS_DIR) target=$(TARGET)-no_runtime rpn_expr="5 y * x +"
1380
1381# memory_profiler_mandelbrot need profiler set
1382$(FILTERS_DIR)/memory_profiler_mandelbrot.a: $(BIN_DIR)/memory_profiler_mandelbrot.generator
1383	@mkdir -p $(@D)
1384	$(CURDIR)/$< -g memory_profiler_mandelbrot -f memory_profiler_mandelbrot $(GEN_AOT_OUTPUTS) -o $(CURDIR)/$(FILTERS_DIR) target=$(TARGET)-no_runtime-profile
1385
1386$(FILTERS_DIR)/alias_with_offset_42.a: $(BIN_DIR)/alias.generator
1387	@mkdir -p $(@D)
1388	$(CURDIR)/$< -g alias_with_offset_42 -f alias_with_offset_42 $(GEN_AOT_OUTPUTS) -o $(CURDIR)/$(FILTERS_DIR) target=$(TARGET)-no_runtime
1389
1390METADATA_TESTER_GENERATOR_ARGS=\
1391	input.type=uint8 input.dim=3 \
1392	dim_only_input_buffer.type=uint8 \
1393	untyped_input_buffer.type=uint8 untyped_input_buffer.dim=3 \
1394	output.type=float32,float32 output.dim=3 \
1395	input_not_nod.type=uint8 input_not_nod.dim=3 \
1396	input_nod.dim=3 \
1397	input_not.type=uint8 \
1398	array_input.size=2 \
1399	array_i8.size=2 \
1400	array_i16.size=2 \
1401	array_i32.size=2 \
1402	array_h.size=2 \
1403	buffer_array_input2.dim=3 \
1404	buffer_array_input3.type=float32 \
1405	buffer_array_input4.dim=3 \
1406	buffer_array_input4.type=float32 \
1407	buffer_array_input5.size=2 \
1408	buffer_array_input6.size=2 \
1409	buffer_array_input6.dim=3 \
1410	buffer_array_input7.size=2 \
1411	buffer_array_input7.type=float32 \
1412	buffer_array_input8.size=2 \
1413	buffer_array_input8.dim=3 \
1414	buffer_array_input8.type=float32 \
1415	buffer_f16_untyped.type=float16 \
1416	array_outputs.size=2 \
1417	array_outputs7.size=2 \
1418	array_outputs8.size=2 \
1419	array_outputs9.size=2
1420
1421# metadata_tester is built with and without user-context
1422$(FILTERS_DIR)/metadata_tester.a: $(BIN_DIR)/metadata_tester.generator
1423	@mkdir -p $(@D)
1424	$(CURDIR)/$< -g metadata_tester -f metadata_tester $(GEN_AOT_OUTPUTS) -o $(CURDIR)/$(FILTERS_DIR) target=$(TARGET)-no_runtime $(METADATA_TESTER_GENERATOR_ARGS)
1425
1426$(FILTERS_DIR)/metadata_tester_ucon.a: $(BIN_DIR)/metadata_tester.generator
1427	@mkdir -p $(@D)
1428	$(CURDIR)/$< -g metadata_tester -f metadata_tester_ucon $(GEN_AOT_OUTPUTS) -o $(CURDIR)/$(FILTERS_DIR) target=$(TARGET)-user_context-no_runtime $(METADATA_TESTER_GENERATOR_ARGS)
1429
1430$(BIN_DIR)/$(TARGET)/generator_aot_metadata_tester: $(FILTERS_DIR)/metadata_tester_ucon.a
1431
1432$(FILTERS_DIR)/multitarget.a: $(BIN_DIR)/multitarget.generator
1433	@mkdir -p $(@D)
1434	$(CURDIR)/$< -g multitarget -f "HalideTest::multitarget" $(GEN_AOT_OUTPUTS) -o $(CURDIR)/$(FILTERS_DIR) \
1435		target=$(TARGET)-no_bounds_query-no_runtime-c_plus_plus_name_mangling,$(TARGET)-no_runtime-c_plus_plus_name_mangling  \
1436		-e assembly,bitcode,c_source,c_header,stmt_html,static_library,stmt
1437
1438$(FILTERS_DIR)/msan.a: $(BIN_DIR)/msan.generator
1439	@mkdir -p $(@D)
1440	$(CURDIR)/$< -g msan -f msan $(GEN_AOT_OUTPUTS) -o $(CURDIR)/$(FILTERS_DIR) target=$(TARGET)-msan
1441
1442# user_context needs to be generated with user_context as the first argument to its calls
1443$(FILTERS_DIR)/user_context.a: $(BIN_DIR)/user_context.generator
1444	@mkdir -p $(@D)
1445	$(CURDIR)/$< -g user_context $(GEN_AOT_OUTPUTS) -o $(CURDIR)/$(FILTERS_DIR) target=$(TARGET)-no_runtime-user_context
1446
1447# ditto for user_context_insanity
1448$(FILTERS_DIR)/user_context_insanity.a: $(BIN_DIR)/user_context_insanity.generator
1449	@mkdir -p $(@D)
1450	$(CURDIR)/$< -g user_context_insanity $(GEN_AOT_OUTPUTS) -o $(CURDIR)/$(FILTERS_DIR) target=$(TARGET)-no_runtime-user_context
1451
1452# matlab needs to be generated with matlab in TARGET
1453$(FILTERS_DIR)/matlab.a: $(BIN_DIR)/matlab.generator
1454	@mkdir -p $(@D)
1455	$(CURDIR)/$< -g matlab $(GEN_AOT_OUTPUTS) -o $(CURDIR)/$(FILTERS_DIR) target=$(TARGET)-no_runtime-matlab
1456
1457# Some .generators have additional dependencies (usually due to define_extern usage).
1458# These typically require two extra dependencies:
1459# (1) Ensuring the extra _generator.cpp is built into the .generator.
1460# (2) Ensuring the extra .a is linked into the final output.
1461
1462# TODO(srj): we really want to say "anything that depends on tiled_blur.a also depends on blur2x2.a";
1463# is there a way to specify that in Make?
1464$(BIN_DIR)/$(TARGET)/generator_aot_tiled_blur: $(FILTERS_DIR)/blur2x2.a
1465ifneq ($(TEST_CUDA), )
1466$(BIN_DIR)/$(TARGET)/generator_aot_cxx_mangling: $(FILTERS_DIR)/cxx_mangling_gpu.a
1467endif
1468$(BIN_DIR)/$(TARGET)/generator_aot_cxx_mangling_define_extern: $(FILTERS_DIR)/cxx_mangling.a
1469
1470$(BIN_DIR)/$(TARGET)/generator_aotcpp_tiled_blur: $(FILTERS_DIR)/blur2x2.halide_generated.cpp
1471ifneq ($(TEST_CUDA), )
1472$(BIN_DIR)/$(TARGET)/generator_aotcpp_cxx_mangling: $(FILTERS_DIR)/cxx_mangling_gpu.halide_generated.cpp
1473endif
1474$(BIN_DIR)/$(TARGET)/generator_aotcpp_cxx_mangling: $(FILTERS_DIR)/cxx_mangling_externs.o
1475$(BIN_DIR)/$(TARGET)/generator_aotcpp_cxx_mangling_define_extern: $(FILTERS_DIR)/cxx_mangling.halide_generated.cpp $(FILTERS_DIR)/cxx_mangling_externs.o $(FILTERS_DIR)/cxx_mangling_define_extern_externs.o
1476
1477$(BUILD_DIR)/stubuser_generator.o: $(FILTERS_DIR)/stubtest.stub.h $(FILTERS_DIR)/configure.stub.h
1478$(BIN_DIR)/stubuser.generator: $(BUILD_DIR)/stubtest_generator.o $(BUILD_DIR)/configure_generator.o
1479
1480# stubtest has input and output funcs with undefined types and array sizes; this is fine for stub
1481# usage (the types can be inferred), but for AOT compilation, we must make the types
1482# concrete via generator args.
1483STUBTEST_GENERATOR_ARGS=\
1484	untyped_buffer_input.type=uint8 untyped_buffer_input.dim=3 \
1485	simple_input.type=float32 \
1486	array_input.type=float32 array_input.size=2 \
1487	int_arg.size=2 \
1488	tuple_output.type=float32,float32 \
1489	vectorize=true
1490
1491$(FILTERS_DIR)/stubtest.a: $(BIN_DIR)/stubtest.generator
1492	@mkdir -p $(@D)
1493	$(CURDIR)/$< -g stubtest -f stubtest $(GEN_AOT_OUTPUTS) -o $(CURDIR)/$(FILTERS_DIR) target=$(TARGET)-no_runtime $(STUBTEST_GENERATOR_ARGS)
1494
1495$(FILTERS_DIR)/external_code.a: $(BIN_DIR)/external_code.generator
1496	@mkdir -p $(@D)
1497	$(CURDIR)/$< -g external_code -e static_library,c_header,registration -o $(CURDIR)/$(FILTERS_DIR) target=$(TARGET)-no_runtime external_code_is_bitcode=true
1498
1499$(FILTERS_DIR)/external_code.halide_generated.cpp: $(BIN_DIR)/external_code.generator
1500	@mkdir -p $(@D)
1501	$(CURDIR)/$< -g external_code -e c_source -o $(CURDIR)/$(FILTERS_DIR) target=$(TARGET)-no_runtime external_code_is_bitcode=false
1502
1503$(FILTERS_DIR)/autograd_grad.a: $(BIN_DIR)/autograd.generator
1504	@mkdir -p $(@D)
1505	$(CURDIR)/$< -g autograd $(GEN_AOT_OUTPUTS) -o $(CURDIR)/$(FILTERS_DIR) -f autograd_grad  -d 1 target=$(TARGET)-no_runtime auto_schedule=true
1506
1507# Usually, it's considered best practice to have one Generator per
1508# .cpp file, with the generator-name and filename matching;
1509# nested_externs_generators.cpp is a counterexample, and thus requires
1510# some special casing to get right.  First, make a special rule to
1511# build each of the Generators in nested_externs_generator.cpp (which
1512# all have the form nested_externs_*).
1513$(FILTERS_DIR)/nested_externs_%.a: $(BIN_DIR)/nested_externs.generator
1514	@mkdir -p $(@D)
1515	$(CURDIR)/$< -g nested_externs_$* $(GEN_AOT_OUTPUTS) -o $(CURDIR)/$(FILTERS_DIR) target=$(TARGET)-no_runtime
1516
1517GEN_AOT_CXX_FLAGS=$(TEST_CXX_FLAGS) -Wno-unknown-pragmas
1518GEN_AOT_INCLUDES=-I$(INCLUDE_DIR) -I$(FILTERS_DIR) -I$(ROOT_DIR)/src/runtime -I$(ROOT_DIR)/test/common -I $(ROOT_DIR)/apps/support -I $(SRC_DIR)/runtime -I$(ROOT_DIR)/tools
1519GEN_AOT_LD_FLAGS=$(COMMON_LD_FLAGS)
1520
1521ifneq ($(TEST_METAL), )
1522# Unlike cuda and opencl, which dynamically go find the appropriate symbols, metal requires actual linking.
1523GEN_AOT_LD_FLAGS+=$(METAL_LD_FLAGS)
1524endif
1525
1526# By default, %_aottest.cpp depends on $(FILTERS_DIR)/%.a/.h (but not libHalide).
1527$(BIN_DIR)/$(TARGET)/generator_aot_%: $(ROOT_DIR)/test/generator/%_aottest.cpp $(FILTERS_DIR)/%.a $(FILTERS_DIR)/%.h $(RUNTIME_EXPORTED_INCLUDES) $(BIN_DIR)/$(TARGET)/runtime.a
1528	@mkdir -p $(@D)
1529	$(CXX) $(GEN_AOT_CXX_FLAGS) $(filter %.cpp %.o %.a,$^) $(GEN_AOT_INCLUDES) $(GEN_AOT_LD_FLAGS) -o $@
1530
1531# Also make AOT testing targets that depends on the .cpp output (rather than .a).
1532$(BIN_DIR)/$(TARGET)/generator_aotcpp_%: $(ROOT_DIR)/test/generator/%_aottest.cpp $(FILTERS_DIR)/%.halide_generated.cpp $(FILTERS_DIR)/%.h $(RUNTIME_EXPORTED_INCLUDES) $(BIN_DIR)/$(TARGET)/runtime.a
1533	@mkdir -p $(@D)
1534	$(CXX) $(GEN_AOT_CXX_FLAGS) $(filter %.cpp %.o %.a,$^) $(GEN_AOT_INCLUDES) $(GEN_AOT_LD_FLAGS) -o $@
1535
1536# MSAN test doesn't use the standard runtime
1537$(BIN_DIR)/$(TARGET)/generator_aot_msan: $(ROOT_DIR)/test/generator/msan_aottest.cpp $(FILTERS_DIR)/msan.a $(FILTERS_DIR)/msan.h $(RUNTIME_EXPORTED_INCLUDES)
1538	@mkdir -p $(@D)
1539	$(CXX) $(GEN_AOT_CXX_FLAGS) $(filter-out %.h,$^) $(GEN_AOT_INCLUDES) $(GEN_AOT_LD_FLAGS) -o $@
1540
1541# alias has additional deps to link in
1542$(BIN_DIR)/$(TARGET)/generator_aot_alias: $(ROOT_DIR)/test/generator/alias_aottest.cpp $(FILTERS_DIR)/alias.a $(FILTERS_DIR)/alias_with_offset_42.a $(RUNTIME_EXPORTED_INCLUDES) $(BIN_DIR)/$(TARGET)/runtime.a
1543	@mkdir -p $(@D)
1544	$(CXX) $(GEN_AOT_CXX_FLAGS) $(filter %.cpp %.o %.a,$^) $(GEN_AOT_INCLUDES) $(GEN_AOT_LD_FLAGS) -o $@
1545
1546$(BIN_DIR)/$(TARGET)/generator_aotcpp_alias: $(ROOT_DIR)/test/generator/alias_aottest.cpp $(FILTERS_DIR)/alias.halide_generated.cpp $(FILTERS_DIR)/alias_with_offset_42.halide_generated.cpp $(RUNTIME_EXPORTED_INCLUDES) $(BIN_DIR)/$(TARGET)/runtime.a
1547	@mkdir -p $(@D)
1548	$(CXX) $(GEN_AOT_CXX_FLAGS) $(filter %.cpp %.o %.a,$^) $(GEN_AOT_INCLUDES) $(GEN_AOT_LD_FLAGS) -o $@
1549
1550# autograd has additional deps to link in
1551$(BIN_DIR)/$(TARGET)/generator_aot_autograd: $(ROOT_DIR)/test/generator/autograd_aottest.cpp $(FILTERS_DIR)/autograd.a $(FILTERS_DIR)/autograd_grad.a $(RUNTIME_EXPORTED_INCLUDES) $(BIN_DIR)/$(TARGET)/runtime.a
1552	@mkdir -p $(@D)
1553	$(CXX) $(GEN_AOT_CXX_FLAGS) $(filter %.cpp %.o %.a,$^) $(GEN_AOT_INCLUDES) $(GEN_AOT_LD_FLAGS) -o $@
1554
1555$(BIN_DIR)/$(TARGET)/generator_aotcpp_autograd: $(ROOT_DIR)/test/generator/autograd_aottest.cpp $(FILTERS_DIR)/autograd.halide_generated.cpp $(FILTERS_DIR)/autograd_grad.halide_generated.cpp $(RUNTIME_EXPORTED_INCLUDES) $(BIN_DIR)/$(TARGET)/runtime.a
1556	@mkdir -p $(@D)
1557	$(CXX) $(GEN_AOT_CXX_FLAGS) $(filter %.cpp %.o %.a,$^) $(GEN_AOT_INCLUDES) $(GEN_AOT_LD_FLAGS) -o $@
1558
1559# nested_externs has additional deps to link in
1560$(BIN_DIR)/$(TARGET)/generator_aot_nested_externs: $(ROOT_DIR)/test/generator/nested_externs_aottest.cpp $(FILTERS_DIR)/nested_externs_root.a $(FILTERS_DIR)/nested_externs_inner.a $(FILTERS_DIR)/nested_externs_combine.a $(FILTERS_DIR)/nested_externs_leaf.a $(RUNTIME_EXPORTED_INCLUDES) $(BIN_DIR)/$(TARGET)/runtime.a
1561	@mkdir -p $(@D)
1562	$(CXX) $(GEN_AOT_CXX_FLAGS) $(filter %.cpp %.o %.a,$^) $(GEN_AOT_INCLUDES) $(GEN_AOT_LD_FLAGS) -o $@
1563
1564$(BIN_DIR)/$(TARGET)/generator_aotcpp_nested_externs: $(ROOT_DIR)/test/generator/nested_externs_aottest.cpp $(FILTERS_DIR)/nested_externs_root.halide_generated.cpp $(FILTERS_DIR)/nested_externs_inner.halide_generated.cpp $(FILTERS_DIR)/nested_externs_combine.halide_generated.cpp $(FILTERS_DIR)/nested_externs_leaf.halide_generated.cpp $(RUNTIME_EXPORTED_INCLUDES) $(BIN_DIR)/$(TARGET)/runtime.a
1565	@mkdir -p $(@D)
1566	$(CXX) $(GEN_AOT_CXX_FLAGS) $(filter %.cpp %.o %.a,$^) $(GEN_AOT_INCLUDES) $(GEN_AOT_LD_FLAGS) -o $@
1567
1568# The matlab tests needs "-matlab" in the runtime
1569$(BIN_DIR)/$(TARGET)/generator_aot_matlab: $(ROOT_DIR)/test/generator/matlab_aottest.cpp $(FILTERS_DIR)/matlab.a $(FILTERS_DIR)/matlab.h $(RUNTIME_EXPORTED_INCLUDES) $(BIN_DIR)/$(TARGET)-matlab/runtime.a
1570	@mkdir -p $(@D)
1571	$(CXX) $(GEN_AOT_CXX_FLAGS) $(filter %.cpp %.o %.a,$^) $(GEN_AOT_INCLUDES) $(GEN_AOT_LD_FLAGS) $(TEST_LD_FLAGS) -o $@
1572
1573$(BIN_DIR)/$(TARGET)/generator_aotcpp_matlab: $(ROOT_DIR)/test/generator/matlab_aottest.cpp $(FILTERS_DIR)/matlab.halide_generated.cpp $(FILTERS_DIR)/matlab.h $(RUNTIME_EXPORTED_INCLUDES) $(BIN_DIR)/$(TARGET)-matlab/runtime.a
1574	@mkdir -p $(@D)
1575	$(CXX) $(GEN_AOT_CXX_FLAGS) $(filter %.cpp %.o %.a,$^) $(GEN_AOT_INCLUDES) $(GEN_AOT_LD_FLAGS) $(TEST_LD_FLAGS) -o $@
1576
1577# The gpu object lifetime test needs the debug runtime
1578$(BIN_DIR)/$(TARGET)/generator_aot_gpu_object_lifetime: $(ROOT_DIR)/test/generator/gpu_object_lifetime_aottest.cpp $(FILTERS_DIR)/gpu_object_lifetime.a $(FILTERS_DIR)/gpu_object_lifetime.h $(RUNTIME_EXPORTED_INCLUDES) $(BIN_DIR)/$(TARGET)-debug/runtime.a
1579	@mkdir -p $(@D)
1580	$(CXX) $(GEN_AOT_CXX_FLAGS) $(filter %.cpp %.o %.a,$^) $(GEN_AOT_INCLUDES) $(GEN_AOT_LD_FLAGS) $(TEST_LD_FLAGS) -o $@
1581
1582# acquire_release explicitly uses CUDA/OpenCL APIs, so link those here.
1583$(BIN_DIR)/$(TARGET)/generator_aot_acquire_release: $(ROOT_DIR)/test/generator/acquire_release_aottest.cpp $(FILTERS_DIR)/acquire_release.a $(FILTERS_DIR)/acquire_release.h $(RUNTIME_EXPORTED_INCLUDES) $(BIN_DIR)/$(TARGET)/runtime.a
1584	@mkdir -p $(@D)
1585	$(CXX) $(GEN_AOT_CXX_FLAGS) $(filter %.cpp %.o %.a,$^) $(GEN_AOT_INCLUDES) $(GEN_AOT_LD_FLAGS) $(OPENCL_LD_FLAGS) $(CUDA_LD_FLAGS) -o $@
1586
1587$(BIN_DIR)/$(TARGET)/generator_aotcpp_acquire_release: $(ROOT_DIR)/test/generator/acquire_release_aottest.cpp $(FILTERS_DIR)/acquire_release.halide_generated.cpp $(FILTERS_DIR)/acquire_release.h $(RUNTIME_EXPORTED_INCLUDES) $(BIN_DIR)/$(TARGET)/runtime.a
1588	@mkdir -p $(@D)
1589	$(CXX) $(GEN_AOT_CXX_FLAGS) $(filter %.cpp %.o %.a,$^) $(GEN_AOT_INCLUDES) $(GEN_AOT_LD_FLAGS) $(OPENCL_LD_FLAGS) $(CUDA_LD_FLAGS) -o $@
1590
1591# define_extern_opencl explicitly uses OpenCL APIs, so link those here.
1592$(BIN_DIR)/$(TARGET)/generator_aot_define_extern_opencl: $(ROOT_DIR)/test/generator/define_extern_opencl_aottest.cpp $(FILTERS_DIR)/define_extern_opencl.a $(FILTERS_DIR)/define_extern_opencl.h $(RUNTIME_EXPORTED_INCLUDES) $(BIN_DIR)/$(TARGET)/runtime.a
1593	@mkdir -p $(@D)
1594	$(CXX) $(GEN_AOT_CXX_FLAGS) $(filter %.cpp %.o %.a,$^) $(GEN_AOT_INCLUDES) $(GEN_AOT_LD_FLAGS) $(OPENCL_LD_FLAGS) -o $@
1595
1596$(BIN_DIR)/$(TARGET)/generator_aotcpp_define_extern_opencl: $(ROOT_DIR)/test/generator/define_extern_opencl_aottest.cpp $(FILTERS_DIR)/define_extern_opencl.halide_generated.cpp $(FILTERS_DIR)/define_extern_opencl.h $(RUNTIME_EXPORTED_INCLUDES) $(BIN_DIR)/$(TARGET)/runtime.a
1597	@mkdir -p $(@D)
1598	$(CXX) $(GEN_AOT_CXX_FLAGS) $(filter %.cpp %.o %.a,$^) $(GEN_AOT_INCLUDES) $(GEN_AOT_LD_FLAGS) $(OPENCL_LD_FLAGS) -o $@
1599
1600# By default, %_jittest.cpp depends on libHalide, plus the stubs for the Generator. These are external tests that use the JIT.
1601$(BIN_DIR)/generator_jit_%: $(ROOT_DIR)/test/generator/%_jittest.cpp $(BIN_DIR)/libHalide.$(SHARED_EXT) $(INCLUDE_DIR)/Halide.h $(FILTERS_DIR)/%.stub.h $(BUILD_DIR)/%_generator.o
1602	@mkdir -p $(@D)
1603	$(CXX) -g $(TEST_CXX_FLAGS) $(filter %.cpp %.o %.a,$^) -I$(INCLUDE_DIR) -I$(FILTERS_DIR) -I $(ROOT_DIR)/apps/support $(TEST_LD_FLAGS) -o $@
1604
1605# generator_aot_multitarget is run multiple times, with different env vars.
1606generator_aot_multitarget: $(BIN_DIR)/$(TARGET)/generator_aot_multitarget
1607	@mkdir -p $(@D)
1608	HL_MULTITARGET_TEST_USE_NOBOUNDSQUERY_FEATURE=0 $(CURDIR)/$<
1609	HL_MULTITARGET_TEST_USE_NOBOUNDSQUERY_FEATURE=1 $(CURDIR)/$<
1610	@-echo
1611
1612# nested externs doesn't actually contain a generator named
1613# "nested_externs", and has no internal tests in any case.
1614test_generator_nested_externs:
1615	@echo "Skipping"
1616
1617$(BUILD_DIR)/RunGenMain.o: $(ROOT_DIR)/tools/RunGenMain.cpp $(RUNTIME_EXPORTED_INCLUDES) $(ROOT_DIR)/tools/RunGen.h
1618	@mkdir -p $(@D)
1619	$(CXX) -c $< $(filter-out -g, $(TEST_CXX_FLAGS)) $(OPTIMIZE) -Os $(IMAGE_IO_CXX_FLAGS) -I$(INCLUDE_DIR) -I $(SRC_DIR)/runtime -I$(ROOT_DIR)/tools -o $@
1620
1621$(FILTERS_DIR)/%.registration.o: $(FILTERS_DIR)/%.registration.cpp
1622	@mkdir -p $(@D)
1623	$(CXX) -c $< $(TEST_CXX_FLAGS) -o $@
1624
1625$(FILTERS_DIR)/%.rungen: $(BUILD_DIR)/RunGenMain.o $(BIN_DIR)/$(TARGET)/runtime.a $(FILTERS_DIR)/%.registration.o $(FILTERS_DIR)/%.a
1626	@mkdir -p $(@D)
1627	$(CXX) -std=c++11 -I$(FILTERS_DIR) \
1628		$(BUILD_DIR)/RunGenMain.o \
1629		$(BIN_DIR)/$(TARGET)/runtime.a \
1630		$(call alwayslink,$(FILTERS_DIR)/$*.registration.o) \
1631		$(FILTERS_DIR)/$*.a \
1632		$(GEN_AOT_LD_FLAGS) $(IMAGE_IO_LIBS) -o $@
1633
1634RUNARGS ?=
1635
1636$(FILTERS_DIR)/%.run: $(FILTERS_DIR)/%.rungen
1637	$(CURDIR)/$< $(RUNARGS)
1638	@-echo
1639
1640$(FILTERS_DIR)/%.registration_extra.o: $(FILTERS_DIR)/%.registration.cpp
1641	@mkdir -p $(@D)
1642	$(CXX) -c $< $(TEST_CXX_FLAGS) -DHALIDE_REGISTER_EXTRA_KEY_VALUE_PAIRS_FUNC=halide_register_extra_key_value_pairs_$* -o $@
1643
1644# Test the registration mechanism, independent of RunGen.
1645# Note that this depends on the registration_extra.o (rather than registration.o)
1646# because it compiles with HALIDE_REGISTER_EXTRA_KEY_VALUE_PAIRS_FUNC defined.
1647$(FILTERS_DIR)/registration_test: $(ROOT_DIR)/test/generator/registration_test.cpp \
1648														 $(BIN_DIR)/$(TARGET)/runtime.a \
1649														 $(FILTERS_DIR)/blur2x2.registration_extra.o $(FILTERS_DIR)/blur2x2.a \
1650														 $(FILTERS_DIR)/cxx_mangling.registration_extra.o $(FILTERS_DIR)/cxx_mangling.a \
1651														 $(FILTERS_DIR)/pyramid.registration_extra.o $(FILTERS_DIR)/pyramid.a
1652	@mkdir -p $(@D)
1653	$(CXX) $(GEN_AOT_CXX_FLAGS) $(GEN_AOT_INCLUDES) \
1654			$(ROOT_DIR)/test/generator/registration_test.cpp \
1655			$(FILTERS_DIR)/blur2x2.registration_extra.o \
1656			$(FILTERS_DIR)/cxx_mangling.registration_extra.o \
1657			$(FILTERS_DIR)/pyramid.registration_extra.o \
1658			$(FILTERS_DIR)/blur2x2.a \
1659			$(FILTERS_DIR)/cxx_mangling.a \
1660			$(FILTERS_DIR)/pyramid.a \
1661      $(BIN_DIR)/$(TARGET)/runtime.a \
1662			$(GEN_AOT_LD_FLAGS) $(IMAGE_IO_LIBS) -o $@
1663
1664# Test RunGen itself
1665$(FILTERS_DIR)/rungen_test: $(ROOT_DIR)/test/generator/rungen_test.cpp \
1666							$(BIN_DIR)/$(TARGET)/runtime.a \
1667							$(FILTERS_DIR)/example.registration.o \
1668							$(FILTERS_DIR)/example.a
1669	@mkdir -p $(@D)
1670	$(CXX) $(GEN_AOT_CXX_FLAGS) $(IMAGE_IO_CXX_FLAGS) $(GEN_AOT_INCLUDES) \
1671			$(ROOT_DIR)/test/generator/rungen_test.cpp \
1672			$(BIN_DIR)/$(TARGET)/runtime.a \
1673			$(call alwayslink,$(FILTERS_DIR)/example.registration.o) \
1674			$(FILTERS_DIR)/example.a \
1675			$(GEN_AOT_LD_FLAGS) $(IMAGE_IO_LIBS) -o $@
1676
1677# Test linking multiple filters into a single RunGen instance
1678$(FILTERS_DIR)/multi_rungen: $(BUILD_DIR)/RunGenMain.o $(BIN_DIR)/$(TARGET)/runtime.a \
1679														 $(FILTERS_DIR)/blur2x2.registration.o $(FILTERS_DIR)/blur2x2.a \
1680														 $(FILTERS_DIR)/cxx_mangling.registration.o $(FILTERS_DIR)/cxx_mangling.a \
1681														 $(FILTERS_DIR)/pyramid.registration.o $(FILTERS_DIR)/pyramid.a
1682	@mkdir -p $(@D)
1683	$(CXX) -std=c++11 -I$(FILTERS_DIR) \
1684			$(BUILD_DIR)/RunGenMain.o \
1685			$(BIN_DIR)/$(TARGET)/runtime.a \
1686			$(call alwayslink,$(FILTERS_DIR)/blur2x2.registration.o) \
1687			$(call alwayslink,$(FILTERS_DIR)/cxx_mangling.registration.o) \
1688			$(call alwayslink,$(FILTERS_DIR)/pyramid.registration.o) \
1689			$(FILTERS_DIR)/blur2x2.a \
1690			$(FILTERS_DIR)/cxx_mangling.a \
1691			$(FILTERS_DIR)/pyramid.a \
1692			$(GEN_AOT_LD_FLAGS) $(IMAGE_IO_LIBS) -o $@
1693
1694# Test concatenating multiple registration files as well, which should also work
1695$(FILTERS_DIR)/multi_rungen2.registration.cpp: $(FILTERS_DIR)/blur2x2.registration.cpp $(FILTERS_DIR)/cxx_mangling.registration.cpp $(FILTERS_DIR)/pyramid.registration.cpp
1696	cat $^ > $@
1697
1698$(FILTERS_DIR)/multi_rungen2: $(BUILD_DIR)/RunGenMain.o $(BIN_DIR)/$(TARGET)/runtime.a \
1699														 $(FILTERS_DIR)/multi_rungen2.registration.cpp \
1700														 $(FILTERS_DIR)/blur2x2.a \
1701														 $(FILTERS_DIR)/cxx_mangling.a \
1702														 $(FILTERS_DIR)/pyramid.a
1703	@mkdir -p $(@D)
1704	$(CXX) -std=c++11 -I$(FILTERS_DIR) $^ $(GEN_AOT_LD_FLAGS) $(IMAGE_IO_LIBS) -o $@
1705
1706$(BIN_DIR)/tutorial_%: $(ROOT_DIR)/tutorial/%.cpp $(BIN_DIR)/libHalide.$(SHARED_EXT) $(INCLUDE_DIR)/Halide.h $(INCLUDE_DIR)/HalideRuntime.h
1707	@ if [[ $@ == *_run ]]; then \
1708		export TUTORIAL=$* ;\
1709		export LESSON=`echo $${TUTORIAL} | cut -b1-9`; \
1710		make -f $(THIS_MAKEFILE) tutorial_$${TUTORIAL/run/generate}; \
1711		$(CXX) $(TUTORIAL_CXX_FLAGS) $(IMAGE_IO_CXX_FLAGS) $(OPTIMIZE_FOR_BUILD_TIME) $< \
1712		-I$(TMP_DIR) -I$(INCLUDE_DIR) $(TMP_DIR)/$${LESSON}_*.a $(GEN_AOT_LD_FLAGS) $(IMAGE_IO_LIBS) -lz -o $@; \
1713	else \
1714		$(CXX) $(TUTORIAL_CXX_FLAGS) $(IMAGE_IO_CXX_FLAGS) $(OPTIMIZE_FOR_BUILD_TIME) $< \
1715		-I$(INCLUDE_DIR) -I$(ROOT_DIR)/tools $(TEST_LD_FLAGS) $(IMAGE_IO_LIBS) -o $@;\
1716	fi
1717
1718$(BIN_DIR)/tutorial_lesson_15_generators: $(ROOT_DIR)/tutorial/lesson_15_generators.cpp $(BIN_DIR)/libHalide.$(SHARED_EXT) $(INCLUDE_DIR)/Halide.h $(BUILD_DIR)/GenGen.o
1719	$(CXX) $(TUTORIAL_CXX_FLAGS) $(IMAGE_IO_CXX_FLAGS) $(OPTIMIZE_FOR_BUILD_TIME) $< $(BUILD_DIR)/GenGen.o \
1720	-I$(INCLUDE_DIR) $(TEST_LD_FLAGS) $(IMAGE_IO_LIBS) -o $@
1721
1722tutorial_lesson_15_generators: $(ROOT_DIR)/tutorial/lesson_15_generators_usage.sh $(BIN_DIR)/tutorial_lesson_15_generators
1723	@-mkdir -p $(TMP_DIR)
1724	cp $(BIN_DIR)/tutorial_lesson_15_generators $(TMP_DIR)/lesson_15_generate; \
1725	cd $(TMP_DIR); \
1726	PATH="$${PATH}:$(CURDIR)/$(BIN_DIR)" source $(ROOT_DIR)/tutorial/lesson_15_generators_usage.sh
1727	@-echo
1728
1729$(BIN_DIR)/tutorial_lesson_16_rgb_generate: $(ROOT_DIR)/tutorial/lesson_16_rgb_generate.cpp $(BIN_DIR)/libHalide.$(SHARED_EXT) $(INCLUDE_DIR)/Halide.h $(BUILD_DIR)/GenGen.o
1730	$(CXX) $(TUTORIAL_CXX_FLAGS) $(IMAGE_IO_CXX_FLAGS) $(OPTIMIZE_FOR_BUILD_TIME) $< $(BUILD_DIR)/GenGen.o \
1731	-I$(INCLUDE_DIR) $(TEST_LD_FLAGS) $(IMAGE_IO_LIBS) -o $@
1732
1733$(BIN_DIR)/tutorial_lesson_16_rgb_run: $(ROOT_DIR)/tutorial/lesson_16_rgb_run.cpp $(BIN_DIR)/tutorial_lesson_16_rgb_generate
1734	@-mkdir -p $(TMP_DIR)
1735	# Run the generator
1736	$(BIN_DIR)/tutorial_lesson_16_rgb_generate -g brighten -o $(TMP_DIR) -f brighten_planar      target=host layout=planar
1737	$(BIN_DIR)/tutorial_lesson_16_rgb_generate -g brighten -o $(TMP_DIR) -f brighten_interleaved target=host-no_runtime layout=interleaved
1738	$(BIN_DIR)/tutorial_lesson_16_rgb_generate -g brighten -o $(TMP_DIR) -f brighten_either      target=host-no_runtime layout=either
1739	$(BIN_DIR)/tutorial_lesson_16_rgb_generate -g brighten -o $(TMP_DIR) -f brighten_specialized target=host-no_runtime layout=specialized
1740	# Compile the runner
1741	$(CXX) $(TUTORIAL_CXX_FLAGS) $(IMAGE_IO_CXX_FLAGS) $(OPTIMIZE_FOR_BUILD_TIME) $< \
1742	-I$(INCLUDE_DIR) -L$(BIN_DIR) -I $(TMP_DIR) $(TMP_DIR)/brighten_*.a \
1743        -lHalide $(TEST_LD_FLAGS) $(COMMON_LD_FLAGS) $(IMAGE_IO_LIBS) -o $@
1744	@-echo
1745
1746$(BIN_DIR)/tutorial_lesson_21_auto_scheduler_generate: $(ROOT_DIR)/tutorial/lesson_21_auto_scheduler_generate.cpp $(BIN_DIR)/libHalide.$(SHARED_EXT) $(INCLUDE_DIR)/Halide.h $(BUILD_DIR)/GenGen.o
1747	$(CXX) $(TUTORIAL_CXX_FLAGS) $(IMAGE_IO_CXX_FLAGS) $(OPTIMIZE_FOR_BUILD_TIME) $< $(BUILD_DIR)/GenGen.o \
1748	-I$(INCLUDE_DIR) $(TEST_LD_FLAGS) $(IMAGE_IO_LIBS) -o $@
1749
1750# The values in MachineParams are:
1751# - the maximum level of parallelism available,
1752# - the size of the last-level cache (in bytes),
1753# - the ratio between the cost of a miss at the last level cache and the cost
1754#   of arithmetic on the target architecture
1755# ...in that order.
1756LESSON_21_MACHINE_PARAMS = 32,16777216,40
1757
1758$(BIN_DIR)/tutorial_lesson_21_auto_scheduler_run: $(ROOT_DIR)/tutorial/lesson_21_auto_scheduler_run.cpp $(BIN_DIR)/tutorial_lesson_21_auto_scheduler_generate
1759	@-mkdir -p $(TMP_DIR)
1760	# Run the generator
1761	$(BIN_DIR)/tutorial_lesson_21_auto_scheduler_generate -g auto_schedule_gen -o $(TMP_DIR) -e static_library,c_header,schedule -f auto_schedule_false target=host            auto_schedule=false
1762	$(BIN_DIR)/tutorial_lesson_21_auto_scheduler_generate -g auto_schedule_gen -o $(TMP_DIR) -e static_library,c_header,schedule -f auto_schedule_true  target=host-no_runtime auto_schedule=true machine_params=$(LESSON_21_MACHINE_PARAMS)
1763	# Compile the runner
1764	$(CXX) $(TUTORIAL_CXX_FLAGS) $(IMAGE_IO_CXX_FLAGS) $(OPTIMIZE_FOR_BUILD_TIME) $< \
1765	-I$(INCLUDE_DIR) -L$(BIN_DIR) -I $(TMP_DIR) $(TMP_DIR)/auto_schedule_*.a \
1766        -lHalide $(TEST_LD_FLAGS) $(COMMON_LD_FLAGS) $(IMAGE_IO_LIBS) -o $@
1767	@-echo
1768
1769test_internal: $(BIN_DIR)/test_internal
1770	@-mkdir -p $(TMP_DIR)
1771	cd $(TMP_DIR) ; $(CURDIR)/$<
1772	@-echo
1773
1774correctness_%: $(BIN_DIR)/correctness_%
1775	@-mkdir -p $(TMP_DIR)
1776	cd $(TMP_DIR) ; $(CURDIR)/$<
1777	@-echo
1778
1779correctness_opencl_runtime: $(BIN_DIR)/$(TARGET)/correctness_opencl_runtime
1780	@-mkdir -p $(TMP_DIR)
1781	cd $(TMP_DIR) ; $(CURDIR)/$<
1782	@-echo
1783
1784quiet_correctness_%: $(BIN_DIR)/correctness_%
1785	@-mkdir -p $(TMP_DIR)
1786	@cd $(TMP_DIR) ; ( $(CURDIR)/$< 2>stderr_$*.txt > stdout_$*.txt && echo -n . ) || ( echo ; echo FAILED TEST: $* ; cat stdout_$*.txt stderr_$*.txt ; false )
1787
1788valgrind_%: $(BIN_DIR)/correctness_%
1789	@-mkdir -p $(TMP_DIR)
1790	cd $(TMP_DIR) ; valgrind --error-exitcode=-1 $(CURDIR)/$<
1791	@-echo
1792
1793# Use Intel SDE to emulate an avx 512 processor.
1794avx512_%: $(BIN_DIR)/correctness_%
1795	@-mkdir -p $(TMP_DIR)
1796	cd $(TMP_DIR) ; sde -cnl -- $(CURDIR)/$<
1797	cd $(TMP_DIR) ; sde -knl -- $(CURDIR)/$<
1798	@-echo
1799
1800# This test is *supposed* to do an out-of-bounds read, so skip it when testing under valgrind
1801valgrind_tracing_stack: $(BIN_DIR)/correctness_tracing_stack
1802	@-mkdir -p $(TMP_DIR)
1803	cd $(TMP_DIR) ; $(CURDIR)/$(BIN_DIR)/correctness_tracing_stack
1804	@-echo
1805
1806performance_%: $(BIN_DIR)/performance_%
1807	@-mkdir -p $(TMP_DIR)
1808	cd $(TMP_DIR) ; $(CURDIR)/$<
1809	@-echo
1810
1811error_%: $(BIN_DIR)/error_%
1812	@-mkdir -p $(TMP_DIR)
1813	cd $(TMP_DIR) ; $(CURDIR)/$< 2>&1 | egrep --q "terminating with uncaught exception|^terminate called|^Error|Assertion.*failed"
1814	@-echo
1815
1816warning_%: $(BIN_DIR)/warning_%
1817	@-mkdir -p $(TMP_DIR)
1818	cd $(TMP_DIR) ; $(CURDIR)/$< 2>&1 | egrep --q "^Warning"
1819	@-echo
1820
1821opengl_%: $(BIN_DIR)/opengl_%
1822	@-mkdir -p $(TMP_DIR)
1823	cd $(TMP_DIR) ; $(CURDIR)/$< 2>&1
1824	@-echo
1825
1826generator_jit_%: $(BIN_DIR)/generator_jit_%
1827	@-mkdir -p $(TMP_DIR)
1828	cd $(TMP_DIR) ; $(CURDIR)/$<
1829	@-echo
1830
1831generator_aot_%: $(BIN_DIR)/$(TARGET)/generator_aot_%
1832	@-mkdir -p $(TMP_DIR)
1833	cd $(TMP_DIR) ; $(CURDIR)/$<
1834	@-echo
1835
1836generator_aotcpp_%: $(BIN_DIR)/$(TARGET)/generator_aotcpp_%
1837	@-mkdir -p $(TMP_DIR)
1838	cd $(TMP_DIR) ; $(CURDIR)/$<
1839	@-echo
1840
1841$(TMP_DIR)/images/%.png: $(ROOT_DIR)/tutorial/images/%.png
1842	@-mkdir -p $(TMP_DIR)/images
1843	cp $< $(TMP_DIR)/images/
1844
1845tutorial_%: $(BIN_DIR)/tutorial_% $(TMP_DIR)/images/rgb.png $(TMP_DIR)/images/gray.png
1846	@-mkdir -p $(TMP_DIR)
1847	cd $(TMP_DIR) ; $(CURDIR)/$<
1848	@-echo
1849
1850auto_schedule_%: $(BIN_DIR)/auto_schedule_%
1851	@-mkdir -p $(TMP_DIR)
1852	cd $(TMP_DIR) ; $(CURDIR)/$<
1853	@-echo
1854
1855time_compilation_test_%: $(BIN_DIR)/test_%
1856	$(TIME_COMPILATION) compile_times_correctness.csv make -f $(THIS_MAKEFILE) $(@:time_compilation_test_%=test_%)
1857
1858time_compilation_performance_%: $(BIN_DIR)/performance_%
1859	$(TIME_COMPILATION) compile_times_performance.csv make -f $(THIS_MAKEFILE) $(@:time_compilation_performance_%=performance_%)
1860
1861time_compilation_opengl_%: $(BIN_DIR)/opengl_%
1862	$(TIME_COMPILATION) compile_times_opengl.csv make -f $(THIS_MAKEFILE) $(@:time_compilation_opengl_%=opengl_%)
1863
1864time_compilation_generator_%: $(BIN_DIR)/%.generator
1865	$(TIME_COMPILATION) compile_times_generator.csv make -f $(THIS_MAKEFILE) $(@:time_compilation_generator_%=$(FILTERS_DIR)/%.a)
1866
1867TEST_APPS=\
1868	HelloMatlab \
1869	autoscheduler \
1870	bilateral_grid \
1871	bgu \
1872	blur \
1873	c_backend \
1874	camera_pipe \
1875	conv_layer \
1876	fft \
1877	gradient_autoscheduler \
1878	hist \
1879	interpolate \
1880	lens_blur \
1881	linear_algebra \
1882	local_laplacian \
1883	max_filter \
1884	nl_means \
1885	onnx \
1886	resize \
1887	resnet_50 \
1888	stencil_chain \
1889	wavelet
1890
1891TEST_APPS_DEPS=$(TEST_APPS:%=%_test_app)
1892BUILD_APPS_DEPS=$(TEST_APPS:%=%_build_app)
1893
1894$(BUILD_APPS_DEPS): distrib build_python_bindings
1895	@echo Building app $(@:%_build_app=%) for ${HL_TARGET}...
1896	@$(MAKE) -C $(ROOT_DIR)/apps/$(@:%_build_app=%) build \
1897		HALIDE_DISTRIB_PATH=$(CURDIR)/$(DISTRIB_DIR) \
1898		HALIDE_PYTHON_BINDINGS_PATH=$(CURDIR)/$(BIN_DIR)/python3_bindings \
1899		BIN_DIR=$(CURDIR)/$(BIN_DIR)/apps/$(@:%_build_app=%)/bin \
1900		HL_TARGET=$(HL_TARGET) \
1901		|| exit 1 ; \
1902
1903$(TEST_APPS_DEPS): distrib build_python_bindings
1904	@echo Testing app $(@:%_test_app=%) for ${HL_TARGET}...
1905	@$(MAKE) -C $(ROOT_DIR)/apps/$(@:%_test_app=%) test \
1906		HALIDE_DISTRIB_PATH=$(CURDIR)/$(DISTRIB_DIR) \
1907		HALIDE_PYTHON_BINDINGS_PATH=$(CURDIR)/$(BIN_DIR)/python3_bindings \
1908		BIN_DIR=$(CURDIR)/$(BIN_DIR)/apps/$(@:%_test_app=%)/bin \
1909		HL_TARGET=$(HL_TARGET) \
1910		|| exit 1 ; \
1911
1912.PHONY: test_apps build_apps $(BUILD_APPS_DEPS)
1913build_apps: $(BUILD_APPS_DEPS)
1914
1915test_apps: $(BUILD_APPS_DEPS)
1916	$(MAKE) -f $(THIS_MAKEFILE) -j1 $(TEST_APPS_DEPS)
1917
1918BENCHMARK_APPS=\
1919	bilateral_grid \
1920	camera_pipe \
1921	lens_blur \
1922	local_laplacian \
1923	nl_means \
1924	stencil_chain
1925
1926$(BENCHMARK_APPS): distrib build_python_bindings
1927	@echo Building $@ for ${HL_TARGET}...
1928	@$(MAKE) -C $(ROOT_DIR)/apps/$@ \
1929		$(CURDIR)/$(BIN_DIR)/apps/$@/bin/$(HL_TARGET)/$@.rungen \
1930		HALIDE_DISTRIB_PATH=$(CURDIR)/$(DISTRIB_DIR) \
1931		HALIDE_PYTHON_BINDINGS_PATH=$(CURDIR)/$(BIN_DIR)/python3_bindings \
1932		BIN_DIR=$(CURDIR)/$(BIN_DIR)/apps/$@/bin \
1933		HL_TARGET=$(HL_TARGET) \
1934		> /dev/null \
1935		|| exit 1
1936
1937.PHONY: benchmark_apps $(BENCHMARK_APPS)
1938benchmark_apps: $(BENCHMARK_APPS)
1939	@for APP in $(BENCHMARK_APPS); do \
1940		echo ;\
1941		echo Benchmarking $${APP} for ${HL_TARGET}... ; \
1942		make -C $(ROOT_DIR)/apps/$${APP} \
1943			$${APP}.benchmark \
1944			HALIDE_DISTRIB_PATH=$(CURDIR)/$(DISTRIB_DIR) \
1945			HALIDE_PYTHON_BINDINGS_PATH=$(CURDIR)/$(BIN_DIR)/python3_bindings \
1946			BIN_DIR=$(CURDIR)/$(BIN_DIR)/apps/$${APP}/bin \
1947			HL_TARGET=$(HL_TARGET) \
1948			|| exit 1 ; \
1949	done
1950
1951# TODO(srj): the python bindings need to be put into the distrib folders;
1952# this is a hopefully-temporary workaround (https://github.com/halide/Halide/issues/4368)
1953.PHONY: build_python_bindings
1954build_python_bindings: distrib $(BIN_DIR)/host/runtime.a
1955	$(MAKE) -C $(ROOT_DIR)/python_bindings \
1956		-f $(ROOT_DIR)/python_bindings/Makefile \
1957		build_python_bindings \
1958		HALIDE_DISTRIB_PATH=$(CURDIR)/$(DISTRIB_DIR) \
1959		BIN=$(CURDIR)/$(BIN_DIR)/python3_bindings \
1960		PYTHON=$(PYTHON) \
1961		OPTIMIZE=$(OPTIMIZE)
1962
1963.PHONY: test_python
1964test_python: distrib $(BIN_DIR)/host/runtime.a build_python_bindings
1965	$(MAKE) -C $(ROOT_DIR)/python_bindings \
1966		-f $(ROOT_DIR)/python_bindings/Makefile \
1967		test \
1968		HALIDE_DISTRIB_PATH=$(CURDIR)/$(DISTRIB_DIR) \
1969		BIN=$(CURDIR)/$(BIN_DIR)/python3_bindings \
1970		PYTHON=$(PYTHON) \
1971		OPTIMIZE=$(OPTIMIZE)
1972
1973# It's just for compiling the runtime, so earlier clangs *might* work,
1974# but best to peg it to the minimum llvm version.
1975ifneq (,$(findstring clang version 3.7,$(CLANG_VERSION)))
1976CLANG_OK=yes
1977endif
1978
1979ifneq (,$(findstring clang version 3.8,$(CLANG_VERSION)))
1980CLANG_OK=yes
1981endif
1982
1983ifneq (,$(findstring clang version 4.0,$(CLANG_VERSION)))
1984CLANG_OK=yes
1985endif
1986
1987ifneq (,$(findstring clang version 5.0,$(CLANG_VERSION)))
1988CLANG_OK=yes
1989endif
1990
1991ifneq (,$(findstring clang version 6.0,$(CLANG_VERSION)))
1992CLANG_OK=yes
1993endif
1994
1995ifneq (,$(findstring clang version 7.0,$(CLANG_VERSION)))
1996CLANG_OK=yes
1997endif
1998
1999ifneq (,$(findstring clang version 7.1,$(CLANG_VERSION)))
2000CLANG_OK=yes
2001endif
2002
2003ifneq (,$(findstring clang version 8.0,$(CLANG_VERSION)))
2004CLANG_OK=yes
2005endif
2006
2007ifneq (,$(findstring clang version 9.0,$(CLANG_VERSION)))
2008CLANG_OK=yes
2009endif
2010
2011ifneq (,$(findstring clang version 10.0,$(CLANG_VERSION)))
2012CLANG_OK=yes
2013endif
2014
2015ifneq (,$(findstring clang version 11.0,$(CLANG_VERSION)))
2016CLANG_OK=yes
2017endif
2018
2019ifneq (,$(findstring clang version 12.0,$(CLANG_VERSION)))
2020CLANG_OK=yes
2021endif
2022
2023ifneq (,$(findstring Apple LLVM version 5.0,$(CLANG_VERSION)))
2024CLANG_OK=yes
2025endif
2026
2027ifneq ($(CLANG_OK), )
2028$(BUILD_DIR)/clang_ok:
2029	@echo "Found a new enough version of clang"
2030	mkdir -p $(BUILD_DIR)
2031	touch $(BUILD_DIR)/clang_ok
2032else
2033$(BUILD_DIR)/clang_ok:
2034	@echo "Can't find clang or version of clang too old (we need 3.7 or greater):"
2035	@echo "You can override this check by setting CLANG_OK=y"
2036	echo '$(CLANG_VERSION)'
2037	echo $(findstring version 3,$(CLANG_VERSION))
2038	echo $(findstring version 3.0,$(CLANG_VERSION))
2039	$(CLANG) --version
2040	@exit 1
2041endif
2042
2043ifneq (,$(findstring $(LLVM_VERSION_TIMES_10), 90 100 110 120))
2044LLVM_OK=yes
2045endif
2046
2047ifneq ($(LLVM_OK), )
2048$(BUILD_DIR)/llvm_ok: $(BUILD_DIR)/rtti_ok
2049	@echo "Found a new enough version of llvm"
2050	mkdir -p $(BUILD_DIR)
2051	touch $(BUILD_DIR)/llvm_ok
2052else
2053$(BUILD_DIR)/llvm_ok:
2054	@echo "Can't find llvm or version of llvm too old (we need 9.0 or greater):"
2055	@echo "You can override this check by setting LLVM_OK=y"
2056	$(LLVM_CONFIG) --version
2057	@exit 1
2058endif
2059
2060ifneq ($(WITH_RTTI), )
2061ifneq ($(LLVM_HAS_NO_RTTI), )
2062else
2063RTTI_OK=yes # Enabled in Halide and LLVM
2064endif
2065else
2066RTTI_OK=yes # Enabled in LLVM but not in Halide
2067endif
2068
2069ifneq ($(RTTI_OK), )
2070$(BUILD_DIR)/rtti_ok:
2071	mkdir -p $(BUILD_DIR)
2072	touch $(BUILD_DIR)/rtti_ok
2073else
2074$(BUILD_DIR)/rtti_ok:
2075	@echo "Can't enable RTTI - llvm was compiled without it."
2076	@echo "LLVM c++ flags: " $(LLVM_CXX_FLAGS)
2077	@exit 1
2078endif
2079
2080install: $(LIB_DIR)/libHalide.a $(BIN_DIR)/libHalide.$(SHARED_EXT) $(INCLUDE_DIR)/Halide.h $(RUNTIME_EXPORTED_INCLUDES)
2081	mkdir -p $(PREFIX)/include $(PREFIX)/bin $(PREFIX)/lib $(PREFIX)/share/halide/tutorial/images $(PREFIX)/share/halide/tools $(PREFIX)/share/halide/tutorial/figures
2082	cp $(LIB_DIR)/libHalide.a $(BIN_DIR)/libHalide.$(SHARED_EXT) $(PREFIX)/lib
2083	cp $(INCLUDE_DIR)/Halide.h $(PREFIX)/include
2084	cp $(INCLUDE_DIR)/HalideBuffer.h $(PREFIX)/include
2085	cp $(INCLUDE_DIR)/HalideRuntim*.h $(PREFIX)/include
2086	cp $(ROOT_DIR)/tutorial/images/*.png $(PREFIX)/share/halide/tutorial/images
2087	cp $(ROOT_DIR)/tutorial/figures/*.gif $(PREFIX)/share/halide/tutorial/figures
2088	cp $(ROOT_DIR)/tutorial/figures/*.jpg $(PREFIX)/share/halide/tutorial/figures
2089	cp $(ROOT_DIR)/tutorial/figures/*.mp4 $(PREFIX)/share/halide/tutorial/figures
2090	cp $(ROOT_DIR)/tutorial/*.cpp $(PREFIX)/share/halide/tutorial
2091	cp $(ROOT_DIR)/tutorial/*.h $(PREFIX)/share/halide/tutorial
2092	cp $(ROOT_DIR)/tutorial/*.sh $(PREFIX)/share/halide/tutorial
2093	cp $(ROOT_DIR)/tools/mex_halide.m $(PREFIX)/share/halide/tools
2094	cp $(ROOT_DIR)/tools/GenGen.cpp $(PREFIX)/share/halide/tools
2095	cp $(ROOT_DIR)/tools/RunGen.h $(PREFIX)/share/halide/tools
2096	cp $(ROOT_DIR)/tools/RunGenMain.cpp $(PREFIX)/share/halide/tools
2097	cp $(ROOT_DIR)/tools/halide_image.h $(PREFIX)/share/halide/tools
2098	cp $(ROOT_DIR)/tools/halide_image_io.h $(PREFIX)/share/halide/tools
2099	cp $(ROOT_DIR)/tools/halide_image_info.h $(PREFIX)/share/halide/tools
2100	cp $(ROOT_DIR)/tools/halide_malloc_trace.h $(PREFIX)/share/halide/tools
2101ifeq ($(UNAME), Darwin)
2102	install_name_tool -id $(PREFIX)/lib/libHalide.$(SHARED_EXT) $(PREFIX)/lib/libHalide.$(SHARED_EXT)
2103endif
2104
2105# This is a specialized 'install' for users who need Hexagon support libraries as well.
2106install_qc: install $(HEXAGON_RUNTIME_LIBS)
2107	mkdir -p $(PREFIX)/lib/arm-32-android $(PREFIX)/lib/arm-64-android $(PREFIX)/lib/host $(PREFIX)/lib/v62 $(PREFIX)/tools
2108	cp $(HEXAGON_RUNTIME_LIBS_DIR)/arm-32-android/* $(PREFIX)/lib/arm-32-android
2109	cp $(HEXAGON_RUNTIME_LIBS_DIR)/arm-64-android/* $(PREFIX)/lib/arm-64-android
2110	cp $(HEXAGON_RUNTIME_LIBS_DIR)/host/* $(PREFIX)/lib/host
2111	cp -r $(HEXAGON_RUNTIME_LIBS_DIR)/v62/* $(PREFIX)/lib/v62
2112	ln -sf $(PREFIX)/share/halide/tools/GenGen.cpp $(PREFIX)/tools/GenGen.cpp
2113	ln -sf $(PREFIX)/lib/v62/hexagon_sim_remote $(PREFIX)/bin/hexagon_sim_remote
2114	ln -sf $(PREFIX)/lib/v62/libsim_qurt.a $(PREFIX)/lib/libsim_qurt.a
2115	ln -sf $(PREFIX)/lib/v62/libsim_qurt_vtcm.a $(PREFIX)/lib/libsim_qurt_vtcm.a
2116
2117# We need to capture the system libraries that we'll need to link
2118# against, so that downstream consumers of our build rules don't
2119# have to guess what's necessary on their system; call
2120# llvm-config and capture the result in config files that
2121# we include in our distribution.
2122HALIDE_RTTI_RAW=$(if $(WITH_RTTI),1,0)
2123
2124$(BUILD_DIR)/halide_config.%: $(ROOT_DIR)/tools/halide_config.%.tpl
2125	@mkdir -p $(@D)
2126	cat $< | sed -e 's/@HALIDE_SYSTEM_LIBS_RAW@/${LLVM_SYSTEM_LIBS}/g' \
2127	       | sed -e 's/@HALIDE_RTTI_RAW@/${HALIDE_RTTI_RAW}/g' \
2128	       | sed -e 's;@HALIDE_LLVM_CXX_FLAGS_RAW@;${LLVM_CXX_FLAGS};g' > $@
2129
2130
2131$(DISTRIB_DIR)/halide.tgz: $(LIB_DIR)/libHalide.a \
2132                           $(BIN_DIR)/libHalide.$(SHARED_EXT) \
2133                           $(INCLUDE_DIR)/Halide.h \
2134                           $(RUNTIME_EXPORTED_INCLUDES) \
2135                           $(ROOT_DIR)/README*.md \
2136                           $(BUILD_DIR)/halide_config.cmake \
2137                           $(BUILD_DIR)/halide_config.make
2138	rm -rf $(DISTRIB_DIR)
2139	mkdir -p $(DISTRIB_DIR)/include \
2140	         $(DISTRIB_DIR)/bin \
2141	         $(DISTRIB_DIR)/lib \
2142	         $(DISTRIB_DIR)/tutorial \
2143	         $(DISTRIB_DIR)/tutorial/images \
2144	         $(DISTRIB_DIR)/tools \
2145	         $(DISTRIB_DIR)/tutorial/figures
2146	cp $(BIN_DIR)/libHalide.$(SHARED_EXT) $(DISTRIB_DIR)/bin
2147	cp $(LIB_DIR)/libHalide.a $(DISTRIB_DIR)/lib
2148	cp $(INCLUDE_DIR)/Halide.h $(DISTRIB_DIR)/include
2149	cp $(INCLUDE_DIR)/HalideBuffer.h $(DISTRIB_DIR)/include
2150	cp $(INCLUDE_DIR)/HalideRuntim*.h $(DISTRIB_DIR)/include
2151	cp $(INCLUDE_DIR)/HalidePyTorch*.h $(DISTRIB_DIR)/include
2152	cp $(ROOT_DIR)/tutorial/images/*.png $(DISTRIB_DIR)/tutorial/images
2153	cp $(ROOT_DIR)/tutorial/figures/*.gif $(DISTRIB_DIR)/tutorial/figures
2154	cp $(ROOT_DIR)/tutorial/figures/*.jpg $(DISTRIB_DIR)/tutorial/figures
2155	cp $(ROOT_DIR)/tutorial/figures/*.mp4 $(DISTRIB_DIR)/tutorial/figures
2156	cp $(ROOT_DIR)/tutorial/*.cpp $(DISTRIB_DIR)/tutorial
2157	cp $(ROOT_DIR)/tutorial/*.h $(DISTRIB_DIR)/tutorial
2158	cp $(ROOT_DIR)/tutorial/*.sh $(DISTRIB_DIR)/tutorial
2159	cp $(ROOT_DIR)/tools/mex_halide.m $(DISTRIB_DIR)/tools
2160	cp $(ROOT_DIR)/tools/GenGen.cpp $(DISTRIB_DIR)/tools
2161	cp $(ROOT_DIR)/tools/RunGen.h $(DISTRIB_DIR)/tools
2162	cp $(ROOT_DIR)/tools/RunGenMain.cpp $(DISTRIB_DIR)/tools
2163	cp $(ROOT_DIR)/tools/halide_benchmark.h $(DISTRIB_DIR)/tools
2164	cp $(ROOT_DIR)/tools/halide_image.h $(DISTRIB_DIR)/tools
2165	cp $(ROOT_DIR)/tools/halide_image_io.h $(DISTRIB_DIR)/tools
2166	cp $(ROOT_DIR)/tools/halide_image_info.h $(DISTRIB_DIR)/tools
2167	cp $(ROOT_DIR)/tools/halide_malloc_trace.h $(DISTRIB_DIR)/tools
2168	cp $(ROOT_DIR)/tools/halide_trace_config.h $(DISTRIB_DIR)/tools
2169	cp $(ROOT_DIR)/README*.md $(DISTRIB_DIR)
2170	cp $(BUILD_DIR)/halide_config.* $(DISTRIB_DIR)
2171	ln -sf $(DISTRIB_DIR) halide
2172	tar -czf $(BUILD_DIR)/halide.tgz \
2173		halide/bin \
2174		halide/lib \
2175		halide/include \
2176		halide/tools \
2177		halide/tutorial \
2178		halide/README*.md \
2179		halide/halide_config.*
2180	rm -rf halide
2181	mv $(BUILD_DIR)/halide.tgz $(DISTRIB_DIR)/halide.tgz
2182
2183
2184.PHONY: distrib
2185distrib: $(DISTRIB_DIR)/halide.tgz
2186
2187$(BIN_DIR)/HalideTraceViz: $(ROOT_DIR)/util/HalideTraceViz.cpp $(INCLUDE_DIR)/HalideRuntime.h $(ROOT_DIR)/tools/halide_image_io.h $(ROOT_DIR)/tools/halide_trace_config.h
2188	$(CXX) $(OPTIMIZE) -std=c++11 $(filter %.cpp,$^) -I$(INCLUDE_DIR) -I$(ROOT_DIR)/tools -L$(BIN_DIR) -o $@
2189
2190$(BIN_DIR)/HalideTraceDump: $(ROOT_DIR)/util/HalideTraceDump.cpp $(ROOT_DIR)/util/HalideTraceUtils.cpp $(INCLUDE_DIR)/HalideRuntime.h $(ROOT_DIR)/tools/halide_image_io.h
2191	$(CXX) $(OPTIMIZE) -std=c++11 $(filter %.cpp,$^) -I$(INCLUDE_DIR) -I$(ROOT_DIR)/tools -I$(ROOT_DIR)/src/runtime -L$(BIN_DIR) $(IMAGE_IO_CXX_FLAGS) $(IMAGE_IO_LIBS) -o $@
2192
2193# Run clang-format on most of the source. The tutorials directory is
2194# explicitly skipped, as those files are manually formatted to
2195# maximize readability. NB: clang-format is *not* stable across versions;
2196# we are currently standardized on the formatting from clang-format-10.
2197# If CLANG_FORMAT points to a different version, you may get incorrectly-formatted code.
2198CLANG_FORMAT ?= ${CLANG}-format
2199
2200.PHONY: format
2201format:
2202	find "${ROOT_DIR}/apps" "${ROOT_DIR}/src" "${ROOT_DIR}/tools" "${ROOT_DIR}/test" "${ROOT_DIR}/util" "${ROOT_DIR}/python_bindings" -name *.cpp -o -name *.h -o -name *.c | xargs ${CLANG_FORMAT} -i -style=file
2203
2204# run-clang-tidy.py is a script that comes with LLVM for running clang
2205# tidy in parallel. Assume it's in the standard install path relative to clang.
2206RUN_CLANG_TIDY ?= $(shell dirname $(CLANG))/../share/clang/run-clang-tidy.py
2207
2208# Run clang-tidy on everything in src/. In future we may increase this
2209# surface. Not doing it for now because things outside src are not
2210# performance-critical.
2211CLANG_TIDY_TARGETS= $(addprefix $(SRC_DIR)/,$(SOURCE_FILES))
2212
2213INVOKE_CLANG_TIDY ?= $(RUN_CLANG_TIDY) -p $(BUILD_DIR) $(CLANG_TIDY_TARGETS) -clang-tidy-binary $(CLANG)-tidy -clang-apply-replacements-binary $(CLANG)-apply-replacements -quiet
2214
2215$(BUILD_DIR)/compile_commands.json:
2216	mkdir -p $(BUILD_DIR)
2217	echo '[' >> $@
2218	BD=$$(realpath $(BUILD_DIR)); \
2219	SD=$$(realpath $(SRC_DIR)); \
2220	ID=$$(realpath $(INCLUDE_DIR)); \
2221	for S in $(SOURCE_FILES); do \
2222	echo "{ \"directory\": \"$${BD}\"," >> $@; \
2223	echo "  \"command\": \"$(CXX) $(CXX_FLAGS) -c $$SD/$$S -o /dev/null\"," >> $@; \
2224	echo "  \"file\": \"$$SD/$$S\" }," >> $@; \
2225	done
2226	# Add a sentinel to make it valid json (no trailing comma)
2227	echo "{ \"directory\": \"$${BD}\"," >> $@; \
2228	echo "  \"command\": \"$(CXX) -c /dev/null -o /dev/null\"," >> $@; \
2229	echo "  \"file\": \"$$S\" }]" >> $@; \
2230
2231.PHONY: clang-tidy
2232clang-tidy: $(BUILD_DIR)/compile_commands.json
2233	@$(INVOKE_CLANG_TIDY) 2>&1 | grep -v "warnings generated" | grep -v '^$(CLANG)-tidy '
2234
2235.PHONY: clang-tidy-fix
2236clang-tidy-fix: $(BUILD_DIR)/compile_commands.json
2237	@$(INVOKE_CLANG_TIDY) -fix 2>&1 | grep -v "warnings generated" | grep -v '^$(CLANG)-tidy '
2238
2239# Build the documentation. Be sure to keep this synchronized with doc/CMakeLists.txt
2240# if you choose to edit it.
2241
2242# Copy ROOT_DIR to keep the following Doxyfile closer to CMake
2243Halide_SOURCE_DIR=${ROOT_DIR}
2244
2245define Doxyfile
2246# Keep the following in sync with doc/CMakeLists.txt
2247ALPHABETICAL_INDEX     = NO
2248BUILTIN_STL_SUPPORT    = YES
2249CASE_SENSE_NAMES       = NO
2250CLASS_DIAGRAMS         = NO
2251DISTRIBUTE_GROUP_DOC   = YES
2252EXAMPLE_PATH           = "${Halide_SOURCE_DIR}/tutorial"
2253EXCLUDE                = bin
2254EXTRACT_ALL            = YES
2255EXTRACT_LOCAL_CLASSES  = NO
2256FILE_PATTERNS          = *.h
2257GENERATE_TREEVIEW      = YES
2258HIDE_FRIEND_COMPOUNDS  = YES
2259HIDE_IN_BODY_DOCS      = YES
2260HIDE_UNDOC_CLASSES     = YES
2261HIDE_UNDOC_MEMBERS     = YES
2262JAVADOC_AUTOBRIEF      = YES
2263QT_AUTOBRIEF           = YES
2264QUIET                  = YES
2265REFERENCED_BY_RELATION = YES
2266REFERENCES_RELATION    = YES
2267SORT_BY_SCOPE_NAME     = YES
2268SORT_MEMBER_DOCS       = NO
2269SOURCE_BROWSER         = YES
2270STRIP_CODE_COMMENTS    = NO
2271
2272# Makefile-specific options
2273GENERATE_LATEX         = NO
2274HAVE_DOT               = NO
2275HTML_OUTPUT            = .
2276INPUT                  = "${Halide_SOURCE_DIR}/src" "${Halide_SOURCE_DIR}/test"
2277OUTPUT_DIRECTORY       = ${DOC_DIR}
2278PROJECT_NAME           = Halide
2279endef
2280
2281# Make the above Doxyfile variable available to the doc target.
2282export Doxyfile
2283
2284.PHONY: doc
2285doc:
2286	@-mkdir -p $(TMP_DIR)
2287	echo "$$Doxyfile" > $(TMP_DIR)/Doxyfile
2288	@-mkdir -p ${DOC_DIR}
2289	doxygen $(TMP_DIR)/Doxyfile
2290