1 #ifndef HALIDE_TARGET_H
2 #define HALIDE_TARGET_H
3 
4 /** \file
5  * Defines the structure that describes a Halide target.
6  */
7 
8 #include <bitset>
9 #include <stdint.h>
10 #include <string>
11 
12 #include "DeviceAPI.h"
13 #include "Type.h"
14 #include "runtime/HalideRuntime.h"
15 
16 namespace Halide {
17 
18 /** A struct representing a target machine and os to generate code for. */
19 struct Target {
20     /** The operating system used by the target. Determines which
21      * system calls to generate.
22      * Corresponds to os_name_map in Target.cpp. */
23     enum OS {
24         OSUnknown = 0,
25         Linux,
26         Windows,
27         OSX,
28         Android,
29         IOS,
30         QuRT,
31         NoOS,
32         Fuchsia,
33         WebAssemblyRuntime
34     } os;
35 
36     /** The architecture used by the target. Determines the
37      * instruction set to use.
38      * Corresponds to arch_name_map in Target.cpp. */
39     enum Arch {
40         ArchUnknown = 0,
41         X86,
42         ARM,
43         MIPS,
44         Hexagon,
45         POWERPC,
46         WebAssembly,
47         RISCV
48     } arch;
49 
50     /** The bit-width of the target machine. Must be 0 for unknown, or 32 or 64. */
51     int bits;
52 
53     /** Optional features a target can have.
54      * Corresponds to feature_name_map in Target.cpp.
55      * See definitions in HalideRuntime.h for full information.
56      */
57     enum Feature {
58         JIT = halide_target_feature_jit,
59         Debug = halide_target_feature_debug,
60         NoAsserts = halide_target_feature_no_asserts,
61         NoBoundsQuery = halide_target_feature_no_bounds_query,
62         SSE41 = halide_target_feature_sse41,
63         AVX = halide_target_feature_avx,
64         AVX2 = halide_target_feature_avx2,
65         FMA = halide_target_feature_fma,
66         FMA4 = halide_target_feature_fma4,
67         F16C = halide_target_feature_f16c,
68         ARMv7s = halide_target_feature_armv7s,
69         NoNEON = halide_target_feature_no_neon,
70         VSX = halide_target_feature_vsx,
71         POWER_ARCH_2_07 = halide_target_feature_power_arch_2_07,
72         CUDA = halide_target_feature_cuda,
73         CUDACapability30 = halide_target_feature_cuda_capability30,
74         CUDACapability32 = halide_target_feature_cuda_capability32,
75         CUDACapability35 = halide_target_feature_cuda_capability35,
76         CUDACapability50 = halide_target_feature_cuda_capability50,
77         CUDACapability61 = halide_target_feature_cuda_capability61,
78         CUDACapability70 = halide_target_feature_cuda_capability70,
79         CUDACapability75 = halide_target_feature_cuda_capability75,
80         CUDACapability80 = halide_target_feature_cuda_capability80,
81         OpenCL = halide_target_feature_opencl,
82         CLDoubles = halide_target_feature_cl_doubles,
83         CLHalf = halide_target_feature_cl_half,
84         CLAtomics64 = halide_target_feature_cl_atomic64,
85         OpenGL = halide_target_feature_opengl,
86         OpenGLCompute = halide_target_feature_openglcompute,
87         EGL = halide_target_feature_egl,
88         UserContext = halide_target_feature_user_context,
89         Matlab = halide_target_feature_matlab,
90         Profile = halide_target_feature_profile,
91         NoRuntime = halide_target_feature_no_runtime,
92         Metal = halide_target_feature_metal,
93         CPlusPlusMangling = halide_target_feature_c_plus_plus_mangling,
94         LargeBuffers = halide_target_feature_large_buffers,
95         HexagonDma = halide_target_feature_hexagon_dma,
96         HVX_64 = halide_target_feature_hvx_64,
97         HVX_128 = halide_target_feature_hvx_128,
98         HVX_v62 = halide_target_feature_hvx_v62,
99         HVX_v65 = halide_target_feature_hvx_v65,
100         HVX_v66 = halide_target_feature_hvx_v66,
101         HVX_shared_object = halide_target_feature_hvx_use_shared_object,
102         FuzzFloatStores = halide_target_feature_fuzz_float_stores,
103         SoftFloatABI = halide_target_feature_soft_float_abi,
104         MSAN = halide_target_feature_msan,
105         AVX512 = halide_target_feature_avx512,
106         AVX512_KNL = halide_target_feature_avx512_knl,
107         AVX512_Skylake = halide_target_feature_avx512_skylake,
108         AVX512_Cannonlake = halide_target_feature_avx512_cannonlake,
109         TraceLoads = halide_target_feature_trace_loads,
110         TraceStores = halide_target_feature_trace_stores,
111         TraceRealizations = halide_target_feature_trace_realizations,
112         TracePipeline = halide_target_feature_trace_pipeline,
113         D3D12Compute = halide_target_feature_d3d12compute,
114         StrictFloat = halide_target_feature_strict_float,
115         TSAN = halide_target_feature_tsan,
116         ASAN = halide_target_feature_asan,
117         CheckUnsafePromises = halide_target_feature_check_unsafe_promises,
118         EmbedBitcode = halide_target_feature_embed_bitcode,
119         EnableLLVMLoopOpt = halide_target_feature_enable_llvm_loop_opt,
120         DisableLLVMLoopOpt = halide_target_feature_disable_llvm_loop_opt,
121         WasmSimd128 = halide_target_feature_wasm_simd128,
122         WasmSignExt = halide_target_feature_wasm_signext,
123         WasmSatFloatToInt = halide_target_feature_wasm_sat_float_to_int,
124         SVE = halide_target_feature_sve,
125         SVE2 = halide_target_feature_sve2,
126         ARMDotProd = halide_target_feature_arm_dot_prod,
127         FeatureEnd = halide_target_feature_end
128     };
TargetTarget129     Target()
130         : os(OSUnknown), arch(ArchUnknown), bits(0) {
131     }
132     Target(OS o, Arch a, int b, const std::vector<Feature> &initial_features = std::vector<Feature>())
osTarget133         : os(o), arch(a), bits(b) {
134         for (const auto &f : initial_features) {
135             set_feature(f);
136         }
137     }
138 
139     /** Given a string of the form used in HL_TARGET
140      * (e.g. "x86-64-avx"), construct the Target it specifies. Note
141      * that this always starts with the result of get_host_target(),
142      * replacing only the parts found in the target string, so if you
143      * omit (say) an OS specification, the host OS will be used
144      * instead. An empty string is exactly equivalent to
145      * get_host_target().
146      *
147      * Invalid target strings will fail with a user_error.
148      */
149     // @{
150     explicit Target(const std::string &s);
151     explicit Target(const char *s);
152     // @}
153 
154     /** Check if a target string is valid. */
155     static bool validate_target_string(const std::string &s);
156 
157     /** Return true if any of the arch/bits/os fields are "unknown"/0;
158         return false otherwise. */
159     bool has_unknowns() const;
160 
161     void set_feature(Feature f, bool value = true);
162 
163     void set_features(const std::vector<Feature> &features_to_set, bool value = true);
164 
165     bool has_feature(Feature f) const;
166 
has_featureTarget167     inline bool has_feature(halide_target_feature_t f) const {
168         return has_feature((Feature)f);
169     }
170 
171     bool features_any_of(const std::vector<Feature> &test_features) const;
172 
173     bool features_all_of(const std::vector<Feature> &test_features) const;
174 
175     /** Return a copy of the target with the given feature set.
176      * This is convenient when enabling certain features (e.g. NoBoundsQuery)
177      * in an initialization list, where the target to be mutated may be
178      * a const reference. */
179     Target with_feature(Feature f) const;
180 
181     /** Return a copy of the target with the given feature cleared.
182      * This is convenient when disabling certain features (e.g. NoBoundsQuery)
183      * in an initialization list, where the target to be mutated may be
184      * a const reference. */
185     Target without_feature(Feature f) const;
186 
187     /** Is a fully feature GPU compute runtime enabled? I.e. is
188      * Func::gpu_tile and similar going to work? Currently includes
189      * CUDA, OpenCL, Metal and D3D12Compute. We do not include OpenGL,
190      * because it is not capable of gpgpu, and is not scheduled via
191      * Func::gpu_tile.
192      * TODO: Should OpenGLCompute be included here? */
193     bool has_gpu_feature() const;
194 
195     /** Does this target allow using a certain type. Generally all
196      * types except 64-bit float and int/uint should be supported by
197      * all backends.
198      *
199      * It is likely better to call the version below which takes a DeviceAPI.
200      */
201     bool supports_type(const Type &t) const;
202 
203     /** Does this target allow using a certain type on a certain device.
204      * This is the prefered version of this routine.
205      */
206     bool supports_type(const Type &t, DeviceAPI device) const;
207 
208     /** Returns whether a particular device API can be used with this
209      * Target. */
210     bool supports_device_api(DeviceAPI api) const;
211 
212     /** If this Target (including all Features) requires a specific DeviceAPI,
213      * return it. If it doesn't, return DeviceAPI::None.  If the Target has
214      * features with multiple (different) DeviceAPI requirements, the result
215      * will be an arbitrary DeviceAPI. */
216     DeviceAPI get_required_device_api() const;
217 
218     bool operator==(const Target &other) const {
219         return os == other.os &&
220                arch == other.arch &&
221                bits == other.bits &&
222                features == other.features;
223     }
224 
225     bool operator!=(const Target &other) const {
226         return !(*this == other);
227     }
228 
229     /**
230      * Create a "greatest common denominator" runtime target that is compatible with
231      * both this target and \p other. Used by generators to conveniently select a suitable
232      * runtime when linking together multiple functions.
233      *
234      * @param other The other target from which we compute the gcd target.
235      * @param[out] result The gcd target if we return true, otherwise unmodified. Can be the same as *this.
236      * @return Whether it was possible to find a compatible target (true) or not.
237      */
238     bool get_runtime_compatible_target(const Target &other, Target &result);
239 
240     /** Convert the Target into a string form that can be reconstituted
241      * by merge_string(), which will always be of the form
242      *
243      *   arch-bits-os-feature1-feature2...featureN.
244      *
245      * Note that is guaranteed that Target(t1.to_string()) == t1,
246      * but not that Target(s).to_string() == s (since there can be
247      * multiple strings that parse to the same Target)...
248      * *unless* t1 contains 'unknown' fields (in which case you'll get a string
249      * that can't be parsed, which is intentional).
250      */
251     std::string to_string() const;
252 
253     /** Given a data type, return an estimate of the "natural" vector size
254      * for that data type when compiling for this Target. */
255     int natural_vector_size(const Halide::Type &t) const;
256 
257     /** Given a data type, return an estimate of the "natural" vector size
258      * for that data type when compiling for this Target. */
259     template<typename data_t>
natural_vector_sizeTarget260     int natural_vector_size() const {
261         return natural_vector_size(type_of<data_t>());
262     }
263 
264     /** Return true iff 64 bits and has_feature(LargeBuffers). */
has_large_buffersTarget265     bool has_large_buffers() const {
266         return bits == 64 && has_feature(LargeBuffers);
267     }
268 
269     /** Return the maximum buffer size in bytes supported on this
270      * Target. This is 2^31 - 1 except on 64-bit targets when the LargeBuffers
271      * feature is enabled, which expands the maximum to 2^63 - 1. */
maximum_buffer_sizeTarget272     int64_t maximum_buffer_size() const {
273         if (has_large_buffers()) {
274             return (((uint64_t)1) << 63) - 1;
275         } else {
276             return (((uint64_t)1) << 31) - 1;
277         }
278     }
279 
280     /** Get the minimum cuda capability found as an integer. Returns
281      * 20 (our minimum supported cuda compute capability) if no cuda
282      * features are set. */
283     int get_cuda_capability_lower_bound() const;
284 
285     /** Was libHalide compiled with support for this target? */
286     bool supported() const;
287 
288     /** Return a bitset of the Featuress set in this Target (set = 1).
289      * Note that while this happens to be the current internal representation,
290      * that might not always be the case. */
get_features_bitsetTarget291     const std::bitset<FeatureEnd> &get_features_bitset() const {
292         return features;
293     }
294 
295     /** Return the name corresponding to a given Feature, in the form
296      * used to construct Target strings (e.g., Feature::Debug is "debug" and not "Debug"). */
297     static std::string feature_to_name(Target::Feature feature);
298 
299     /** Return the feature corresponding to a given name, in the form
300      * used to construct Target strings (e.g., Feature::Debug is "debug" and not "Debug").
301      * If the string is not a known feature name, return FeatureEnd. */
302     static Target::Feature feature_from_name(const std::string &name);
303 
304 private:
305     /** A bitmask that stores the active features. */
306     std::bitset<FeatureEnd> features;
307 };
308 
309 /** Return the target corresponding to the host machine. */
310 Target get_host_target();
311 
312 /** Return the target that Halide will use. If HL_TARGET is set it
313  * uses that. Otherwise calls \ref get_host_target */
314 Target get_target_from_environment();
315 
316 /** Return the target that Halide will use for jit-compilation. If
317  * HL_JIT_TARGET is set it uses that. Otherwise calls \ref
318  * get_host_target. Throws an error if the architecture, bit width,
319  * and OS of the target do not match the host target, so this is only
320  * useful for controlling the feature set. */
321 Target get_jit_target_from_environment();
322 
323 /** Get the Target feature corresponding to a DeviceAPI. For device
324  * apis that do not correspond to any single target feature, returns
325  * Target::FeatureEnd */
326 Target::Feature target_feature_for_device_api(DeviceAPI api);
327 
328 namespace Internal {
329 
330 void target_test();
331 }
332 
333 }  // namespace Halide
334 
335 #endif
336