1 #ifndef HALIDE_TARGET_H 2 #define HALIDE_TARGET_H 3 4 /** \file 5 * Defines the structure that describes a Halide target. 6 */ 7 8 #include <bitset> 9 #include <stdint.h> 10 #include <string> 11 12 #include "DeviceAPI.h" 13 #include "Type.h" 14 #include "runtime/HalideRuntime.h" 15 16 namespace Halide { 17 18 /** A struct representing a target machine and os to generate code for. */ 19 struct Target { 20 /** The operating system used by the target. Determines which 21 * system calls to generate. 22 * Corresponds to os_name_map in Target.cpp. */ 23 enum OS { 24 OSUnknown = 0, 25 Linux, 26 Windows, 27 OSX, 28 Android, 29 IOS, 30 QuRT, 31 NoOS, 32 Fuchsia, 33 WebAssemblyRuntime 34 } os; 35 36 /** The architecture used by the target. Determines the 37 * instruction set to use. 38 * Corresponds to arch_name_map in Target.cpp. */ 39 enum Arch { 40 ArchUnknown = 0, 41 X86, 42 ARM, 43 MIPS, 44 Hexagon, 45 POWERPC, 46 WebAssembly, 47 RISCV 48 } arch; 49 50 /** The bit-width of the target machine. Must be 0 for unknown, or 32 or 64. */ 51 int bits; 52 53 /** Optional features a target can have. 54 * Corresponds to feature_name_map in Target.cpp. 55 * See definitions in HalideRuntime.h for full information. 56 */ 57 enum Feature { 58 JIT = halide_target_feature_jit, 59 Debug = halide_target_feature_debug, 60 NoAsserts = halide_target_feature_no_asserts, 61 NoBoundsQuery = halide_target_feature_no_bounds_query, 62 SSE41 = halide_target_feature_sse41, 63 AVX = halide_target_feature_avx, 64 AVX2 = halide_target_feature_avx2, 65 FMA = halide_target_feature_fma, 66 FMA4 = halide_target_feature_fma4, 67 F16C = halide_target_feature_f16c, 68 ARMv7s = halide_target_feature_armv7s, 69 NoNEON = halide_target_feature_no_neon, 70 VSX = halide_target_feature_vsx, 71 POWER_ARCH_2_07 = halide_target_feature_power_arch_2_07, 72 CUDA = halide_target_feature_cuda, 73 CUDACapability30 = halide_target_feature_cuda_capability30, 74 CUDACapability32 = halide_target_feature_cuda_capability32, 75 CUDACapability35 = halide_target_feature_cuda_capability35, 76 CUDACapability50 = halide_target_feature_cuda_capability50, 77 CUDACapability61 = halide_target_feature_cuda_capability61, 78 CUDACapability70 = halide_target_feature_cuda_capability70, 79 CUDACapability75 = halide_target_feature_cuda_capability75, 80 CUDACapability80 = halide_target_feature_cuda_capability80, 81 OpenCL = halide_target_feature_opencl, 82 CLDoubles = halide_target_feature_cl_doubles, 83 CLHalf = halide_target_feature_cl_half, 84 CLAtomics64 = halide_target_feature_cl_atomic64, 85 OpenGL = halide_target_feature_opengl, 86 OpenGLCompute = halide_target_feature_openglcompute, 87 EGL = halide_target_feature_egl, 88 UserContext = halide_target_feature_user_context, 89 Matlab = halide_target_feature_matlab, 90 Profile = halide_target_feature_profile, 91 NoRuntime = halide_target_feature_no_runtime, 92 Metal = halide_target_feature_metal, 93 CPlusPlusMangling = halide_target_feature_c_plus_plus_mangling, 94 LargeBuffers = halide_target_feature_large_buffers, 95 HexagonDma = halide_target_feature_hexagon_dma, 96 HVX_64 = halide_target_feature_hvx_64, 97 HVX_128 = halide_target_feature_hvx_128, 98 HVX_v62 = halide_target_feature_hvx_v62, 99 HVX_v65 = halide_target_feature_hvx_v65, 100 HVX_v66 = halide_target_feature_hvx_v66, 101 HVX_shared_object = halide_target_feature_hvx_use_shared_object, 102 FuzzFloatStores = halide_target_feature_fuzz_float_stores, 103 SoftFloatABI = halide_target_feature_soft_float_abi, 104 MSAN = halide_target_feature_msan, 105 AVX512 = halide_target_feature_avx512, 106 AVX512_KNL = halide_target_feature_avx512_knl, 107 AVX512_Skylake = halide_target_feature_avx512_skylake, 108 AVX512_Cannonlake = halide_target_feature_avx512_cannonlake, 109 TraceLoads = halide_target_feature_trace_loads, 110 TraceStores = halide_target_feature_trace_stores, 111 TraceRealizations = halide_target_feature_trace_realizations, 112 TracePipeline = halide_target_feature_trace_pipeline, 113 D3D12Compute = halide_target_feature_d3d12compute, 114 StrictFloat = halide_target_feature_strict_float, 115 TSAN = halide_target_feature_tsan, 116 ASAN = halide_target_feature_asan, 117 CheckUnsafePromises = halide_target_feature_check_unsafe_promises, 118 EmbedBitcode = halide_target_feature_embed_bitcode, 119 EnableLLVMLoopOpt = halide_target_feature_enable_llvm_loop_opt, 120 DisableLLVMLoopOpt = halide_target_feature_disable_llvm_loop_opt, 121 WasmSimd128 = halide_target_feature_wasm_simd128, 122 WasmSignExt = halide_target_feature_wasm_signext, 123 WasmSatFloatToInt = halide_target_feature_wasm_sat_float_to_int, 124 SVE = halide_target_feature_sve, 125 SVE2 = halide_target_feature_sve2, 126 ARMDotProd = halide_target_feature_arm_dot_prod, 127 FeatureEnd = halide_target_feature_end 128 }; TargetTarget129 Target() 130 : os(OSUnknown), arch(ArchUnknown), bits(0) { 131 } 132 Target(OS o, Arch a, int b, const std::vector<Feature> &initial_features = std::vector<Feature>()) osTarget133 : os(o), arch(a), bits(b) { 134 for (const auto &f : initial_features) { 135 set_feature(f); 136 } 137 } 138 139 /** Given a string of the form used in HL_TARGET 140 * (e.g. "x86-64-avx"), construct the Target it specifies. Note 141 * that this always starts with the result of get_host_target(), 142 * replacing only the parts found in the target string, so if you 143 * omit (say) an OS specification, the host OS will be used 144 * instead. An empty string is exactly equivalent to 145 * get_host_target(). 146 * 147 * Invalid target strings will fail with a user_error. 148 */ 149 // @{ 150 explicit Target(const std::string &s); 151 explicit Target(const char *s); 152 // @} 153 154 /** Check if a target string is valid. */ 155 static bool validate_target_string(const std::string &s); 156 157 /** Return true if any of the arch/bits/os fields are "unknown"/0; 158 return false otherwise. */ 159 bool has_unknowns() const; 160 161 void set_feature(Feature f, bool value = true); 162 163 void set_features(const std::vector<Feature> &features_to_set, bool value = true); 164 165 bool has_feature(Feature f) const; 166 has_featureTarget167 inline bool has_feature(halide_target_feature_t f) const { 168 return has_feature((Feature)f); 169 } 170 171 bool features_any_of(const std::vector<Feature> &test_features) const; 172 173 bool features_all_of(const std::vector<Feature> &test_features) const; 174 175 /** Return a copy of the target with the given feature set. 176 * This is convenient when enabling certain features (e.g. NoBoundsQuery) 177 * in an initialization list, where the target to be mutated may be 178 * a const reference. */ 179 Target with_feature(Feature f) const; 180 181 /** Return a copy of the target with the given feature cleared. 182 * This is convenient when disabling certain features (e.g. NoBoundsQuery) 183 * in an initialization list, where the target to be mutated may be 184 * a const reference. */ 185 Target without_feature(Feature f) const; 186 187 /** Is a fully feature GPU compute runtime enabled? I.e. is 188 * Func::gpu_tile and similar going to work? Currently includes 189 * CUDA, OpenCL, Metal and D3D12Compute. We do not include OpenGL, 190 * because it is not capable of gpgpu, and is not scheduled via 191 * Func::gpu_tile. 192 * TODO: Should OpenGLCompute be included here? */ 193 bool has_gpu_feature() const; 194 195 /** Does this target allow using a certain type. Generally all 196 * types except 64-bit float and int/uint should be supported by 197 * all backends. 198 * 199 * It is likely better to call the version below which takes a DeviceAPI. 200 */ 201 bool supports_type(const Type &t) const; 202 203 /** Does this target allow using a certain type on a certain device. 204 * This is the prefered version of this routine. 205 */ 206 bool supports_type(const Type &t, DeviceAPI device) const; 207 208 /** Returns whether a particular device API can be used with this 209 * Target. */ 210 bool supports_device_api(DeviceAPI api) const; 211 212 /** If this Target (including all Features) requires a specific DeviceAPI, 213 * return it. If it doesn't, return DeviceAPI::None. If the Target has 214 * features with multiple (different) DeviceAPI requirements, the result 215 * will be an arbitrary DeviceAPI. */ 216 DeviceAPI get_required_device_api() const; 217 218 bool operator==(const Target &other) const { 219 return os == other.os && 220 arch == other.arch && 221 bits == other.bits && 222 features == other.features; 223 } 224 225 bool operator!=(const Target &other) const { 226 return !(*this == other); 227 } 228 229 /** 230 * Create a "greatest common denominator" runtime target that is compatible with 231 * both this target and \p other. Used by generators to conveniently select a suitable 232 * runtime when linking together multiple functions. 233 * 234 * @param other The other target from which we compute the gcd target. 235 * @param[out] result The gcd target if we return true, otherwise unmodified. Can be the same as *this. 236 * @return Whether it was possible to find a compatible target (true) or not. 237 */ 238 bool get_runtime_compatible_target(const Target &other, Target &result); 239 240 /** Convert the Target into a string form that can be reconstituted 241 * by merge_string(), which will always be of the form 242 * 243 * arch-bits-os-feature1-feature2...featureN. 244 * 245 * Note that is guaranteed that Target(t1.to_string()) == t1, 246 * but not that Target(s).to_string() == s (since there can be 247 * multiple strings that parse to the same Target)... 248 * *unless* t1 contains 'unknown' fields (in which case you'll get a string 249 * that can't be parsed, which is intentional). 250 */ 251 std::string to_string() const; 252 253 /** Given a data type, return an estimate of the "natural" vector size 254 * for that data type when compiling for this Target. */ 255 int natural_vector_size(const Halide::Type &t) const; 256 257 /** Given a data type, return an estimate of the "natural" vector size 258 * for that data type when compiling for this Target. */ 259 template<typename data_t> natural_vector_sizeTarget260 int natural_vector_size() const { 261 return natural_vector_size(type_of<data_t>()); 262 } 263 264 /** Return true iff 64 bits and has_feature(LargeBuffers). */ has_large_buffersTarget265 bool has_large_buffers() const { 266 return bits == 64 && has_feature(LargeBuffers); 267 } 268 269 /** Return the maximum buffer size in bytes supported on this 270 * Target. This is 2^31 - 1 except on 64-bit targets when the LargeBuffers 271 * feature is enabled, which expands the maximum to 2^63 - 1. */ maximum_buffer_sizeTarget272 int64_t maximum_buffer_size() const { 273 if (has_large_buffers()) { 274 return (((uint64_t)1) << 63) - 1; 275 } else { 276 return (((uint64_t)1) << 31) - 1; 277 } 278 } 279 280 /** Get the minimum cuda capability found as an integer. Returns 281 * 20 (our minimum supported cuda compute capability) if no cuda 282 * features are set. */ 283 int get_cuda_capability_lower_bound() const; 284 285 /** Was libHalide compiled with support for this target? */ 286 bool supported() const; 287 288 /** Return a bitset of the Featuress set in this Target (set = 1). 289 * Note that while this happens to be the current internal representation, 290 * that might not always be the case. */ get_features_bitsetTarget291 const std::bitset<FeatureEnd> &get_features_bitset() const { 292 return features; 293 } 294 295 /** Return the name corresponding to a given Feature, in the form 296 * used to construct Target strings (e.g., Feature::Debug is "debug" and not "Debug"). */ 297 static std::string feature_to_name(Target::Feature feature); 298 299 /** Return the feature corresponding to a given name, in the form 300 * used to construct Target strings (e.g., Feature::Debug is "debug" and not "Debug"). 301 * If the string is not a known feature name, return FeatureEnd. */ 302 static Target::Feature feature_from_name(const std::string &name); 303 304 private: 305 /** A bitmask that stores the active features. */ 306 std::bitset<FeatureEnd> features; 307 }; 308 309 /** Return the target corresponding to the host machine. */ 310 Target get_host_target(); 311 312 /** Return the target that Halide will use. If HL_TARGET is set it 313 * uses that. Otherwise calls \ref get_host_target */ 314 Target get_target_from_environment(); 315 316 /** Return the target that Halide will use for jit-compilation. If 317 * HL_JIT_TARGET is set it uses that. Otherwise calls \ref 318 * get_host_target. Throws an error if the architecture, bit width, 319 * and OS of the target do not match the host target, so this is only 320 * useful for controlling the feature set. */ 321 Target get_jit_target_from_environment(); 322 323 /** Get the Target feature corresponding to a DeviceAPI. For device 324 * apis that do not correspond to any single target feature, returns 325 * Target::FeatureEnd */ 326 Target::Feature target_feature_for_device_api(DeviceAPI api); 327 328 namespace Internal { 329 330 void target_test(); 331 } 332 333 } // namespace Halide 334 335 #endif 336