1 #include "HalideRuntime.h" 2 #include "cpu_features.h" 3 4 namespace Halide { 5 namespace Runtime { 6 namespace Internal { 7 8 extern "C" void x86_cpuid_halide(int32_t *); 9 10 namespace { 11 cpuid(int32_t fn_id,int32_t * info)12ALWAYS_INLINE void cpuid(int32_t fn_id, int32_t *info) { 13 info[0] = fn_id; 14 x86_cpuid_halide(info); 15 } 16 17 } // namespace 18 halide_get_cpu_features()19WEAK CpuFeatures halide_get_cpu_features() { 20 CpuFeatures features; 21 features.set_known(halide_target_feature_sse41); 22 features.set_known(halide_target_feature_avx); 23 features.set_known(halide_target_feature_f16c); 24 features.set_known(halide_target_feature_fma); 25 features.set_known(halide_target_feature_avx2); 26 features.set_known(halide_target_feature_avx512); 27 features.set_known(halide_target_feature_avx512_knl); 28 features.set_known(halide_target_feature_avx512_skylake); 29 features.set_known(halide_target_feature_avx512_cannonlake); 30 31 int32_t info[4]; 32 cpuid(1, info); 33 34 const bool have_sse41 = (info[2] & (1 << 19)) != 0; 35 const bool have_avx = (info[2] & (1 << 28)) != 0; 36 const bool have_f16c = (info[2] & (1 << 29)) != 0; 37 const bool have_rdrand = (info[2] & (1 << 30)) != 0; 38 const bool have_fma = (info[2] & (1 << 12)) != 0; 39 if (have_sse41) { 40 features.set_available(halide_target_feature_sse41); 41 } 42 if (have_avx) { 43 features.set_available(halide_target_feature_avx); 44 } 45 if (have_f16c) { 46 features.set_available(halide_target_feature_f16c); 47 } 48 if (have_fma) { 49 features.set_available(halide_target_feature_fma); 50 } 51 52 const bool use_64_bits = (sizeof(size_t) == 8); 53 if (use_64_bits && have_avx && have_f16c && have_rdrand) { 54 int info2[4]; 55 cpuid(7, info2); 56 const uint32_t avx2 = 1U << 5; 57 const uint32_t avx512f = 1U << 16; 58 const uint32_t avx512dq = 1U << 17; 59 const uint32_t avx512pf = 1U << 26; 60 const uint32_t avx512er = 1U << 27; 61 const uint32_t avx512cd = 1U << 28; 62 const uint32_t avx512bw = 1U << 30; 63 const uint32_t avx512vl = 1U << 31; 64 const uint32_t avx512ifma = 1U << 21; 65 const uint32_t avx512 = avx512f | avx512cd; 66 const uint32_t avx512_knl = avx512 | avx512pf | avx512er; 67 const uint32_t avx512_skylake = avx512 | avx512vl | avx512bw | avx512dq; 68 const uint32_t avx512_cannonlake = avx512_skylake | avx512ifma; // Assume ifma => vbmi 69 if ((info2[1] & avx2) == avx2) { 70 features.set_available(halide_target_feature_avx2); 71 } 72 if ((info2[1] & avx512) == avx512) { 73 features.set_available(halide_target_feature_avx512); 74 if ((info2[1] & avx512_knl) == avx512_knl) { 75 features.set_available(halide_target_feature_avx512_knl); 76 } 77 if ((info2[1] & avx512_skylake) == avx512_skylake) { 78 features.set_available(halide_target_feature_avx512_skylake); 79 } 80 if ((info2[1] & avx512_cannonlake) == avx512_cannonlake) { 81 features.set_available(halide_target_feature_avx512_cannonlake); 82 } 83 } 84 } 85 return features; 86 } 87 88 } // namespace Internal 89 } // namespace Runtime 90 } // namespace Halide 91