1 #include "HalideRuntime.h"
2 #include "cpu_features.h"
3 
4 namespace Halide {
5 namespace Runtime {
6 namespace Internal {
7 
8 extern "C" void x86_cpuid_halide(int32_t *);
9 
10 namespace {
11 
cpuid(int32_t fn_id,int32_t * info)12 ALWAYS_INLINE void cpuid(int32_t fn_id, int32_t *info) {
13     info[0] = fn_id;
14     x86_cpuid_halide(info);
15 }
16 
17 }  // namespace
18 
halide_get_cpu_features()19 WEAK CpuFeatures halide_get_cpu_features() {
20     CpuFeatures features;
21     features.set_known(halide_target_feature_sse41);
22     features.set_known(halide_target_feature_avx);
23     features.set_known(halide_target_feature_f16c);
24     features.set_known(halide_target_feature_fma);
25     features.set_known(halide_target_feature_avx2);
26     features.set_known(halide_target_feature_avx512);
27     features.set_known(halide_target_feature_avx512_knl);
28     features.set_known(halide_target_feature_avx512_skylake);
29     features.set_known(halide_target_feature_avx512_cannonlake);
30 
31     int32_t info[4];
32     cpuid(1, info);
33 
34     const bool have_sse41 = (info[2] & (1 << 19)) != 0;
35     const bool have_avx = (info[2] & (1 << 28)) != 0;
36     const bool have_f16c = (info[2] & (1 << 29)) != 0;
37     const bool have_rdrand = (info[2] & (1 << 30)) != 0;
38     const bool have_fma = (info[2] & (1 << 12)) != 0;
39     if (have_sse41) {
40         features.set_available(halide_target_feature_sse41);
41     }
42     if (have_avx) {
43         features.set_available(halide_target_feature_avx);
44     }
45     if (have_f16c) {
46         features.set_available(halide_target_feature_f16c);
47     }
48     if (have_fma) {
49         features.set_available(halide_target_feature_fma);
50     }
51 
52     const bool use_64_bits = (sizeof(size_t) == 8);
53     if (use_64_bits && have_avx && have_f16c && have_rdrand) {
54         int info2[4];
55         cpuid(7, info2);
56         const uint32_t avx2 = 1U << 5;
57         const uint32_t avx512f = 1U << 16;
58         const uint32_t avx512dq = 1U << 17;
59         const uint32_t avx512pf = 1U << 26;
60         const uint32_t avx512er = 1U << 27;
61         const uint32_t avx512cd = 1U << 28;
62         const uint32_t avx512bw = 1U << 30;
63         const uint32_t avx512vl = 1U << 31;
64         const uint32_t avx512ifma = 1U << 21;
65         const uint32_t avx512 = avx512f | avx512cd;
66         const uint32_t avx512_knl = avx512 | avx512pf | avx512er;
67         const uint32_t avx512_skylake = avx512 | avx512vl | avx512bw | avx512dq;
68         const uint32_t avx512_cannonlake = avx512_skylake | avx512ifma;  // Assume ifma => vbmi
69         if ((info2[1] & avx2) == avx2) {
70             features.set_available(halide_target_feature_avx2);
71         }
72         if ((info2[1] & avx512) == avx512) {
73             features.set_available(halide_target_feature_avx512);
74             if ((info2[1] & avx512_knl) == avx512_knl) {
75                 features.set_available(halide_target_feature_avx512_knl);
76             }
77             if ((info2[1] & avx512_skylake) == avx512_skylake) {
78                 features.set_available(halide_target_feature_avx512_skylake);
79             }
80             if ((info2[1] & avx512_cannonlake) == avx512_cannonlake) {
81                 features.set_available(halide_target_feature_avx512_cannonlake);
82             }
83         }
84     }
85     return features;
86 }
87 
88 }  // namespace Internal
89 }  // namespace Runtime
90 }  // namespace Halide
91