1 /*
2 * Runtime CPU detection for x86
3 * (C) 2009,2010,2013,2017 Jack Lloyd
4 *
5 * Botan is released under the Simplified BSD License (see license.txt)
6 */
7 
8 #include <botan/cpuid.h>
9 #include <botan/mem_ops.h>
10 #include <botan/loadstor.h>
11 
12 #if defined(BOTAN_TARGET_CPU_IS_X86_FAMILY)
13 
14 #if defined(BOTAN_BUILD_COMPILER_IS_MSVC)
15   #include <intrin.h>
16 #elif defined(BOTAN_BUILD_COMPILER_IS_INTEL)
17   #include <ia32intrin.h>
18 #elif defined(BOTAN_BUILD_COMPILER_IS_GCC) || defined(BOTAN_BUILD_COMPILER_IS_CLANG)
19   #include <cpuid.h>
20 #endif
21 
22 #endif
23 
24 namespace Botan {
25 
26 #if defined(BOTAN_TARGET_CPU_IS_X86_FAMILY)
27 
detect_cpu_features(size_t * cache_line_size)28 uint64_t CPUID::CPUID_Data::detect_cpu_features(size_t* cache_line_size)
29    {
30 #if defined(BOTAN_BUILD_COMPILER_IS_MSVC)
31   #define X86_CPUID(type, out) do { __cpuid((int*)out, type); } while(0)
32   #define X86_CPUID_SUBLEVEL(type, level, out) do { __cpuidex((int*)out, type, level); } while(0)
33 
34 #elif defined(BOTAN_BUILD_COMPILER_IS_INTEL)
35   #define X86_CPUID(type, out) do { __cpuid(out, type); } while(0)
36   #define X86_CPUID_SUBLEVEL(type, level, out) do { __cpuidex((int*)out, type, level); } while(0)
37 
38 #elif defined(BOTAN_TARGET_ARCH_IS_X86_64) && defined(BOTAN_USE_GCC_INLINE_ASM)
39   #define X86_CPUID(type, out)                                                    \
40      asm("cpuid\n\t" : "=a" (out[0]), "=b" (out[1]), "=c" (out[2]), "=d" (out[3]) \
41          : "0" (type))
42 
43   #define X86_CPUID_SUBLEVEL(type, level, out)                                    \
44      asm("cpuid\n\t" : "=a" (out[0]), "=b" (out[1]), "=c" (out[2]), "=d" (out[3]) \
45          : "0" (type), "2" (level))
46 
47 #elif defined(BOTAN_BUILD_COMPILER_IS_GCC) || defined(BOTAN_BUILD_COMPILER_IS_CLANG)
48   #define X86_CPUID(type, out) do { __get_cpuid(type, out, out+1, out+2, out+3); } while(0)
49 
50   #define X86_CPUID_SUBLEVEL(type, level, out) \
51      do { __cpuid_count(type, level, out[0], out[1], out[2], out[3]); } while(0)
52 #else
53   #warning "No way of calling x86 cpuid instruction for this compiler"
54   #define X86_CPUID(type, out) do { clear_mem(out, 4); } while(0)
55   #define X86_CPUID_SUBLEVEL(type, level, out) do { clear_mem(out, 4); } while(0)
56 #endif
57 
58    uint64_t features_detected = 0;
59    uint32_t cpuid[4] = { 0 };
60 
61    // CPUID 0: vendor identification, max sublevel
62    X86_CPUID(0, cpuid);
63 
64    const uint32_t max_supported_sublevel = cpuid[0];
65 
66    const uint32_t INTEL_CPUID[3] = { 0x756E6547, 0x6C65746E, 0x49656E69 };
67    const uint32_t AMD_CPUID[3] = { 0x68747541, 0x444D4163, 0x69746E65 };
68    const bool is_intel = same_mem(cpuid + 1, INTEL_CPUID, 3);
69    const bool is_amd = same_mem(cpuid + 1, AMD_CPUID, 3);
70 
71    if(max_supported_sublevel >= 1)
72       {
73       // CPUID 1: feature bits
74       X86_CPUID(1, cpuid);
75       const uint64_t flags0 = (static_cast<uint64_t>(cpuid[2]) << 32) | cpuid[3];
76 
77       enum x86_CPUID_1_bits : uint64_t {
78          RDTSC = (1ULL << 4),
79          SSE2 = (1ULL << 26),
80          CLMUL = (1ULL << 33),
81          SSSE3 = (1ULL << 41),
82          SSE41 = (1ULL << 51),
83          SSE42 = (1ULL << 52),
84          AESNI = (1ULL << 57),
85          RDRAND = (1ULL << 62)
86       };
87 
88       if(flags0 & x86_CPUID_1_bits::RDTSC)
89          features_detected |= CPUID::CPUID_RDTSC_BIT;
90       if(flags0 & x86_CPUID_1_bits::SSE2)
91          features_detected |= CPUID::CPUID_SSE2_BIT;
92       if(flags0 & x86_CPUID_1_bits::CLMUL)
93          features_detected |= CPUID::CPUID_CLMUL_BIT;
94       if(flags0 & x86_CPUID_1_bits::SSSE3)
95          features_detected |= CPUID::CPUID_SSSE3_BIT;
96       if(flags0 & x86_CPUID_1_bits::SSE41)
97          features_detected |= CPUID::CPUID_SSE41_BIT;
98       if(flags0 & x86_CPUID_1_bits::SSE42)
99          features_detected |= CPUID::CPUID_SSE42_BIT;
100       if(flags0 & x86_CPUID_1_bits::AESNI)
101          features_detected |= CPUID::CPUID_AESNI_BIT;
102       if(flags0 & x86_CPUID_1_bits::RDRAND)
103          features_detected |= CPUID::CPUID_RDRAND_BIT;
104       }
105 
106    if(is_intel)
107       {
108       // Intel cache line size is in cpuid(1) output
109       *cache_line_size = 8 * get_byte(2, cpuid[1]);
110       }
111    else if(is_amd)
112       {
113       // AMD puts it in vendor zone
114       X86_CPUID(0x80000005, cpuid);
115       *cache_line_size = get_byte(3, cpuid[2]);
116       }
117 
118    if(max_supported_sublevel >= 7)
119       {
120       clear_mem(cpuid, 4);
121       X86_CPUID_SUBLEVEL(7, 0, cpuid);
122 
123       enum x86_CPUID_7_bits : uint64_t {
124          BMI1 = (1ULL << 3),
125          AVX2 = (1ULL << 5),
126          BMI2 = (1ULL << 8),
127          AVX512_F = (1ULL << 16),
128          AVX512_DQ = (1ULL << 17),
129          RDSEED = (1ULL << 18),
130          ADX = (1ULL << 19),
131          AVX512_IFMA = (1ULL << 21),
132          SHA = (1ULL << 29),
133          AVX512_BW = (1ULL << 30),
134          AVX512_VL = (1ULL << 31),
135          AVX512_VBMI = (1ULL << 33),
136          AVX512_VBMI2 = (1ULL << 38),
137          AVX512_VAES = (1ULL << 41),
138          AVX512_VCLMUL = (1ULL << 42),
139          AVX512_VBITALG = (1ULL << 44),
140       };
141 
142       const uint64_t flags7 = (static_cast<uint64_t>(cpuid[2]) << 32) | cpuid[1];
143 
144       if(flags7 & x86_CPUID_7_bits::AVX2)
145          features_detected |= CPUID::CPUID_AVX2_BIT;
146       if(flags7 & x86_CPUID_7_bits::BMI1)
147          {
148          features_detected |= CPUID::CPUID_BMI1_BIT;
149          /*
150          We only set the BMI2 bit if BMI1 is also supported, so BMI2
151          code can safely use both extensions. No known processor
152          implements BMI2 but not BMI1.
153          */
154          if(flags7 & x86_CPUID_7_bits::BMI2)
155             features_detected |= CPUID::CPUID_BMI2_BIT;
156          }
157 
158       if(flags7 & x86_CPUID_7_bits::AVX512_F)
159          {
160          features_detected |= CPUID::CPUID_AVX512F_BIT;
161 
162          if(flags7 & x86_CPUID_7_bits::AVX512_DQ)
163             features_detected |= CPUID::CPUID_AVX512DQ_BIT;
164          if(flags7 & x86_CPUID_7_bits::AVX512_BW)
165             features_detected |= CPUID::CPUID_AVX512BW_BIT;
166 
167          const uint64_t ICELAKE_FLAGS =
168             x86_CPUID_7_bits::AVX512_F |
169             x86_CPUID_7_bits::AVX512_DQ |
170             x86_CPUID_7_bits::AVX512_IFMA |
171             x86_CPUID_7_bits::AVX512_BW |
172             x86_CPUID_7_bits::AVX512_VL |
173             x86_CPUID_7_bits::AVX512_VBMI |
174             x86_CPUID_7_bits::AVX512_VBMI2 |
175             x86_CPUID_7_bits::AVX512_VBITALG;
176 
177          if((flags7 & ICELAKE_FLAGS) == ICELAKE_FLAGS)
178             features_detected |= CPUID::CPUID_AVX512_ICL_BIT;
179 
180          if(flags7 & x86_CPUID_7_bits::AVX512_VAES)
181             features_detected |= CPUID::CPUID_AVX512_AES_BIT;
182          if(flags7 & x86_CPUID_7_bits::AVX512_VCLMUL)
183             features_detected |= CPUID::CPUID_AVX512_CLMUL_BIT;
184          }
185 
186       if(flags7 & x86_CPUID_7_bits::RDSEED)
187          features_detected |= CPUID::CPUID_RDSEED_BIT;
188       if(flags7 & x86_CPUID_7_bits::ADX)
189          features_detected |= CPUID::CPUID_ADX_BIT;
190       if(flags7 & x86_CPUID_7_bits::SHA)
191          features_detected |= CPUID::CPUID_SHA_BIT;
192       }
193 
194 #undef X86_CPUID
195 #undef X86_CPUID_SUBLEVEL
196 
197    /*
198    * If we don't have access to CPUID, we can still safely assume that
199    * any x86-64 processor has SSE2 and RDTSC
200    */
201 #if defined(BOTAN_TARGET_ARCH_IS_X86_64)
202    if(features_detected == 0)
203       {
204       features_detected |= CPUID::CPUID_SSE2_BIT;
205       features_detected |= CPUID::CPUID_RDTSC_BIT;
206       }
207 #endif
208 
209    return features_detected;
210    }
211 
212 #endif
213 
214 }
215