1 /*
2 * Runtime CPU detection for x86
3 * (C) 2009,2010,2013,2017 Jack Lloyd
4 *
5 * Botan is released under the Simplified BSD License (see license.txt)
6 */
7
8 #include <botan/cpuid.h>
9 #include <botan/mem_ops.h>
10 #include <botan/loadstor.h>
11
12 #if defined(BOTAN_TARGET_CPU_IS_X86_FAMILY)
13
14 #if defined(BOTAN_BUILD_COMPILER_IS_MSVC)
15 #include <intrin.h>
16 #elif defined(BOTAN_BUILD_COMPILER_IS_INTEL)
17 #include <ia32intrin.h>
18 #elif defined(BOTAN_BUILD_COMPILER_IS_GCC) || defined(BOTAN_BUILD_COMPILER_IS_CLANG)
19 #include <cpuid.h>
20 #endif
21
22 #endif
23
24 namespace Botan {
25
26 #if defined(BOTAN_TARGET_CPU_IS_X86_FAMILY)
27
detect_cpu_features(size_t * cache_line_size)28 uint64_t CPUID::CPUID_Data::detect_cpu_features(size_t* cache_line_size)
29 {
30 #if defined(BOTAN_BUILD_COMPILER_IS_MSVC)
31 #define X86_CPUID(type, out) do { __cpuid((int*)out, type); } while(0)
32 #define X86_CPUID_SUBLEVEL(type, level, out) do { __cpuidex((int*)out, type, level); } while(0)
33
34 #elif defined(BOTAN_BUILD_COMPILER_IS_INTEL)
35 #define X86_CPUID(type, out) do { __cpuid(out, type); } while(0)
36 #define X86_CPUID_SUBLEVEL(type, level, out) do { __cpuidex((int*)out, type, level); } while(0)
37
38 #elif defined(BOTAN_TARGET_ARCH_IS_X86_64) && defined(BOTAN_USE_GCC_INLINE_ASM)
39 #define X86_CPUID(type, out) \
40 asm("cpuid\n\t" : "=a" (out[0]), "=b" (out[1]), "=c" (out[2]), "=d" (out[3]) \
41 : "0" (type))
42
43 #define X86_CPUID_SUBLEVEL(type, level, out) \
44 asm("cpuid\n\t" : "=a" (out[0]), "=b" (out[1]), "=c" (out[2]), "=d" (out[3]) \
45 : "0" (type), "2" (level))
46
47 #elif defined(BOTAN_BUILD_COMPILER_IS_GCC) || defined(BOTAN_BUILD_COMPILER_IS_CLANG)
48 #define X86_CPUID(type, out) do { __get_cpuid(type, out, out+1, out+2, out+3); } while(0)
49
50 #define X86_CPUID_SUBLEVEL(type, level, out) \
51 do { __cpuid_count(type, level, out[0], out[1], out[2], out[3]); } while(0)
52 #else
53 #warning "No way of calling x86 cpuid instruction for this compiler"
54 #define X86_CPUID(type, out) do { clear_mem(out, 4); } while(0)
55 #define X86_CPUID_SUBLEVEL(type, level, out) do { clear_mem(out, 4); } while(0)
56 #endif
57
58 uint64_t features_detected = 0;
59 uint32_t cpuid[4] = { 0 };
60
61 // CPUID 0: vendor identification, max sublevel
62 X86_CPUID(0, cpuid);
63
64 const uint32_t max_supported_sublevel = cpuid[0];
65
66 const uint32_t INTEL_CPUID[3] = { 0x756E6547, 0x6C65746E, 0x49656E69 };
67 const uint32_t AMD_CPUID[3] = { 0x68747541, 0x444D4163, 0x69746E65 };
68 const bool is_intel = same_mem(cpuid + 1, INTEL_CPUID, 3);
69 const bool is_amd = same_mem(cpuid + 1, AMD_CPUID, 3);
70
71 if(max_supported_sublevel >= 1)
72 {
73 // CPUID 1: feature bits
74 X86_CPUID(1, cpuid);
75 const uint64_t flags0 = (static_cast<uint64_t>(cpuid[2]) << 32) | cpuid[3];
76
77 enum x86_CPUID_1_bits : uint64_t {
78 RDTSC = (1ULL << 4),
79 SSE2 = (1ULL << 26),
80 CLMUL = (1ULL << 33),
81 SSSE3 = (1ULL << 41),
82 SSE41 = (1ULL << 51),
83 SSE42 = (1ULL << 52),
84 AESNI = (1ULL << 57),
85 RDRAND = (1ULL << 62)
86 };
87
88 if(flags0 & x86_CPUID_1_bits::RDTSC)
89 features_detected |= CPUID::CPUID_RDTSC_BIT;
90 if(flags0 & x86_CPUID_1_bits::SSE2)
91 features_detected |= CPUID::CPUID_SSE2_BIT;
92 if(flags0 & x86_CPUID_1_bits::CLMUL)
93 features_detected |= CPUID::CPUID_CLMUL_BIT;
94 if(flags0 & x86_CPUID_1_bits::SSSE3)
95 features_detected |= CPUID::CPUID_SSSE3_BIT;
96 if(flags0 & x86_CPUID_1_bits::SSE41)
97 features_detected |= CPUID::CPUID_SSE41_BIT;
98 if(flags0 & x86_CPUID_1_bits::SSE42)
99 features_detected |= CPUID::CPUID_SSE42_BIT;
100 if(flags0 & x86_CPUID_1_bits::AESNI)
101 features_detected |= CPUID::CPUID_AESNI_BIT;
102 if(flags0 & x86_CPUID_1_bits::RDRAND)
103 features_detected |= CPUID::CPUID_RDRAND_BIT;
104 }
105
106 if(is_intel)
107 {
108 // Intel cache line size is in cpuid(1) output
109 *cache_line_size = 8 * get_byte(2, cpuid[1]);
110 }
111 else if(is_amd)
112 {
113 // AMD puts it in vendor zone
114 X86_CPUID(0x80000005, cpuid);
115 *cache_line_size = get_byte(3, cpuid[2]);
116 }
117
118 if(max_supported_sublevel >= 7)
119 {
120 clear_mem(cpuid, 4);
121 X86_CPUID_SUBLEVEL(7, 0, cpuid);
122
123 enum x86_CPUID_7_bits : uint64_t {
124 BMI1 = (1ULL << 3),
125 AVX2 = (1ULL << 5),
126 BMI2 = (1ULL << 8),
127 AVX512_F = (1ULL << 16),
128 AVX512_DQ = (1ULL << 17),
129 RDSEED = (1ULL << 18),
130 ADX = (1ULL << 19),
131 AVX512_IFMA = (1ULL << 21),
132 SHA = (1ULL << 29),
133 AVX512_BW = (1ULL << 30),
134 AVX512_VL = (1ULL << 31),
135 AVX512_VBMI = (1ULL << 33),
136 AVX512_VBMI2 = (1ULL << 38),
137 AVX512_VAES = (1ULL << 41),
138 AVX512_VCLMUL = (1ULL << 42),
139 AVX512_VBITALG = (1ULL << 44),
140 };
141
142 const uint64_t flags7 = (static_cast<uint64_t>(cpuid[2]) << 32) | cpuid[1];
143
144 if(flags7 & x86_CPUID_7_bits::AVX2)
145 features_detected |= CPUID::CPUID_AVX2_BIT;
146 if(flags7 & x86_CPUID_7_bits::BMI1)
147 {
148 features_detected |= CPUID::CPUID_BMI1_BIT;
149 /*
150 We only set the BMI2 bit if BMI1 is also supported, so BMI2
151 code can safely use both extensions. No known processor
152 implements BMI2 but not BMI1.
153 */
154 if(flags7 & x86_CPUID_7_bits::BMI2)
155 features_detected |= CPUID::CPUID_BMI2_BIT;
156 }
157
158 if(flags7 & x86_CPUID_7_bits::AVX512_F)
159 {
160 features_detected |= CPUID::CPUID_AVX512F_BIT;
161
162 if(flags7 & x86_CPUID_7_bits::AVX512_DQ)
163 features_detected |= CPUID::CPUID_AVX512DQ_BIT;
164 if(flags7 & x86_CPUID_7_bits::AVX512_BW)
165 features_detected |= CPUID::CPUID_AVX512BW_BIT;
166
167 const uint64_t ICELAKE_FLAGS =
168 x86_CPUID_7_bits::AVX512_F |
169 x86_CPUID_7_bits::AVX512_DQ |
170 x86_CPUID_7_bits::AVX512_IFMA |
171 x86_CPUID_7_bits::AVX512_BW |
172 x86_CPUID_7_bits::AVX512_VL |
173 x86_CPUID_7_bits::AVX512_VBMI |
174 x86_CPUID_7_bits::AVX512_VBMI2 |
175 x86_CPUID_7_bits::AVX512_VBITALG;
176
177 if((flags7 & ICELAKE_FLAGS) == ICELAKE_FLAGS)
178 features_detected |= CPUID::CPUID_AVX512_ICL_BIT;
179
180 if(flags7 & x86_CPUID_7_bits::AVX512_VAES)
181 features_detected |= CPUID::CPUID_AVX512_AES_BIT;
182 if(flags7 & x86_CPUID_7_bits::AVX512_VCLMUL)
183 features_detected |= CPUID::CPUID_AVX512_CLMUL_BIT;
184 }
185
186 if(flags7 & x86_CPUID_7_bits::RDSEED)
187 features_detected |= CPUID::CPUID_RDSEED_BIT;
188 if(flags7 & x86_CPUID_7_bits::ADX)
189 features_detected |= CPUID::CPUID_ADX_BIT;
190 if(flags7 & x86_CPUID_7_bits::SHA)
191 features_detected |= CPUID::CPUID_SHA_BIT;
192 }
193
194 #undef X86_CPUID
195 #undef X86_CPUID_SUBLEVEL
196
197 /*
198 * If we don't have access to CPUID, we can still safely assume that
199 * any x86-64 processor has SSE2 and RDTSC
200 */
201 #if defined(BOTAN_TARGET_ARCH_IS_X86_64)
202 if(features_detected == 0)
203 {
204 features_detected |= CPUID::CPUID_SSE2_BIT;
205 features_detected |= CPUID::CPUID_RDTSC_BIT;
206 }
207 #endif
208
209 return features_detected;
210 }
211
212 #endif
213
214 }
215