1 #include <stddef.h>
2 #include <stdint.h>
3 #ifdef HAVE_ANDROID_GETCPUFEATURES
4 # include <cpu-features.h>
5 #endif
6
7 #include "private/common.h"
8 #include "runtime.h"
9
10 typedef struct CPUFeatures_ {
11 int initialized;
12 int has_neon;
13 int has_sse2;
14 int has_sse3;
15 int has_ssse3;
16 int has_sse41;
17 int has_avx;
18 int has_avx2;
19 int has_avx512f;
20 int has_pclmul;
21 int has_aesni;
22 int has_rdrand;
23 } CPUFeatures;
24
25 static CPUFeatures _cpu_features;
26
27 #define CPUID_EBX_AVX2 0x00000020
28 #define CPUID_EBX_AVX512F 0x00010000
29
30 #define CPUID_ECX_SSE3 0x00000001
31 #define CPUID_ECX_PCLMUL 0x00000002
32 #define CPUID_ECX_SSSE3 0x00000200
33 #define CPUID_ECX_SSE41 0x00080000
34 #define CPUID_ECX_AESNI 0x02000000
35 #define CPUID_ECX_XSAVE 0x04000000
36 #define CPUID_ECX_OSXSAVE 0x08000000
37 #define CPUID_ECX_AVX 0x10000000
38 #define CPUID_ECX_RDRAND 0x40000000
39
40 #define CPUID_EDX_SSE2 0x04000000
41
42 #define XCR0_SSE 0x00000002
43 #define XCR0_AVX 0x00000004
44 #define XCR0_OPMASK 0x00000020
45 #define XCR0_ZMM_HI256 0x00000040
46 #define XCR0_HI16_ZMM 0x00000080
47
48 static int
_sodium_runtime_arm_cpu_features(CPUFeatures * const cpu_features)49 _sodium_runtime_arm_cpu_features(CPUFeatures * const cpu_features)
50 {
51 #ifndef __arm__
52 cpu_features->has_neon = 0;
53 return -1;
54 #else
55 # ifdef __APPLE__
56 # ifdef __ARM_NEON__
57 cpu_features->has_neon = 1;
58 # else
59 cpu_features->has_neon = 0;
60 # endif
61 # elif defined(HAVE_ANDROID_GETCPUFEATURES) && \
62 defined(ANDROID_CPU_ARM_FEATURE_NEON)
63 cpu_features->has_neon =
64 (android_getCpuFeatures() & ANDROID_CPU_ARM_FEATURE_NEON) != 0x0;
65 # else
66 cpu_features->has_neon = 0;
67 # endif
68 return 0;
69 #endif
70 }
71
72 static void
_cpuid(unsigned int cpu_info[4U],const unsigned int cpu_info_type)73 _cpuid(unsigned int cpu_info[4U], const unsigned int cpu_info_type)
74 {
75 #if defined(_MSC_VER) && \
76 (defined(_M_X64) || defined(_M_AMD64) || defined(_M_IX86))
77 __cpuid((int *) cpu_info, cpu_info_type);
78 #elif defined(HAVE_CPUID)
79 cpu_info[0] = cpu_info[1] = cpu_info[2] = cpu_info[3] = 0;
80 # ifdef __i386__
81 __asm__ __volatile__(
82 "pushfl; pushfl; "
83 "popl %0; "
84 "movl %0, %1; xorl %2, %0; "
85 "pushl %0; "
86 "popfl; pushfl; popl %0; popfl"
87 : "=&r"(cpu_info[0]), "=&r"(cpu_info[1])
88 : "i"(0x200000));
89 if (((cpu_info[0] ^ cpu_info[1]) & 0x200000) == 0x0) {
90 return; /* LCOV_EXCL_LINE */
91 }
92 # endif
93 # ifdef __i386__
94 __asm__ __volatile__("xchgl %%ebx, %k1; cpuid; xchgl %%ebx, %k1"
95 : "=a"(cpu_info[0]), "=&r"(cpu_info[1]),
96 "=c"(cpu_info[2]), "=d"(cpu_info[3])
97 : "0"(cpu_info_type), "2"(0U));
98 # elif defined(__x86_64__)
99 __asm__ __volatile__("xchgq %%rbx, %q1; cpuid; xchgq %%rbx, %q1"
100 : "=a"(cpu_info[0]), "=&r"(cpu_info[1]),
101 "=c"(cpu_info[2]), "=d"(cpu_info[3])
102 : "0"(cpu_info_type), "2"(0U));
103 # else
104 __asm__ __volatile__("cpuid"
105 : "=a"(cpu_info[0]), "=b"(cpu_info[1]),
106 "=c"(cpu_info[2]), "=d"(cpu_info[3])
107 : "0"(cpu_info_type), "2"(0U));
108 # endif
109 #else
110 (void) cpu_info_type;
111 cpu_info[0] = cpu_info[1] = cpu_info[2] = cpu_info[3] = 0;
112 #endif
113 }
114
115 static int
_sodium_runtime_intel_cpu_features(CPUFeatures * const cpu_features)116 _sodium_runtime_intel_cpu_features(CPUFeatures * const cpu_features)
117 {
118 unsigned int cpu_info[4];
119 unsigned int id;
120 uint32_t xcr0 = 0U;
121
122 _cpuid(cpu_info, 0x0);
123 if ((id = cpu_info[0]) == 0U) {
124 return -1; /* LCOV_EXCL_LINE */
125 }
126 _cpuid(cpu_info, 0x00000001);
127 #ifdef HAVE_EMMINTRIN_H
128 cpu_features->has_sse2 = ((cpu_info[3] & CPUID_EDX_SSE2) != 0x0);
129 #else
130 cpu_features->has_sse2 = 0;
131 #endif
132
133 #ifdef HAVE_PMMINTRIN_H
134 cpu_features->has_sse3 = ((cpu_info[2] & CPUID_ECX_SSE3) != 0x0);
135 #else
136 cpu_features->has_sse3 = 0;
137 #endif
138
139 #ifdef HAVE_TMMINTRIN_H
140 cpu_features->has_ssse3 = ((cpu_info[2] & CPUID_ECX_SSSE3) != 0x0);
141 #else
142 cpu_features->has_ssse3 = 0;
143 #endif
144
145 #ifdef HAVE_SMMINTRIN_H
146 cpu_features->has_sse41 = ((cpu_info[2] & CPUID_ECX_SSE41) != 0x0);
147 #else
148 cpu_features->has_sse41 = 0;
149 #endif
150
151 cpu_features->has_avx = 0;
152
153 (void) xcr0;
154 #ifdef HAVE_AVXINTRIN_H
155 if ((cpu_info[2] & (CPUID_ECX_AVX | CPUID_ECX_XSAVE | CPUID_ECX_OSXSAVE)) ==
156 (CPUID_ECX_AVX | CPUID_ECX_XSAVE | CPUID_ECX_OSXSAVE)) {
157 xcr0 = 0U;
158 # if defined(HAVE__XGETBV) || \
159 (defined(_MSC_VER) && defined(_XCR_XFEATURE_ENABLED_MASK) && _MSC_FULL_VER >= 160040219)
160 xcr0 = (uint32_t) _xgetbv(0);
161 # elif defined(_MSC_VER) && defined(_M_IX86)
162 /*
163 * Visual Studio documentation states that eax/ecx/edx don't need to
164 * be preserved in inline assembly code. But that doesn't seem to
165 * always hold true on Visual Studio 2010.
166 */
167 __asm {
168 push eax
169 push ecx
170 push edx
171 xor ecx, ecx
172 _asm _emit 0x0f _asm _emit 0x01 _asm _emit 0xd0
173 mov xcr0, eax
174 pop edx
175 pop ecx
176 pop eax
177 }
178 # elif defined(HAVE_AVX_ASM)
179 __asm__ __volatile__(".byte 0x0f, 0x01, 0xd0" /* XGETBV */
180 : "=a"(xcr0)
181 : "c"((uint32_t) 0U)
182 : "%edx");
183 # endif
184 if ((xcr0 & (XCR0_SSE | XCR0_AVX)) == (XCR0_SSE | XCR0_AVX)) {
185 cpu_features->has_avx = 1;
186 }
187 }
188 #endif
189
190 cpu_features->has_avx2 = 0;
191 #ifdef HAVE_AVX2INTRIN_H
192 if (cpu_features->has_avx) {
193 unsigned int cpu_info7[4];
194
195 _cpuid(cpu_info7, 0x00000007);
196 cpu_features->has_avx2 = ((cpu_info7[1] & CPUID_EBX_AVX2) != 0x0);
197 }
198 #endif
199
200 cpu_features->has_avx512f = 0;
201 #ifdef HAVE_AVX512FINTRIN_H
202 if (cpu_features->has_avx2) {
203 unsigned int cpu_info7[4];
204
205 _cpuid(cpu_info7, 0x00000007);
206 /* LCOV_EXCL_START */
207 if ((cpu_info7[1] & CPUID_EBX_AVX512F) == CPUID_EBX_AVX512F &&
208 (xcr0 & (XCR0_OPMASK | XCR0_ZMM_HI256 | XCR0_HI16_ZMM))
209 == (XCR0_OPMASK | XCR0_ZMM_HI256 | XCR0_HI16_ZMM)) {
210 cpu_features->has_avx512f = 1;
211 }
212 /* LCOV_EXCL_STOP */
213 }
214 #endif
215
216 #ifdef HAVE_WMMINTRIN_H
217 cpu_features->has_pclmul = ((cpu_info[2] & CPUID_ECX_PCLMUL) != 0x0);
218 cpu_features->has_aesni = ((cpu_info[2] & CPUID_ECX_AESNI) != 0x0);
219 #else
220 cpu_features->has_pclmul = 0;
221 cpu_features->has_aesni = 0;
222 #endif
223
224 #ifdef HAVE_RDRAND
225 cpu_features->has_rdrand = ((cpu_info[2] & CPUID_ECX_RDRAND) != 0x0);
226 #else
227 cpu_features->has_rdrand = 0;
228 #endif
229
230 return 0;
231 }
232
233 int
_sodium_runtime_get_cpu_features(void)234 _sodium_runtime_get_cpu_features(void)
235 {
236 int ret = -1;
237
238 ret &= _sodium_runtime_arm_cpu_features(&_cpu_features);
239 ret &= _sodium_runtime_intel_cpu_features(&_cpu_features);
240 _cpu_features.initialized = 1;
241
242 return ret;
243 }
244
245 int
sodium_runtime_has_neon(void)246 sodium_runtime_has_neon(void)
247 {
248 return _cpu_features.has_neon;
249 }
250
251 int
sodium_runtime_has_sse2(void)252 sodium_runtime_has_sse2(void)
253 {
254 return _cpu_features.has_sse2;
255 }
256
257 int
sodium_runtime_has_sse3(void)258 sodium_runtime_has_sse3(void)
259 {
260 return _cpu_features.has_sse3;
261 }
262
263 int
sodium_runtime_has_ssse3(void)264 sodium_runtime_has_ssse3(void)
265 {
266 return _cpu_features.has_ssse3;
267 }
268
269 int
sodium_runtime_has_sse41(void)270 sodium_runtime_has_sse41(void)
271 {
272 return _cpu_features.has_sse41;
273 }
274
275 int
sodium_runtime_has_avx(void)276 sodium_runtime_has_avx(void)
277 {
278 return _cpu_features.has_avx;
279 }
280
281 int
sodium_runtime_has_avx2(void)282 sodium_runtime_has_avx2(void)
283 {
284 return _cpu_features.has_avx2;
285 }
286
287 int
sodium_runtime_has_avx512f(void)288 sodium_runtime_has_avx512f(void)
289 {
290 return _cpu_features.has_avx512f;
291 }
292
293 int
sodium_runtime_has_pclmul(void)294 sodium_runtime_has_pclmul(void)
295 {
296 return _cpu_features.has_pclmul;
297 }
298
299 int
sodium_runtime_has_aesni(void)300 sodium_runtime_has_aesni(void)
301 {
302 return _cpu_features.has_aesni;
303 }
304
305 int
sodium_runtime_has_rdrand(void)306 sodium_runtime_has_rdrand(void)
307 {
308 return _cpu_features.has_rdrand;
309 }
310