1 #include <stddef.h>
2 #include <stdint.h>
3 #ifdef HAVE_ANDROID_GETCPUFEATURES
4 # include <cpu-features.h>
5 #endif
6
7 #include "private/common.h"
8 #include "runtime.h"
9
10 typedef struct CPUFeatures_ {
11 int initialized;
12 int has_neon;
13 int has_sse2;
14 int has_sse3;
15 int has_ssse3;
16 int has_sse41;
17 int has_avx;
18 int has_avx2;
19 int has_avx512f;
20 int has_pclmul;
21 int has_aesni;
22 int has_rdrand;
23 } CPUFeatures;
24
25 static CPUFeatures _cpu_features;
26
27 #define CPUID_EBX_AVX2 0x00000020
28 #define CPUID_EBX_AVX512F 0x00010000
29
30 #define CPUID_ECX_SSE3 0x00000001
31 #define CPUID_ECX_PCLMUL 0x00000002
32 #define CPUID_ECX_SSSE3 0x00000200
33 #define CPUID_ECX_SSE41 0x00080000
34 #define CPUID_ECX_AESNI 0x02000000
35 #define CPUID_ECX_XSAVE 0x04000000
36 #define CPUID_ECX_OSXSAVE 0x08000000
37 #define CPUID_ECX_AVX 0x10000000
38 #define CPUID_ECX_RDRAND 0x40000000
39
40 #define CPUID_EDX_SSE2 0x04000000
41
42 #define XCR0_SSE 0x00000002
43 #define XCR0_AVX 0x00000004
44
45 static int
_sodium_runtime_arm_cpu_features(CPUFeatures * const cpu_features)46 _sodium_runtime_arm_cpu_features(CPUFeatures * const cpu_features)
47 {
48 #ifndef __arm__
49 cpu_features->has_neon = 0;
50 return -1;
51 #else
52 # ifdef __APPLE__
53 # ifdef __ARM_NEON__
54 cpu_features->has_neon = 1;
55 # else
56 cpu_features->has_neon = 0;
57 # endif
58 # elif defined(HAVE_ANDROID_GETCPUFEATURES) && \
59 defined(ANDROID_CPU_ARM_FEATURE_NEON)
60 cpu_features->has_neon =
61 (android_getCpuFeatures() & ANDROID_CPU_ARM_FEATURE_NEON) != 0x0;
62 # else
63 cpu_features->has_neon = 0;
64 # endif
65 return 0;
66 #endif
67 }
68
69 static void
_cpuid(unsigned int cpu_info[4U],const unsigned int cpu_info_type)70 _cpuid(unsigned int cpu_info[4U], const unsigned int cpu_info_type)
71 {
72 #if defined(_MSC_VER) && \
73 (defined(_M_X64) || defined(_M_AMD64) || defined(_M_IX86))
74 __cpuid((int *) cpu_info, cpu_info_type);
75 #elif defined(HAVE_CPUID)
76 cpu_info[0] = cpu_info[1] = cpu_info[2] = cpu_info[3] = 0;
77 # ifdef __i386__
78 __asm__ __volatile__(
79 "pushfl; pushfl; "
80 "popl %0; "
81 "movl %0, %1; xorl %2, %0; "
82 "pushl %0; "
83 "popfl; pushfl; popl %0; popfl"
84 : "=&r"(cpu_info[0]), "=&r"(cpu_info[1])
85 : "i"(0x200000));
86 if (((cpu_info[0] ^ cpu_info[1]) & 0x200000) == 0x0) {
87 return; /* LCOV_EXCL_LINE */
88 }
89 # endif
90 # ifdef __i386__
91 __asm__ __volatile__("xchgl %%ebx, %k1; cpuid; xchgl %%ebx, %k1"
92 : "=a"(cpu_info[0]), "=&r"(cpu_info[1]),
93 "=c"(cpu_info[2]), "=d"(cpu_info[3])
94 : "0"(cpu_info_type), "2"(0U));
95 # elif defined(__x86_64__)
96 __asm__ __volatile__("xchgq %%rbx, %q1; cpuid; xchgq %%rbx, %q1"
97 : "=a"(cpu_info[0]), "=&r"(cpu_info[1]),
98 "=c"(cpu_info[2]), "=d"(cpu_info[3])
99 : "0"(cpu_info_type), "2"(0U));
100 # else
101 __asm__ __volatile__("cpuid"
102 : "=a"(cpu_info[0]), "=b"(cpu_info[1]),
103 "=c"(cpu_info[2]), "=d"(cpu_info[3])
104 : "0"(cpu_info_type), "2"(0U));
105 # endif
106 #else
107 (void) cpu_info_type;
108 cpu_info[0] = cpu_info[1] = cpu_info[2] = cpu_info[3] = 0;
109 #endif
110 }
111
112 static int
_sodium_runtime_intel_cpu_features(CPUFeatures * const cpu_features)113 _sodium_runtime_intel_cpu_features(CPUFeatures * const cpu_features)
114 {
115 unsigned int cpu_info[4];
116 unsigned int id;
117
118 _cpuid(cpu_info, 0x0);
119 if ((id = cpu_info[0]) == 0U) {
120 return -1; /* LCOV_EXCL_LINE */
121 }
122 _cpuid(cpu_info, 0x00000001);
123 #ifdef HAVE_EMMINTRIN_H
124 cpu_features->has_sse2 = ((cpu_info[3] & CPUID_EDX_SSE2) != 0x0);
125 #else
126 cpu_features->has_sse2 = 0;
127 #endif
128
129 #ifdef HAVE_PMMINTRIN_H
130 cpu_features->has_sse3 = ((cpu_info[2] & CPUID_ECX_SSE3) != 0x0);
131 #else
132 cpu_features->has_sse3 = 0;
133 #endif
134
135 #ifdef HAVE_TMMINTRIN_H
136 cpu_features->has_ssse3 = ((cpu_info[2] & CPUID_ECX_SSSE3) != 0x0);
137 #else
138 cpu_features->has_ssse3 = 0;
139 #endif
140
141 #ifdef HAVE_SMMINTRIN_H
142 cpu_features->has_sse41 = ((cpu_info[2] & CPUID_ECX_SSE41) != 0x0);
143 #else
144 cpu_features->has_sse41 = 0;
145 #endif
146
147 cpu_features->has_avx = 0;
148 #ifdef HAVE_AVXINTRIN_H
149 if ((cpu_info[2] & (CPUID_ECX_AVX | CPUID_ECX_XSAVE | CPUID_ECX_OSXSAVE)) ==
150 (CPUID_ECX_AVX | CPUID_ECX_XSAVE | CPUID_ECX_OSXSAVE)) {
151 uint32_t xcr0 = 0U;
152 # if defined(HAVE__XGETBV) || \
153 (defined(_MSC_VER) && defined(_XCR_XFEATURE_ENABLED_MASK) && _MSC_FULL_VER >= 160040219)
154 xcr0 = (uint32_t) _xgetbv(0);
155 # elif defined(_MSC_VER) && defined(_M_IX86)
156 __asm {
157 xor ecx, ecx
158 _asm _emit 0x0f _asm _emit 0x01 _asm _emit 0xd0
159 mov xcr0, eax
160 }
161 # elif defined(HAVE_AVX_ASM)
162 __asm__ __volatile__(".byte 0x0f, 0x01, 0xd0" /* XGETBV */
163 : "=a"(xcr0)
164 : "c"((uint32_t) 0U)
165 : "%edx");
166 # endif
167 if ((xcr0 & (XCR0_SSE | XCR0_AVX)) == (XCR0_SSE | XCR0_AVX)) {
168 cpu_features->has_avx = 1;
169 }
170 }
171 #endif
172
173 cpu_features->has_avx2 = 0;
174 #ifdef HAVE_AVX2INTRIN_H
175 if (cpu_features->has_avx) {
176 unsigned int cpu_info7[4];
177
178 _cpuid(cpu_info7, 0x00000007);
179 cpu_features->has_avx2 = ((cpu_info7[1] & CPUID_EBX_AVX2) != 0x0);
180 }
181 #endif
182
183 cpu_features->has_avx512f = 0;
184 #ifdef HAVE_AVX512FINTRIN_H
185 if (cpu_features->has_avx2) {
186 unsigned int cpu_info7[4];
187
188 _cpuid(cpu_info7, 0x00000007);
189 cpu_features->has_avx512f = ((cpu_info7[1] & CPUID_EBX_AVX512F) != 0x0);
190 }
191 #endif
192
193 #ifdef HAVE_WMMINTRIN_H
194 cpu_features->has_pclmul = ((cpu_info[2] & CPUID_ECX_PCLMUL) != 0x0);
195 cpu_features->has_aesni = ((cpu_info[2] & CPUID_ECX_AESNI) != 0x0);
196 #else
197 cpu_features->has_pclmul = 0;
198 cpu_features->has_aesni = 0;
199 #endif
200
201 #ifdef HAVE_RDRAND
202 cpu_features->has_rdrand = ((cpu_info[2] & CPUID_ECX_RDRAND) != 0x0);
203 #else
204 cpu_features->has_rdrand = 0;
205 #endif
206
207 return 0;
208 }
209
210 int
_sodium_runtime_get_cpu_features(void)211 _sodium_runtime_get_cpu_features(void)
212 {
213 int ret = -1;
214
215 ret &= _sodium_runtime_arm_cpu_features(&_cpu_features);
216 ret &= _sodium_runtime_intel_cpu_features(&_cpu_features);
217 _cpu_features.initialized = 1;
218
219 return ret;
220 }
221
222 int
sodium_runtime_has_neon(void)223 sodium_runtime_has_neon(void)
224 {
225 return _cpu_features.has_neon;
226 }
227
228 int
sodium_runtime_has_sse2(void)229 sodium_runtime_has_sse2(void)
230 {
231 return _cpu_features.has_sse2;
232 }
233
234 int
sodium_runtime_has_sse3(void)235 sodium_runtime_has_sse3(void)
236 {
237 return _cpu_features.has_sse3;
238 }
239
240 int
sodium_runtime_has_ssse3(void)241 sodium_runtime_has_ssse3(void)
242 {
243 return _cpu_features.has_ssse3;
244 }
245
246 int
sodium_runtime_has_sse41(void)247 sodium_runtime_has_sse41(void)
248 {
249 return _cpu_features.has_sse41;
250 }
251
252 int
sodium_runtime_has_avx(void)253 sodium_runtime_has_avx(void)
254 {
255 return _cpu_features.has_avx;
256 }
257
258 int
sodium_runtime_has_avx2(void)259 sodium_runtime_has_avx2(void)
260 {
261 return _cpu_features.has_avx2;
262 }
263
264 int
sodium_runtime_has_avx512f(void)265 sodium_runtime_has_avx512f(void)
266 {
267 return _cpu_features.has_avx512f;
268 }
269
270 int
sodium_runtime_has_pclmul(void)271 sodium_runtime_has_pclmul(void)
272 {
273 return _cpu_features.has_pclmul;
274 }
275
276 int
sodium_runtime_has_aesni(void)277 sodium_runtime_has_aesni(void)
278 {
279 return _cpu_features.has_aesni;
280 }
281
282 int
sodium_runtime_has_rdrand(void)283 sodium_runtime_has_rdrand(void)
284 {
285 return _cpu_features.has_rdrand;
286 }
287