1*33772c1eSriastradh #include <stddef.h>
2*33772c1eSriastradh #include <stdint.h>
3*33772c1eSriastradh #ifdef HAVE_ANDROID_GETCPUFEATURES
4*33772c1eSriastradh # include <cpu-features.h>
5*33772c1eSriastradh #endif
6*33772c1eSriastradh
7*33772c1eSriastradh #include "private/common.h"
8*33772c1eSriastradh #include "runtime.h"
9*33772c1eSriastradh
10*33772c1eSriastradh typedef struct CPUFeatures_ {
11*33772c1eSriastradh int initialized;
12*33772c1eSriastradh int has_neon;
13*33772c1eSriastradh int has_sse2;
14*33772c1eSriastradh int has_sse3;
15*33772c1eSriastradh int has_ssse3;
16*33772c1eSriastradh int has_sse41;
17*33772c1eSriastradh int has_avx;
18*33772c1eSriastradh int has_avx2;
19*33772c1eSriastradh int has_avx512f;
20*33772c1eSriastradh int has_pclmul;
21*33772c1eSriastradh int has_aesni;
22*33772c1eSriastradh int has_rdrand;
23*33772c1eSriastradh } CPUFeatures;
24*33772c1eSriastradh
25*33772c1eSriastradh static CPUFeatures _cpu_features;
26*33772c1eSriastradh
27*33772c1eSriastradh #define CPUID_EBX_AVX2 0x00000020
28*33772c1eSriastradh #define CPUID_EBX_AVX512F 0x00010000
29*33772c1eSriastradh
30*33772c1eSriastradh #define CPUID_ECX_SSE3 0x00000001
31*33772c1eSriastradh #define CPUID_ECX_PCLMUL 0x00000002
32*33772c1eSriastradh #define CPUID_ECX_SSSE3 0x00000200
33*33772c1eSriastradh #define CPUID_ECX_SSE41 0x00080000
34*33772c1eSriastradh #define CPUID_ECX_AESNI 0x02000000
35*33772c1eSriastradh #define CPUID_ECX_XSAVE 0x04000000
36*33772c1eSriastradh #define CPUID_ECX_OSXSAVE 0x08000000
37*33772c1eSriastradh #define CPUID_ECX_AVX 0x10000000
38*33772c1eSriastradh #define CPUID_ECX_RDRAND 0x40000000
39*33772c1eSriastradh
40*33772c1eSriastradh #define CPUID_EDX_SSE2 0x04000000
41*33772c1eSriastradh
42*33772c1eSriastradh #define XCR0_SSE 0x00000002
43*33772c1eSriastradh #define XCR0_AVX 0x00000004
44*33772c1eSriastradh
45*33772c1eSriastradh static int
_sodium_runtime_arm_cpu_features(CPUFeatures * const cpu_features)46*33772c1eSriastradh _sodium_runtime_arm_cpu_features(CPUFeatures * const cpu_features)
47*33772c1eSriastradh {
48*33772c1eSriastradh #ifndef __arm__
49*33772c1eSriastradh cpu_features->has_neon = 0;
50*33772c1eSriastradh return -1;
51*33772c1eSriastradh #else
52*33772c1eSriastradh # ifdef __APPLE__
53*33772c1eSriastradh # ifdef __ARM_NEON__
54*33772c1eSriastradh cpu_features->has_neon = 1;
55*33772c1eSriastradh # else
56*33772c1eSriastradh cpu_features->has_neon = 0;
57*33772c1eSriastradh # endif
58*33772c1eSriastradh # elif defined(HAVE_ANDROID_GETCPUFEATURES) && \
59*33772c1eSriastradh defined(ANDROID_CPU_ARM_FEATURE_NEON)
60*33772c1eSriastradh cpu_features->has_neon =
61*33772c1eSriastradh (android_getCpuFeatures() & ANDROID_CPU_ARM_FEATURE_NEON) != 0x0;
62*33772c1eSriastradh # else
63*33772c1eSriastradh cpu_features->has_neon = 0;
64*33772c1eSriastradh # endif
65*33772c1eSriastradh return 0;
66*33772c1eSriastradh #endif
67*33772c1eSriastradh }
68*33772c1eSriastradh
69*33772c1eSriastradh static void
_cpuid(unsigned int cpu_info[4U],const unsigned int cpu_info_type)70*33772c1eSriastradh _cpuid(unsigned int cpu_info[4U], const unsigned int cpu_info_type)
71*33772c1eSriastradh {
72*33772c1eSriastradh #if defined(_MSC_VER) && \
73*33772c1eSriastradh (defined(_M_X64) || defined(_M_AMD64) || defined(_M_IX86))
74*33772c1eSriastradh __cpuid((int *) cpu_info, cpu_info_type);
75*33772c1eSriastradh #elif defined(HAVE_CPUID)
76*33772c1eSriastradh cpu_info[0] = cpu_info[1] = cpu_info[2] = cpu_info[3] = 0;
77*33772c1eSriastradh # ifdef __i386__
78*33772c1eSriastradh __asm__ __volatile__(
79*33772c1eSriastradh "pushfl; pushfl; "
80*33772c1eSriastradh "popl %0; "
81*33772c1eSriastradh "movl %0, %1; xorl %2, %0; "
82*33772c1eSriastradh "pushl %0; "
83*33772c1eSriastradh "popfl; pushfl; popl %0; popfl"
84*33772c1eSriastradh : "=&r"(cpu_info[0]), "=&r"(cpu_info[1])
85*33772c1eSriastradh : "i"(0x200000));
86*33772c1eSriastradh if (((cpu_info[0] ^ cpu_info[1]) & 0x200000) == 0x0) {
87*33772c1eSriastradh return; /* LCOV_EXCL_LINE */
88*33772c1eSriastradh }
89*33772c1eSriastradh # endif
90*33772c1eSriastradh # ifdef __i386__
91*33772c1eSriastradh __asm__ __volatile__("xchgl %%ebx, %k1; cpuid; xchgl %%ebx, %k1"
92*33772c1eSriastradh : "=a"(cpu_info[0]), "=&r"(cpu_info[1]),
93*33772c1eSriastradh "=c"(cpu_info[2]), "=d"(cpu_info[3])
94*33772c1eSriastradh : "0"(cpu_info_type), "2"(0U));
95*33772c1eSriastradh # elif defined(__x86_64__)
96*33772c1eSriastradh __asm__ __volatile__("xchgq %%rbx, %q1; cpuid; xchgq %%rbx, %q1"
97*33772c1eSriastradh : "=a"(cpu_info[0]), "=&r"(cpu_info[1]),
98*33772c1eSriastradh "=c"(cpu_info[2]), "=d"(cpu_info[3])
99*33772c1eSriastradh : "0"(cpu_info_type), "2"(0U));
100*33772c1eSriastradh # else
101*33772c1eSriastradh __asm__ __volatile__("cpuid"
102*33772c1eSriastradh : "=a"(cpu_info[0]), "=b"(cpu_info[1]),
103*33772c1eSriastradh "=c"(cpu_info[2]), "=d"(cpu_info[3])
104*33772c1eSriastradh : "0"(cpu_info_type), "2"(0U));
105*33772c1eSriastradh # endif
106*33772c1eSriastradh #else
107*33772c1eSriastradh (void) cpu_info_type;
108*33772c1eSriastradh cpu_info[0] = cpu_info[1] = cpu_info[2] = cpu_info[3] = 0;
109*33772c1eSriastradh #endif
110*33772c1eSriastradh }
111*33772c1eSriastradh
112*33772c1eSriastradh static int
_sodium_runtime_intel_cpu_features(CPUFeatures * const cpu_features)113*33772c1eSriastradh _sodium_runtime_intel_cpu_features(CPUFeatures * const cpu_features)
114*33772c1eSriastradh {
115*33772c1eSriastradh unsigned int cpu_info[4];
116*33772c1eSriastradh unsigned int id;
117*33772c1eSriastradh
118*33772c1eSriastradh _cpuid(cpu_info, 0x0);
119*33772c1eSriastradh if ((id = cpu_info[0]) == 0U) {
120*33772c1eSriastradh return -1; /* LCOV_EXCL_LINE */
121*33772c1eSriastradh }
122*33772c1eSriastradh _cpuid(cpu_info, 0x00000001);
123*33772c1eSriastradh #ifdef HAVE_EMMINTRIN_H
124*33772c1eSriastradh cpu_features->has_sse2 = ((cpu_info[3] & CPUID_EDX_SSE2) != 0x0);
125*33772c1eSriastradh #else
126*33772c1eSriastradh cpu_features->has_sse2 = 0;
127*33772c1eSriastradh #endif
128*33772c1eSriastradh
129*33772c1eSriastradh #ifdef HAVE_PMMINTRIN_H
130*33772c1eSriastradh cpu_features->has_sse3 = ((cpu_info[2] & CPUID_ECX_SSE3) != 0x0);
131*33772c1eSriastradh #else
132*33772c1eSriastradh cpu_features->has_sse3 = 0;
133*33772c1eSriastradh #endif
134*33772c1eSriastradh
135*33772c1eSriastradh #ifdef HAVE_TMMINTRIN_H
136*33772c1eSriastradh cpu_features->has_ssse3 = ((cpu_info[2] & CPUID_ECX_SSSE3) != 0x0);
137*33772c1eSriastradh #else
138*33772c1eSriastradh cpu_features->has_ssse3 = 0;
139*33772c1eSriastradh #endif
140*33772c1eSriastradh
141*33772c1eSriastradh #ifdef HAVE_SMMINTRIN_H
142*33772c1eSriastradh cpu_features->has_sse41 = ((cpu_info[2] & CPUID_ECX_SSE41) != 0x0);
143*33772c1eSriastradh #else
144*33772c1eSriastradh cpu_features->has_sse41 = 0;
145*33772c1eSriastradh #endif
146*33772c1eSriastradh
147*33772c1eSriastradh cpu_features->has_avx = 0;
148*33772c1eSriastradh #ifdef HAVE_AVXINTRIN_H
149*33772c1eSriastradh if ((cpu_info[2] & (CPUID_ECX_AVX | CPUID_ECX_XSAVE | CPUID_ECX_OSXSAVE)) ==
150*33772c1eSriastradh (CPUID_ECX_AVX | CPUID_ECX_XSAVE | CPUID_ECX_OSXSAVE)) {
151*33772c1eSriastradh uint32_t xcr0 = 0U;
152*33772c1eSriastradh # if defined(HAVE__XGETBV) || \
153*33772c1eSriastradh (defined(_MSC_VER) && defined(_XCR_XFEATURE_ENABLED_MASK) && _MSC_FULL_VER >= 160040219)
154*33772c1eSriastradh xcr0 = (uint32_t) _xgetbv(0);
155*33772c1eSriastradh # elif defined(_MSC_VER) && defined(_M_IX86)
156*33772c1eSriastradh __asm {
157*33772c1eSriastradh xor ecx, ecx
158*33772c1eSriastradh _asm _emit 0x0f _asm _emit 0x01 _asm _emit 0xd0
159*33772c1eSriastradh mov xcr0, eax
160*33772c1eSriastradh }
161*33772c1eSriastradh # elif defined(HAVE_AVX_ASM)
162*33772c1eSriastradh __asm__ __volatile__(".byte 0x0f, 0x01, 0xd0" /* XGETBV */
163*33772c1eSriastradh : "=a"(xcr0)
164*33772c1eSriastradh : "c"((uint32_t) 0U)
165*33772c1eSriastradh : "%edx");
166*33772c1eSriastradh # endif
167*33772c1eSriastradh if ((xcr0 & (XCR0_SSE | XCR0_AVX)) == (XCR0_SSE | XCR0_AVX)) {
168*33772c1eSriastradh cpu_features->has_avx = 1;
169*33772c1eSriastradh }
170*33772c1eSriastradh }
171*33772c1eSriastradh #endif
172*33772c1eSriastradh
173*33772c1eSriastradh cpu_features->has_avx2 = 0;
174*33772c1eSriastradh #ifdef HAVE_AVX2INTRIN_H
175*33772c1eSriastradh if (cpu_features->has_avx) {
176*33772c1eSriastradh unsigned int cpu_info7[4];
177*33772c1eSriastradh
178*33772c1eSriastradh _cpuid(cpu_info7, 0x00000007);
179*33772c1eSriastradh cpu_features->has_avx2 = ((cpu_info7[1] & CPUID_EBX_AVX2) != 0x0);
180*33772c1eSriastradh }
181*33772c1eSriastradh #endif
182*33772c1eSriastradh
183*33772c1eSriastradh cpu_features->has_avx512f = 0;
184*33772c1eSriastradh #ifdef HAVE_AVX512FINTRIN_H
185*33772c1eSriastradh if (cpu_features->has_avx2) {
186*33772c1eSriastradh unsigned int cpu_info7[4];
187*33772c1eSriastradh
188*33772c1eSriastradh _cpuid(cpu_info7, 0x00000007);
189*33772c1eSriastradh cpu_features->has_avx512f = ((cpu_info7[1] & CPUID_EBX_AVX512F) != 0x0);
190*33772c1eSriastradh }
191*33772c1eSriastradh #endif
192*33772c1eSriastradh
193*33772c1eSriastradh #ifdef HAVE_WMMINTRIN_H
194*33772c1eSriastradh cpu_features->has_pclmul = ((cpu_info[2] & CPUID_ECX_PCLMUL) != 0x0);
195*33772c1eSriastradh cpu_features->has_aesni = ((cpu_info[2] & CPUID_ECX_AESNI) != 0x0);
196*33772c1eSriastradh #else
197*33772c1eSriastradh cpu_features->has_pclmul = 0;
198*33772c1eSriastradh cpu_features->has_aesni = 0;
199*33772c1eSriastradh #endif
200*33772c1eSriastradh
201*33772c1eSriastradh #ifdef HAVE_RDRAND
202*33772c1eSriastradh cpu_features->has_rdrand = ((cpu_info[2] & CPUID_ECX_RDRAND) != 0x0);
203*33772c1eSriastradh #else
204*33772c1eSriastradh cpu_features->has_rdrand = 0;
205*33772c1eSriastradh #endif
206*33772c1eSriastradh
207*33772c1eSriastradh return 0;
208*33772c1eSriastradh }
209*33772c1eSriastradh
210*33772c1eSriastradh int
_sodium_runtime_get_cpu_features(void)211*33772c1eSriastradh _sodium_runtime_get_cpu_features(void)
212*33772c1eSriastradh {
213*33772c1eSriastradh int ret = -1;
214*33772c1eSriastradh
215*33772c1eSriastradh ret &= _sodium_runtime_arm_cpu_features(&_cpu_features);
216*33772c1eSriastradh ret &= _sodium_runtime_intel_cpu_features(&_cpu_features);
217*33772c1eSriastradh _cpu_features.initialized = 1;
218*33772c1eSriastradh
219*33772c1eSriastradh return ret;
220*33772c1eSriastradh }
221*33772c1eSriastradh
222*33772c1eSriastradh int
sodium_runtime_has_neon(void)223*33772c1eSriastradh sodium_runtime_has_neon(void)
224*33772c1eSriastradh {
225*33772c1eSriastradh return _cpu_features.has_neon;
226*33772c1eSriastradh }
227*33772c1eSriastradh
228*33772c1eSriastradh int
sodium_runtime_has_sse2(void)229*33772c1eSriastradh sodium_runtime_has_sse2(void)
230*33772c1eSriastradh {
231*33772c1eSriastradh return _cpu_features.has_sse2;
232*33772c1eSriastradh }
233*33772c1eSriastradh
234*33772c1eSriastradh int
sodium_runtime_has_sse3(void)235*33772c1eSriastradh sodium_runtime_has_sse3(void)
236*33772c1eSriastradh {
237*33772c1eSriastradh return _cpu_features.has_sse3;
238*33772c1eSriastradh }
239*33772c1eSriastradh
240*33772c1eSriastradh int
sodium_runtime_has_ssse3(void)241*33772c1eSriastradh sodium_runtime_has_ssse3(void)
242*33772c1eSriastradh {
243*33772c1eSriastradh return _cpu_features.has_ssse3;
244*33772c1eSriastradh }
245*33772c1eSriastradh
246*33772c1eSriastradh int
sodium_runtime_has_sse41(void)247*33772c1eSriastradh sodium_runtime_has_sse41(void)
248*33772c1eSriastradh {
249*33772c1eSriastradh return _cpu_features.has_sse41;
250*33772c1eSriastradh }
251*33772c1eSriastradh
252*33772c1eSriastradh int
sodium_runtime_has_avx(void)253*33772c1eSriastradh sodium_runtime_has_avx(void)
254*33772c1eSriastradh {
255*33772c1eSriastradh return _cpu_features.has_avx;
256*33772c1eSriastradh }
257*33772c1eSriastradh
258*33772c1eSriastradh int
sodium_runtime_has_avx2(void)259*33772c1eSriastradh sodium_runtime_has_avx2(void)
260*33772c1eSriastradh {
261*33772c1eSriastradh return _cpu_features.has_avx2;
262*33772c1eSriastradh }
263*33772c1eSriastradh
264*33772c1eSriastradh int
sodium_runtime_has_avx512f(void)265*33772c1eSriastradh sodium_runtime_has_avx512f(void)
266*33772c1eSriastradh {
267*33772c1eSriastradh return _cpu_features.has_avx512f;
268*33772c1eSriastradh }
269*33772c1eSriastradh
270*33772c1eSriastradh int
sodium_runtime_has_pclmul(void)271*33772c1eSriastradh sodium_runtime_has_pclmul(void)
272*33772c1eSriastradh {
273*33772c1eSriastradh return _cpu_features.has_pclmul;
274*33772c1eSriastradh }
275*33772c1eSriastradh
276*33772c1eSriastradh int
sodium_runtime_has_aesni(void)277*33772c1eSriastradh sodium_runtime_has_aesni(void)
278*33772c1eSriastradh {
279*33772c1eSriastradh return _cpu_features.has_aesni;
280*33772c1eSriastradh }
281*33772c1eSriastradh
282*33772c1eSriastradh int
sodium_runtime_has_rdrand(void)283*33772c1eSriastradh sodium_runtime_has_rdrand(void)
284*33772c1eSriastradh {
285*33772c1eSriastradh return _cpu_features.has_rdrand;
286*33772c1eSriastradh }
287