1*33772c1eSriastradh #include <stddef.h>
2*33772c1eSriastradh #include <stdint.h>
3*33772c1eSriastradh #ifdef HAVE_ANDROID_GETCPUFEATURES
4*33772c1eSriastradh # include <cpu-features.h>
5*33772c1eSriastradh #endif
6*33772c1eSriastradh 
7*33772c1eSriastradh #include "private/common.h"
8*33772c1eSriastradh #include "runtime.h"
9*33772c1eSriastradh 
10*33772c1eSriastradh typedef struct CPUFeatures_ {
11*33772c1eSriastradh     int initialized;
12*33772c1eSriastradh     int has_neon;
13*33772c1eSriastradh     int has_sse2;
14*33772c1eSriastradh     int has_sse3;
15*33772c1eSriastradh     int has_ssse3;
16*33772c1eSriastradh     int has_sse41;
17*33772c1eSriastradh     int has_avx;
18*33772c1eSriastradh     int has_avx2;
19*33772c1eSriastradh     int has_avx512f;
20*33772c1eSriastradh     int has_pclmul;
21*33772c1eSriastradh     int has_aesni;
22*33772c1eSriastradh     int has_rdrand;
23*33772c1eSriastradh } CPUFeatures;
24*33772c1eSriastradh 
25*33772c1eSriastradh static CPUFeatures _cpu_features;
26*33772c1eSriastradh 
27*33772c1eSriastradh #define CPUID_EBX_AVX2    0x00000020
28*33772c1eSriastradh #define CPUID_EBX_AVX512F 0x00010000
29*33772c1eSriastradh 
30*33772c1eSriastradh #define CPUID_ECX_SSE3    0x00000001
31*33772c1eSriastradh #define CPUID_ECX_PCLMUL  0x00000002
32*33772c1eSriastradh #define CPUID_ECX_SSSE3   0x00000200
33*33772c1eSriastradh #define CPUID_ECX_SSE41   0x00080000
34*33772c1eSriastradh #define CPUID_ECX_AESNI   0x02000000
35*33772c1eSriastradh #define CPUID_ECX_XSAVE   0x04000000
36*33772c1eSriastradh #define CPUID_ECX_OSXSAVE 0x08000000
37*33772c1eSriastradh #define CPUID_ECX_AVX     0x10000000
38*33772c1eSriastradh #define CPUID_ECX_RDRAND  0x40000000
39*33772c1eSriastradh 
40*33772c1eSriastradh #define CPUID_EDX_SSE2    0x04000000
41*33772c1eSriastradh 
42*33772c1eSriastradh #define XCR0_SSE 0x00000002
43*33772c1eSriastradh #define XCR0_AVX 0x00000004
44*33772c1eSriastradh 
45*33772c1eSriastradh static int
_sodium_runtime_arm_cpu_features(CPUFeatures * const cpu_features)46*33772c1eSriastradh _sodium_runtime_arm_cpu_features(CPUFeatures * const cpu_features)
47*33772c1eSriastradh {
48*33772c1eSriastradh #ifndef __arm__
49*33772c1eSriastradh     cpu_features->has_neon = 0;
50*33772c1eSriastradh     return -1;
51*33772c1eSriastradh #else
52*33772c1eSriastradh # ifdef __APPLE__
53*33772c1eSriastradh #  ifdef __ARM_NEON__
54*33772c1eSriastradh     cpu_features->has_neon = 1;
55*33772c1eSriastradh #  else
56*33772c1eSriastradh     cpu_features->has_neon = 0;
57*33772c1eSriastradh #  endif
58*33772c1eSriastradh # elif defined(HAVE_ANDROID_GETCPUFEATURES) && \
59*33772c1eSriastradh     defined(ANDROID_CPU_ARM_FEATURE_NEON)
60*33772c1eSriastradh     cpu_features->has_neon =
61*33772c1eSriastradh         (android_getCpuFeatures() & ANDROID_CPU_ARM_FEATURE_NEON) != 0x0;
62*33772c1eSriastradh # else
63*33772c1eSriastradh     cpu_features->has_neon = 0;
64*33772c1eSriastradh # endif
65*33772c1eSriastradh     return 0;
66*33772c1eSriastradh #endif
67*33772c1eSriastradh }
68*33772c1eSriastradh 
69*33772c1eSriastradh static void
_cpuid(unsigned int cpu_info[4U],const unsigned int cpu_info_type)70*33772c1eSriastradh _cpuid(unsigned int cpu_info[4U], const unsigned int cpu_info_type)
71*33772c1eSriastradh {
72*33772c1eSriastradh #if defined(_MSC_VER) && \
73*33772c1eSriastradh     (defined(_M_X64) || defined(_M_AMD64) || defined(_M_IX86))
74*33772c1eSriastradh     __cpuid((int *) cpu_info, cpu_info_type);
75*33772c1eSriastradh #elif defined(HAVE_CPUID)
76*33772c1eSriastradh     cpu_info[0] = cpu_info[1] = cpu_info[2] = cpu_info[3] = 0;
77*33772c1eSriastradh # ifdef __i386__
78*33772c1eSriastradh     __asm__ __volatile__(
79*33772c1eSriastradh         "pushfl; pushfl; "
80*33772c1eSriastradh         "popl %0; "
81*33772c1eSriastradh         "movl %0, %1; xorl %2, %0; "
82*33772c1eSriastradh         "pushl %0; "
83*33772c1eSriastradh         "popfl; pushfl; popl %0; popfl"
84*33772c1eSriastradh         : "=&r"(cpu_info[0]), "=&r"(cpu_info[1])
85*33772c1eSriastradh         : "i"(0x200000));
86*33772c1eSriastradh     if (((cpu_info[0] ^ cpu_info[1]) & 0x200000) == 0x0) {
87*33772c1eSriastradh         return; /* LCOV_EXCL_LINE */
88*33772c1eSriastradh     }
89*33772c1eSriastradh # endif
90*33772c1eSriastradh # ifdef __i386__
91*33772c1eSriastradh     __asm__ __volatile__("xchgl %%ebx, %k1; cpuid; xchgl %%ebx, %k1"
92*33772c1eSriastradh                          : "=a"(cpu_info[0]), "=&r"(cpu_info[1]),
93*33772c1eSriastradh                            "=c"(cpu_info[2]), "=d"(cpu_info[3])
94*33772c1eSriastradh                          : "0"(cpu_info_type), "2"(0U));
95*33772c1eSriastradh # elif defined(__x86_64__)
96*33772c1eSriastradh     __asm__ __volatile__("xchgq %%rbx, %q1; cpuid; xchgq %%rbx, %q1"
97*33772c1eSriastradh                          : "=a"(cpu_info[0]), "=&r"(cpu_info[1]),
98*33772c1eSriastradh                            "=c"(cpu_info[2]), "=d"(cpu_info[3])
99*33772c1eSriastradh                          : "0"(cpu_info_type), "2"(0U));
100*33772c1eSriastradh # else
101*33772c1eSriastradh     __asm__ __volatile__("cpuid"
102*33772c1eSriastradh                          : "=a"(cpu_info[0]), "=b"(cpu_info[1]),
103*33772c1eSriastradh                            "=c"(cpu_info[2]), "=d"(cpu_info[3])
104*33772c1eSriastradh                          : "0"(cpu_info_type), "2"(0U));
105*33772c1eSriastradh # endif
106*33772c1eSriastradh #else
107*33772c1eSriastradh     (void) cpu_info_type;
108*33772c1eSriastradh     cpu_info[0] = cpu_info[1] = cpu_info[2] = cpu_info[3] = 0;
109*33772c1eSriastradh #endif
110*33772c1eSriastradh }
111*33772c1eSriastradh 
112*33772c1eSriastradh static int
_sodium_runtime_intel_cpu_features(CPUFeatures * const cpu_features)113*33772c1eSriastradh _sodium_runtime_intel_cpu_features(CPUFeatures * const cpu_features)
114*33772c1eSriastradh {
115*33772c1eSriastradh     unsigned int cpu_info[4];
116*33772c1eSriastradh     unsigned int id;
117*33772c1eSriastradh 
118*33772c1eSriastradh     _cpuid(cpu_info, 0x0);
119*33772c1eSriastradh     if ((id = cpu_info[0]) == 0U) {
120*33772c1eSriastradh         return -1; /* LCOV_EXCL_LINE */
121*33772c1eSriastradh     }
122*33772c1eSriastradh     _cpuid(cpu_info, 0x00000001);
123*33772c1eSriastradh #ifdef HAVE_EMMINTRIN_H
124*33772c1eSriastradh     cpu_features->has_sse2 = ((cpu_info[3] & CPUID_EDX_SSE2) != 0x0);
125*33772c1eSriastradh #else
126*33772c1eSriastradh     cpu_features->has_sse2   = 0;
127*33772c1eSriastradh #endif
128*33772c1eSriastradh 
129*33772c1eSriastradh #ifdef HAVE_PMMINTRIN_H
130*33772c1eSriastradh     cpu_features->has_sse3 = ((cpu_info[2] & CPUID_ECX_SSE3) != 0x0);
131*33772c1eSriastradh #else
132*33772c1eSriastradh     cpu_features->has_sse3   = 0;
133*33772c1eSriastradh #endif
134*33772c1eSriastradh 
135*33772c1eSriastradh #ifdef HAVE_TMMINTRIN_H
136*33772c1eSriastradh     cpu_features->has_ssse3 = ((cpu_info[2] & CPUID_ECX_SSSE3) != 0x0);
137*33772c1eSriastradh #else
138*33772c1eSriastradh     cpu_features->has_ssse3  = 0;
139*33772c1eSriastradh #endif
140*33772c1eSriastradh 
141*33772c1eSriastradh #ifdef HAVE_SMMINTRIN_H
142*33772c1eSriastradh     cpu_features->has_sse41 = ((cpu_info[2] & CPUID_ECX_SSE41) != 0x0);
143*33772c1eSriastradh #else
144*33772c1eSriastradh     cpu_features->has_sse41  = 0;
145*33772c1eSriastradh #endif
146*33772c1eSriastradh 
147*33772c1eSriastradh     cpu_features->has_avx = 0;
148*33772c1eSriastradh #ifdef HAVE_AVXINTRIN_H
149*33772c1eSriastradh     if ((cpu_info[2] & (CPUID_ECX_AVX | CPUID_ECX_XSAVE | CPUID_ECX_OSXSAVE)) ==
150*33772c1eSriastradh         (CPUID_ECX_AVX | CPUID_ECX_XSAVE | CPUID_ECX_OSXSAVE)) {
151*33772c1eSriastradh         uint32_t xcr0 = 0U;
152*33772c1eSriastradh # if defined(HAVE__XGETBV) || \
153*33772c1eSriastradh         (defined(_MSC_VER) && defined(_XCR_XFEATURE_ENABLED_MASK) && _MSC_FULL_VER >= 160040219)
154*33772c1eSriastradh         xcr0 = (uint32_t) _xgetbv(0);
155*33772c1eSriastradh # elif defined(_MSC_VER) && defined(_M_IX86)
156*33772c1eSriastradh         __asm {
157*33772c1eSriastradh             xor ecx, ecx
158*33772c1eSriastradh             _asm _emit 0x0f _asm _emit 0x01 _asm _emit 0xd0
159*33772c1eSriastradh             mov xcr0, eax
160*33772c1eSriastradh         }
161*33772c1eSriastradh # elif defined(HAVE_AVX_ASM)
162*33772c1eSriastradh         __asm__ __volatile__(".byte 0x0f, 0x01, 0xd0" /* XGETBV */
163*33772c1eSriastradh                              : "=a"(xcr0)
164*33772c1eSriastradh                              : "c"((uint32_t) 0U)
165*33772c1eSriastradh                              : "%edx");
166*33772c1eSriastradh # endif
167*33772c1eSriastradh         if ((xcr0 & (XCR0_SSE | XCR0_AVX)) == (XCR0_SSE | XCR0_AVX)) {
168*33772c1eSriastradh             cpu_features->has_avx = 1;
169*33772c1eSriastradh         }
170*33772c1eSriastradh     }
171*33772c1eSriastradh #endif
172*33772c1eSriastradh 
173*33772c1eSriastradh     cpu_features->has_avx2 = 0;
174*33772c1eSriastradh #ifdef HAVE_AVX2INTRIN_H
175*33772c1eSriastradh     if (cpu_features->has_avx) {
176*33772c1eSriastradh         unsigned int cpu_info7[4];
177*33772c1eSriastradh 
178*33772c1eSriastradh         _cpuid(cpu_info7, 0x00000007);
179*33772c1eSriastradh         cpu_features->has_avx2 = ((cpu_info7[1] & CPUID_EBX_AVX2) != 0x0);
180*33772c1eSriastradh     }
181*33772c1eSriastradh #endif
182*33772c1eSriastradh 
183*33772c1eSriastradh     cpu_features->has_avx512f = 0;
184*33772c1eSriastradh #ifdef HAVE_AVX512FINTRIN_H
185*33772c1eSriastradh     if (cpu_features->has_avx2) {
186*33772c1eSriastradh         unsigned int cpu_info7[4];
187*33772c1eSriastradh 
188*33772c1eSriastradh         _cpuid(cpu_info7, 0x00000007);
189*33772c1eSriastradh         cpu_features->has_avx512f = ((cpu_info7[1] & CPUID_EBX_AVX512F) != 0x0);
190*33772c1eSriastradh     }
191*33772c1eSriastradh #endif
192*33772c1eSriastradh 
193*33772c1eSriastradh #ifdef HAVE_WMMINTRIN_H
194*33772c1eSriastradh     cpu_features->has_pclmul = ((cpu_info[2] & CPUID_ECX_PCLMUL) != 0x0);
195*33772c1eSriastradh     cpu_features->has_aesni  = ((cpu_info[2] & CPUID_ECX_AESNI) != 0x0);
196*33772c1eSriastradh #else
197*33772c1eSriastradh     cpu_features->has_pclmul = 0;
198*33772c1eSriastradh     cpu_features->has_aesni  = 0;
199*33772c1eSriastradh #endif
200*33772c1eSriastradh 
201*33772c1eSriastradh #ifdef HAVE_RDRAND
202*33772c1eSriastradh     cpu_features->has_rdrand = ((cpu_info[2] & CPUID_ECX_RDRAND) != 0x0);
203*33772c1eSriastradh #else
204*33772c1eSriastradh     cpu_features->has_rdrand = 0;
205*33772c1eSriastradh #endif
206*33772c1eSriastradh 
207*33772c1eSriastradh     return 0;
208*33772c1eSriastradh }
209*33772c1eSriastradh 
210*33772c1eSriastradh int
_sodium_runtime_get_cpu_features(void)211*33772c1eSriastradh _sodium_runtime_get_cpu_features(void)
212*33772c1eSriastradh {
213*33772c1eSriastradh     int ret = -1;
214*33772c1eSriastradh 
215*33772c1eSriastradh     ret &= _sodium_runtime_arm_cpu_features(&_cpu_features);
216*33772c1eSriastradh     ret &= _sodium_runtime_intel_cpu_features(&_cpu_features);
217*33772c1eSriastradh     _cpu_features.initialized = 1;
218*33772c1eSriastradh 
219*33772c1eSriastradh     return ret;
220*33772c1eSriastradh }
221*33772c1eSriastradh 
222*33772c1eSriastradh int
sodium_runtime_has_neon(void)223*33772c1eSriastradh sodium_runtime_has_neon(void)
224*33772c1eSriastradh {
225*33772c1eSriastradh     return _cpu_features.has_neon;
226*33772c1eSriastradh }
227*33772c1eSriastradh 
228*33772c1eSriastradh int
sodium_runtime_has_sse2(void)229*33772c1eSriastradh sodium_runtime_has_sse2(void)
230*33772c1eSriastradh {
231*33772c1eSriastradh     return _cpu_features.has_sse2;
232*33772c1eSriastradh }
233*33772c1eSriastradh 
234*33772c1eSriastradh int
sodium_runtime_has_sse3(void)235*33772c1eSriastradh sodium_runtime_has_sse3(void)
236*33772c1eSriastradh {
237*33772c1eSriastradh     return _cpu_features.has_sse3;
238*33772c1eSriastradh }
239*33772c1eSriastradh 
240*33772c1eSriastradh int
sodium_runtime_has_ssse3(void)241*33772c1eSriastradh sodium_runtime_has_ssse3(void)
242*33772c1eSriastradh {
243*33772c1eSriastradh     return _cpu_features.has_ssse3;
244*33772c1eSriastradh }
245*33772c1eSriastradh 
246*33772c1eSriastradh int
sodium_runtime_has_sse41(void)247*33772c1eSriastradh sodium_runtime_has_sse41(void)
248*33772c1eSriastradh {
249*33772c1eSriastradh     return _cpu_features.has_sse41;
250*33772c1eSriastradh }
251*33772c1eSriastradh 
252*33772c1eSriastradh int
sodium_runtime_has_avx(void)253*33772c1eSriastradh sodium_runtime_has_avx(void)
254*33772c1eSriastradh {
255*33772c1eSriastradh     return _cpu_features.has_avx;
256*33772c1eSriastradh }
257*33772c1eSriastradh 
258*33772c1eSriastradh int
sodium_runtime_has_avx2(void)259*33772c1eSriastradh sodium_runtime_has_avx2(void)
260*33772c1eSriastradh {
261*33772c1eSriastradh     return _cpu_features.has_avx2;
262*33772c1eSriastradh }
263*33772c1eSriastradh 
264*33772c1eSriastradh int
sodium_runtime_has_avx512f(void)265*33772c1eSriastradh sodium_runtime_has_avx512f(void)
266*33772c1eSriastradh {
267*33772c1eSriastradh     return _cpu_features.has_avx512f;
268*33772c1eSriastradh }
269*33772c1eSriastradh 
270*33772c1eSriastradh int
sodium_runtime_has_pclmul(void)271*33772c1eSriastradh sodium_runtime_has_pclmul(void)
272*33772c1eSriastradh {
273*33772c1eSriastradh     return _cpu_features.has_pclmul;
274*33772c1eSriastradh }
275*33772c1eSriastradh 
276*33772c1eSriastradh int
sodium_runtime_has_aesni(void)277*33772c1eSriastradh sodium_runtime_has_aesni(void)
278*33772c1eSriastradh {
279*33772c1eSriastradh     return _cpu_features.has_aesni;
280*33772c1eSriastradh }
281*33772c1eSriastradh 
282*33772c1eSriastradh int
sodium_runtime_has_rdrand(void)283*33772c1eSriastradh sodium_runtime_has_rdrand(void)
284*33772c1eSriastradh {
285*33772c1eSriastradh     return _cpu_features.has_rdrand;
286*33772c1eSriastradh }
287