1 // cpu.cpp - written and placed in the public domain by Wei Dai
2
3 #include "pch.h"
4 #include "config.h"
5
6 #ifndef EXCEPTION_EXECUTE_HANDLER
7 # define EXCEPTION_EXECUTE_HANDLER 1
8 #endif
9
10 #ifndef CRYPTOPP_IMPORTS
11
12 #include "cpu.h"
13 #include "misc.h"
14 #include <algorithm>
15
16 #ifndef CRYPTOPP_MS_STYLE_INLINE_ASSEMBLY
17 #include <signal.h>
18 #include <setjmp.h>
19 #endif
20
NAMESPACE_BEGIN(CryptoPP)21 NAMESPACE_BEGIN(CryptoPP)
22
23 #ifndef CRYPTOPP_MS_STYLE_INLINE_ASSEMBLY
24 extern "C" {
25 typedef void (*SigHandler)(int);
26 };
27 #endif // Not CRYPTOPP_MS_STYLE_INLINE_ASSEMBLY
28
29 #ifdef CRYPTOPP_CPUID_AVAILABLE
30
31 #if _MSC_VER >= 1400 && CRYPTOPP_BOOL_X64
32
CpuId(word32 input,word32 output[4])33 bool CpuId(word32 input, word32 output[4])
34 {
35 __cpuid((int *)output, input);
36 return true;
37 }
38
39 #else
40
41 #ifndef CRYPTOPP_MS_STYLE_INLINE_ASSEMBLY
42 extern "C"
43 {
44 static jmp_buf s_jmpNoCPUID;
SigIllHandlerCPUID(int)45 static void SigIllHandlerCPUID(int)
46 {
47 longjmp(s_jmpNoCPUID, 1);
48 }
49
50 static jmp_buf s_jmpNoSSE2;
SigIllHandlerSSE2(int)51 static void SigIllHandlerSSE2(int)
52 {
53 longjmp(s_jmpNoSSE2, 1);
54 }
55 }
56 #endif
57
CpuId(word32 input,word32 output[4])58 bool CpuId(word32 input, word32 output[4])
59 {
60 #if defined(CRYPTOPP_MS_STYLE_INLINE_ASSEMBLY)
61 __try
62 {
63 __asm
64 {
65 mov eax, input
66 mov ecx, 0
67 cpuid
68 mov edi, output
69 mov [edi], eax
70 mov [edi+4], ebx
71 mov [edi+8], ecx
72 mov [edi+12], edx
73 }
74 }
75 // GetExceptionCode() == EXCEPTION_ILLEGAL_INSTRUCTION
76 __except (EXCEPTION_EXECUTE_HANDLER)
77 {
78 return false;
79 }
80
81 // function 0 returns the highest basic function understood in EAX
82 if(input == 0)
83 return !!output[0];
84
85 return true;
86 #else
87 // longjmp and clobber warnings. Volatile is required.
88 // http://github.com/weidai11/cryptopp/issues/24 and http://stackoverflow.com/q/7721854
89 volatile bool result = true;
90
91 volatile SigHandler oldHandler = signal(SIGILL, SigIllHandlerCPUID);
92 if (oldHandler == SIG_ERR)
93 return false;
94
95 # ifndef __MINGW32__
96 volatile sigset_t oldMask;
97 if (sigprocmask(0, NULL, (sigset_t*)&oldMask))
98 return false;
99 # endif
100
101 if (setjmp(s_jmpNoCPUID))
102 result = false;
103 else
104 {
105 asm volatile
106 (
107 // save ebx in case -fPIC is being used
108 // TODO: this might need an early clobber on EDI.
109 # if CRYPTOPP_BOOL_X32 || CRYPTOPP_BOOL_X64
110 "pushq %%rbx; cpuid; mov %%ebx, %%edi; popq %%rbx"
111 # else
112 "push %%ebx; cpuid; mov %%ebx, %%edi; pop %%ebx"
113 # endif
114 : "=a" (output[0]), "=D" (output[1]), "=c" (output[2]), "=d" (output[3])
115 : "a" (input), "c" (0)
116 : "cc"
117 );
118 }
119
120 # ifndef __MINGW32__
121 sigprocmask(SIG_SETMASK, (sigset_t*)&oldMask, NULL);
122 # endif
123
124 signal(SIGILL, oldHandler);
125 return result;
126 #endif
127 }
128
129 #endif
130
TrySSE2()131 static bool TrySSE2()
132 {
133 #if CRYPTOPP_BOOL_X64
134 return true;
135 #elif defined(CRYPTOPP_MS_STYLE_INLINE_ASSEMBLY)
136 __try
137 {
138 #if CRYPTOPP_BOOL_SSE2_ASM_AVAILABLE
139 AS2(por xmm0, xmm0) // executing SSE2 instruction
140 #elif CRYPTOPP_BOOL_SSE2_INTRINSICS_AVAILABLE
141 __m128i x = _mm_setzero_si128();
142 return _mm_cvtsi128_si32(x) == 0;
143 #endif
144 }
145 // GetExceptionCode() == EXCEPTION_ILLEGAL_INSTRUCTION
146 __except (EXCEPTION_EXECUTE_HANDLER)
147 {
148 return false;
149 }
150 return true;
151 #else
152 // longjmp and clobber warnings. Volatile is required.
153 // http://github.com/weidai11/cryptopp/issues/24 and http://stackoverflow.com/q/7721854
154 volatile bool result = true;
155
156 volatile SigHandler oldHandler = signal(SIGILL, SigIllHandlerSSE2);
157 if (oldHandler == SIG_ERR)
158 return false;
159
160 # ifndef __MINGW32__
161 volatile sigset_t oldMask;
162 if (sigprocmask(0, NULL, (sigset_t*)&oldMask))
163 return false;
164 # endif
165
166 if (setjmp(s_jmpNoSSE2))
167 result = false;
168 else
169 {
170 #if CRYPTOPP_BOOL_SSE2_ASM_AVAILABLE
171 __asm __volatile ("por %xmm0, %xmm0");
172 #elif CRYPTOPP_BOOL_SSE2_INTRINSICS_AVAILABLE
173 __m128i x = _mm_setzero_si128();
174 result = _mm_cvtsi128_si32(x) == 0;
175 #endif
176 }
177
178 # ifndef __MINGW32__
179 sigprocmask(SIG_SETMASK, (sigset_t*)&oldMask, NULL);
180 # endif
181
182 signal(SIGILL, oldHandler);
183 return result;
184 #endif
185 }
186
187 bool CRYPTOPP_SECTION_INIT g_x86DetectionDone = false;
188 bool CRYPTOPP_SECTION_INIT g_hasMMX = false, CRYPTOPP_SECTION_INIT g_hasISSE = false, CRYPTOPP_SECTION_INIT g_hasSSE2 = false, CRYPTOPP_SECTION_INIT g_hasSSSE3 = false;
189 bool CRYPTOPP_SECTION_INIT g_hasSSE4 = false, CRYPTOPP_SECTION_INIT g_hasAESNI = false, CRYPTOPP_SECTION_INIT g_hasCLMUL = false, CRYPTOPP_SECTION_INIT g_hasSHA = false;
190 bool CRYPTOPP_SECTION_INIT g_hasRDRAND = false, CRYPTOPP_SECTION_INIT g_hasRDSEED = false, CRYPTOPP_SECTION_INIT g_isP4 = false;
191 bool CRYPTOPP_SECTION_INIT g_hasPadlockRNG = false, CRYPTOPP_SECTION_INIT g_hasPadlockACE = false, CRYPTOPP_SECTION_INIT g_hasPadlockACE2 = false;
192 bool CRYPTOPP_SECTION_INIT g_hasPadlockPHE = false, CRYPTOPP_SECTION_INIT g_hasPadlockPMM = false;
193 word32 CRYPTOPP_SECTION_INIT g_cacheLineSize = CRYPTOPP_L1_CACHE_LINE_SIZE;
194
IsIntel(const word32 output[4])195 static inline bool IsIntel(const word32 output[4])
196 {
197 // This is the "GenuineIntel" string
198 return (output[1] /*EBX*/ == 0x756e6547) &&
199 (output[2] /*ECX*/ == 0x6c65746e) &&
200 (output[3] /*EDX*/ == 0x49656e69);
201 }
202
IsAMD(const word32 output[4])203 static inline bool IsAMD(const word32 output[4])
204 {
205 // This is the "AuthenticAMD" string. Some early K5's can return "AMDisbetter!"
206 return (output[1] /*EBX*/ == 0x68747541) &&
207 (output[2] /*ECX*/ == 0x444D4163) &&
208 (output[3] /*EDX*/ == 0x69746E65);
209 }
210
IsVIA(const word32 output[4])211 static inline bool IsVIA(const word32 output[4])
212 {
213 // This is the "CentaurHauls" string. Some non-PadLock's can return "VIA VIA VIA "
214 return (output[1] /*EBX*/ == 0x746e6543) &&
215 (output[2] /*ECX*/ == 0x736c7561) &&
216 (output[3] /*EDX*/ == 0x48727561);
217 }
218
219 #if HAVE_GCC_CONSTRUCTOR1
DetectX86Features()220 void __attribute__ ((constructor (CRYPTOPP_INIT_PRIORITY + 50))) DetectX86Features()
221 #elif HAVE_GCC_CONSTRUCTOR0
222 void __attribute__ ((constructor)) DetectX86Features()
223 #else
224 void DetectX86Features()
225 #endif
226 {
227 word32 cpuid[4], cpuid1[4];
228 if (!CpuId(0, cpuid))
229 return;
230 if (!CpuId(1, cpuid1))
231 return;
232
233 g_hasMMX = (cpuid1[3] & (1 << 23)) != 0;
234 if ((cpuid1[3] & (1 << 26)) != 0)
235 g_hasSSE2 = TrySSE2();
236 g_hasSSSE3 = g_hasSSE2 && (cpuid1[2] & (1<<9));
237 g_hasSSE4 = g_hasSSE2 && ((cpuid1[2] & (1<<19)) && (cpuid1[2] & (1<<20)));
238 g_hasAESNI = g_hasSSE2 && (cpuid1[2] & (1<<25));
239 g_hasCLMUL = g_hasSSE2 && (cpuid1[2] & (1<<1));
240
241 if ((cpuid1[3] & (1 << 25)) != 0)
242 g_hasISSE = true;
243 else
244 {
245 word32 cpuid2[4];
246 CpuId(0x080000000, cpuid2);
247 if (cpuid2[0] >= 0x080000001)
248 {
249 CpuId(0x080000001, cpuid2);
250 g_hasISSE = (cpuid2[3] & (1 << 22)) != 0;
251 }
252 }
253
254 if (IsIntel(cpuid))
255 {
256 static const unsigned int RDRAND_FLAG = (1 << 30);
257 static const unsigned int RDSEED_FLAG = (1 << 18);
258 static const unsigned int SHA_FLAG = (1 << 29);
259
260 g_isP4 = ((cpuid1[0] >> 8) & 0xf) == 0xf;
261 g_cacheLineSize = 8 * GETBYTE(cpuid1[1], 1);
262 g_hasRDRAND = !!(cpuid1[2] /*ECX*/ & RDRAND_FLAG);
263
264 if (cpuid[0] /*EAX*/ >= 7)
265 {
266 word32 cpuid3[4];
267 if (CpuId(7, cpuid3))
268 {
269 g_hasRDSEED = !!(cpuid3[1] /*EBX*/ & RDSEED_FLAG);
270 g_hasSHA = !!(cpuid3[1] /*EBX*/ & SHA_FLAG);
271 }
272 }
273 }
274 else if (IsAMD(cpuid))
275 {
276 static const unsigned int RDRAND_FLAG = (1 << 30);
277
278 CpuId(0x01, cpuid);
279 g_hasRDRAND = !!(cpuid[2] /*ECX*/ & RDRAND_FLAG);
280
281 CpuId(0x80000005, cpuid);
282 g_cacheLineSize = GETBYTE(cpuid[2], 0);
283 }
284 else if (IsVIA(cpuid))
285 {
286 static const unsigned int RNG_FLAGS = (0x3 << 2);
287 static const unsigned int ACE_FLAGS = (0x3 << 6);
288 static const unsigned int ACE2_FLAGS = (0x3 << 8);
289 static const unsigned int PHE_FLAGS = (0x3 << 10);
290 static const unsigned int PMM_FLAGS = (0x3 << 12);
291
292 CpuId(0xC0000000, cpuid);
293 if (cpuid[0] >= 0xC0000001)
294 {
295 // Extended features available
296 CpuId(0xC0000001, cpuid);
297 g_hasPadlockRNG = !!(cpuid[3] /*EDX*/ & RNG_FLAGS);
298 g_hasPadlockACE = !!(cpuid[3] /*EDX*/ & ACE_FLAGS);
299 g_hasPadlockACE2 = !!(cpuid[3] /*EDX*/ & ACE2_FLAGS);
300 g_hasPadlockPHE = !!(cpuid[3] /*EDX*/ & PHE_FLAGS);
301 g_hasPadlockPMM = !!(cpuid[3] /*EDX*/ & PMM_FLAGS);
302 }
303 }
304
305 if (!g_cacheLineSize)
306 g_cacheLineSize = CRYPTOPP_L1_CACHE_LINE_SIZE;
307
308 *((volatile bool*)&g_x86DetectionDone) = true;
309 }
310
311 #elif (CRYPTOPP_BOOL_ARM32 || CRYPTOPP_BOOL_ARM64)
312
313 // The ARM equivalent of CPUID probing is reading a MSR. The code requires Exception Level 1 (EL1) and above, but user space runs at EL0.
314 // Attempting to run the code results in a SIGILL and termination.
315 //
316 // #if defined(__arm64__) || defined(__aarch64__)
317 // word64 caps = 0; // Read ID_AA64ISAR0_EL1
318 // __asm __volatile("mrs %0, " "id_aa64isar0_el1" : "=r" (caps));
319 // #elif defined(__arm__) || defined(__aarch32__)
320 // word32 caps = 0; // Read ID_ISAR5_EL1
321 // __asm __volatile("mrs %0, " "id_isar5_el1" : "=r" (caps));
322 // #endif
323 //
324 // The following does not work well either. Its appears to be missing constants, and it does not detect Aarch32 execution environments on Aarch64
325 // http://community.arm.com/groups/android-community/blog/2014/10/10/runtime-detection-of-cpu-features-on-an-armv8-a-cpu
326 //
327 bool CRYPTOPP_SECTION_INIT g_ArmDetectionDone = false;
328 bool CRYPTOPP_SECTION_INIT g_hasNEON = false, CRYPTOPP_SECTION_INIT g_hasPMULL = false, CRYPTOPP_SECTION_INIT g_hasCRC32 = false;
329 bool CRYPTOPP_SECTION_INIT g_hasAES = false, CRYPTOPP_SECTION_INIT g_hasSHA1 = false, CRYPTOPP_SECTION_INIT g_hasSHA2 = false;
330 word32 CRYPTOPP_SECTION_INIT g_cacheLineSize = CRYPTOPP_L1_CACHE_LINE_SIZE;
331
332 #ifndef CRYPTOPP_MS_STYLE_INLINE_ASSEMBLY
333 extern "C"
334 {
335 static jmp_buf s_jmpNoNEON;
SigIllHandlerNEON(int)336 static void SigIllHandlerNEON(int)
337 {
338 longjmp(s_jmpNoNEON, 1);
339 }
340
341 static jmp_buf s_jmpNoPMULL;
SigIllHandlerPMULL(int)342 static void SigIllHandlerPMULL(int)
343 {
344 longjmp(s_jmpNoPMULL, 1);
345 }
346
347 static jmp_buf s_jmpNoCRC32;
SigIllHandlerCRC32(int)348 static void SigIllHandlerCRC32(int)
349 {
350 longjmp(s_jmpNoCRC32, 1);
351 }
352
353 static jmp_buf s_jmpNoAES;
SigIllHandlerAES(int)354 static void SigIllHandlerAES(int)
355 {
356 longjmp(s_jmpNoAES, 1);
357 }
358
359 static jmp_buf s_jmpNoSHA1;
SigIllHandlerSHA1(int)360 static void SigIllHandlerSHA1(int)
361 {
362 longjmp(s_jmpNoSHA1, 1);
363 }
364
365 static jmp_buf s_jmpNoSHA2;
SigIllHandlerSHA2(int)366 static void SigIllHandlerSHA2(int)
367 {
368 longjmp(s_jmpNoSHA2, 1);
369 }
370 };
371 #endif // Not CRYPTOPP_MS_STYLE_INLINE_ASSEMBLY
372
TryNEON()373 static bool TryNEON()
374 {
375 #if (CRYPTOPP_BOOL_NEON_INTRINSICS_AVAILABLE)
376 # if defined(CRYPTOPP_MS_STYLE_INLINE_ASSEMBLY)
377 volatile bool result = true;
378 __try
379 {
380 uint32_t v1[4] = {1,1,1,1};
381 uint32x4_t x1 = vld1q_u32(v1);
382 uint64_t v2[2] = {1,1};
383 uint64x2_t x2 = vld1q_u64(v2);
384
385 uint32x4_t x3 = vdupq_n_u32(2);
386 x3 = vsetq_lane_u32(vgetq_lane_u32(x1,0),x3,0);
387 x3 = vsetq_lane_u32(vgetq_lane_u32(x1,3),x3,3);
388 uint64x2_t x4 = vdupq_n_u64(2);
389 x4 = vsetq_lane_u64(vgetq_lane_u64(x2,0),x4,0);
390 x4 = vsetq_lane_u64(vgetq_lane_u64(x2,1),x4,1);
391
392 result = !!(vgetq_lane_u32(x3,0) | vgetq_lane_u64(x4,1));
393 }
394 __except (EXCEPTION_EXECUTE_HANDLER)
395 {
396 return false;
397 }
398 return result;
399 # else
400 // longjmp and clobber warnings. Volatile is required.
401 // http://github.com/weidai11/cryptopp/issues/24 and http://stackoverflow.com/q/7721854
402 volatile bool result = true;
403
404 volatile SigHandler oldHandler = signal(SIGILL, SigIllHandlerNEON);
405 if (oldHandler == SIG_ERR)
406 return false;
407
408 volatile sigset_t oldMask;
409 if (sigprocmask(0, NULL, (sigset_t*)&oldMask))
410 return false;
411
412 if (setjmp(s_jmpNoNEON))
413 result = false;
414 else
415 {
416 uint32_t v1[4] = {1,1,1,1};
417 uint32x4_t x1 = vld1q_u32(v1);
418 uint64_t v2[2] = {1,1};
419 uint64x2_t x2 = vld1q_u64(v2);
420
421 uint32x4_t x3 = {0,0,0,0};
422 x3 = vsetq_lane_u32(vgetq_lane_u32(x1,0),x3,0);
423 x3 = vsetq_lane_u32(vgetq_lane_u32(x1,3),x3,3);
424 uint64x2_t x4 = {0,0};
425 x4 = vsetq_lane_u64(vgetq_lane_u64(x2,0),x4,0);
426 x4 = vsetq_lane_u64(vgetq_lane_u64(x2,1),x4,1);
427
428 // Hack... GCC optimizes away the code and returns true
429 result = !!(vgetq_lane_u32(x3,0) | vgetq_lane_u64(x4,1));
430 }
431
432 sigprocmask(SIG_SETMASK, (sigset_t*)&oldMask, NULL);
433 signal(SIGILL, oldHandler);
434 return result;
435 # endif
436 #else
437 return false;
438 #endif // CRYPTOPP_BOOL_NEON_INTRINSICS_AVAILABLE
439 }
440
TryPMULL()441 static bool TryPMULL()
442 {
443 #if (CRYPTOPP_BOOL_ARM_CRYPTO_INTRINSICS_AVAILABLE)
444 # if defined(CRYPTOPP_MS_STYLE_INLINE_ASSEMBLY)
445 volatile bool result = true;
446 __try
447 {
448 const poly64_t a1={2}, b1={3};
449 const poly64x2_t a2={4,5}, b2={6,7};
450 const poly64x2_t a3={0x8080808080808080,0xa0a0a0a0a0a0a0a0}, b3={0xc0c0c0c0c0c0c0c0, 0xe0e0e0e0e0e0e0e0};
451
452 const poly128_t r1 = vmull_p64(a1, b1);
453 const poly128_t r2 = vmull_high_p64(a2, b2);
454 const poly128_t r3 = vmull_high_p64(a3, b3);
455
456 // Also see https://github.com/weidai11/cryptopp/issues/233.
457 const uint64x2_t& t1 = vreinterpretq_u64_p128(r1); // {6,0}
458 const uint64x2_t& t2 = vreinterpretq_u64_p128(r2); // {24,0}
459 const uint64x2_t& t3 = vreinterpretq_u64_p128(r3); // {bignum,bignum}
460
461 result = !!(vgetq_lane_u64(t1,0) == 0x06 && vgetq_lane_u64(t1,1) == 0x00 && vgetq_lane_u64(t2,0) == 0x1b &&
462 vgetq_lane_u64(t2,1) == 0x00 && vgetq_lane_u64(t3,0) == 0x6c006c006c006c00 && vgetq_lane_u64(t3,1) == 0x6c006c006c006c00);
463 }
464 __except (EXCEPTION_EXECUTE_HANDLER)
465 {
466 return false;
467 }
468 return result;
469 # else
470 // longjmp and clobber warnings. Volatile is required.
471 // http://github.com/weidai11/cryptopp/issues/24 and http://stackoverflow.com/q/7721854
472 volatile bool result = true;
473
474 volatile SigHandler oldHandler = signal(SIGILL, SigIllHandlerPMULL);
475 if (oldHandler == SIG_ERR)
476 return false;
477
478 volatile sigset_t oldMask;
479 if (sigprocmask(0, NULL, (sigset_t*)&oldMask))
480 return false;
481
482 if (setjmp(s_jmpNoPMULL))
483 result = false;
484 else
485 {
486 const poly64_t a1={2}, b1={3};
487 const poly64x2_t a2={4,5}, b2={6,7};
488 const poly64x2_t a3={0x8080808080808080,0xa0a0a0a0a0a0a0a0}, b3={0xc0c0c0c0c0c0c0c0, 0xe0e0e0e0e0e0e0e0};
489
490 const poly128_t r1 = vmull_p64(a1, b1);
491 const poly128_t r2 = vmull_high_p64(a2, b2);
492 const poly128_t r3 = vmull_high_p64(a3, b3);
493
494 // Linaro is missing vreinterpretq_u64_p128. Also see https://github.com/weidai11/cryptopp/issues/233.
495 const uint64x2_t& t1 = (uint64x2_t)(r1); // {6,0}
496 const uint64x2_t& t2 = (uint64x2_t)(r2); // {24,0}
497 const uint64x2_t& t3 = (uint64x2_t)(r3); // {bignum,bignum}
498
499 result = !!(vgetq_lane_u64(t1,0) == 0x06 && vgetq_lane_u64(t1,1) == 0x00 && vgetq_lane_u64(t2,0) == 0x1b &&
500 vgetq_lane_u64(t2,1) == 0x00 && vgetq_lane_u64(t3,0) == 0x6c006c006c006c00 && vgetq_lane_u64(t3,1) == 0x6c006c006c006c00);
501 }
502
503 sigprocmask(SIG_SETMASK, (sigset_t*)&oldMask, NULL);
504 signal(SIGILL, oldHandler);
505 return result;
506 # endif
507 #else
508 return false;
509 #endif // CRYPTOPP_BOOL_ARM_CRYPTO_INTRINSICS_AVAILABLE
510 }
511
TryCRC32()512 static bool TryCRC32()
513 {
514 #if (CRYPTOPP_BOOL_ARM_CRC32_INTRINSICS_AVAILABLE)
515 # if defined(CRYPTOPP_MS_STYLE_INLINE_ASSEMBLY)
516 volatile bool result = true;
517 __try
518 {
519 word32 w=0, x=1; word16 y=2; byte z=3;
520 w = __crc32cw(w,x);
521 w = __crc32ch(w,y);
522 w = __crc32cb(w,z);
523
524 result = !!w;
525 }
526 __except (EXCEPTION_EXECUTE_HANDLER)
527 {
528 return false;
529 }
530 return result;
531 # else
532 // longjmp and clobber warnings. Volatile is required.
533 // http://github.com/weidai11/cryptopp/issues/24 and http://stackoverflow.com/q/7721854
534 volatile bool result = true;
535
536 volatile SigHandler oldHandler = signal(SIGILL, SigIllHandlerCRC32);
537 if (oldHandler == SIG_ERR)
538 return false;
539
540 volatile sigset_t oldMask;
541 if (sigprocmask(0, NULL, (sigset_t*)&oldMask))
542 return false;
543
544 if (setjmp(s_jmpNoCRC32))
545 result = false;
546 else
547 {
548 word32 w=0, x=1; word16 y=2; byte z=3;
549 w = __crc32cw(w,x);
550 w = __crc32ch(w,y);
551 w = __crc32cb(w,z);
552
553 // Hack... GCC optimizes away the code and returns true
554 result = !!w;
555 }
556
557 sigprocmask(SIG_SETMASK, (sigset_t*)&oldMask, NULL);
558 signal(SIGILL, oldHandler);
559 return result;
560 # endif
561 #else
562 return false;
563 #endif // CRYPTOPP_BOOL_ARM_CRC32_INTRINSICS_AVAILABLE
564 }
565
TryAES()566 static bool TryAES()
567 {
568 #if (CRYPTOPP_BOOL_ARM_CRYPTO_INTRINSICS_AVAILABLE)
569 # if defined(CRYPTOPP_MS_STYLE_INLINE_ASSEMBLY)
570 volatile bool result = true;
571 __try
572 {
573 // AES encrypt and decrypt
574 uint8x16_t data = vdupq_n_u8(0), key = vdupq_n_u8(0);
575 uint8x16_t r1 = vaeseq_u8(data, key);
576 uint8x16_t r2 = vaesdq_u8(data, key);
577
578 result = !!(vgetq_lane_u8(r1,0) | vgetq_lane_u8(r2,7));
579 }
580 __except (EXCEPTION_EXECUTE_HANDLER)
581 {
582 return false;
583 }
584 return result;
585 # else
586 // longjmp and clobber warnings. Volatile is required.
587 // http://github.com/weidai11/cryptopp/issues/24 and http://stackoverflow.com/q/7721854
588 volatile bool result = true;
589
590 volatile SigHandler oldHandler = signal(SIGILL, SigIllHandlerAES);
591 if (oldHandler == SIG_ERR)
592 return false;
593
594 volatile sigset_t oldMask;
595 if (sigprocmask(0, NULL, (sigset_t*)&oldMask))
596 return false;
597
598 if (setjmp(s_jmpNoAES))
599 result = false;
600 else
601 {
602 uint8x16_t data = vdupq_n_u8(0), key = vdupq_n_u8(0);
603 uint8x16_t r1 = vaeseq_u8(data, key);
604 uint8x16_t r2 = vaesdq_u8(data, key);
605
606 // Hack... GCC optimizes away the code and returns true
607 result = !!(vgetq_lane_u8(r1,0) | vgetq_lane_u8(r2,7));
608 }
609
610 sigprocmask(SIG_SETMASK, (sigset_t*)&oldMask, NULL);
611 signal(SIGILL, oldHandler);
612 return result;
613 # endif
614 #else
615 return false;
616 #endif // CRYPTOPP_BOOL_ARM_CRYPTO_INTRINSICS_AVAILABLE
617 }
618
TrySHA1()619 static bool TrySHA1()
620 {
621 #if (CRYPTOPP_BOOL_ARM_CRYPTO_INTRINSICS_AVAILABLE)
622 # if defined(CRYPTOPP_MS_STYLE_INLINE_ASSEMBLY)
623 volatile bool result = true;
624 __try
625 {
626 uint32x4_t data1 = {1,2,3,4}, data2 = {5,6,7,8}, data3 = {9,10,11,12};
627
628 uint32x4_t r1 = vsha1cq_u32 (data1, 0, data2);
629 uint32x4_t r2 = vsha1mq_u32 (data1, 0, data2);
630 uint32x4_t r3 = vsha1pq_u32 (data1, 0, data2);
631 uint32x4_t r4 = vsha1su0q_u32 (data1, data2, data3);
632 uint32x4_t r5 = vsha1su1q_u32 (data1, data2);
633
634 result = !!(vgetq_lane_u32(r1,0) | vgetq_lane_u32(r2,1) | vgetq_lane_u32(r3,2) | vgetq_lane_u32(r4,3) | vgetq_lane_u32(r5,0));
635 }
636 __except (EXCEPTION_EXECUTE_HANDLER)
637 {
638 return false;
639 }
640 return result;
641 # else
642 // longjmp and clobber warnings. Volatile is required.
643 // http://github.com/weidai11/cryptopp/issues/24 and http://stackoverflow.com/q/7721854
644 volatile bool result = true;
645
646 volatile SigHandler oldHandler = signal(SIGILL, SigIllHandlerSHA1);
647 if (oldHandler == SIG_ERR)
648 return false;
649
650 volatile sigset_t oldMask;
651 if (sigprocmask(0, NULL, (sigset_t*)&oldMask))
652 return false;
653
654 if (setjmp(s_jmpNoSHA1))
655 result = false;
656 else
657 {
658 uint32x4_t data1 = {1,2,3,4}, data2 = {5,6,7,8}, data3 = {9,10,11,12};
659
660 uint32x4_t r1 = vsha1cq_u32 (data1, 0, data2);
661 uint32x4_t r2 = vsha1mq_u32 (data1, 0, data2);
662 uint32x4_t r3 = vsha1pq_u32 (data1, 0, data2);
663 uint32x4_t r4 = vsha1su0q_u32 (data1, data2, data3);
664 uint32x4_t r5 = vsha1su1q_u32 (data1, data2);
665
666 // Hack... GCC optimizes away the code and returns true
667 result = !!(vgetq_lane_u32(r1,0) | vgetq_lane_u32(r2,1) | vgetq_lane_u32(r3,2) | vgetq_lane_u32(r4,3) | vgetq_lane_u32(r5,0));
668 }
669
670 sigprocmask(SIG_SETMASK, (sigset_t*)&oldMask, NULL);
671 signal(SIGILL, oldHandler);
672 return result;
673 # endif
674 #else
675 return false;
676 #endif // CRYPTOPP_BOOL_ARM_CRYPTO_INTRINSICS_AVAILABLE
677 }
678
TrySHA2()679 static bool TrySHA2()
680 {
681 #if (CRYPTOPP_BOOL_ARM_CRYPTO_INTRINSICS_AVAILABLE)
682 # if defined(CRYPTOPP_MS_STYLE_INLINE_ASSEMBLY)
683 volatile bool result = true;
684 __try
685 {
686 uint32x4_t data1 = {1,2,3,4}, data2 = {5,6,7,8}, data3 = {9,10,11,12};
687
688 uint32x4_t r1 = vsha256hq_u32 (data1, data2, data3);
689 uint32x4_t r2 = vsha256h2q_u32 (data1, data2, data3);
690 uint32x4_t r3 = vsha256su0q_u32 (data1, data2);
691 uint32x4_t r4 = vsha256su1q_u32 (data1, data2, data3);
692
693 result = !!(vgetq_lane_u32(r1,0) | vgetq_lane_u32(r2,1) | vgetq_lane_u32(r3,2) | vgetq_lane_u32(r4,3));
694 }
695 __except (EXCEPTION_EXECUTE_HANDLER)
696 {
697 return false;
698 }
699 return result;
700 # else
701 // longjmp and clobber warnings. Volatile is required.
702 // http://github.com/weidai11/cryptopp/issues/24 and http://stackoverflow.com/q/7721854
703 volatile bool result = true;
704
705 volatile SigHandler oldHandler = signal(SIGILL, SigIllHandlerSHA2);
706 if (oldHandler == SIG_ERR)
707 return false;
708
709 volatile sigset_t oldMask;
710 if (sigprocmask(0, NULL, (sigset_t*)&oldMask))
711 return false;
712
713 if (setjmp(s_jmpNoSHA2))
714 result = false;
715 else
716 {
717 uint32x4_t data1 = {1,2,3,4}, data2 = {5,6,7,8}, data3 = {9,10,11,12};
718
719 uint32x4_t r1 = vsha256hq_u32 (data1, data2, data3);
720 uint32x4_t r2 = vsha256h2q_u32 (data1, data2, data3);
721 uint32x4_t r3 = vsha256su0q_u32 (data1, data2);
722 uint32x4_t r4 = vsha256su1q_u32 (data1, data2, data3);
723
724 // Hack... GCC optimizes away the code and returns true
725 result = !!(vgetq_lane_u32(r1,0) | vgetq_lane_u32(r2,1) | vgetq_lane_u32(r3,2) | vgetq_lane_u32(r4,3));
726 }
727
728 sigprocmask(SIG_SETMASK, (sigset_t*)&oldMask, NULL);
729 signal(SIGILL, oldHandler);
730 return result;
731 # endif
732 #else
733 return false;
734 #endif // CRYPTOPP_BOOL_ARM_CRYPTO_INTRINSICS_AVAILABLE
735 }
736
737 #if HAVE_GCC_CONSTRUCTOR1
DetectArmFeatures()738 void __attribute__ ((constructor (CRYPTOPP_INIT_PRIORITY + 50))) DetectArmFeatures()
739 #elif HAVE_GCC_CONSTRUCTOR0
740 void __attribute__ ((constructor)) DetectArmFeatures()
741 #else
742 void DetectArmFeatures()
743 #endif
744 {
745 g_hasNEON = TryNEON();
746 g_hasPMULL = TryPMULL();
747 g_hasCRC32 = TryCRC32();
748 g_hasAES = TryAES();
749 g_hasSHA1 = TrySHA1();
750 g_hasSHA2 = TrySHA2();
751
752 *((volatile bool*)&g_ArmDetectionDone) = true;
753 }
754
755 #endif
756
757 NAMESPACE_END
758
759 #endif
760