1 // cpu.cpp - written and placed in the public domain by Wei Dai
2 
3 #include "pch.h"
4 #include "config.h"
5 
6 #ifndef EXCEPTION_EXECUTE_HANDLER
7 # define EXCEPTION_EXECUTE_HANDLER 1
8 #endif
9 
10 #ifndef CRYPTOPP_IMPORTS
11 
12 #include "cpu.h"
13 #include "misc.h"
14 #include <algorithm>
15 
16 #ifndef CRYPTOPP_MS_STYLE_INLINE_ASSEMBLY
17 #include <signal.h>
18 #include <setjmp.h>
19 #endif
20 
NAMESPACE_BEGIN(CryptoPP)21 NAMESPACE_BEGIN(CryptoPP)
22 
23 #ifndef CRYPTOPP_MS_STYLE_INLINE_ASSEMBLY
24 extern "C" {
25     typedef void (*SigHandler)(int);
26 };
27 #endif  // Not CRYPTOPP_MS_STYLE_INLINE_ASSEMBLY
28 
29 #ifdef CRYPTOPP_CPUID_AVAILABLE
30 
31 #if _MSC_VER >= 1400 && CRYPTOPP_BOOL_X64
32 
CpuId(word32 input,word32 output[4])33 bool CpuId(word32 input, word32 output[4])
34 {
35 	__cpuid((int *)output, input);
36 	return true;
37 }
38 
39 #else
40 
41 #ifndef CRYPTOPP_MS_STYLE_INLINE_ASSEMBLY
42 extern "C"
43 {
44 	static jmp_buf s_jmpNoCPUID;
SigIllHandlerCPUID(int)45 	static void SigIllHandlerCPUID(int)
46 	{
47 		longjmp(s_jmpNoCPUID, 1);
48 	}
49 
50 	static jmp_buf s_jmpNoSSE2;
SigIllHandlerSSE2(int)51 	static void SigIllHandlerSSE2(int)
52 	{
53 		longjmp(s_jmpNoSSE2, 1);
54 	}
55 }
56 #endif
57 
CpuId(word32 input,word32 output[4])58 bool CpuId(word32 input, word32 output[4])
59 {
60 #if defined(CRYPTOPP_MS_STYLE_INLINE_ASSEMBLY)
61     __try
62 	{
63 		__asm
64 		{
65 			mov eax, input
66 			mov ecx, 0
67 			cpuid
68 			mov edi, output
69 			mov [edi], eax
70 			mov [edi+4], ebx
71 			mov [edi+8], ecx
72 			mov [edi+12], edx
73 		}
74 	}
75 	// GetExceptionCode() == EXCEPTION_ILLEGAL_INSTRUCTION
76 	__except (EXCEPTION_EXECUTE_HANDLER)
77 	{
78 		return false;
79 	}
80 
81 	// function 0 returns the highest basic function understood in EAX
82 	if(input == 0)
83 		return !!output[0];
84 
85 	return true;
86 #else
87 	// longjmp and clobber warnings. Volatile is required.
88 	// http://github.com/weidai11/cryptopp/issues/24 and http://stackoverflow.com/q/7721854
89 	volatile bool result = true;
90 
91 	volatile SigHandler oldHandler = signal(SIGILL, SigIllHandlerCPUID);
92 	if (oldHandler == SIG_ERR)
93 		return false;
94 
95 # ifndef __MINGW32__
96 	volatile sigset_t oldMask;
97 	if (sigprocmask(0, NULL, (sigset_t*)&oldMask))
98 		return false;
99 # endif
100 
101 	if (setjmp(s_jmpNoCPUID))
102 		result = false;
103 	else
104 	{
105 		asm volatile
106 		(
107 			// save ebx in case -fPIC is being used
108 			// TODO: this might need an early clobber on EDI.
109 # if CRYPTOPP_BOOL_X32 || CRYPTOPP_BOOL_X64
110 			"pushq %%rbx; cpuid; mov %%ebx, %%edi; popq %%rbx"
111 # else
112 			"push %%ebx; cpuid; mov %%ebx, %%edi; pop %%ebx"
113 # endif
114 			: "=a" (output[0]), "=D" (output[1]), "=c" (output[2]), "=d" (output[3])
115 			: "a" (input), "c" (0)
116 			: "cc"
117 		);
118 	}
119 
120 # ifndef __MINGW32__
121 	sigprocmask(SIG_SETMASK, (sigset_t*)&oldMask, NULL);
122 # endif
123 
124 	signal(SIGILL, oldHandler);
125 	return result;
126 #endif
127 }
128 
129 #endif
130 
TrySSE2()131 static bool TrySSE2()
132 {
133 #if CRYPTOPP_BOOL_X64
134 	return true;
135 #elif defined(CRYPTOPP_MS_STYLE_INLINE_ASSEMBLY)
136     __try
137 	{
138 #if CRYPTOPP_BOOL_SSE2_ASM_AVAILABLE
139 		AS2(por xmm0, xmm0)        // executing SSE2 instruction
140 #elif CRYPTOPP_BOOL_SSE2_INTRINSICS_AVAILABLE
141 		__m128i x = _mm_setzero_si128();
142 		return _mm_cvtsi128_si32(x) == 0;
143 #endif
144 	}
145 	// GetExceptionCode() == EXCEPTION_ILLEGAL_INSTRUCTION
146 	__except (EXCEPTION_EXECUTE_HANDLER)
147 	{
148 		return false;
149 	}
150 	return true;
151 #else
152 	// longjmp and clobber warnings. Volatile is required.
153 	// http://github.com/weidai11/cryptopp/issues/24 and http://stackoverflow.com/q/7721854
154 	volatile bool result = true;
155 
156 	volatile SigHandler oldHandler = signal(SIGILL, SigIllHandlerSSE2);
157 	if (oldHandler == SIG_ERR)
158 		return false;
159 
160 # ifndef __MINGW32__
161 	volatile sigset_t oldMask;
162 	if (sigprocmask(0, NULL, (sigset_t*)&oldMask))
163 		return false;
164 # endif
165 
166 	if (setjmp(s_jmpNoSSE2))
167 		result = false;
168 	else
169 	{
170 #if CRYPTOPP_BOOL_SSE2_ASM_AVAILABLE
171 		__asm __volatile ("por %xmm0, %xmm0");
172 #elif CRYPTOPP_BOOL_SSE2_INTRINSICS_AVAILABLE
173 		__m128i x = _mm_setzero_si128();
174 		result = _mm_cvtsi128_si32(x) == 0;
175 #endif
176 	}
177 
178 # ifndef __MINGW32__
179 	sigprocmask(SIG_SETMASK, (sigset_t*)&oldMask, NULL);
180 # endif
181 
182 	signal(SIGILL, oldHandler);
183 	return result;
184 #endif
185 }
186 
187 bool CRYPTOPP_SECTION_INIT g_x86DetectionDone = false;
188 bool CRYPTOPP_SECTION_INIT g_hasMMX = false, CRYPTOPP_SECTION_INIT g_hasISSE = false, CRYPTOPP_SECTION_INIT g_hasSSE2 = false, CRYPTOPP_SECTION_INIT g_hasSSSE3 = false;
189 bool CRYPTOPP_SECTION_INIT g_hasSSE4 = false, CRYPTOPP_SECTION_INIT g_hasAESNI = false, CRYPTOPP_SECTION_INIT g_hasCLMUL = false, CRYPTOPP_SECTION_INIT g_hasSHA = false;
190 bool CRYPTOPP_SECTION_INIT g_hasRDRAND = false, CRYPTOPP_SECTION_INIT g_hasRDSEED = false, CRYPTOPP_SECTION_INIT g_isP4 = false;
191 bool CRYPTOPP_SECTION_INIT g_hasPadlockRNG = false, CRYPTOPP_SECTION_INIT g_hasPadlockACE = false, CRYPTOPP_SECTION_INIT g_hasPadlockACE2 = false;
192 bool CRYPTOPP_SECTION_INIT g_hasPadlockPHE = false, CRYPTOPP_SECTION_INIT g_hasPadlockPMM = false;
193 word32 CRYPTOPP_SECTION_INIT g_cacheLineSize = CRYPTOPP_L1_CACHE_LINE_SIZE;
194 
IsIntel(const word32 output[4])195 static inline bool IsIntel(const word32 output[4])
196 {
197 	// This is the "GenuineIntel" string
198 	return (output[1] /*EBX*/ == 0x756e6547) &&
199 		(output[2] /*ECX*/ == 0x6c65746e) &&
200 		(output[3] /*EDX*/ == 0x49656e69);
201 }
202 
IsAMD(const word32 output[4])203 static inline bool IsAMD(const word32 output[4])
204 {
205 	// This is the "AuthenticAMD" string. Some early K5's can return "AMDisbetter!"
206 	return (output[1] /*EBX*/ == 0x68747541) &&
207 		(output[2] /*ECX*/ == 0x444D4163) &&
208 		(output[3] /*EDX*/ == 0x69746E65);
209 }
210 
IsVIA(const word32 output[4])211 static inline bool IsVIA(const word32 output[4])
212 {
213 	// This is the "CentaurHauls" string. Some non-PadLock's can return "VIA VIA VIA "
214 	return (output[1] /*EBX*/ == 0x746e6543) &&
215 		(output[2] /*ECX*/ == 0x736c7561) &&
216 		(output[3] /*EDX*/ == 0x48727561);
217 }
218 
219 #if HAVE_GCC_CONSTRUCTOR1
DetectX86Features()220 void __attribute__ ((constructor (CRYPTOPP_INIT_PRIORITY + 50))) DetectX86Features()
221 #elif HAVE_GCC_CONSTRUCTOR0
222 void __attribute__ ((constructor)) DetectX86Features()
223 #else
224 void DetectX86Features()
225 #endif
226 {
227 	word32 cpuid[4], cpuid1[4];
228 	if (!CpuId(0, cpuid))
229 		return;
230 	if (!CpuId(1, cpuid1))
231 		return;
232 
233 	g_hasMMX = (cpuid1[3] & (1 << 23)) != 0;
234 	if ((cpuid1[3] & (1 << 26)) != 0)
235 		g_hasSSE2 = TrySSE2();
236 	g_hasSSSE3 = g_hasSSE2 && (cpuid1[2] & (1<<9));
237 	g_hasSSE4 = g_hasSSE2 && ((cpuid1[2] & (1<<19)) && (cpuid1[2] & (1<<20)));
238 	g_hasAESNI = g_hasSSE2 && (cpuid1[2] & (1<<25));
239 	g_hasCLMUL = g_hasSSE2 && (cpuid1[2] & (1<<1));
240 
241 	if ((cpuid1[3] & (1 << 25)) != 0)
242 		g_hasISSE = true;
243 	else
244 	{
245 		word32 cpuid2[4];
246 		CpuId(0x080000000, cpuid2);
247 		if (cpuid2[0] >= 0x080000001)
248 		{
249 			CpuId(0x080000001, cpuid2);
250 			g_hasISSE = (cpuid2[3] & (1 << 22)) != 0;
251 		}
252 	}
253 
254 	if (IsIntel(cpuid))
255 	{
256 		static const unsigned int RDRAND_FLAG = (1 << 30);
257 		static const unsigned int RDSEED_FLAG = (1 << 18);
258 		static const unsigned int    SHA_FLAG = (1 << 29);
259 
260 		g_isP4 = ((cpuid1[0] >> 8) & 0xf) == 0xf;
261 		g_cacheLineSize = 8 * GETBYTE(cpuid1[1], 1);
262 		g_hasRDRAND = !!(cpuid1[2] /*ECX*/ & RDRAND_FLAG);
263 
264 		if (cpuid[0] /*EAX*/ >= 7)
265 		{
266 			word32 cpuid3[4];
267 			if (CpuId(7, cpuid3))
268 			{
269 				g_hasRDSEED = !!(cpuid3[1] /*EBX*/ & RDSEED_FLAG);
270 				g_hasSHA = !!(cpuid3[1] /*EBX*/ & SHA_FLAG);
271 			}
272 		}
273 	}
274 	else if (IsAMD(cpuid))
275 	{
276 		static const unsigned int RDRAND_FLAG = (1 << 30);
277 
278 		CpuId(0x01, cpuid);
279 		g_hasRDRAND = !!(cpuid[2] /*ECX*/ & RDRAND_FLAG);
280 
281 		CpuId(0x80000005, cpuid);
282 		g_cacheLineSize = GETBYTE(cpuid[2], 0);
283 	}
284 	else if (IsVIA(cpuid))
285 	{
286 		static const unsigned int  RNG_FLAGS = (0x3 << 2);
287 		static const unsigned int  ACE_FLAGS = (0x3 << 6);
288 		static const unsigned int ACE2_FLAGS = (0x3 << 8);
289 		static const unsigned int  PHE_FLAGS = (0x3 << 10);
290 		static const unsigned int  PMM_FLAGS = (0x3 << 12);
291 
292 		CpuId(0xC0000000, cpuid);
293 		if (cpuid[0] >= 0xC0000001)
294 		{
295 			// Extended features available
296 			CpuId(0xC0000001, cpuid);
297 			g_hasPadlockRNG  = !!(cpuid[3] /*EDX*/ & RNG_FLAGS);
298 			g_hasPadlockACE  = !!(cpuid[3] /*EDX*/ & ACE_FLAGS);
299 			g_hasPadlockACE2 = !!(cpuid[3] /*EDX*/ & ACE2_FLAGS);
300 			g_hasPadlockPHE  = !!(cpuid[3] /*EDX*/ & PHE_FLAGS);
301 			g_hasPadlockPMM  = !!(cpuid[3] /*EDX*/ & PMM_FLAGS);
302 		}
303 	}
304 
305 	if (!g_cacheLineSize)
306 		g_cacheLineSize = CRYPTOPP_L1_CACHE_LINE_SIZE;
307 
308 	*((volatile bool*)&g_x86DetectionDone) = true;
309 }
310 
311 #elif (CRYPTOPP_BOOL_ARM32 || CRYPTOPP_BOOL_ARM64)
312 
313 // The ARM equivalent of CPUID probing is reading a MSR. The code requires Exception Level 1 (EL1) and above, but user space runs at EL0.
314 //   Attempting to run the code results in a SIGILL and termination.
315 //
316 //     #if defined(__arm64__) || defined(__aarch64__)
317 //	     word64 caps = 0;  // Read ID_AA64ISAR0_EL1
318 //	     __asm __volatile("mrs %0, " "id_aa64isar0_el1" : "=r" (caps));
319 //     #elif defined(__arm__) || defined(__aarch32__)
320 //	     word32 caps = 0;  // Read ID_ISAR5_EL1
321 //	     __asm __volatile("mrs %0, " "id_isar5_el1" : "=r" (caps));
322 //     #endif
323 //
324 // The following does not work well either. Its appears to be missing constants, and it does not detect Aarch32 execution environments on Aarch64
325 // http://community.arm.com/groups/android-community/blog/2014/10/10/runtime-detection-of-cpu-features-on-an-armv8-a-cpu
326 //
327 bool CRYPTOPP_SECTION_INIT g_ArmDetectionDone = false;
328 bool CRYPTOPP_SECTION_INIT g_hasNEON = false, CRYPTOPP_SECTION_INIT g_hasPMULL = false, CRYPTOPP_SECTION_INIT g_hasCRC32 = false;
329 bool CRYPTOPP_SECTION_INIT g_hasAES = false, CRYPTOPP_SECTION_INIT g_hasSHA1 = false, CRYPTOPP_SECTION_INIT g_hasSHA2 = false;
330 word32 CRYPTOPP_SECTION_INIT g_cacheLineSize = CRYPTOPP_L1_CACHE_LINE_SIZE;
331 
332 #ifndef CRYPTOPP_MS_STYLE_INLINE_ASSEMBLY
333 extern "C"
334 {
335 	static jmp_buf s_jmpNoNEON;
SigIllHandlerNEON(int)336 	static void SigIllHandlerNEON(int)
337 	{
338 		longjmp(s_jmpNoNEON, 1);
339 	}
340 
341 	static jmp_buf s_jmpNoPMULL;
SigIllHandlerPMULL(int)342 	static void SigIllHandlerPMULL(int)
343 	{
344 		longjmp(s_jmpNoPMULL, 1);
345 	}
346 
347 	static jmp_buf s_jmpNoCRC32;
SigIllHandlerCRC32(int)348 	static void SigIllHandlerCRC32(int)
349 	{
350 		longjmp(s_jmpNoCRC32, 1);
351 	}
352 
353 	static jmp_buf s_jmpNoAES;
SigIllHandlerAES(int)354 	static void SigIllHandlerAES(int)
355 	{
356 		longjmp(s_jmpNoAES, 1);
357 	}
358 
359 	static jmp_buf s_jmpNoSHA1;
SigIllHandlerSHA1(int)360 	static void SigIllHandlerSHA1(int)
361 	{
362 		longjmp(s_jmpNoSHA1, 1);
363 	}
364 
365 	static jmp_buf s_jmpNoSHA2;
SigIllHandlerSHA2(int)366 	static void SigIllHandlerSHA2(int)
367 	{
368 		longjmp(s_jmpNoSHA2, 1);
369 	}
370 };
371 #endif  // Not CRYPTOPP_MS_STYLE_INLINE_ASSEMBLY
372 
TryNEON()373 static bool TryNEON()
374 {
375 #if (CRYPTOPP_BOOL_NEON_INTRINSICS_AVAILABLE)
376 # if defined(CRYPTOPP_MS_STYLE_INLINE_ASSEMBLY)
377 	volatile bool result = true;
378 	__try
379 	{
380 		uint32_t v1[4] = {1,1,1,1};
381 		uint32x4_t x1 = vld1q_u32(v1);
382 		uint64_t v2[2] = {1,1};
383 		uint64x2_t x2 = vld1q_u64(v2);
384 
385 		uint32x4_t x3 = vdupq_n_u32(2);
386 		x3 = vsetq_lane_u32(vgetq_lane_u32(x1,0),x3,0);
387 		x3 = vsetq_lane_u32(vgetq_lane_u32(x1,3),x3,3);
388 		uint64x2_t x4 = vdupq_n_u64(2);
389 		x4 = vsetq_lane_u64(vgetq_lane_u64(x2,0),x4,0);
390 		x4 = vsetq_lane_u64(vgetq_lane_u64(x2,1),x4,1);
391 
392 		result = !!(vgetq_lane_u32(x3,0) | vgetq_lane_u64(x4,1));
393 	}
394 	__except (EXCEPTION_EXECUTE_HANDLER)
395 	{
396 		return false;
397 	}
398 	return result;
399 # else
400 	// longjmp and clobber warnings. Volatile is required.
401 	// http://github.com/weidai11/cryptopp/issues/24 and http://stackoverflow.com/q/7721854
402 	volatile bool result = true;
403 
404 	volatile SigHandler oldHandler = signal(SIGILL, SigIllHandlerNEON);
405 	if (oldHandler == SIG_ERR)
406 		return false;
407 
408 	volatile sigset_t oldMask;
409 	if (sigprocmask(0, NULL, (sigset_t*)&oldMask))
410 		return false;
411 
412 	if (setjmp(s_jmpNoNEON))
413 		result = false;
414 	else
415 	{
416 		uint32_t v1[4] = {1,1,1,1};
417 		uint32x4_t x1 = vld1q_u32(v1);
418 		uint64_t v2[2] = {1,1};
419 		uint64x2_t x2 = vld1q_u64(v2);
420 
421 		uint32x4_t x3 = {0,0,0,0};
422 		x3 = vsetq_lane_u32(vgetq_lane_u32(x1,0),x3,0);
423 		x3 = vsetq_lane_u32(vgetq_lane_u32(x1,3),x3,3);
424 		uint64x2_t x4 = {0,0};
425 		x4 = vsetq_lane_u64(vgetq_lane_u64(x2,0),x4,0);
426 		x4 = vsetq_lane_u64(vgetq_lane_u64(x2,1),x4,1);
427 
428 		// Hack... GCC optimizes away the code and returns true
429 		result = !!(vgetq_lane_u32(x3,0) | vgetq_lane_u64(x4,1));
430 	}
431 
432 	sigprocmask(SIG_SETMASK, (sigset_t*)&oldMask, NULL);
433 	signal(SIGILL, oldHandler);
434 	return result;
435 # endif
436 #else
437 	return false;
438 #endif  // CRYPTOPP_BOOL_NEON_INTRINSICS_AVAILABLE
439 }
440 
TryPMULL()441 static bool TryPMULL()
442 {
443 #if (CRYPTOPP_BOOL_ARM_CRYPTO_INTRINSICS_AVAILABLE)
444 # if defined(CRYPTOPP_MS_STYLE_INLINE_ASSEMBLY)
445 	volatile bool result = true;
446 	__try
447 	{
448 		const poly64_t a1={2}, b1={3};
449 		const poly64x2_t a2={4,5}, b2={6,7};
450 		const poly64x2_t a3={0x8080808080808080,0xa0a0a0a0a0a0a0a0}, b3={0xc0c0c0c0c0c0c0c0, 0xe0e0e0e0e0e0e0e0};
451 
452 		const poly128_t r1 = vmull_p64(a1, b1);
453 		const poly128_t r2 = vmull_high_p64(a2, b2);
454 		const poly128_t r3 = vmull_high_p64(a3, b3);
455 
456 		// Also see https://github.com/weidai11/cryptopp/issues/233.
457 		const uint64x2_t& t1 = vreinterpretq_u64_p128(r1);  // {6,0}
458 		const uint64x2_t& t2 = vreinterpretq_u64_p128(r2);  // {24,0}
459 		const uint64x2_t& t3 = vreinterpretq_u64_p128(r3);  // {bignum,bignum}
460 
461 		result = !!(vgetq_lane_u64(t1,0) == 0x06 && vgetq_lane_u64(t1,1) == 0x00 && vgetq_lane_u64(t2,0) == 0x1b &&
462 			vgetq_lane_u64(t2,1) == 0x00 &&	vgetq_lane_u64(t3,0) == 0x6c006c006c006c00 && vgetq_lane_u64(t3,1) == 0x6c006c006c006c00);
463 	}
464 	__except (EXCEPTION_EXECUTE_HANDLER)
465 	{
466 		return false;
467 	}
468 	return result;
469 # else
470 	// longjmp and clobber warnings. Volatile is required.
471 	// http://github.com/weidai11/cryptopp/issues/24 and http://stackoverflow.com/q/7721854
472 	volatile bool result = true;
473 
474 	volatile SigHandler oldHandler = signal(SIGILL, SigIllHandlerPMULL);
475 	if (oldHandler == SIG_ERR)
476 		return false;
477 
478 	volatile sigset_t oldMask;
479 	if (sigprocmask(0, NULL, (sigset_t*)&oldMask))
480 		return false;
481 
482 	if (setjmp(s_jmpNoPMULL))
483 		result = false;
484 	else
485 	{
486 		const poly64_t a1={2}, b1={3};
487 		const poly64x2_t a2={4,5}, b2={6,7};
488 		const poly64x2_t a3={0x8080808080808080,0xa0a0a0a0a0a0a0a0}, b3={0xc0c0c0c0c0c0c0c0, 0xe0e0e0e0e0e0e0e0};
489 
490 		const poly128_t r1 = vmull_p64(a1, b1);
491 		const poly128_t r2 = vmull_high_p64(a2, b2);
492 		const poly128_t r3 = vmull_high_p64(a3, b3);
493 
494 		// Linaro is missing vreinterpretq_u64_p128. Also see https://github.com/weidai11/cryptopp/issues/233.
495 		const uint64x2_t& t1 = (uint64x2_t)(r1);  // {6,0}
496 		const uint64x2_t& t2 = (uint64x2_t)(r2);  // {24,0}
497 		const uint64x2_t& t3 = (uint64x2_t)(r3);  // {bignum,bignum}
498 
499 		result = !!(vgetq_lane_u64(t1,0) == 0x06 && vgetq_lane_u64(t1,1) == 0x00 && vgetq_lane_u64(t2,0) == 0x1b &&
500 			vgetq_lane_u64(t2,1) == 0x00 &&	vgetq_lane_u64(t3,0) == 0x6c006c006c006c00 && vgetq_lane_u64(t3,1) == 0x6c006c006c006c00);
501 	}
502 
503 	sigprocmask(SIG_SETMASK, (sigset_t*)&oldMask, NULL);
504 	signal(SIGILL, oldHandler);
505 	return result;
506 # endif
507 #else
508 	return false;
509 #endif  // CRYPTOPP_BOOL_ARM_CRYPTO_INTRINSICS_AVAILABLE
510 }
511 
TryCRC32()512 static bool TryCRC32()
513 {
514 #if (CRYPTOPP_BOOL_ARM_CRC32_INTRINSICS_AVAILABLE)
515 # if defined(CRYPTOPP_MS_STYLE_INLINE_ASSEMBLY)
516 	volatile bool result = true;
517 	__try
518 	{
519 		word32 w=0, x=1; word16 y=2; byte z=3;
520 		w = __crc32cw(w,x);
521 		w = __crc32ch(w,y);
522 		w = __crc32cb(w,z);
523 
524 		result = !!w;
525 	}
526 	__except (EXCEPTION_EXECUTE_HANDLER)
527 	{
528 		return false;
529 	}
530 	return result;
531 # else
532 	// longjmp and clobber warnings. Volatile is required.
533 	// http://github.com/weidai11/cryptopp/issues/24 and http://stackoverflow.com/q/7721854
534 	volatile bool result = true;
535 
536 	volatile SigHandler oldHandler = signal(SIGILL, SigIllHandlerCRC32);
537 	if (oldHandler == SIG_ERR)
538 		return false;
539 
540 	volatile sigset_t oldMask;
541 	if (sigprocmask(0, NULL, (sigset_t*)&oldMask))
542 		return false;
543 
544 	if (setjmp(s_jmpNoCRC32))
545 		result = false;
546 	else
547 	{
548 		word32 w=0, x=1; word16 y=2; byte z=3;
549 		w = __crc32cw(w,x);
550 		w = __crc32ch(w,y);
551 		w = __crc32cb(w,z);
552 
553 		// Hack... GCC optimizes away the code and returns true
554 		result = !!w;
555 	}
556 
557 	sigprocmask(SIG_SETMASK, (sigset_t*)&oldMask, NULL);
558 	signal(SIGILL, oldHandler);
559 	return result;
560 # endif
561 #else
562 	return false;
563 #endif  // CRYPTOPP_BOOL_ARM_CRC32_INTRINSICS_AVAILABLE
564 }
565 
TryAES()566 static bool TryAES()
567 {
568 #if (CRYPTOPP_BOOL_ARM_CRYPTO_INTRINSICS_AVAILABLE)
569 # if defined(CRYPTOPP_MS_STYLE_INLINE_ASSEMBLY)
570 	volatile bool result = true;
571 	__try
572 	{
573 		// AES encrypt and decrypt
574 		uint8x16_t data = vdupq_n_u8(0), key = vdupq_n_u8(0);
575 		uint8x16_t r1 = vaeseq_u8(data, key);
576 		uint8x16_t r2 = vaesdq_u8(data, key);
577 
578 		result = !!(vgetq_lane_u8(r1,0) | vgetq_lane_u8(r2,7));
579 	}
580 	__except (EXCEPTION_EXECUTE_HANDLER)
581 	{
582 		return false;
583 	}
584 	return result;
585 # else
586 	// longjmp and clobber warnings. Volatile is required.
587 	// http://github.com/weidai11/cryptopp/issues/24 and http://stackoverflow.com/q/7721854
588 	volatile bool result = true;
589 
590 	volatile SigHandler oldHandler = signal(SIGILL, SigIllHandlerAES);
591 	if (oldHandler == SIG_ERR)
592 		return false;
593 
594 	volatile sigset_t oldMask;
595 	if (sigprocmask(0, NULL, (sigset_t*)&oldMask))
596 		return false;
597 
598 	if (setjmp(s_jmpNoAES))
599 		result = false;
600 	else
601 	{
602 		uint8x16_t data = vdupq_n_u8(0), key = vdupq_n_u8(0);
603 		uint8x16_t r1 = vaeseq_u8(data, key);
604 		uint8x16_t r2 = vaesdq_u8(data, key);
605 
606 		// Hack... GCC optimizes away the code and returns true
607 		result = !!(vgetq_lane_u8(r1,0) | vgetq_lane_u8(r2,7));
608 	}
609 
610 	sigprocmask(SIG_SETMASK, (sigset_t*)&oldMask, NULL);
611 	signal(SIGILL, oldHandler);
612 	return result;
613 # endif
614 #else
615 	return false;
616 #endif  // CRYPTOPP_BOOL_ARM_CRYPTO_INTRINSICS_AVAILABLE
617 }
618 
TrySHA1()619 static bool TrySHA1()
620 {
621 #if (CRYPTOPP_BOOL_ARM_CRYPTO_INTRINSICS_AVAILABLE)
622 # if defined(CRYPTOPP_MS_STYLE_INLINE_ASSEMBLY)
623 	volatile bool result = true;
624 	__try
625 	{
626 		uint32x4_t data1 = {1,2,3,4}, data2 = {5,6,7,8}, data3 = {9,10,11,12};
627 
628 		uint32x4_t r1 = vsha1cq_u32 (data1, 0, data2);
629 		uint32x4_t r2 = vsha1mq_u32 (data1, 0, data2);
630 		uint32x4_t r3 = vsha1pq_u32 (data1, 0, data2);
631 		uint32x4_t r4 = vsha1su0q_u32 (data1, data2, data3);
632 		uint32x4_t r5 = vsha1su1q_u32 (data1, data2);
633 
634 		result = !!(vgetq_lane_u32(r1,0) | vgetq_lane_u32(r2,1) | vgetq_lane_u32(r3,2) | vgetq_lane_u32(r4,3) | vgetq_lane_u32(r5,0));
635 	}
636 	__except (EXCEPTION_EXECUTE_HANDLER)
637 	{
638 		return false;
639 	}
640 	return result;
641 # else
642 	// longjmp and clobber warnings. Volatile is required.
643 	// http://github.com/weidai11/cryptopp/issues/24 and http://stackoverflow.com/q/7721854
644 	volatile bool result = true;
645 
646 	volatile SigHandler oldHandler = signal(SIGILL, SigIllHandlerSHA1);
647 	if (oldHandler == SIG_ERR)
648 		return false;
649 
650 	volatile sigset_t oldMask;
651 	if (sigprocmask(0, NULL, (sigset_t*)&oldMask))
652 		return false;
653 
654 	if (setjmp(s_jmpNoSHA1))
655 		result = false;
656 	else
657 	{
658 		uint32x4_t data1 = {1,2,3,4}, data2 = {5,6,7,8}, data3 = {9,10,11,12};
659 
660 		uint32x4_t r1 = vsha1cq_u32 (data1, 0, data2);
661 		uint32x4_t r2 = vsha1mq_u32 (data1, 0, data2);
662 		uint32x4_t r3 = vsha1pq_u32 (data1, 0, data2);
663 		uint32x4_t r4 = vsha1su0q_u32 (data1, data2, data3);
664 		uint32x4_t r5 = vsha1su1q_u32 (data1, data2);
665 
666 		// Hack... GCC optimizes away the code and returns true
667 		result = !!(vgetq_lane_u32(r1,0) | vgetq_lane_u32(r2,1) | vgetq_lane_u32(r3,2) | vgetq_lane_u32(r4,3) | vgetq_lane_u32(r5,0));
668 	}
669 
670 	sigprocmask(SIG_SETMASK, (sigset_t*)&oldMask, NULL);
671 	signal(SIGILL, oldHandler);
672 	return result;
673 # endif
674 #else
675 	return false;
676 #endif  // CRYPTOPP_BOOL_ARM_CRYPTO_INTRINSICS_AVAILABLE
677 }
678 
TrySHA2()679 static bool TrySHA2()
680 {
681 #if (CRYPTOPP_BOOL_ARM_CRYPTO_INTRINSICS_AVAILABLE)
682 # if defined(CRYPTOPP_MS_STYLE_INLINE_ASSEMBLY)
683 	volatile bool result = true;
684 	__try
685 	{
686 		uint32x4_t data1 = {1,2,3,4}, data2 = {5,6,7,8}, data3 = {9,10,11,12};
687 
688 		uint32x4_t r1 = vsha256hq_u32 (data1, data2, data3);
689 		uint32x4_t r2 = vsha256h2q_u32 (data1, data2, data3);
690 		uint32x4_t r3 = vsha256su0q_u32 (data1, data2);
691 		uint32x4_t r4 = vsha256su1q_u32 (data1, data2, data3);
692 
693 		result = !!(vgetq_lane_u32(r1,0) | vgetq_lane_u32(r2,1) | vgetq_lane_u32(r3,2) | vgetq_lane_u32(r4,3));
694 	}
695 	__except (EXCEPTION_EXECUTE_HANDLER)
696 	{
697 		return false;
698 	}
699 	return result;
700 # else
701 	// longjmp and clobber warnings. Volatile is required.
702 	// http://github.com/weidai11/cryptopp/issues/24 and http://stackoverflow.com/q/7721854
703 	volatile bool result = true;
704 
705 	volatile SigHandler oldHandler = signal(SIGILL, SigIllHandlerSHA2);
706 	if (oldHandler == SIG_ERR)
707 		return false;
708 
709 	volatile sigset_t oldMask;
710 	if (sigprocmask(0, NULL, (sigset_t*)&oldMask))
711 		return false;
712 
713 	if (setjmp(s_jmpNoSHA2))
714 		result = false;
715 	else
716 	{
717 		uint32x4_t data1 = {1,2,3,4}, data2 = {5,6,7,8}, data3 = {9,10,11,12};
718 
719 		uint32x4_t r1 = vsha256hq_u32 (data1, data2, data3);
720 		uint32x4_t r2 = vsha256h2q_u32 (data1, data2, data3);
721 		uint32x4_t r3 = vsha256su0q_u32 (data1, data2);
722 		uint32x4_t r4 = vsha256su1q_u32 (data1, data2, data3);
723 
724 		// Hack... GCC optimizes away the code and returns true
725 		result = !!(vgetq_lane_u32(r1,0) | vgetq_lane_u32(r2,1) | vgetq_lane_u32(r3,2) | vgetq_lane_u32(r4,3));
726 	}
727 
728 	sigprocmask(SIG_SETMASK, (sigset_t*)&oldMask, NULL);
729 	signal(SIGILL, oldHandler);
730 	return result;
731 # endif
732 #else
733 	return false;
734 #endif  // CRYPTOPP_BOOL_ARM_CRYPTO_INTRINSICS_AVAILABLE
735 }
736 
737 #if HAVE_GCC_CONSTRUCTOR1
DetectArmFeatures()738 void __attribute__ ((constructor (CRYPTOPP_INIT_PRIORITY + 50))) DetectArmFeatures()
739 #elif HAVE_GCC_CONSTRUCTOR0
740 void __attribute__ ((constructor)) DetectArmFeatures()
741 #else
742 void DetectArmFeatures()
743 #endif
744 {
745 	g_hasNEON = TryNEON();
746 	g_hasPMULL = TryPMULL();
747 	g_hasCRC32 = TryCRC32();
748 	g_hasAES = TryAES();
749 	g_hasSHA1 = TrySHA1();
750 	g_hasSHA2 = TrySHA2();
751 
752 	*((volatile bool*)&g_ArmDetectionDone) = true;
753 }
754 
755 #endif
756 
757 NAMESPACE_END
758 
759 #endif
760