1 /* CpuArch.c -- CPU specific code
2 2021-07-13 : Igor Pavlov : Public domain */
3 
4 #include "Precomp.h"
5 
6 #include "CpuArch.h"
7 
8 #ifdef MY_CPU_X86_OR_AMD64
9 
10 #if (defined(_MSC_VER) && !defined(MY_CPU_AMD64)) || defined(__GNUC__)
11 #define USE_ASM
12 #endif
13 
14 #if !defined(USE_ASM) && _MSC_VER >= 1500
15 #include <intrin.h>
16 #endif
17 
18 #if defined(USE_ASM) && !defined(MY_CPU_AMD64)
CheckFlag(UInt32 flag)19 static UInt32 CheckFlag(UInt32 flag)
20 {
21   #ifdef _MSC_VER
22   __asm pushfd;
23   __asm pop EAX;
24   __asm mov EDX, EAX;
25   __asm xor EAX, flag;
26   __asm push EAX;
27   __asm popfd;
28   __asm pushfd;
29   __asm pop EAX;
30   __asm xor EAX, EDX;
31   __asm push EDX;
32   __asm popfd;
33   __asm and flag, EAX;
34   #else
35   __asm__ __volatile__ (
36     "pushf\n\t"
37     "pop  %%EAX\n\t"
38     "movl %%EAX,%%EDX\n\t"
39     "xorl %0,%%EAX\n\t"
40     "push %%EAX\n\t"
41     "popf\n\t"
42     "pushf\n\t"
43     "pop  %%EAX\n\t"
44     "xorl %%EDX,%%EAX\n\t"
45     "push %%EDX\n\t"
46     "popf\n\t"
47     "andl %%EAX, %0\n\t":
48     "=c" (flag) : "c" (flag) :
49     "%eax", "%edx");
50   #endif
51   return flag;
52 }
53 #define CHECK_CPUID_IS_SUPPORTED if (CheckFlag(1 << 18) == 0 || CheckFlag(1 << 21) == 0) return False;
54 #else
55 #define CHECK_CPUID_IS_SUPPORTED
56 #endif
57 
58 #ifndef USE_ASM
59   #ifdef _MSC_VER
60     #if _MSC_VER >= 1600
61       #define MY__cpuidex  __cpuidex
62     #else
63 
64 /*
65  __cpuid (function == 4) requires subfunction number in ECX.
66   MSDN: The __cpuid intrinsic clears the ECX register before calling the cpuid instruction.
67    __cpuid() in new MSVC clears ECX.
68    __cpuid() in old MSVC (14.00) doesn't clear ECX
69  We still can use __cpuid for low (function) values that don't require ECX,
70  but __cpuid() in old MSVC will be incorrect for some function values: (function == 4).
71  So here we use the hack for old MSVC to send (subFunction) in ECX register to cpuid instruction,
72  where ECX value is first parameter for FAST_CALL / NO_INLINE function,
73  So the caller of MY__cpuidex_HACK() sets ECX as subFunction, and
74  old MSVC for __cpuid() doesn't change ECX and cpuid instruction gets (subFunction) value.
75 
76  DON'T remove MY_NO_INLINE and MY_FAST_CALL for MY__cpuidex_HACK() !!!
77 */
78 
79 static
80 MY_NO_INLINE
MY__cpuidex_HACK(UInt32 subFunction,int * CPUInfo,UInt32 function)81 void MY_FAST_CALL MY__cpuidex_HACK(UInt32 subFunction, int *CPUInfo, UInt32 function)
82 {
83   UNUSED_VAR(subFunction);
84   __cpuid(CPUInfo, function);
85 }
86 
87       #define MY__cpuidex(info, func, func2)  MY__cpuidex_HACK(func2, info, func)
88       #pragma message("======== MY__cpuidex_HACK WAS USED ========")
89     #endif
90   #else
91      #define MY__cpuidex(info, func, func2)  __cpuid(info, func)
92      #pragma message("======== (INCORRECT ?) cpuid WAS USED ========")
93   #endif
94 #endif
95 
96 
97 
98 
MyCPUID(UInt32 function,UInt32 * a,UInt32 * b,UInt32 * c,UInt32 * d)99 void MyCPUID(UInt32 function, UInt32 *a, UInt32 *b, UInt32 *c, UInt32 *d)
100 {
101   #ifdef USE_ASM
102 
103   #ifdef _MSC_VER
104 
105   UInt32 a2, b2, c2, d2;
106   __asm xor EBX, EBX;
107   __asm xor ECX, ECX;
108   __asm xor EDX, EDX;
109   __asm mov EAX, function;
110   __asm cpuid;
111   __asm mov a2, EAX;
112   __asm mov b2, EBX;
113   __asm mov c2, ECX;
114   __asm mov d2, EDX;
115 
116   *a = a2;
117   *b = b2;
118   *c = c2;
119   *d = d2;
120 
121   #else
122 
123   __asm__ __volatile__ (
124   #if defined(MY_CPU_AMD64) && defined(__PIC__)
125     "mov %%rbx, %%rdi;"
126     "cpuid;"
127     "xchg %%rbx, %%rdi;"
128     : "=a" (*a) ,
129       "=D" (*b) ,
130   #elif defined(MY_CPU_X86) && defined(__PIC__)
131     "mov %%ebx, %%edi;"
132     "cpuid;"
133     "xchgl %%ebx, %%edi;"
134     : "=a" (*a) ,
135       "=D" (*b) ,
136   #else
137     "cpuid"
138     : "=a" (*a) ,
139       "=b" (*b) ,
140   #endif
141       "=c" (*c) ,
142       "=d" (*d)
143     : "0" (function), "c"(0) ) ;
144 
145   #endif
146 
147   #else
148 
149   int CPUInfo[4];
150 
151   MY__cpuidex(CPUInfo, (int)function, 0);
152 
153   *a = (UInt32)CPUInfo[0];
154   *b = (UInt32)CPUInfo[1];
155   *c = (UInt32)CPUInfo[2];
156   *d = (UInt32)CPUInfo[3];
157 
158   #endif
159 }
160 
x86cpuid_CheckAndRead(Cx86cpuid * p)161 BoolInt x86cpuid_CheckAndRead(Cx86cpuid *p)
162 {
163   CHECK_CPUID_IS_SUPPORTED
164   MyCPUID(0, &p->maxFunc, &p->vendor[0], &p->vendor[2], &p->vendor[1]);
165   MyCPUID(1, &p->ver, &p->b, &p->c, &p->d);
166   return True;
167 }
168 
169 static const UInt32 kVendors[][3] =
170 {
171   { 0x756E6547, 0x49656E69, 0x6C65746E},
172   { 0x68747541, 0x69746E65, 0x444D4163},
173   { 0x746E6543, 0x48727561, 0x736C7561}
174 };
175 
x86cpuid_GetFirm(const Cx86cpuid * p)176 int x86cpuid_GetFirm(const Cx86cpuid *p)
177 {
178   unsigned i;
179   for (i = 0; i < sizeof(kVendors) / sizeof(kVendors[i]); i++)
180   {
181     const UInt32 *v = kVendors[i];
182     if (v[0] == p->vendor[0] &&
183         v[1] == p->vendor[1] &&
184         v[2] == p->vendor[2])
185       return (int)i;
186   }
187   return -1;
188 }
189 
CPU_Is_InOrder()190 BoolInt CPU_Is_InOrder()
191 {
192   Cx86cpuid p;
193   int firm;
194   UInt32 family, model;
195   if (!x86cpuid_CheckAndRead(&p))
196     return True;
197 
198   family = x86cpuid_GetFamily(p.ver);
199   model = x86cpuid_GetModel(p.ver);
200 
201   firm = x86cpuid_GetFirm(&p);
202 
203   switch (firm)
204   {
205     case CPU_FIRM_INTEL: return (family < 6 || (family == 6 && (
206         /* In-Order Atom CPU */
207            model == 0x1C  /* 45 nm, N4xx, D4xx, N5xx, D5xx, 230, 330 */
208         || model == 0x26  /* 45 nm, Z6xx */
209         || model == 0x27  /* 32 nm, Z2460 */
210         || model == 0x35  /* 32 nm, Z2760 */
211         || model == 0x36  /* 32 nm, N2xxx, D2xxx */
212         )));
213     case CPU_FIRM_AMD: return (family < 5 || (family == 5 && (model < 6 || model == 0xA)));
214     case CPU_FIRM_VIA: return (family < 6 || (family == 6 && model < 0xF));
215   }
216   return True;
217 }
218 
219 #if !defined(MY_CPU_AMD64) && defined(_WIN32)
220 #include <Windows.h>
CPU_Sys_Is_SSE_Supported()221 static BoolInt CPU_Sys_Is_SSE_Supported()
222 {
223   OSVERSIONINFO vi;
224   vi.dwOSVersionInfoSize = sizeof(vi);
225   if (!GetVersionEx(&vi))
226     return False;
227   return (vi.dwMajorVersion >= 5);
228 }
229 #define CHECK_SYS_SSE_SUPPORT if (!CPU_Sys_Is_SSE_Supported()) return False;
230 #else
231 #define CHECK_SYS_SSE_SUPPORT
232 #endif
233 
234 
X86_CPUID_ECX_Get_Flags()235 static UInt32 X86_CPUID_ECX_Get_Flags()
236 {
237   Cx86cpuid p;
238   CHECK_SYS_SSE_SUPPORT
239   if (!x86cpuid_CheckAndRead(&p))
240     return 0;
241   return p.c;
242 }
243 
CPU_IsSupported_AES()244 BoolInt CPU_IsSupported_AES()
245 {
246   return (X86_CPUID_ECX_Get_Flags() >> 25) & 1;
247 }
248 
CPU_IsSupported_SSSE3()249 BoolInt CPU_IsSupported_SSSE3()
250 {
251   return (X86_CPUID_ECX_Get_Flags() >> 9) & 1;
252 }
253 
CPU_IsSupported_SSE41()254 BoolInt CPU_IsSupported_SSE41()
255 {
256   return (X86_CPUID_ECX_Get_Flags() >> 19) & 1;
257 }
258 
CPU_IsSupported_SHA()259 BoolInt CPU_IsSupported_SHA()
260 {
261   Cx86cpuid p;
262   CHECK_SYS_SSE_SUPPORT
263   if (!x86cpuid_CheckAndRead(&p))
264     return False;
265 
266   if (p.maxFunc < 7)
267     return False;
268   {
269     UInt32 d[4] = { 0 };
270     MyCPUID(7, &d[0], &d[1], &d[2], &d[3]);
271     return (d[1] >> 29) & 1;
272   }
273 }
274 
275 // #include <stdio.h>
276 
277 #ifdef _WIN32
278 #include <Windows.h>
279 #endif
280 
CPU_IsSupported_AVX2()281 BoolInt CPU_IsSupported_AVX2()
282 {
283   Cx86cpuid p;
284   CHECK_SYS_SSE_SUPPORT
285 
286   #ifdef _WIN32
287   #define MY__PF_XSAVE_ENABLED  17
288   if (!IsProcessorFeaturePresent(MY__PF_XSAVE_ENABLED))
289     return False;
290   #endif
291 
292   if (!x86cpuid_CheckAndRead(&p))
293     return False;
294   if (p.maxFunc < 7)
295     return False;
296   {
297     UInt32 d[4] = { 0 };
298     MyCPUID(7, &d[0], &d[1], &d[2], &d[3]);
299     // printf("\ncpuid(7): ebx=%8x ecx=%8x\n", d[1], d[2]);
300     return 1
301       & (d[1] >> 5); // avx2
302   }
303 }
304 
CPU_IsSupported_VAES_AVX2()305 BoolInt CPU_IsSupported_VAES_AVX2()
306 {
307   Cx86cpuid p;
308   CHECK_SYS_SSE_SUPPORT
309 
310   #ifdef _WIN32
311   #define MY__PF_XSAVE_ENABLED  17
312   if (!IsProcessorFeaturePresent(MY__PF_XSAVE_ENABLED))
313     return False;
314   #endif
315 
316   if (!x86cpuid_CheckAndRead(&p))
317     return False;
318   if (p.maxFunc < 7)
319     return False;
320   {
321     UInt32 d[4] = { 0 };
322     MyCPUID(7, &d[0], &d[1], &d[2], &d[3]);
323     // printf("\ncpuid(7): ebx=%8x ecx=%8x\n", d[1], d[2]);
324     return 1
325       & (d[1] >> 5) // avx2
326       // & (d[1] >> 31) // avx512vl
327       & (d[2] >> 9); // vaes // VEX-256/EVEX
328   }
329 }
330 
CPU_IsSupported_PageGB()331 BoolInt CPU_IsSupported_PageGB()
332 {
333   Cx86cpuid cpuid;
334   if (!x86cpuid_CheckAndRead(&cpuid))
335     return False;
336   {
337     UInt32 d[4] = { 0 };
338     MyCPUID(0x80000000, &d[0], &d[1], &d[2], &d[3]);
339     if (d[0] < 0x80000001)
340       return False;
341   }
342   {
343     UInt32 d[4] = { 0 };
344     MyCPUID(0x80000001, &d[0], &d[1], &d[2], &d[3]);
345     return (d[3] >> 26) & 1;
346   }
347 }
348 
349 
350 #elif defined(MY_CPU_ARM_OR_ARM64)
351 
352 #ifdef _WIN32
353 
354 #include <Windows.h>
355 
CPU_IsSupported_CRC32()356 BoolInt CPU_IsSupported_CRC32()  { return IsProcessorFeaturePresent(PF_ARM_V8_CRC32_INSTRUCTIONS_AVAILABLE) ? 1 : 0; }
CPU_IsSupported_CRYPTO()357 BoolInt CPU_IsSupported_CRYPTO() { return IsProcessorFeaturePresent(PF_ARM_V8_CRYPTO_INSTRUCTIONS_AVAILABLE) ? 1 : 0; }
CPU_IsSupported_NEON()358 BoolInt CPU_IsSupported_NEON()   { return IsProcessorFeaturePresent(PF_ARM_NEON_INSTRUCTIONS_AVAILABLE) ? 1 : 0; }
359 
360 #else
361 
362 #if defined(__APPLE__)
363 
364 /*
365 #include <stdio.h>
366 #include <string.h>
367 static void Print_sysctlbyname(const char *name)
368 {
369   size_t bufSize = 256;
370   char buf[256];
371   int res = sysctlbyname(name, &buf, &bufSize, NULL, 0);
372   {
373     int i;
374     printf("\nres = %d : %s : '%s' : bufSize = %d, numeric", res, name, buf, (unsigned)bufSize);
375     for (i = 0; i < 20; i++)
376       printf(" %2x", (unsigned)(Byte)buf[i]);
377 
378   }
379 }
380 */
381 
My_sysctlbyname_Get_BoolInt(const char * name)382 static BoolInt My_sysctlbyname_Get_BoolInt(const char *name)
383 {
384   UInt32 val = 0;
385   if (My_sysctlbyname_Get_UInt32(name, &val) == 0 && val == 1)
386     return 1;
387   return 0;
388 }
389 
390   /*
391   Print_sysctlbyname("hw.pagesize");
392   Print_sysctlbyname("machdep.cpu.brand_string");
393   */
394 
CPU_IsSupported_CRC32(void)395 BoolInt CPU_IsSupported_CRC32(void)
396 {
397   return My_sysctlbyname_Get_BoolInt("hw.optional.armv8_crc32");
398 }
399 
CPU_IsSupported_NEON(void)400 BoolInt CPU_IsSupported_NEON(void)
401 {
402   return My_sysctlbyname_Get_BoolInt("hw.optional.neon");
403 }
404 
405 #ifdef MY_CPU_ARM64
406 #define APPLE_CRYPTO_SUPPORT_VAL 1
407 #else
408 #define APPLE_CRYPTO_SUPPORT_VAL 0
409 #endif
410 
CPU_IsSupported_SHA1(void)411 BoolInt CPU_IsSupported_SHA1(void) { return APPLE_CRYPTO_SUPPORT_VAL; }
CPU_IsSupported_SHA2(void)412 BoolInt CPU_IsSupported_SHA2(void) { return APPLE_CRYPTO_SUPPORT_VAL; }
CPU_IsSupported_AES(void)413 BoolInt CPU_IsSupported_AES (void) { return APPLE_CRYPTO_SUPPORT_VAL; }
414 
415 
416 #else // __APPLE__
417 
418 #include <sys/auxv.h>
419 
420 #if defined(__FreeBSD__)
get_hwcap()421 static UInt64 get_hwcap() {
422   unsigned long hwcap;
423   if(elf_aux_info(AT_HWCAP, &hwcap, sizeof(unsigned long)) != 0) {
424         return(0);
425   }
426   return hwcap;
427 }
428 
CPU_IsSupported_CRC32(void)429 BoolInt CPU_IsSupported_CRC32(void) { return get_hwcap() & HWCAP_CRC32; }
CPU_IsSupported_NEON(void)430 BoolInt CPU_IsSupported_NEON(void) { return 1; }
CPU_IsSupported_SHA1(void)431 BoolInt CPU_IsSupported_SHA1(void){ return get_hwcap() & HWCAP_SHA1; }
CPU_IsSupported_SHA2(void)432 BoolInt CPU_IsSupported_SHA2(void) { return get_hwcap() & HWCAP_SHA2; }
CPU_IsSupported_AES(void)433 BoolInt CPU_IsSupported_AES(void) { return get_hwcap() & HWCAP_AES; }
434 
435 #else // __FreeBSD__
436 
437 #define USE_HWCAP
438 
439 #ifdef USE_HWCAP
440 
441 #include <asm/hwcap.h>
442 
443   #define MY_HWCAP_CHECK_FUNC_2(name1, name2) \
444   BoolInt CPU_IsSupported_ ## name1() { return (getauxval(AT_HWCAP)  & (HWCAP_  ## name2)) ? 1 : 0; }
445 
446 #ifdef MY_CPU_ARM64
447   #define MY_HWCAP_CHECK_FUNC(name) \
448   MY_HWCAP_CHECK_FUNC_2(name, name)
MY_HWCAP_CHECK_FUNC_2(NEON,ASIMD)449   MY_HWCAP_CHECK_FUNC_2(NEON, ASIMD)
450 // MY_HWCAP_CHECK_FUNC (ASIMD)
451 #elif defined(MY_CPU_ARM)
452   #define MY_HWCAP_CHECK_FUNC(name) \
453   BoolInt CPU_IsSupported_ ## name() { return (getauxval(AT_HWCAP2) & (HWCAP2_ ## name)) ? 1 : 0; }
454   MY_HWCAP_CHECK_FUNC_2(NEON, NEON)
455 #endif
456 
457 #else // USE_HWCAP
458 
459   #define MY_HWCAP_CHECK_FUNC(name) \
460   BoolInt CPU_IsSupported_ ## name() { return 0; }
461   MY_HWCAP_CHECK_FUNC(NEON)
462 
463 #endif // USE_HWCAP
464 
465 MY_HWCAP_CHECK_FUNC (CRC32)
466 MY_HWCAP_CHECK_FUNC (SHA1)
467 MY_HWCAP_CHECK_FUNC (SHA2)
468 MY_HWCAP_CHECK_FUNC (AES)
469 
470 #endif // FreeBSD
471 #endif // __APPLE__
472 #endif // _WIN32
473 
474 #endif // MY_CPU_ARM_OR_ARM64
475 
476 
477 
478 #ifdef __APPLE__
479 
480 #include <sys/sysctl.h>
481 
482 int My_sysctlbyname_Get(const char *name, void *buf, size_t *bufSize)
483 {
484   return sysctlbyname(name, buf, bufSize, NULL, 0);
485 }
486 
My_sysctlbyname_Get_UInt32(const char * name,UInt32 * val)487 int My_sysctlbyname_Get_UInt32(const char *name, UInt32 *val)
488 {
489   size_t bufSize = sizeof(*val);
490   int res = My_sysctlbyname_Get(name, val, &bufSize);
491   if (res == 0 && bufSize != sizeof(*val))
492     return EFAULT;
493   return res;
494 }
495 
496 #endif
497