1 //===-- cpu_model.c - Support for __cpu_model builtin  ------------*- C -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 //  This file is based on LLVM's lib/Support/Host.cpp.
10 //  It implements the operating system Host concept and builtin
11 //  __cpu_model for the compiler_rt library for x86 and
12 //  __aarch64_have_lse_atomics, __aarch64_cpu_features for AArch64.
13 //
14 //===----------------------------------------------------------------------===//
15 
16 #ifndef __has_attribute
17 #define __has_attribute(attr) 0
18 #endif
19 
20 #if __has_attribute(constructor)
21 #if __GNUC__ >= 9
22 // Ordinarily init priorities below 101 are disallowed as they are reserved for the
23 // implementation. However, we are the implementation, so silence the diagnostic,
24 // since it doesn't apply to us.
25 #pragma GCC diagnostic ignored "-Wprio-ctor-dtor"
26 #endif
27 // We're choosing init priority 90 to force our constructors to run before any
28 // constructors in the end user application (starting at priority 101). This value
29 // matches the libgcc choice for the same functions.
30 #define CONSTRUCTOR_ATTRIBUTE __attribute__((constructor(90)))
31 #else
32 // FIXME: For MSVC, we should make a function pointer global in .CRT$X?? so that
33 // this runs during initialization.
34 #define CONSTRUCTOR_ATTRIBUTE
35 #endif
36 
37 #if (defined(__i386__) || defined(_M_IX86) || defined(__x86_64__) ||           \
38      defined(_M_X64)) &&                                                       \
39     (defined(__GNUC__) || defined(__clang__) || defined(_MSC_VER))
40 
41 #include <assert.h>
42 
43 #define bool int
44 #define true 1
45 #define false 0
46 
47 #ifdef _MSC_VER
48 #include <intrin.h>
49 #endif
50 
51 enum VendorSignatures {
52   SIG_INTEL = 0x756e6547, // Genu
53   SIG_AMD = 0x68747541,   // Auth
54 };
55 
56 enum ProcessorVendors {
57   VENDOR_INTEL = 1,
58   VENDOR_AMD,
59   VENDOR_OTHER,
60   VENDOR_MAX
61 };
62 
63 enum ProcessorTypes {
64   INTEL_BONNELL = 1,
65   INTEL_CORE2,
66   INTEL_COREI7,
67   AMDFAM10H,
68   AMDFAM15H,
69   INTEL_SILVERMONT,
70   INTEL_KNL,
71   AMD_BTVER1,
72   AMD_BTVER2,
73   AMDFAM17H,
74   INTEL_KNM,
75   INTEL_GOLDMONT,
76   INTEL_GOLDMONT_PLUS,
77   INTEL_TREMONT,
78   AMDFAM19H,
79   ZHAOXIN_FAM7H,
80   INTEL_SIERRAFOREST,
81   INTEL_GRANDRIDGE,
82   CPU_TYPE_MAX
83 };
84 
85 enum ProcessorSubtypes {
86   INTEL_COREI7_NEHALEM = 1,
87   INTEL_COREI7_WESTMERE,
88   INTEL_COREI7_SANDYBRIDGE,
89   AMDFAM10H_BARCELONA,
90   AMDFAM10H_SHANGHAI,
91   AMDFAM10H_ISTANBUL,
92   AMDFAM15H_BDVER1,
93   AMDFAM15H_BDVER2,
94   AMDFAM15H_BDVER3,
95   AMDFAM15H_BDVER4,
96   AMDFAM17H_ZNVER1,
97   INTEL_COREI7_IVYBRIDGE,
98   INTEL_COREI7_HASWELL,
99   INTEL_COREI7_BROADWELL,
100   INTEL_COREI7_SKYLAKE,
101   INTEL_COREI7_SKYLAKE_AVX512,
102   INTEL_COREI7_CANNONLAKE,
103   INTEL_COREI7_ICELAKE_CLIENT,
104   INTEL_COREI7_ICELAKE_SERVER,
105   AMDFAM17H_ZNVER2,
106   INTEL_COREI7_CASCADELAKE,
107   INTEL_COREI7_TIGERLAKE,
108   INTEL_COREI7_COOPERLAKE,
109   INTEL_COREI7_SAPPHIRERAPIDS,
110   INTEL_COREI7_ALDERLAKE,
111   AMDFAM19H_ZNVER3,
112   INTEL_COREI7_ROCKETLAKE,
113   ZHAOXIN_FAM7H_LUJIAZUI,
114   AMDFAM19H_ZNVER4,
115   INTEL_COREI7_GRANITERAPIDS,
116   CPU_SUBTYPE_MAX
117 };
118 
119 enum ProcessorFeatures {
120   FEATURE_CMOV = 0,
121   FEATURE_MMX,
122   FEATURE_POPCNT,
123   FEATURE_SSE,
124   FEATURE_SSE2,
125   FEATURE_SSE3,
126   FEATURE_SSSE3,
127   FEATURE_SSE4_1,
128   FEATURE_SSE4_2,
129   FEATURE_AVX,
130   FEATURE_AVX2,
131   FEATURE_SSE4_A,
132   FEATURE_FMA4,
133   FEATURE_XOP,
134   FEATURE_FMA,
135   FEATURE_AVX512F,
136   FEATURE_BMI,
137   FEATURE_BMI2,
138   FEATURE_AES,
139   FEATURE_PCLMUL,
140   FEATURE_AVX512VL,
141   FEATURE_AVX512BW,
142   FEATURE_AVX512DQ,
143   FEATURE_AVX512CD,
144   FEATURE_AVX512ER,
145   FEATURE_AVX512PF,
146   FEATURE_AVX512VBMI,
147   FEATURE_AVX512IFMA,
148   FEATURE_AVX5124VNNIW,
149   FEATURE_AVX5124FMAPS,
150   FEATURE_AVX512VPOPCNTDQ,
151   FEATURE_AVX512VBMI2,
152   FEATURE_GFNI,
153   FEATURE_VPCLMULQDQ,
154   FEATURE_AVX512VNNI,
155   FEATURE_AVX512BITALG,
156   FEATURE_AVX512BF16,
157   FEATURE_AVX512VP2INTERSECT,
158   CPU_FEATURE_MAX
159 };
160 
161 // The check below for i386 was copied from clang's cpuid.h (__get_cpuid_max).
162 // Check motivated by bug reports for OpenSSL crashing on CPUs without CPUID
163 // support. Consequently, for i386, the presence of CPUID is checked first
164 // via the corresponding eflags bit.
isCpuIdSupported(void)165 static bool isCpuIdSupported(void) {
166 #if defined(__GNUC__) || defined(__clang__)
167 #if defined(__i386__)
168   int __cpuid_supported;
169   __asm__("  pushfl\n"
170           "  popl   %%eax\n"
171           "  movl   %%eax,%%ecx\n"
172           "  xorl   $0x00200000,%%eax\n"
173           "  pushl  %%eax\n"
174           "  popfl\n"
175           "  pushfl\n"
176           "  popl   %%eax\n"
177           "  movl   $0,%0\n"
178           "  cmpl   %%eax,%%ecx\n"
179           "  je     1f\n"
180           "  movl   $1,%0\n"
181           "1:"
182           : "=r"(__cpuid_supported)
183           :
184           : "eax", "ecx");
185   if (!__cpuid_supported)
186     return false;
187 #endif
188   return true;
189 #endif
190   return true;
191 }
192 
193 // This code is copied from lib/Support/Host.cpp.
194 // Changes to either file should be mirrored in the other.
195 
196 /// getX86CpuIDAndInfo - Execute the specified cpuid and return the 4 values in
197 /// the specified arguments.  If we can't run cpuid on the host, return true.
getX86CpuIDAndInfo(unsigned value,unsigned * rEAX,unsigned * rEBX,unsigned * rECX,unsigned * rEDX)198 static bool getX86CpuIDAndInfo(unsigned value, unsigned *rEAX, unsigned *rEBX,
199                                unsigned *rECX, unsigned *rEDX) {
200 #if defined(__GNUC__) || defined(__clang__)
201 #if defined(__x86_64__)
202   // gcc doesn't know cpuid would clobber ebx/rbx. Preserve it manually.
203   // FIXME: should we save this for Clang?
204   __asm__("movq\t%%rbx, %%rsi\n\t"
205           "cpuid\n\t"
206           "xchgq\t%%rbx, %%rsi\n\t"
207           : "=a"(*rEAX), "=S"(*rEBX), "=c"(*rECX), "=d"(*rEDX)
208           : "a"(value));
209   return false;
210 #elif defined(__i386__)
211   __asm__("movl\t%%ebx, %%esi\n\t"
212           "cpuid\n\t"
213           "xchgl\t%%ebx, %%esi\n\t"
214           : "=a"(*rEAX), "=S"(*rEBX), "=c"(*rECX), "=d"(*rEDX)
215           : "a"(value));
216   return false;
217 #else
218   return true;
219 #endif
220 #elif defined(_MSC_VER)
221   // The MSVC intrinsic is portable across x86 and x64.
222   int registers[4];
223   __cpuid(registers, value);
224   *rEAX = registers[0];
225   *rEBX = registers[1];
226   *rECX = registers[2];
227   *rEDX = registers[3];
228   return false;
229 #else
230   return true;
231 #endif
232 }
233 
234 /// getX86CpuIDAndInfoEx - Execute the specified cpuid with subleaf and return
235 /// the 4 values in the specified arguments.  If we can't run cpuid on the host,
236 /// return true.
getX86CpuIDAndInfoEx(unsigned value,unsigned subleaf,unsigned * rEAX,unsigned * rEBX,unsigned * rECX,unsigned * rEDX)237 static bool getX86CpuIDAndInfoEx(unsigned value, unsigned subleaf,
238                                  unsigned *rEAX, unsigned *rEBX, unsigned *rECX,
239                                  unsigned *rEDX) {
240 #if defined(__GNUC__) || defined(__clang__)
241 #if defined(__x86_64__)
242   // gcc doesn't know cpuid would clobber ebx/rbx. Preserve it manually.
243   // FIXME: should we save this for Clang?
244   __asm__("movq\t%%rbx, %%rsi\n\t"
245           "cpuid\n\t"
246           "xchgq\t%%rbx, %%rsi\n\t"
247           : "=a"(*rEAX), "=S"(*rEBX), "=c"(*rECX), "=d"(*rEDX)
248           : "a"(value), "c"(subleaf));
249   return false;
250 #elif defined(__i386__)
251   __asm__("movl\t%%ebx, %%esi\n\t"
252           "cpuid\n\t"
253           "xchgl\t%%ebx, %%esi\n\t"
254           : "=a"(*rEAX), "=S"(*rEBX), "=c"(*rECX), "=d"(*rEDX)
255           : "a"(value), "c"(subleaf));
256   return false;
257 #else
258   return true;
259 #endif
260 #elif defined(_MSC_VER)
261   int registers[4];
262   __cpuidex(registers, value, subleaf);
263   *rEAX = registers[0];
264   *rEBX = registers[1];
265   *rECX = registers[2];
266   *rEDX = registers[3];
267   return false;
268 #else
269   return true;
270 #endif
271 }
272 
273 // Read control register 0 (XCR0). Used to detect features such as AVX.
getX86XCR0(unsigned * rEAX,unsigned * rEDX)274 static bool getX86XCR0(unsigned *rEAX, unsigned *rEDX) {
275 #if defined(__GNUC__) || defined(__clang__)
276   // Check xgetbv; this uses a .byte sequence instead of the instruction
277   // directly because older assemblers do not include support for xgetbv and
278   // there is no easy way to conditionally compile based on the assembler used.
279   __asm__(".byte 0x0f, 0x01, 0xd0" : "=a"(*rEAX), "=d"(*rEDX) : "c"(0));
280   return false;
281 #elif defined(_MSC_FULL_VER) && defined(_XCR_XFEATURE_ENABLED_MASK)
282   unsigned long long Result = _xgetbv(_XCR_XFEATURE_ENABLED_MASK);
283   *rEAX = Result;
284   *rEDX = Result >> 32;
285   return false;
286 #else
287   return true;
288 #endif
289 }
290 
detectX86FamilyModel(unsigned EAX,unsigned * Family,unsigned * Model)291 static void detectX86FamilyModel(unsigned EAX, unsigned *Family,
292                                  unsigned *Model) {
293   *Family = (EAX >> 8) & 0xf; // Bits 8 - 11
294   *Model = (EAX >> 4) & 0xf;  // Bits 4 - 7
295   if (*Family == 6 || *Family == 0xf) {
296     if (*Family == 0xf)
297       // Examine extended family ID if family ID is F.
298       *Family += (EAX >> 20) & 0xff; // Bits 20 - 27
299     // Examine extended model ID if family ID is 6 or F.
300     *Model += ((EAX >> 16) & 0xf) << 4; // Bits 16 - 19
301   }
302 }
303 
304 static const char *
getIntelProcessorTypeAndSubtype(unsigned Family,unsigned Model,const unsigned * Features,unsigned * Type,unsigned * Subtype)305 getIntelProcessorTypeAndSubtype(unsigned Family, unsigned Model,
306                                 const unsigned *Features,
307                                 unsigned *Type, unsigned *Subtype) {
308 #define testFeature(F)                                                         \
309   (Features[F / 32] & (1 << (F % 32))) != 0
310 
311   // We select CPU strings to match the code in Host.cpp, but we don't use them
312   // in compiler-rt.
313   const char *CPU = 0;
314 
315   switch (Family) {
316   case 6:
317     switch (Model) {
318     case 0x0f: // Intel Core 2 Duo processor, Intel Core 2 Duo mobile
319                // processor, Intel Core 2 Quad processor, Intel Core 2 Quad
320                // mobile processor, Intel Core 2 Extreme processor, Intel
321                // Pentium Dual-Core processor, Intel Xeon processor, model
322                // 0Fh. All processors are manufactured using the 65 nm process.
323     case 0x16: // Intel Celeron processor model 16h. All processors are
324                // manufactured using the 65 nm process
325       CPU = "core2";
326       *Type = INTEL_CORE2;
327       break;
328     case 0x17: // Intel Core 2 Extreme processor, Intel Xeon processor, model
329                // 17h. All processors are manufactured using the 45 nm process.
330                //
331                // 45nm: Penryn , Wolfdale, Yorkfield (XE)
332     case 0x1d: // Intel Xeon processor MP. All processors are manufactured using
333                // the 45 nm process.
334       CPU = "penryn";
335       *Type = INTEL_CORE2;
336       break;
337     case 0x1a: // Intel Core i7 processor and Intel Xeon processor. All
338                // processors are manufactured using the 45 nm process.
339     case 0x1e: // Intel(R) Core(TM) i7 CPU         870  @ 2.93GHz.
340                // As found in a Summer 2010 model iMac.
341     case 0x1f:
342     case 0x2e:              // Nehalem EX
343       CPU = "nehalem";
344       *Type = INTEL_COREI7;
345       *Subtype = INTEL_COREI7_NEHALEM;
346       break;
347     case 0x25: // Intel Core i7, laptop version.
348     case 0x2c: // Intel Core i7 processor and Intel Xeon processor. All
349                // processors are manufactured using the 32 nm process.
350     case 0x2f: // Westmere EX
351       CPU = "westmere";
352       *Type = INTEL_COREI7;
353       *Subtype = INTEL_COREI7_WESTMERE;
354       break;
355     case 0x2a: // Intel Core i7 processor. All processors are manufactured
356                // using the 32 nm process.
357     case 0x2d:
358       CPU = "sandybridge";
359       *Type = INTEL_COREI7;
360       *Subtype = INTEL_COREI7_SANDYBRIDGE;
361       break;
362     case 0x3a:
363     case 0x3e:              // Ivy Bridge EP
364       CPU = "ivybridge";
365       *Type = INTEL_COREI7;
366       *Subtype = INTEL_COREI7_IVYBRIDGE;
367       break;
368 
369     // Haswell:
370     case 0x3c:
371     case 0x3f:
372     case 0x45:
373     case 0x46:
374       CPU = "haswell";
375       *Type = INTEL_COREI7;
376       *Subtype = INTEL_COREI7_HASWELL;
377       break;
378 
379     // Broadwell:
380     case 0x3d:
381     case 0x47:
382     case 0x4f:
383     case 0x56:
384       CPU = "broadwell";
385       *Type = INTEL_COREI7;
386       *Subtype = INTEL_COREI7_BROADWELL;
387       break;
388 
389     // Skylake:
390     case 0x4e:              // Skylake mobile
391     case 0x5e:              // Skylake desktop
392     case 0x8e:              // Kaby Lake mobile
393     case 0x9e:              // Kaby Lake desktop
394     case 0xa5:              // Comet Lake-H/S
395     case 0xa6:              // Comet Lake-U
396       CPU = "skylake";
397       *Type = INTEL_COREI7;
398       *Subtype = INTEL_COREI7_SKYLAKE;
399       break;
400 
401     // Rocketlake:
402     case 0xa7:
403       CPU = "rocketlake";
404       *Type = INTEL_COREI7;
405       *Subtype = INTEL_COREI7_ROCKETLAKE;
406       break;
407 
408     // Skylake Xeon:
409     case 0x55:
410       *Type = INTEL_COREI7;
411       if (testFeature(FEATURE_AVX512BF16)) {
412         CPU = "cooperlake";
413         *Subtype = INTEL_COREI7_COOPERLAKE;
414       } else if (testFeature(FEATURE_AVX512VNNI)) {
415         CPU = "cascadelake";
416         *Subtype = INTEL_COREI7_CASCADELAKE;
417       } else {
418         CPU = "skylake-avx512";
419         *Subtype = INTEL_COREI7_SKYLAKE_AVX512;
420       }
421       break;
422 
423     // Cannonlake:
424     case 0x66:
425       CPU = "cannonlake";
426       *Type = INTEL_COREI7;
427       *Subtype = INTEL_COREI7_CANNONLAKE;
428       break;
429 
430     // Icelake:
431     case 0x7d:
432     case 0x7e:
433       CPU = "icelake-client";
434       *Type = INTEL_COREI7;
435       *Subtype = INTEL_COREI7_ICELAKE_CLIENT;
436       break;
437 
438     // Tigerlake:
439     case 0x8c:
440     case 0x8d:
441       CPU = "tigerlake";
442       *Type = INTEL_COREI7;
443       *Subtype = INTEL_COREI7_TIGERLAKE;
444       break;
445 
446     // Alderlake:
447     case 0x97:
448     case 0x9a:
449     // Raptorlake:
450     case 0xb7:
451     // Meteorlake:
452     case 0xaa:
453     case 0xac:
454       CPU = "alderlake";
455       *Type = INTEL_COREI7;
456       *Subtype = INTEL_COREI7_ALDERLAKE;
457       break;
458 
459     // Icelake Xeon:
460     case 0x6a:
461     case 0x6c:
462       CPU = "icelake-server";
463       *Type = INTEL_COREI7;
464       *Subtype = INTEL_COREI7_ICELAKE_SERVER;
465       break;
466 
467     // Emerald Rapids:
468     case 0xcf:
469     // Sapphire Rapids:
470     case 0x8f:
471       CPU = "sapphirerapids";
472       *Type = INTEL_COREI7;
473       *Subtype = INTEL_COREI7_SAPPHIRERAPIDS;
474       break;
475 
476     // Granite Rapids:
477     case 0xae:
478     case 0xad:
479       CPU = "graniterapids";
480       *Type = INTEL_COREI7;
481       *Subtype = INTEL_COREI7_GRANITERAPIDS;
482       break;
483 
484     case 0x1c: // Most 45 nm Intel Atom processors
485     case 0x26: // 45 nm Atom Lincroft
486     case 0x27: // 32 nm Atom Medfield
487     case 0x35: // 32 nm Atom Midview
488     case 0x36: // 32 nm Atom Midview
489       CPU = "bonnell";
490       *Type = INTEL_BONNELL;
491       break;
492 
493     // Atom Silvermont codes from the Intel software optimization guide.
494     case 0x37:
495     case 0x4a:
496     case 0x4d:
497     case 0x5a:
498     case 0x5d:
499     case 0x4c: // really airmont
500       CPU = "silvermont";
501       *Type = INTEL_SILVERMONT;
502       break;
503     // Goldmont:
504     case 0x5c: // Apollo Lake
505     case 0x5f: // Denverton
506       CPU = "goldmont";
507       *Type = INTEL_GOLDMONT;
508       break; // "goldmont"
509     case 0x7a:
510       CPU = "goldmont-plus";
511       *Type = INTEL_GOLDMONT_PLUS;
512       break;
513     case 0x86:
514       CPU = "tremont";
515       *Type = INTEL_TREMONT;
516       break;
517 
518     // Sierraforest:
519     case 0xaf:
520       CPU = "sierraforest";
521       *Type = INTEL_SIERRAFOREST;
522       break;
523 
524     // Grandridge:
525     case 0xb6:
526       CPU = "grandridge";
527       *Type = INTEL_GRANDRIDGE;
528       break;
529 
530     case 0x57:
531       CPU = "knl";
532       *Type = INTEL_KNL;
533       break;
534 
535     case 0x85:
536       CPU = "knm";
537       *Type = INTEL_KNM;
538       break;
539 
540     default: // Unknown family 6 CPU.
541       break;
542     }
543     break;
544   default:
545     break; // Unknown.
546   }
547 
548   return CPU;
549 }
550 
551 static const char *
getAMDProcessorTypeAndSubtype(unsigned Family,unsigned Model,const unsigned * Features,unsigned * Type,unsigned * Subtype)552 getAMDProcessorTypeAndSubtype(unsigned Family, unsigned Model,
553                               const unsigned *Features,
554                               unsigned *Type, unsigned *Subtype) {
555   // We select CPU strings to match the code in Host.cpp, but we don't use them
556   // in compiler-rt.
557   const char *CPU = 0;
558 
559   switch (Family) {
560   case 16:
561     CPU = "amdfam10";
562     *Type = AMDFAM10H;
563     switch (Model) {
564     case 2:
565       *Subtype = AMDFAM10H_BARCELONA;
566       break;
567     case 4:
568       *Subtype = AMDFAM10H_SHANGHAI;
569       break;
570     case 8:
571       *Subtype = AMDFAM10H_ISTANBUL;
572       break;
573     }
574     break;
575   case 20:
576     CPU = "btver1";
577     *Type = AMD_BTVER1;
578     break;
579   case 21:
580     CPU = "bdver1";
581     *Type = AMDFAM15H;
582     if (Model >= 0x60 && Model <= 0x7f) {
583       CPU = "bdver4";
584       *Subtype = AMDFAM15H_BDVER4;
585       break; // 60h-7Fh: Excavator
586     }
587     if (Model >= 0x30 && Model <= 0x3f) {
588       CPU = "bdver3";
589       *Subtype = AMDFAM15H_BDVER3;
590       break; // 30h-3Fh: Steamroller
591     }
592     if ((Model >= 0x10 && Model <= 0x1f) || Model == 0x02) {
593       CPU = "bdver2";
594       *Subtype = AMDFAM15H_BDVER2;
595       break; // 02h, 10h-1Fh: Piledriver
596     }
597     if (Model <= 0x0f) {
598       *Subtype = AMDFAM15H_BDVER1;
599       break; // 00h-0Fh: Bulldozer
600     }
601     break;
602   case 22:
603     CPU = "btver2";
604     *Type = AMD_BTVER2;
605     break;
606   case 23:
607     CPU = "znver1";
608     *Type = AMDFAM17H;
609     if ((Model >= 0x30 && Model <= 0x3f) || Model == 0x71) {
610       CPU = "znver2";
611       *Subtype = AMDFAM17H_ZNVER2;
612       break; // 30h-3fh, 71h: Zen2
613     }
614     if (Model <= 0x0f) {
615       *Subtype = AMDFAM17H_ZNVER1;
616       break; // 00h-0Fh: Zen1
617     }
618     break;
619   case 25:
620     CPU = "znver3";
621     *Type = AMDFAM19H;
622     if (Model <= 0x0f || (Model >= 0x20 && Model <= 0x5f)) {
623       // Family 19h Models 00h-0Fh - Zen3
624       // Family 19h Models 20h-2Fh - Zen3
625       // Family 19h Models 30h-3Fh - Zen3
626       // Family 19h Models 40h-4Fh - Zen3+
627       // Family 19h Models 50h-5Fh - Zen3+
628       *Subtype = AMDFAM19H_ZNVER3;
629       break;
630     }
631     if ((Model >= 0x10 && Model <= 0x1f) ||
632         (Model >= 0x60 && Model <= 0x74) ||
633         (Model >= 0x78 && Model <= 0x7b) ||
634         (Model >= 0xA0 && Model <= 0xAf)) {
635       CPU = "znver4";
636       *Subtype = AMDFAM19H_ZNVER4;
637       break; //  "znver4"
638     }
639     break;
640   default:
641     break; // Unknown AMD CPU.
642   }
643 
644   return CPU;
645 }
646 
getAvailableFeatures(unsigned ECX,unsigned EDX,unsigned MaxLeaf,unsigned * Features)647 static void getAvailableFeatures(unsigned ECX, unsigned EDX, unsigned MaxLeaf,
648                                  unsigned *Features) {
649   unsigned EAX, EBX;
650 
651 #define setFeature(F)                                                          \
652   Features[F / 32] |= 1U << (F % 32)
653 
654   if ((EDX >> 15) & 1)
655     setFeature(FEATURE_CMOV);
656   if ((EDX >> 23) & 1)
657     setFeature(FEATURE_MMX);
658   if ((EDX >> 25) & 1)
659     setFeature(FEATURE_SSE);
660   if ((EDX >> 26) & 1)
661     setFeature(FEATURE_SSE2);
662 
663   if ((ECX >> 0) & 1)
664     setFeature(FEATURE_SSE3);
665   if ((ECX >> 1) & 1)
666     setFeature(FEATURE_PCLMUL);
667   if ((ECX >> 9) & 1)
668     setFeature(FEATURE_SSSE3);
669   if ((ECX >> 12) & 1)
670     setFeature(FEATURE_FMA);
671   if ((ECX >> 19) & 1)
672     setFeature(FEATURE_SSE4_1);
673   if ((ECX >> 20) & 1)
674     setFeature(FEATURE_SSE4_2);
675   if ((ECX >> 23) & 1)
676     setFeature(FEATURE_POPCNT);
677   if ((ECX >> 25) & 1)
678     setFeature(FEATURE_AES);
679 
680   // If CPUID indicates support for XSAVE, XRESTORE and AVX, and XGETBV
681   // indicates that the AVX registers will be saved and restored on context
682   // switch, then we have full AVX support.
683   const unsigned AVXBits = (1 << 27) | (1 << 28);
684   bool HasAVX = ((ECX & AVXBits) == AVXBits) && !getX86XCR0(&EAX, &EDX) &&
685                 ((EAX & 0x6) == 0x6);
686 #if defined(__APPLE__)
687   // Darwin lazily saves the AVX512 context on first use: trust that the OS will
688   // save the AVX512 context if we use AVX512 instructions, even the bit is not
689   // set right now.
690   bool HasAVX512Save = true;
691 #else
692   // AVX512 requires additional context to be saved by the OS.
693   bool HasAVX512Save = HasAVX && ((EAX & 0xe0) == 0xe0);
694 #endif
695 
696   if (HasAVX)
697     setFeature(FEATURE_AVX);
698 
699   bool HasLeaf7 =
700       MaxLeaf >= 0x7 && !getX86CpuIDAndInfoEx(0x7, 0x0, &EAX, &EBX, &ECX, &EDX);
701 
702   if (HasLeaf7 && ((EBX >> 3) & 1))
703     setFeature(FEATURE_BMI);
704   if (HasLeaf7 && ((EBX >> 5) & 1) && HasAVX)
705     setFeature(FEATURE_AVX2);
706   if (HasLeaf7 && ((EBX >> 8) & 1))
707     setFeature(FEATURE_BMI2);
708   if (HasLeaf7 && ((EBX >> 16) & 1) && HasAVX512Save)
709     setFeature(FEATURE_AVX512F);
710   if (HasLeaf7 && ((EBX >> 17) & 1) && HasAVX512Save)
711     setFeature(FEATURE_AVX512DQ);
712   if (HasLeaf7 && ((EBX >> 21) & 1) && HasAVX512Save)
713     setFeature(FEATURE_AVX512IFMA);
714   if (HasLeaf7 && ((EBX >> 26) & 1) && HasAVX512Save)
715     setFeature(FEATURE_AVX512PF);
716   if (HasLeaf7 && ((EBX >> 27) & 1) && HasAVX512Save)
717     setFeature(FEATURE_AVX512ER);
718   if (HasLeaf7 && ((EBX >> 28) & 1) && HasAVX512Save)
719     setFeature(FEATURE_AVX512CD);
720   if (HasLeaf7 && ((EBX >> 30) & 1) && HasAVX512Save)
721     setFeature(FEATURE_AVX512BW);
722   if (HasLeaf7 && ((EBX >> 31) & 1) && HasAVX512Save)
723     setFeature(FEATURE_AVX512VL);
724 
725   if (HasLeaf7 && ((ECX >> 1) & 1) && HasAVX512Save)
726     setFeature(FEATURE_AVX512VBMI);
727   if (HasLeaf7 && ((ECX >> 6) & 1) && HasAVX512Save)
728     setFeature(FEATURE_AVX512VBMI2);
729   if (HasLeaf7 && ((ECX >> 8) & 1))
730     setFeature(FEATURE_GFNI);
731   if (HasLeaf7 && ((ECX >> 10) & 1) && HasAVX)
732     setFeature(FEATURE_VPCLMULQDQ);
733   if (HasLeaf7 && ((ECX >> 11) & 1) && HasAVX512Save)
734     setFeature(FEATURE_AVX512VNNI);
735   if (HasLeaf7 && ((ECX >> 12) & 1) && HasAVX512Save)
736     setFeature(FEATURE_AVX512BITALG);
737   if (HasLeaf7 && ((ECX >> 14) & 1) && HasAVX512Save)
738     setFeature(FEATURE_AVX512VPOPCNTDQ);
739 
740   if (HasLeaf7 && ((EDX >> 2) & 1) && HasAVX512Save)
741     setFeature(FEATURE_AVX5124VNNIW);
742   if (HasLeaf7 && ((EDX >> 3) & 1) && HasAVX512Save)
743     setFeature(FEATURE_AVX5124FMAPS);
744   if (HasLeaf7 && ((EDX >> 8) & 1) && HasAVX512Save)
745     setFeature(FEATURE_AVX512VP2INTERSECT);
746 
747   bool HasLeaf7Subleaf1 =
748       MaxLeaf >= 0x7 && !getX86CpuIDAndInfoEx(0x7, 0x1, &EAX, &EBX, &ECX, &EDX);
749   if (HasLeaf7Subleaf1 && ((EAX >> 5) & 1) && HasAVX512Save)
750     setFeature(FEATURE_AVX512BF16);
751 
752   unsigned MaxExtLevel;
753   getX86CpuIDAndInfo(0x80000000, &MaxExtLevel, &EBX, &ECX, &EDX);
754 
755   bool HasExtLeaf1 = MaxExtLevel >= 0x80000001 &&
756                      !getX86CpuIDAndInfo(0x80000001, &EAX, &EBX, &ECX, &EDX);
757   if (HasExtLeaf1 && ((ECX >> 6) & 1))
758     setFeature(FEATURE_SSE4_A);
759   if (HasExtLeaf1 && ((ECX >> 11) & 1))
760     setFeature(FEATURE_XOP);
761   if (HasExtLeaf1 && ((ECX >> 16) & 1))
762     setFeature(FEATURE_FMA4);
763 #undef setFeature
764 }
765 
766 #ifndef _WIN32
767 __attribute__((visibility("hidden")))
768 #endif
769 int __cpu_indicator_init(void) CONSTRUCTOR_ATTRIBUTE;
770 
771 #ifndef _WIN32
772 __attribute__((visibility("hidden")))
773 #endif
774 struct __processor_model {
775   unsigned int __cpu_vendor;
776   unsigned int __cpu_type;
777   unsigned int __cpu_subtype;
778   unsigned int __cpu_features[1];
779 } __cpu_model = {0, 0, 0, {0}};
780 
781 #ifndef _WIN32
782 __attribute__((visibility("hidden")))
783 #endif
784 unsigned int __cpu_features2 = 0;
785 
786 // A constructor function that is sets __cpu_model and __cpu_features2 with
787 // the right values.  This needs to run only once.  This constructor is
788 // given the highest priority and it should run before constructors without
789 // the priority set.  However, it still runs after ifunc initializers and
790 // needs to be called explicitly there.
791 
__cpu_indicator_init(void)792 int CONSTRUCTOR_ATTRIBUTE __cpu_indicator_init(void) {
793   unsigned EAX, EBX, ECX, EDX;
794   unsigned MaxLeaf = 5;
795   unsigned Vendor;
796   unsigned Model, Family;
797   unsigned Features[(CPU_FEATURE_MAX + 31) / 32] = {0};
798 
799   // This function needs to run just once.
800   if (__cpu_model.__cpu_vendor)
801     return 0;
802 
803   if (!isCpuIdSupported() ||
804       getX86CpuIDAndInfo(0, &MaxLeaf, &Vendor, &ECX, &EDX) || MaxLeaf < 1) {
805     __cpu_model.__cpu_vendor = VENDOR_OTHER;
806     return -1;
807   }
808 
809   getX86CpuIDAndInfo(1, &EAX, &EBX, &ECX, &EDX);
810   detectX86FamilyModel(EAX, &Family, &Model);
811 
812   // Find available features.
813   getAvailableFeatures(ECX, EDX, MaxLeaf, &Features[0]);
814 
815   assert((sizeof(Features)/sizeof(Features[0])) == 2);
816   __cpu_model.__cpu_features[0] = Features[0];
817   __cpu_features2 = Features[1];
818 
819   if (Vendor == SIG_INTEL) {
820     // Get CPU type.
821     getIntelProcessorTypeAndSubtype(Family, Model, &Features[0],
822                                     &(__cpu_model.__cpu_type),
823                                     &(__cpu_model.__cpu_subtype));
824     __cpu_model.__cpu_vendor = VENDOR_INTEL;
825   } else if (Vendor == SIG_AMD) {
826     // Get CPU type.
827     getAMDProcessorTypeAndSubtype(Family, Model, &Features[0],
828                                   &(__cpu_model.__cpu_type),
829                                   &(__cpu_model.__cpu_subtype));
830     __cpu_model.__cpu_vendor = VENDOR_AMD;
831   } else
832     __cpu_model.__cpu_vendor = VENDOR_OTHER;
833 
834   assert(__cpu_model.__cpu_vendor < VENDOR_MAX);
835   assert(__cpu_model.__cpu_type < CPU_TYPE_MAX);
836   assert(__cpu_model.__cpu_subtype < CPU_SUBTYPE_MAX);
837 
838   return 0;
839 }
840 #elif defined(__aarch64__)
841 
842 #ifndef AT_HWCAP
843 #define AT_HWCAP 16
844 #endif
845 #ifndef HWCAP_CPUID
846 #define HWCAP_CPUID (1 << 11)
847 #endif
848 #ifndef HWCAP_FP
849 #define HWCAP_FP (1 << 0)
850 #endif
851 #ifndef HWCAP_ASIMD
852 #define HWCAP_ASIMD (1 << 1)
853 #endif
854 #ifndef HWCAP_AES
855 #define HWCAP_AES (1 << 3)
856 #endif
857 #ifndef HWCAP_PMULL
858 #define HWCAP_PMULL (1 << 4)
859 #endif
860 #ifndef HWCAP_SHA1
861 #define HWCAP_SHA1 (1 << 5)
862 #endif
863 #ifndef HWCAP_SHA2
864 #define HWCAP_SHA2 (1 << 6)
865 #endif
866 #ifndef HWCAP_ATOMICS
867 #define HWCAP_ATOMICS (1 << 8)
868 #endif
869 #ifndef HWCAP_FPHP
870 #define HWCAP_FPHP (1 << 9)
871 #endif
872 #ifndef HWCAP_ASIMDHP
873 #define HWCAP_ASIMDHP (1 << 10)
874 #endif
875 #ifndef HWCAP_ASIMDRDM
876 #define HWCAP_ASIMDRDM (1 << 12)
877 #endif
878 #ifndef HWCAP_JSCVT
879 #define HWCAP_JSCVT (1 << 13)
880 #endif
881 #ifndef HWCAP_FCMA
882 #define HWCAP_FCMA (1 << 14)
883 #endif
884 #ifndef HWCAP_LRCPC
885 #define HWCAP_LRCPC (1 << 15)
886 #endif
887 #ifndef HWCAP_DCPOP
888 #define HWCAP_DCPOP (1 << 16)
889 #endif
890 #ifndef HWCAP_SHA3
891 #define HWCAP_SHA3 (1 << 17)
892 #endif
893 #ifndef HWCAP_SM3
894 #define HWCAP_SM3 (1 << 18)
895 #endif
896 #ifndef HWCAP_SM4
897 #define HWCAP_SM4 (1 << 19)
898 #endif
899 #ifndef HWCAP_ASIMDDP
900 #define HWCAP_ASIMDDP (1 << 20)
901 #endif
902 #ifndef HWCAP_SHA512
903 #define HWCAP_SHA512 (1 << 21)
904 #endif
905 #ifndef HWCAP_SVE
906 #define HWCAP_SVE (1 << 22)
907 #endif
908 #ifndef HWCAP_ASIMDFHM
909 #define HWCAP_ASIMDFHM (1 << 23)
910 #endif
911 #ifndef HWCAP_DIT
912 #define HWCAP_DIT (1 << 24)
913 #endif
914 #ifndef HWCAP_ILRCPC
915 #define HWCAP_ILRCPC (1 << 26)
916 #endif
917 #ifndef HWCAP_FLAGM
918 #define HWCAP_FLAGM (1 << 27)
919 #endif
920 #ifndef HWCAP_SSBS
921 #define HWCAP_SSBS (1 << 28)
922 #endif
923 #ifndef HWCAP_SB
924 #define HWCAP_SB (1 << 29)
925 #endif
926 
927 #ifndef AT_HWCAP2
928 #define AT_HWCAP2 26
929 #endif
930 #ifndef HWCAP2_DCPODP
931 #define HWCAP2_DCPODP (1 << 0)
932 #endif
933 #ifndef HWCAP2_SVE2
934 #define HWCAP2_SVE2 (1 << 1)
935 #endif
936 #ifndef HWCAP2_SVEAES
937 #define HWCAP2_SVEAES (1 << 2)
938 #endif
939 #ifndef HWCAP2_SVEPMULL
940 #define HWCAP2_SVEPMULL (1 << 3)
941 #endif
942 #ifndef HWCAP2_SVEBITPERM
943 #define HWCAP2_SVEBITPERM (1 << 4)
944 #endif
945 #ifndef HWCAP2_SVESHA3
946 #define HWCAP2_SVESHA3 (1 << 5)
947 #endif
948 #ifndef HWCAP2_SVESM4
949 #define HWCAP2_SVESM4 (1 << 6)
950 #endif
951 #ifndef HWCAP2_FLAGM2
952 #define HWCAP2_FLAGM2 (1 << 7)
953 #endif
954 #ifndef HWCAP2_FRINT
955 #define HWCAP2_FRINT (1 << 8)
956 #endif
957 #ifndef HWCAP2_SVEI8MM
958 #define HWCAP2_SVEI8MM (1 << 9)
959 #endif
960 #ifndef HWCAP2_SVEF32MM
961 #define HWCAP2_SVEF32MM (1 << 10)
962 #endif
963 #ifndef HWCAP2_SVEF64MM
964 #define HWCAP2_SVEF64MM (1 << 11)
965 #endif
966 #ifndef HWCAP2_SVEBF16
967 #define HWCAP2_SVEBF16 (1 << 12)
968 #endif
969 #ifndef HWCAP2_I8MM
970 #define HWCAP2_I8MM (1 << 13)
971 #endif
972 #ifndef HWCAP2_BF16
973 #define HWCAP2_BF16 (1 << 14)
974 #endif
975 #ifndef HWCAP2_DGH
976 #define HWCAP2_DGH (1 << 15)
977 #endif
978 #ifndef HWCAP2_RNG
979 #define HWCAP2_RNG (1 << 16)
980 #endif
981 #ifndef HWCAP2_BTI
982 #define HWCAP2_BTI (1 << 17)
983 #endif
984 #ifndef HWCAP2_MTE
985 #define HWCAP2_MTE (1 << 18)
986 #endif
987 #ifndef HWCAP2_RPRES
988 #define HWCAP2_RPRES (1 << 21)
989 #endif
990 #ifndef HWCAP2_MTE3
991 #define HWCAP2_MTE3 (1 << 22)
992 #endif
993 #ifndef HWCAP2_SME
994 #define HWCAP2_SME (1 << 23)
995 #endif
996 #ifndef HWCAP2_SME_I16I64
997 #define HWCAP2_SME_I16I64 (1 << 24)
998 #endif
999 #ifndef HWCAP2_SME_F64F64
1000 #define HWCAP2_SME_F64F64 (1 << 25)
1001 #endif
1002 #ifndef HWCAP2_WFXT
1003 #define HWCAP2_WFXT (1UL << 31)
1004 #endif
1005 #ifndef HWCAP2_EBF16
1006 #define HWCAP2_EBF16 (1UL << 32)
1007 #endif
1008 #ifndef HWCAP2_SVE_EBF16
1009 #define HWCAP2_SVE_EBF16 (1UL << 33)
1010 #endif
1011 
1012 // LSE support detection for out-of-line atomics
1013 // using HWCAP and Auxiliary vector
1014 _Bool __aarch64_have_lse_atomics
1015     __attribute__((visibility("hidden"), nocommon));
1016 
1017 #if defined(__has_include)
1018 #if __has_include(<sys/auxv.h>)
1019 #include <sys/auxv.h>
1020 #if __has_include(<asm/hwcap.h>)
1021 #include <asm/hwcap.h>
1022 
1023 #if defined(__ANDROID__)
1024 #include <string.h>
1025 #include <sys/system_properties.h>
1026 #elif defined(__Fuchsia__)
1027 #include <zircon/features.h>
1028 #include <zircon/syscalls.h>
1029 #endif
1030 
1031 // Detect Exynos 9810 CPU
1032 #define IF_EXYNOS9810                                                          \
1033   char arch[PROP_VALUE_MAX];                                                   \
1034   if (__system_property_get("ro.arch", arch) > 0 &&                            \
1035       strncmp(arch, "exynos9810", sizeof("exynos9810") - 1) == 0)
1036 
init_have_lse_atomics(void)1037 static void CONSTRUCTOR_ATTRIBUTE init_have_lse_atomics(void) {
1038 #if defined(__FreeBSD__)
1039   unsigned long hwcap;
1040   int result = elf_aux_info(AT_HWCAP, &hwcap, sizeof hwcap);
1041   __aarch64_have_lse_atomics = result == 0 && (hwcap & HWCAP_ATOMICS) != 0;
1042 #elif defined(__Fuchsia__)
1043   // This ensures the vDSO is a direct link-time dependency of anything that
1044   // needs this initializer code.
1045 #pragma comment(lib, "zircon")
1046   uint32_t features;
1047   zx_status_t status = _zx_system_get_features(ZX_FEATURE_KIND_CPU, &features);
1048   __aarch64_have_lse_atomics =
1049       status == ZX_OK && (features & ZX_ARM64_FEATURE_ISA_ATOMICS) != 0;
1050 #else
1051   unsigned long hwcap = getauxval(AT_HWCAP);
1052   _Bool result = (hwcap & HWCAP_ATOMICS) != 0;
1053 #if defined(__ANDROID__)
1054   if (result) {
1055     // Some cores in the Exynos 9810 CPU are ARMv8.2 and others are ARMv8.0;
1056     // only the former support LSE atomics.  However, the kernel in the
1057     // initial Android 8.0 release of Galaxy S9/S9+ devices incorrectly
1058     // reported the feature as being supported.
1059     //
1060     // The kernel appears to have been corrected to mark it unsupported as of
1061     // the Android 9.0 release on those devices, and this issue has not been
1062     // observed anywhere else. Thus, this workaround may be removed if
1063     // compiler-rt ever drops support for Android 8.0.
1064     IF_EXYNOS9810 result = false;
1065   }
1066 #endif // defined(__ANDROID__)
1067   __aarch64_have_lse_atomics = result;
1068 #endif // defined(__FreeBSD__)
1069 }
1070 
1071 #if !defined(DISABLE_AARCH64_FMV)
1072 // CPUFeatures must correspond to the same AArch64 features in
1073 // AArch64TargetParser.h
1074 enum CPUFeatures {
1075   FEAT_RNG,
1076   FEAT_FLAGM,
1077   FEAT_FLAGM2,
1078   FEAT_FP16FML,
1079   FEAT_DOTPROD,
1080   FEAT_SM4,
1081   FEAT_RDM,
1082   FEAT_LSE,
1083   FEAT_FP,
1084   FEAT_SIMD,
1085   FEAT_CRC,
1086   FEAT_SHA1,
1087   FEAT_SHA2,
1088   FEAT_SHA3,
1089   FEAT_AES,
1090   FEAT_PMULL,
1091   FEAT_FP16,
1092   FEAT_DIT,
1093   FEAT_DPB,
1094   FEAT_DPB2,
1095   FEAT_JSCVT,
1096   FEAT_FCMA,
1097   FEAT_RCPC,
1098   FEAT_RCPC2,
1099   FEAT_FRINTTS,
1100   FEAT_DGH,
1101   FEAT_I8MM,
1102   FEAT_BF16,
1103   FEAT_EBF16,
1104   FEAT_RPRES,
1105   FEAT_SVE,
1106   FEAT_SVE_BF16,
1107   FEAT_SVE_EBF16,
1108   FEAT_SVE_I8MM,
1109   FEAT_SVE_F32MM,
1110   FEAT_SVE_F64MM,
1111   FEAT_SVE2,
1112   FEAT_SVE_AES,
1113   FEAT_SVE_PMULL128,
1114   FEAT_SVE_BITPERM,
1115   FEAT_SVE_SHA3,
1116   FEAT_SVE_SM4,
1117   FEAT_SME,
1118   FEAT_MEMTAG,
1119   FEAT_MEMTAG2,
1120   FEAT_MEMTAG3,
1121   FEAT_SB,
1122   FEAT_PREDRES,
1123   FEAT_SSBS,
1124   FEAT_SSBS2,
1125   FEAT_BTI,
1126   FEAT_LS64,
1127   FEAT_LS64_V,
1128   FEAT_LS64_ACCDATA,
1129   FEAT_WFXT,
1130   FEAT_SME_F64,
1131   FEAT_SME_I64,
1132   FEAT_SME2,
1133   FEAT_MAX
1134 };
1135 
1136 // Architecture features used
1137 // in Function Multi Versioning
1138 struct {
1139   unsigned long long features;
1140   // As features grows new fields could be added
1141 } __aarch64_cpu_features __attribute__((visibility("hidden"), nocommon));
1142 
init_cpu_features_resolver(unsigned long hwcap,unsigned long hwcap2)1143 void init_cpu_features_resolver(unsigned long hwcap, unsigned long hwcap2) {
1144 #define setCPUFeature(F) __aarch64_cpu_features.features |= 1ULL << F
1145 #define getCPUFeature(id, ftr) __asm__("mrs %0, " #id : "=r"(ftr))
1146 #define extractBits(val, start, number)                                        \
1147   (val & ((1ULL << number) - 1ULL) << start) >> start
1148   if (hwcap & HWCAP_CRC32)
1149     setCPUFeature(FEAT_CRC);
1150   if (hwcap & HWCAP_PMULL)
1151     setCPUFeature(FEAT_PMULL);
1152   if (hwcap & HWCAP_FLAGM)
1153     setCPUFeature(FEAT_FLAGM);
1154   if (hwcap2 & HWCAP2_FLAGM2) {
1155     setCPUFeature(FEAT_FLAGM);
1156     setCPUFeature(FEAT_FLAGM2);
1157   }
1158   if (hwcap & HWCAP_SM3 && hwcap & HWCAP_SM4)
1159     setCPUFeature(FEAT_SM4);
1160   if (hwcap & HWCAP_ASIMDDP)
1161     setCPUFeature(FEAT_DOTPROD);
1162   if (hwcap & HWCAP_ASIMDFHM)
1163     setCPUFeature(FEAT_FP16FML);
1164   if (hwcap & HWCAP_FPHP) {
1165     setCPUFeature(FEAT_FP16);
1166     setCPUFeature(FEAT_FP);
1167   }
1168   if (hwcap & HWCAP_DIT)
1169     setCPUFeature(FEAT_DIT);
1170   if (hwcap & HWCAP_ASIMDRDM)
1171     setCPUFeature(FEAT_RDM);
1172   if (hwcap & HWCAP_ILRCPC)
1173     setCPUFeature(FEAT_RCPC2);
1174   if (hwcap & HWCAP_AES)
1175     setCPUFeature(FEAT_AES);
1176   if (hwcap & HWCAP_SHA1)
1177     setCPUFeature(FEAT_SHA1);
1178   if (hwcap & HWCAP_SHA2)
1179     setCPUFeature(FEAT_SHA2);
1180   if (hwcap & HWCAP_JSCVT)
1181     setCPUFeature(FEAT_JSCVT);
1182   if (hwcap & HWCAP_FCMA)
1183     setCPUFeature(FEAT_FCMA);
1184   if (hwcap & HWCAP_SB)
1185     setCPUFeature(FEAT_SB);
1186   if (hwcap & HWCAP_SSBS)
1187     setCPUFeature(FEAT_SSBS2);
1188   if (hwcap2 & HWCAP2_MTE) {
1189     setCPUFeature(FEAT_MEMTAG);
1190     setCPUFeature(FEAT_MEMTAG2);
1191   }
1192   if (hwcap2 & HWCAP2_MTE3) {
1193     setCPUFeature(FEAT_MEMTAG);
1194     setCPUFeature(FEAT_MEMTAG2);
1195     setCPUFeature(FEAT_MEMTAG3);
1196   }
1197   if (hwcap2 & HWCAP2_SVEAES)
1198     setCPUFeature(FEAT_SVE_AES);
1199   if (hwcap2 & HWCAP2_SVEPMULL) {
1200     setCPUFeature(FEAT_SVE_AES);
1201     setCPUFeature(FEAT_SVE_PMULL128);
1202   }
1203   if (hwcap2 & HWCAP2_SVEBITPERM)
1204     setCPUFeature(FEAT_SVE_BITPERM);
1205   if (hwcap2 & HWCAP2_SVESHA3)
1206     setCPUFeature(FEAT_SVE_SHA3);
1207   if (hwcap2 & HWCAP2_SVESM4)
1208     setCPUFeature(FEAT_SVE_SM4);
1209   if (hwcap2 & HWCAP2_DCPODP)
1210     setCPUFeature(FEAT_DPB2);
1211   if (hwcap & HWCAP_ATOMICS)
1212     setCPUFeature(FEAT_LSE);
1213   if (hwcap2 & HWCAP2_RNG)
1214     setCPUFeature(FEAT_RNG);
1215   if (hwcap2 & HWCAP2_I8MM)
1216     setCPUFeature(FEAT_I8MM);
1217   if (hwcap2 & HWCAP2_EBF16)
1218     setCPUFeature(FEAT_EBF16);
1219   if (hwcap2 & HWCAP2_SVE_EBF16)
1220     setCPUFeature(FEAT_SVE_EBF16);
1221   if (hwcap2 & HWCAP2_DGH)
1222     setCPUFeature(FEAT_DGH);
1223   if (hwcap2 & HWCAP2_FRINT)
1224     setCPUFeature(FEAT_FRINTTS);
1225   if (hwcap2 & HWCAP2_SVEI8MM)
1226     setCPUFeature(FEAT_SVE_I8MM);
1227   if (hwcap2 & HWCAP2_SVEF32MM)
1228     setCPUFeature(FEAT_SVE_F32MM);
1229   if (hwcap2 & HWCAP2_SVEF64MM)
1230     setCPUFeature(FEAT_SVE_F64MM);
1231   if (hwcap2 & HWCAP2_BTI)
1232     setCPUFeature(FEAT_BTI);
1233   if (hwcap2 & HWCAP2_RPRES)
1234     setCPUFeature(FEAT_RPRES);
1235   if (hwcap2 & HWCAP2_WFXT)
1236     setCPUFeature(FEAT_WFXT);
1237   if (hwcap2 & HWCAP2_SME)
1238     setCPUFeature(FEAT_SME);
1239   if (hwcap2 & HWCAP2_SME_I16I64)
1240     setCPUFeature(FEAT_SME_I64);
1241   if (hwcap2 & HWCAP2_SME_F64F64)
1242     setCPUFeature(FEAT_SME_F64);
1243   if (hwcap & HWCAP_CPUID) {
1244     unsigned long ftr;
1245     getCPUFeature(ID_AA64PFR1_EL1, ftr);
1246     // ID_AA64PFR1_EL1.MTE >= 0b0001
1247     if (extractBits(ftr, 8, 4) >= 0x1)
1248       setCPUFeature(FEAT_MEMTAG);
1249     // ID_AA64PFR1_EL1.SSBS == 0b0001
1250     if (extractBits(ftr, 4, 4) == 0x1)
1251       setCPUFeature(FEAT_SSBS);
1252     // ID_AA64PFR1_EL1.SME == 0b0010
1253     if (extractBits(ftr, 24, 4) == 0x2)
1254       setCPUFeature(FEAT_SME2);
1255     getCPUFeature(ID_AA64PFR0_EL1, ftr);
1256     // ID_AA64PFR0_EL1.FP != 0b1111
1257     if (extractBits(ftr, 16, 4) != 0xF) {
1258       setCPUFeature(FEAT_FP);
1259       // ID_AA64PFR0_EL1.AdvSIMD has the same value as ID_AA64PFR0_EL1.FP
1260       setCPUFeature(FEAT_SIMD);
1261     }
1262     // ID_AA64PFR0_EL1.SVE != 0b0000
1263     if (extractBits(ftr, 32, 4) != 0x0) {
1264       // get ID_AA64ZFR0_EL1, that name supported
1265       // if sve enabled only
1266       getCPUFeature(S3_0_C0_C4_4, ftr);
1267       // ID_AA64ZFR0_EL1.SVEver == 0b0000
1268       if (extractBits(ftr, 0, 4) == 0x0)
1269         setCPUFeature(FEAT_SVE);
1270       // ID_AA64ZFR0_EL1.SVEver == 0b0001
1271       if (extractBits(ftr, 0, 4) == 0x1)
1272         setCPUFeature(FEAT_SVE2);
1273       // ID_AA64ZFR0_EL1.BF16 != 0b0000
1274       if (extractBits(ftr, 20, 4) != 0x0)
1275         setCPUFeature(FEAT_SVE_BF16);
1276     }
1277     getCPUFeature(ID_AA64ISAR0_EL1, ftr);
1278     // ID_AA64ISAR0_EL1.SHA3 != 0b0000
1279     if (extractBits(ftr, 32, 4) != 0x0)
1280       setCPUFeature(FEAT_SHA3);
1281     getCPUFeature(ID_AA64ISAR1_EL1, ftr);
1282     // ID_AA64ISAR1_EL1.DPB >= 0b0001
1283     if (extractBits(ftr, 0, 4) >= 0x1)
1284       setCPUFeature(FEAT_DPB);
1285     // ID_AA64ISAR1_EL1.LRCPC != 0b0000
1286     if (extractBits(ftr, 20, 4) != 0x0)
1287       setCPUFeature(FEAT_RCPC);
1288     // ID_AA64ISAR1_EL1.SPECRES == 0b0001
1289     if (extractBits(ftr, 40, 4) == 0x2)
1290       setCPUFeature(FEAT_PREDRES);
1291     // ID_AA64ISAR1_EL1.BF16 != 0b0000
1292     if (extractBits(ftr, 44, 4) != 0x0)
1293       setCPUFeature(FEAT_BF16);
1294     // ID_AA64ISAR1_EL1.LS64 >= 0b0001
1295     if (extractBits(ftr, 60, 4) >= 0x1)
1296       setCPUFeature(FEAT_LS64);
1297     // ID_AA64ISAR1_EL1.LS64 >= 0b0010
1298     if (extractBits(ftr, 60, 4) >= 0x2)
1299       setCPUFeature(FEAT_LS64_V);
1300     // ID_AA64ISAR1_EL1.LS64 >= 0b0011
1301     if (extractBits(ftr, 60, 4) >= 0x3)
1302       setCPUFeature(FEAT_LS64_ACCDATA);
1303   } else {
1304     // Set some features in case of no CPUID support
1305     if (hwcap & (HWCAP_FP | HWCAP_FPHP)) {
1306       setCPUFeature(FEAT_FP);
1307       // FP and AdvSIMD fields have the same value
1308       setCPUFeature(FEAT_SIMD);
1309     }
1310     if (hwcap & HWCAP_DCPOP || hwcap2 & HWCAP2_DCPODP)
1311       setCPUFeature(FEAT_DPB);
1312     if (hwcap & HWCAP_LRCPC || hwcap & HWCAP_ILRCPC)
1313       setCPUFeature(FEAT_RCPC);
1314     if (hwcap2 & HWCAP2_BF16 || hwcap2 & HWCAP2_EBF16)
1315       setCPUFeature(FEAT_BF16);
1316     if (hwcap2 & HWCAP2_SVEBF16)
1317       setCPUFeature(FEAT_SVE_BF16);
1318     if (hwcap2 & HWCAP2_SVE2 && hwcap & HWCAP_SVE)
1319       setCPUFeature(FEAT_SVE2);
1320     if (hwcap & HWCAP_SHA3)
1321       setCPUFeature(FEAT_SHA3);
1322   }
1323 }
1324 
init_cpu_features(void)1325 void CONSTRUCTOR_ATTRIBUTE init_cpu_features(void) {
1326   unsigned long hwcap;
1327   unsigned long hwcap2;
1328   // CPU features already initialized.
1329   if (__aarch64_cpu_features.features)
1330     return;
1331   setCPUFeature(FEAT_MAX);
1332 #if defined(__FreeBSD__)
1333   int res = 0;
1334   res = elf_aux_info(AT_HWCAP, &hwcap, sizeof hwcap);
1335   res |= elf_aux_info(AT_HWCAP2, &hwcap2, sizeof hwcap2);
1336   if (res)
1337     return;
1338 #else
1339 #if defined(__ANDROID__)
1340   // Don't set any CPU features,
1341   // detection could be wrong on Exynos 9810.
1342   IF_EXYNOS9810 return;
1343 #endif // defined(__ANDROID__)
1344   hwcap = getauxval(AT_HWCAP);
1345   hwcap2 = getauxval(AT_HWCAP2);
1346 #endif // defined(__FreeBSD__)
1347   init_cpu_features_resolver(hwcap, hwcap2);
1348 #undef extractBits
1349 #undef getCPUFeature
1350 #undef setCPUFeature
1351 #undef IF_EXYNOS9810
1352 }
1353 #endif // !defined(DISABLE_AARCH64_FMV)
1354 #endif // defined(__has_include)
1355 #endif // __has_include(<sys/auxv.h>)
1356 #endif // __has_include(<asm/hwcap.h>)
1357 #endif // defined(__aarch64__)
1358