1 //===-- cpu_model.c - Support for __cpu_model builtin  ------------*- C -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 //  This file is based on LLVM's lib/Support/Host.cpp.
10 //  It implements the operating system Host concept and builtin
11 //  __cpu_model for the compiler_rt library for x86 and
12 //  __aarch64_have_lse_atomics for AArch64.
13 //
14 //===----------------------------------------------------------------------===//
15 
16 #if defined(HAVE_INIT_PRIORITY)
17 #define CONSTRUCTOR_ATTRIBUTE __attribute__((__constructor__ 101))
18 #elif __has_attribute(__constructor__)
19 #define CONSTRUCTOR_ATTRIBUTE __attribute__((__constructor__))
20 #else
21 // FIXME: For MSVC, we should make a function pointer global in .CRT$X?? so that
22 // this runs during initialization.
23 #define CONSTRUCTOR_ATTRIBUTE
24 #endif
25 
26 #if (defined(__i386__) || defined(_M_IX86) || defined(__x86_64__) ||           \
27      defined(_M_X64)) &&                                                       \
28     (defined(__GNUC__) || defined(__clang__) || defined(_MSC_VER))
29 
30 #include <assert.h>
31 
32 #define bool int
33 #define true 1
34 #define false 0
35 
36 #ifdef _MSC_VER
37 #include <intrin.h>
38 #endif
39 
40 #ifndef __has_attribute
41 #define __has_attribute(attr) 0
42 #endif
43 
44 enum VendorSignatures {
45   SIG_INTEL = 0x756e6547, // Genu
46   SIG_AMD = 0x68747541,   // Auth
47 };
48 
49 enum ProcessorVendors {
50   VENDOR_INTEL = 1,
51   VENDOR_AMD,
52   VENDOR_OTHER,
53   VENDOR_MAX
54 };
55 
56 enum ProcessorTypes {
57   INTEL_BONNELL = 1,
58   INTEL_CORE2,
59   INTEL_COREI7,
60   AMDFAM10H,
61   AMDFAM15H,
62   INTEL_SILVERMONT,
63   INTEL_KNL,
64   AMD_BTVER1,
65   AMD_BTVER2,
66   AMDFAM17H,
67   INTEL_KNM,
68   INTEL_GOLDMONT,
69   INTEL_GOLDMONT_PLUS,
70   INTEL_TREMONT,
71   AMDFAM19H,
72   CPU_TYPE_MAX
73 };
74 
75 enum ProcessorSubtypes {
76   INTEL_COREI7_NEHALEM = 1,
77   INTEL_COREI7_WESTMERE,
78   INTEL_COREI7_SANDYBRIDGE,
79   AMDFAM10H_BARCELONA,
80   AMDFAM10H_SHANGHAI,
81   AMDFAM10H_ISTANBUL,
82   AMDFAM15H_BDVER1,
83   AMDFAM15H_BDVER2,
84   AMDFAM15H_BDVER3,
85   AMDFAM15H_BDVER4,
86   AMDFAM17H_ZNVER1,
87   INTEL_COREI7_IVYBRIDGE,
88   INTEL_COREI7_HASWELL,
89   INTEL_COREI7_BROADWELL,
90   INTEL_COREI7_SKYLAKE,
91   INTEL_COREI7_SKYLAKE_AVX512,
92   INTEL_COREI7_CANNONLAKE,
93   INTEL_COREI7_ICELAKE_CLIENT,
94   INTEL_COREI7_ICELAKE_SERVER,
95   AMDFAM17H_ZNVER2,
96   INTEL_COREI7_CASCADELAKE,
97   INTEL_COREI7_TIGERLAKE,
98   INTEL_COREI7_COOPERLAKE,
99   INTEL_COREI7_SAPPHIRERAPIDS,
100   INTEL_COREI7_ALDERLAKE,
101   AMDFAM19H_ZNVER3,
102   INTEL_COREI7_ROCKETLAKE,
103   CPU_SUBTYPE_MAX
104 };
105 
106 enum ProcessorFeatures {
107   FEATURE_CMOV = 0,
108   FEATURE_MMX,
109   FEATURE_POPCNT,
110   FEATURE_SSE,
111   FEATURE_SSE2,
112   FEATURE_SSE3,
113   FEATURE_SSSE3,
114   FEATURE_SSE4_1,
115   FEATURE_SSE4_2,
116   FEATURE_AVX,
117   FEATURE_AVX2,
118   FEATURE_SSE4_A,
119   FEATURE_FMA4,
120   FEATURE_XOP,
121   FEATURE_FMA,
122   FEATURE_AVX512F,
123   FEATURE_BMI,
124   FEATURE_BMI2,
125   FEATURE_AES,
126   FEATURE_PCLMUL,
127   FEATURE_AVX512VL,
128   FEATURE_AVX512BW,
129   FEATURE_AVX512DQ,
130   FEATURE_AVX512CD,
131   FEATURE_AVX512ER,
132   FEATURE_AVX512PF,
133   FEATURE_AVX512VBMI,
134   FEATURE_AVX512IFMA,
135   FEATURE_AVX5124VNNIW,
136   FEATURE_AVX5124FMAPS,
137   FEATURE_AVX512VPOPCNTDQ,
138   FEATURE_AVX512VBMI2,
139   FEATURE_GFNI,
140   FEATURE_VPCLMULQDQ,
141   FEATURE_AVX512VNNI,
142   FEATURE_AVX512BITALG,
143   FEATURE_AVX512BF16,
144   FEATURE_AVX512VP2INTERSECT,
145   CPU_FEATURE_MAX
146 };
147 
148 // The check below for i386 was copied from clang's cpuid.h (__get_cpuid_max).
149 // Check motivated by bug reports for OpenSSL crashing on CPUs without CPUID
150 // support. Consequently, for i386, the presence of CPUID is checked first
151 // via the corresponding eflags bit.
isCpuIdSupported()152 static bool isCpuIdSupported() {
153 #if defined(__GNUC__) || defined(__clang__)
154 #if defined(__i386__)
155   int __cpuid_supported;
156   __asm__("  pushfl\n"
157           "  popl   %%eax\n"
158           "  movl   %%eax,%%ecx\n"
159           "  xorl   $0x00200000,%%eax\n"
160           "  pushl  %%eax\n"
161           "  popfl\n"
162           "  pushfl\n"
163           "  popl   %%eax\n"
164           "  movl   $0,%0\n"
165           "  cmpl   %%eax,%%ecx\n"
166           "  je     1f\n"
167           "  movl   $1,%0\n"
168           "1:"
169           : "=r"(__cpuid_supported)
170           :
171           : "eax", "ecx");
172   if (!__cpuid_supported)
173     return false;
174 #endif
175   return true;
176 #endif
177   return true;
178 }
179 
180 // This code is copied from lib/Support/Host.cpp.
181 // Changes to either file should be mirrored in the other.
182 
183 /// getX86CpuIDAndInfo - Execute the specified cpuid and return the 4 values in
184 /// the specified arguments.  If we can't run cpuid on the host, return true.
getX86CpuIDAndInfo(unsigned value,unsigned * rEAX,unsigned * rEBX,unsigned * rECX,unsigned * rEDX)185 static bool getX86CpuIDAndInfo(unsigned value, unsigned *rEAX, unsigned *rEBX,
186                                unsigned *rECX, unsigned *rEDX) {
187 #if defined(__GNUC__) || defined(__clang__)
188 #if defined(__x86_64__)
189   // gcc doesn't know cpuid would clobber ebx/rbx. Preserve it manually.
190   // FIXME: should we save this for Clang?
191   __asm__("movq\t%%rbx, %%rsi\n\t"
192           "cpuid\n\t"
193           "xchgq\t%%rbx, %%rsi\n\t"
194           : "=a"(*rEAX), "=S"(*rEBX), "=c"(*rECX), "=d"(*rEDX)
195           : "a"(value));
196   return false;
197 #elif defined(__i386__)
198   __asm__("movl\t%%ebx, %%esi\n\t"
199           "cpuid\n\t"
200           "xchgl\t%%ebx, %%esi\n\t"
201           : "=a"(*rEAX), "=S"(*rEBX), "=c"(*rECX), "=d"(*rEDX)
202           : "a"(value));
203   return false;
204 #else
205   return true;
206 #endif
207 #elif defined(_MSC_VER)
208   // The MSVC intrinsic is portable across x86 and x64.
209   int registers[4];
210   __cpuid(registers, value);
211   *rEAX = registers[0];
212   *rEBX = registers[1];
213   *rECX = registers[2];
214   *rEDX = registers[3];
215   return false;
216 #else
217   return true;
218 #endif
219 }
220 
221 /// getX86CpuIDAndInfoEx - Execute the specified cpuid with subleaf and return
222 /// the 4 values in the specified arguments.  If we can't run cpuid on the host,
223 /// return true.
getX86CpuIDAndInfoEx(unsigned value,unsigned subleaf,unsigned * rEAX,unsigned * rEBX,unsigned * rECX,unsigned * rEDX)224 static bool getX86CpuIDAndInfoEx(unsigned value, unsigned subleaf,
225                                  unsigned *rEAX, unsigned *rEBX, unsigned *rECX,
226                                  unsigned *rEDX) {
227 #if defined(__GNUC__) || defined(__clang__)
228 #if defined(__x86_64__)
229   // gcc doesn't know cpuid would clobber ebx/rbx. Preserve it manually.
230   // FIXME: should we save this for Clang?
231   __asm__("movq\t%%rbx, %%rsi\n\t"
232           "cpuid\n\t"
233           "xchgq\t%%rbx, %%rsi\n\t"
234           : "=a"(*rEAX), "=S"(*rEBX), "=c"(*rECX), "=d"(*rEDX)
235           : "a"(value), "c"(subleaf));
236   return false;
237 #elif defined(__i386__)
238   __asm__("movl\t%%ebx, %%esi\n\t"
239           "cpuid\n\t"
240           "xchgl\t%%ebx, %%esi\n\t"
241           : "=a"(*rEAX), "=S"(*rEBX), "=c"(*rECX), "=d"(*rEDX)
242           : "a"(value), "c"(subleaf));
243   return false;
244 #else
245   return true;
246 #endif
247 #elif defined(_MSC_VER)
248   int registers[4];
249   __cpuidex(registers, value, subleaf);
250   *rEAX = registers[0];
251   *rEBX = registers[1];
252   *rECX = registers[2];
253   *rEDX = registers[3];
254   return false;
255 #else
256   return true;
257 #endif
258 }
259 
260 // Read control register 0 (XCR0). Used to detect features such as AVX.
getX86XCR0(unsigned * rEAX,unsigned * rEDX)261 static bool getX86XCR0(unsigned *rEAX, unsigned *rEDX) {
262 #if defined(__GNUC__) || defined(__clang__)
263   // Check xgetbv; this uses a .byte sequence instead of the instruction
264   // directly because older assemblers do not include support for xgetbv and
265   // there is no easy way to conditionally compile based on the assembler used.
266   __asm__(".byte 0x0f, 0x01, 0xd0" : "=a"(*rEAX), "=d"(*rEDX) : "c"(0));
267   return false;
268 #elif defined(_MSC_FULL_VER) && defined(_XCR_XFEATURE_ENABLED_MASK)
269   unsigned long long Result = _xgetbv(_XCR_XFEATURE_ENABLED_MASK);
270   *rEAX = Result;
271   *rEDX = Result >> 32;
272   return false;
273 #else
274   return true;
275 #endif
276 }
277 
detectX86FamilyModel(unsigned EAX,unsigned * Family,unsigned * Model)278 static void detectX86FamilyModel(unsigned EAX, unsigned *Family,
279                                  unsigned *Model) {
280   *Family = (EAX >> 8) & 0xf; // Bits 8 - 11
281   *Model = (EAX >> 4) & 0xf;  // Bits 4 - 7
282   if (*Family == 6 || *Family == 0xf) {
283     if (*Family == 0xf)
284       // Examine extended family ID if family ID is F.
285       *Family += (EAX >> 20) & 0xff; // Bits 20 - 27
286     // Examine extended model ID if family ID is 6 or F.
287     *Model += ((EAX >> 16) & 0xf) << 4; // Bits 16 - 19
288   }
289 }
290 
291 static const char *
getIntelProcessorTypeAndSubtype(unsigned Family,unsigned Model,const unsigned * Features,unsigned * Type,unsigned * Subtype)292 getIntelProcessorTypeAndSubtype(unsigned Family, unsigned Model,
293                                 const unsigned *Features,
294                                 unsigned *Type, unsigned *Subtype) {
295 #define testFeature(F)                                                         \
296   (Features[F / 32] & (1 << (F % 32))) != 0
297 
298   // We select CPU strings to match the code in Host.cpp, but we don't use them
299   // in compiler-rt.
300   const char *CPU = 0;
301 
302   switch (Family) {
303   case 6:
304     switch (Model) {
305     case 0x0f: // Intel Core 2 Duo processor, Intel Core 2 Duo mobile
306                // processor, Intel Core 2 Quad processor, Intel Core 2 Quad
307                // mobile processor, Intel Core 2 Extreme processor, Intel
308                // Pentium Dual-Core processor, Intel Xeon processor, model
309                // 0Fh. All processors are manufactured using the 65 nm process.
310     case 0x16: // Intel Celeron processor model 16h. All processors are
311                // manufactured using the 65 nm process
312       CPU = "core2";
313       *Type = INTEL_CORE2;
314       break;
315     case 0x17: // Intel Core 2 Extreme processor, Intel Xeon processor, model
316                // 17h. All processors are manufactured using the 45 nm process.
317                //
318                // 45nm: Penryn , Wolfdale, Yorkfield (XE)
319     case 0x1d: // Intel Xeon processor MP. All processors are manufactured using
320                // the 45 nm process.
321       CPU = "penryn";
322       *Type = INTEL_CORE2;
323       break;
324     case 0x1a: // Intel Core i7 processor and Intel Xeon processor. All
325                // processors are manufactured using the 45 nm process.
326     case 0x1e: // Intel(R) Core(TM) i7 CPU         870  @ 2.93GHz.
327                // As found in a Summer 2010 model iMac.
328     case 0x1f:
329     case 0x2e:              // Nehalem EX
330       CPU = "nehalem";
331       *Type = INTEL_COREI7;
332       *Subtype = INTEL_COREI7_NEHALEM;
333       break;
334     case 0x25: // Intel Core i7, laptop version.
335     case 0x2c: // Intel Core i7 processor and Intel Xeon processor. All
336                // processors are manufactured using the 32 nm process.
337     case 0x2f: // Westmere EX
338       CPU = "westmere";
339       *Type = INTEL_COREI7;
340       *Subtype = INTEL_COREI7_WESTMERE;
341       break;
342     case 0x2a: // Intel Core i7 processor. All processors are manufactured
343                // using the 32 nm process.
344     case 0x2d:
345       CPU = "sandybridge";
346       *Type = INTEL_COREI7;
347       *Subtype = INTEL_COREI7_SANDYBRIDGE;
348       break;
349     case 0x3a:
350     case 0x3e:              // Ivy Bridge EP
351       CPU = "ivybridge";
352       *Type = INTEL_COREI7;
353       *Subtype = INTEL_COREI7_IVYBRIDGE;
354       break;
355 
356     // Haswell:
357     case 0x3c:
358     case 0x3f:
359     case 0x45:
360     case 0x46:
361       CPU = "haswell";
362       *Type = INTEL_COREI7;
363       *Subtype = INTEL_COREI7_HASWELL;
364       break;
365 
366     // Broadwell:
367     case 0x3d:
368     case 0x47:
369     case 0x4f:
370     case 0x56:
371       CPU = "broadwell";
372       *Type = INTEL_COREI7;
373       *Subtype = INTEL_COREI7_BROADWELL;
374       break;
375 
376     // Skylake:
377     case 0x4e:              // Skylake mobile
378     case 0x5e:              // Skylake desktop
379     case 0x8e:              // Kaby Lake mobile
380     case 0x9e:              // Kaby Lake desktop
381     case 0xa5:              // Comet Lake-H/S
382     case 0xa6:              // Comet Lake-U
383       CPU = "skylake";
384       *Type = INTEL_COREI7;
385       *Subtype = INTEL_COREI7_SKYLAKE;
386       break;
387 
388     // Rocketlake:
389     case 0xa7:
390       CPU = "rocketlake";
391       *Type = INTEL_COREI7;
392       *Subtype = INTEL_COREI7_ROCKETLAKE;
393       break;
394 
395     // Skylake Xeon:
396     case 0x55:
397       *Type = INTEL_COREI7;
398       if (testFeature(FEATURE_AVX512BF16)) {
399         CPU = "cooperlake";
400         *Subtype = INTEL_COREI7_COOPERLAKE;
401       } else if (testFeature(FEATURE_AVX512VNNI)) {
402         CPU = "cascadelake";
403         *Subtype = INTEL_COREI7_CASCADELAKE;
404       } else {
405         CPU = "skylake-avx512";
406         *Subtype = INTEL_COREI7_SKYLAKE_AVX512;
407       }
408       break;
409 
410     // Cannonlake:
411     case 0x66:
412       CPU = "cannonlake";
413       *Type = INTEL_COREI7;
414       *Subtype = INTEL_COREI7_CANNONLAKE;
415       break;
416 
417     // Icelake:
418     case 0x7d:
419     case 0x7e:
420       CPU = "icelake-client";
421       *Type = INTEL_COREI7;
422       *Subtype = INTEL_COREI7_ICELAKE_CLIENT;
423       break;
424 
425     // Tigerlake:
426     case 0x8c:
427     case 0x8d:
428       CPU = "tigerlake";
429       *Type = INTEL_COREI7;
430       *Subtype = INTEL_COREI7_TIGERLAKE;
431       break;
432 
433     // Alderlake:
434     case 0x97:
435     case 0x9a:
436       CPU = "alderlake";
437       *Type = INTEL_COREI7;
438       *Subtype = INTEL_COREI7_ALDERLAKE;
439       break;
440 
441     // Icelake Xeon:
442     case 0x6a:
443     case 0x6c:
444       CPU = "icelake-server";
445       *Type = INTEL_COREI7;
446       *Subtype = INTEL_COREI7_ICELAKE_SERVER;
447       break;
448 
449     // Sapphire Rapids:
450     case 0x8f:
451       CPU = "sapphirerapids";
452       *Type = INTEL_COREI7;
453       *Subtype = INTEL_COREI7_SAPPHIRERAPIDS;
454       break;
455 
456     case 0x1c: // Most 45 nm Intel Atom processors
457     case 0x26: // 45 nm Atom Lincroft
458     case 0x27: // 32 nm Atom Medfield
459     case 0x35: // 32 nm Atom Midview
460     case 0x36: // 32 nm Atom Midview
461       CPU = "bonnell";
462       *Type = INTEL_BONNELL;
463       break;
464 
465     // Atom Silvermont codes from the Intel software optimization guide.
466     case 0x37:
467     case 0x4a:
468     case 0x4d:
469     case 0x5a:
470     case 0x5d:
471     case 0x4c: // really airmont
472       CPU = "silvermont";
473       *Type = INTEL_SILVERMONT;
474       break;
475     // Goldmont:
476     case 0x5c: // Apollo Lake
477     case 0x5f: // Denverton
478       CPU = "goldmont";
479       *Type = INTEL_GOLDMONT;
480       break; // "goldmont"
481     case 0x7a:
482       CPU = "goldmont-plus";
483       *Type = INTEL_GOLDMONT_PLUS;
484       break;
485     case 0x86:
486       CPU = "tremont";
487       *Type = INTEL_TREMONT;
488       break;
489 
490     case 0x57:
491       CPU = "knl";
492       *Type = INTEL_KNL;
493       break;
494 
495     case 0x85:
496       CPU = "knm";
497       *Type = INTEL_KNM;
498       break;
499 
500     default: // Unknown family 6 CPU.
501       break;
502     }
503     break;
504   default:
505     break; // Unknown.
506   }
507 
508   return CPU;
509 }
510 
511 static const char *
getAMDProcessorTypeAndSubtype(unsigned Family,unsigned Model,const unsigned * Features,unsigned * Type,unsigned * Subtype)512 getAMDProcessorTypeAndSubtype(unsigned Family, unsigned Model,
513                               const unsigned *Features,
514                               unsigned *Type, unsigned *Subtype) {
515   // We select CPU strings to match the code in Host.cpp, but we don't use them
516   // in compiler-rt.
517   const char *CPU = 0;
518 
519   switch (Family) {
520   case 16:
521     CPU = "amdfam10";
522     *Type = AMDFAM10H;
523     switch (Model) {
524     case 2:
525       *Subtype = AMDFAM10H_BARCELONA;
526       break;
527     case 4:
528       *Subtype = AMDFAM10H_SHANGHAI;
529       break;
530     case 8:
531       *Subtype = AMDFAM10H_ISTANBUL;
532       break;
533     }
534     break;
535   case 20:
536     CPU = "btver1";
537     *Type = AMD_BTVER1;
538     break;
539   case 21:
540     CPU = "bdver1";
541     *Type = AMDFAM15H;
542     if (Model >= 0x60 && Model <= 0x7f) {
543       CPU = "bdver4";
544       *Subtype = AMDFAM15H_BDVER4;
545       break; // 60h-7Fh: Excavator
546     }
547     if (Model >= 0x30 && Model <= 0x3f) {
548       CPU = "bdver3";
549       *Subtype = AMDFAM15H_BDVER3;
550       break; // 30h-3Fh: Steamroller
551     }
552     if ((Model >= 0x10 && Model <= 0x1f) || Model == 0x02) {
553       CPU = "bdver2";
554       *Subtype = AMDFAM15H_BDVER2;
555       break; // 02h, 10h-1Fh: Piledriver
556     }
557     if (Model <= 0x0f) {
558       *Subtype = AMDFAM15H_BDVER1;
559       break; // 00h-0Fh: Bulldozer
560     }
561     break;
562   case 22:
563     CPU = "btver2";
564     *Type = AMD_BTVER2;
565     break;
566   case 23:
567     CPU = "znver1";
568     *Type = AMDFAM17H;
569     if ((Model >= 0x30 && Model <= 0x3f) || Model == 0x71) {
570       CPU = "znver2";
571       *Subtype = AMDFAM17H_ZNVER2;
572       break; // 30h-3fh, 71h: Zen2
573     }
574     if (Model <= 0x0f) {
575       *Subtype = AMDFAM17H_ZNVER1;
576       break; // 00h-0Fh: Zen1
577     }
578     break;
579   case 25:
580     CPU = "znver3";
581     *Type = AMDFAM19H;
582     if (Model <= 0x0f) {
583       *Subtype = AMDFAM19H_ZNVER3;
584       break; // 00h-0Fh: Zen3
585     }
586     break;
587   default:
588     break; // Unknown AMD CPU.
589   }
590 
591   return CPU;
592 }
593 
getAvailableFeatures(unsigned ECX,unsigned EDX,unsigned MaxLeaf,unsigned * Features)594 static void getAvailableFeatures(unsigned ECX, unsigned EDX, unsigned MaxLeaf,
595                                  unsigned *Features) {
596   unsigned EAX, EBX;
597 
598 #define setFeature(F)                                                          \
599   Features[F / 32] |= 1U << (F % 32)
600 
601   if ((EDX >> 15) & 1)
602     setFeature(FEATURE_CMOV);
603   if ((EDX >> 23) & 1)
604     setFeature(FEATURE_MMX);
605   if ((EDX >> 25) & 1)
606     setFeature(FEATURE_SSE);
607   if ((EDX >> 26) & 1)
608     setFeature(FEATURE_SSE2);
609 
610   if ((ECX >> 0) & 1)
611     setFeature(FEATURE_SSE3);
612   if ((ECX >> 1) & 1)
613     setFeature(FEATURE_PCLMUL);
614   if ((ECX >> 9) & 1)
615     setFeature(FEATURE_SSSE3);
616   if ((ECX >> 12) & 1)
617     setFeature(FEATURE_FMA);
618   if ((ECX >> 19) & 1)
619     setFeature(FEATURE_SSE4_1);
620   if ((ECX >> 20) & 1)
621     setFeature(FEATURE_SSE4_2);
622   if ((ECX >> 23) & 1)
623     setFeature(FEATURE_POPCNT);
624   if ((ECX >> 25) & 1)
625     setFeature(FEATURE_AES);
626 
627   // If CPUID indicates support for XSAVE, XRESTORE and AVX, and XGETBV
628   // indicates that the AVX registers will be saved and restored on context
629   // switch, then we have full AVX support.
630   const unsigned AVXBits = (1 << 27) | (1 << 28);
631   bool HasAVX = ((ECX & AVXBits) == AVXBits) && !getX86XCR0(&EAX, &EDX) &&
632                 ((EAX & 0x6) == 0x6);
633 #if defined(__APPLE__)
634   // Darwin lazily saves the AVX512 context on first use: trust that the OS will
635   // save the AVX512 context if we use AVX512 instructions, even the bit is not
636   // set right now.
637   bool HasAVX512Save = true;
638 #else
639   // AVX512 requires additional context to be saved by the OS.
640   bool HasAVX512Save = HasAVX && ((EAX & 0xe0) == 0xe0);
641 #endif
642 
643   if (HasAVX)
644     setFeature(FEATURE_AVX);
645 
646   bool HasLeaf7 =
647       MaxLeaf >= 0x7 && !getX86CpuIDAndInfoEx(0x7, 0x0, &EAX, &EBX, &ECX, &EDX);
648 
649   if (HasLeaf7 && ((EBX >> 3) & 1))
650     setFeature(FEATURE_BMI);
651   if (HasLeaf7 && ((EBX >> 5) & 1) && HasAVX)
652     setFeature(FEATURE_AVX2);
653   if (HasLeaf7 && ((EBX >> 8) & 1))
654     setFeature(FEATURE_BMI2);
655   if (HasLeaf7 && ((EBX >> 16) & 1) && HasAVX512Save)
656     setFeature(FEATURE_AVX512F);
657   if (HasLeaf7 && ((EBX >> 17) & 1) && HasAVX512Save)
658     setFeature(FEATURE_AVX512DQ);
659   if (HasLeaf7 && ((EBX >> 21) & 1) && HasAVX512Save)
660     setFeature(FEATURE_AVX512IFMA);
661   if (HasLeaf7 && ((EBX >> 26) & 1) && HasAVX512Save)
662     setFeature(FEATURE_AVX512PF);
663   if (HasLeaf7 && ((EBX >> 27) & 1) && HasAVX512Save)
664     setFeature(FEATURE_AVX512ER);
665   if (HasLeaf7 && ((EBX >> 28) & 1) && HasAVX512Save)
666     setFeature(FEATURE_AVX512CD);
667   if (HasLeaf7 && ((EBX >> 30) & 1) && HasAVX512Save)
668     setFeature(FEATURE_AVX512BW);
669   if (HasLeaf7 && ((EBX >> 31) & 1) && HasAVX512Save)
670     setFeature(FEATURE_AVX512VL);
671 
672   if (HasLeaf7 && ((ECX >> 1) & 1) && HasAVX512Save)
673     setFeature(FEATURE_AVX512VBMI);
674   if (HasLeaf7 && ((ECX >> 6) & 1) && HasAVX512Save)
675     setFeature(FEATURE_AVX512VBMI2);
676   if (HasLeaf7 && ((ECX >> 8) & 1))
677     setFeature(FEATURE_GFNI);
678   if (HasLeaf7 && ((ECX >> 10) & 1) && HasAVX)
679     setFeature(FEATURE_VPCLMULQDQ);
680   if (HasLeaf7 && ((ECX >> 11) & 1) && HasAVX512Save)
681     setFeature(FEATURE_AVX512VNNI);
682   if (HasLeaf7 && ((ECX >> 12) & 1) && HasAVX512Save)
683     setFeature(FEATURE_AVX512BITALG);
684   if (HasLeaf7 && ((ECX >> 14) & 1) && HasAVX512Save)
685     setFeature(FEATURE_AVX512VPOPCNTDQ);
686 
687   if (HasLeaf7 && ((EDX >> 2) & 1) && HasAVX512Save)
688     setFeature(FEATURE_AVX5124VNNIW);
689   if (HasLeaf7 && ((EDX >> 3) & 1) && HasAVX512Save)
690     setFeature(FEATURE_AVX5124FMAPS);
691   if (HasLeaf7 && ((EDX >> 8) & 1) && HasAVX512Save)
692     setFeature(FEATURE_AVX512VP2INTERSECT);
693 
694   bool HasLeaf7Subleaf1 =
695       MaxLeaf >= 0x7 && !getX86CpuIDAndInfoEx(0x7, 0x1, &EAX, &EBX, &ECX, &EDX);
696   if (HasLeaf7Subleaf1 && ((EAX >> 5) & 1) && HasAVX512Save)
697     setFeature(FEATURE_AVX512BF16);
698 
699   unsigned MaxExtLevel;
700   getX86CpuIDAndInfo(0x80000000, &MaxExtLevel, &EBX, &ECX, &EDX);
701 
702   bool HasExtLeaf1 = MaxExtLevel >= 0x80000001 &&
703                      !getX86CpuIDAndInfo(0x80000001, &EAX, &EBX, &ECX, &EDX);
704   if (HasExtLeaf1 && ((ECX >> 6) & 1))
705     setFeature(FEATURE_SSE4_A);
706   if (HasExtLeaf1 && ((ECX >> 11) & 1))
707     setFeature(FEATURE_XOP);
708   if (HasExtLeaf1 && ((ECX >> 16) & 1))
709     setFeature(FEATURE_FMA4);
710 #undef setFeature
711 }
712 
713 #ifndef _WIN32
714 __attribute__((visibility("hidden")))
715 #endif
716 int __cpu_indicator_init(void) CONSTRUCTOR_ATTRIBUTE;
717 
718 #ifndef _WIN32
719 __attribute__((visibility("hidden")))
720 #endif
721 struct __processor_model {
722   unsigned int __cpu_vendor;
723   unsigned int __cpu_type;
724   unsigned int __cpu_subtype;
725   unsigned int __cpu_features[1];
726 } __cpu_model = {0, 0, 0, {0}};
727 
728 #ifndef _WIN32
729 __attribute__((visibility("hidden")))
730 #endif
731 unsigned int __cpu_features2 = 0;
732 
733 // A constructor function that is sets __cpu_model and __cpu_features2 with
734 // the right values.  This needs to run only once.  This constructor is
735 // given the highest priority and it should run before constructors without
736 // the priority set.  However, it still runs after ifunc initializers and
737 // needs to be called explicitly there.
738 
__cpu_indicator_init(void)739 int CONSTRUCTOR_ATTRIBUTE __cpu_indicator_init(void) {
740   unsigned EAX, EBX, ECX, EDX;
741   unsigned MaxLeaf = 5;
742   unsigned Vendor;
743   unsigned Model, Family;
744   unsigned Features[(CPU_FEATURE_MAX + 31) / 32] = {0};
745 
746   // This function needs to run just once.
747   if (__cpu_model.__cpu_vendor)
748     return 0;
749 
750   if (!isCpuIdSupported() ||
751       getX86CpuIDAndInfo(0, &MaxLeaf, &Vendor, &ECX, &EDX) || MaxLeaf < 1) {
752     __cpu_model.__cpu_vendor = VENDOR_OTHER;
753     return -1;
754   }
755 
756   getX86CpuIDAndInfo(1, &EAX, &EBX, &ECX, &EDX);
757   detectX86FamilyModel(EAX, &Family, &Model);
758 
759   // Find available features.
760   getAvailableFeatures(ECX, EDX, MaxLeaf, &Features[0]);
761 
762   assert((sizeof(Features)/sizeof(Features[0])) == 2);
763   __cpu_model.__cpu_features[0] = Features[0];
764   __cpu_features2 = Features[1];
765 
766   if (Vendor == SIG_INTEL) {
767     // Get CPU type.
768     getIntelProcessorTypeAndSubtype(Family, Model, &Features[0],
769                                     &(__cpu_model.__cpu_type),
770                                     &(__cpu_model.__cpu_subtype));
771     __cpu_model.__cpu_vendor = VENDOR_INTEL;
772   } else if (Vendor == SIG_AMD) {
773     // Get CPU type.
774     getAMDProcessorTypeAndSubtype(Family, Model, &Features[0],
775                                   &(__cpu_model.__cpu_type),
776                                   &(__cpu_model.__cpu_subtype));
777     __cpu_model.__cpu_vendor = VENDOR_AMD;
778   } else
779     __cpu_model.__cpu_vendor = VENDOR_OTHER;
780 
781   assert(__cpu_model.__cpu_vendor < VENDOR_MAX);
782   assert(__cpu_model.__cpu_type < CPU_TYPE_MAX);
783   assert(__cpu_model.__cpu_subtype < CPU_SUBTYPE_MAX);
784 
785   return 0;
786 }
787 #elif defined(__aarch64__)
788 // LSE support detection for out-of-line atomics
789 // using HWCAP and Auxiliary vector
790 _Bool __aarch64_have_lse_atomics
791     __attribute__((visibility("hidden"), nocommon));
792 #if defined(__has_include)
793 #if __has_include(<sys/auxv.h>)
794 #include <sys/auxv.h>
795 #ifndef AT_HWCAP
796 #define AT_HWCAP 16
797 #endif
798 #ifndef HWCAP_ATOMICS
799 #define HWCAP_ATOMICS (1 << 8)
800 #endif
init_have_lse_atomics(void)801 static void CONSTRUCTOR_ATTRIBUTE init_have_lse_atomics(void) {
802   unsigned long hwcap = getauxval(AT_HWCAP);
803   __aarch64_have_lse_atomics = (hwcap & HWCAP_ATOMICS) != 0;
804 }
805 #endif // defined(__has_include)
806 #endif // __has_include(<sys/auxv.h>)
807 #endif // defined(__aarch64__)
808