1 //===-- cpu_model.c - Support for __cpu_model builtin  ------------*- C -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 //  This file is based on LLVM's lib/Support/Host.cpp.
10 //  It implements the operating system Host concept and builtin
11 //  __cpu_model for the compiler_rt library, for x86 only.
12 //
13 //===----------------------------------------------------------------------===//
14 
15 #if (defined(__i386__) || defined(_M_IX86) || defined(__x86_64__) ||           \
16      defined(_M_X64)) &&                                                       \
17     (defined(__GNUC__) || defined(__clang__) || defined(_MSC_VER))
18 
19 #include <assert.h>
20 
21 #define bool int
22 #define true 1
23 #define false 0
24 
25 #ifdef _MSC_VER
26 #include <intrin.h>
27 #endif
28 
29 #ifndef __has_attribute
30 #define __has_attribute(attr) 0
31 #endif
32 
33 enum VendorSignatures {
34   SIG_INTEL = 0x756e6547, // Genu
35   SIG_AMD = 0x68747541,   // Auth
36 };
37 
38 enum ProcessorVendors {
39   VENDOR_INTEL = 1,
40   VENDOR_AMD,
41   VENDOR_OTHER,
42   VENDOR_MAX
43 };
44 
45 enum ProcessorTypes {
46   INTEL_BONNELL = 1,
47   INTEL_CORE2,
48   INTEL_COREI7,
49   AMDFAM10H,
50   AMDFAM15H,
51   INTEL_SILVERMONT,
52   INTEL_KNL,
53   AMD_BTVER1,
54   AMD_BTVER2,
55   AMDFAM17H,
56   INTEL_KNM,
57   INTEL_GOLDMONT,
58   INTEL_GOLDMONT_PLUS,
59   INTEL_TREMONT,
60   CPU_TYPE_MAX
61 };
62 
63 enum ProcessorSubtypes {
64   INTEL_COREI7_NEHALEM = 1,
65   INTEL_COREI7_WESTMERE,
66   INTEL_COREI7_SANDYBRIDGE,
67   AMDFAM10H_BARCELONA,
68   AMDFAM10H_SHANGHAI,
69   AMDFAM10H_ISTANBUL,
70   AMDFAM15H_BDVER1,
71   AMDFAM15H_BDVER2,
72   AMDFAM15H_BDVER3,
73   AMDFAM15H_BDVER4,
74   AMDFAM17H_ZNVER1,
75   INTEL_COREI7_IVYBRIDGE,
76   INTEL_COREI7_HASWELL,
77   INTEL_COREI7_BROADWELL,
78   INTEL_COREI7_SKYLAKE,
79   INTEL_COREI7_SKYLAKE_AVX512,
80   INTEL_COREI7_CANNONLAKE,
81   INTEL_COREI7_ICELAKE_CLIENT,
82   INTEL_COREI7_ICELAKE_SERVER,
83   AMDFAM17H_ZNVER2,
84   INTEL_COREI7_CASCADELAKE,
85   CPU_SUBTYPE_MAX
86 };
87 
88 enum ProcessorFeatures {
89   FEATURE_CMOV = 0,
90   FEATURE_MMX,
91   FEATURE_POPCNT,
92   FEATURE_SSE,
93   FEATURE_SSE2,
94   FEATURE_SSE3,
95   FEATURE_SSSE3,
96   FEATURE_SSE4_1,
97   FEATURE_SSE4_2,
98   FEATURE_AVX,
99   FEATURE_AVX2,
100   FEATURE_SSE4_A,
101   FEATURE_FMA4,
102   FEATURE_XOP,
103   FEATURE_FMA,
104   FEATURE_AVX512F,
105   FEATURE_BMI,
106   FEATURE_BMI2,
107   FEATURE_AES,
108   FEATURE_PCLMUL,
109   FEATURE_AVX512VL,
110   FEATURE_AVX512BW,
111   FEATURE_AVX512DQ,
112   FEATURE_AVX512CD,
113   FEATURE_AVX512ER,
114   FEATURE_AVX512PF,
115   FEATURE_AVX512VBMI,
116   FEATURE_AVX512IFMA,
117   FEATURE_AVX5124VNNIW,
118   FEATURE_AVX5124FMAPS,
119   FEATURE_AVX512VPOPCNTDQ,
120   FEATURE_AVX512VBMI2,
121   FEATURE_GFNI,
122   FEATURE_VPCLMULQDQ,
123   FEATURE_AVX512VNNI,
124   FEATURE_AVX512BITALG,
125   FEATURE_AVX512BF16
126 };
127 
128 // The check below for i386 was copied from clang's cpuid.h (__get_cpuid_max).
129 // Check motivated by bug reports for OpenSSL crashing on CPUs without CPUID
130 // support. Consequently, for i386, the presence of CPUID is checked first
131 // via the corresponding eflags bit.
isCpuIdSupported()132 static bool isCpuIdSupported() {
133 #if defined(__GNUC__) || defined(__clang__)
134 #if defined(__i386__)
135   int __cpuid_supported;
136   __asm__("  pushfl\n"
137           "  popl   %%eax\n"
138           "  movl   %%eax,%%ecx\n"
139           "  xorl   $0x00200000,%%eax\n"
140           "  pushl  %%eax\n"
141           "  popfl\n"
142           "  pushfl\n"
143           "  popl   %%eax\n"
144           "  movl   $0,%0\n"
145           "  cmpl   %%eax,%%ecx\n"
146           "  je     1f\n"
147           "  movl   $1,%0\n"
148           "1:"
149           : "=r"(__cpuid_supported)
150           :
151           : "eax", "ecx");
152   if (!__cpuid_supported)
153     return false;
154 #endif
155   return true;
156 #endif
157   return true;
158 }
159 
160 // This code is copied from lib/Support/Host.cpp.
161 // Changes to either file should be mirrored in the other.
162 
163 /// getX86CpuIDAndInfo - Execute the specified cpuid and return the 4 values in
164 /// the specified arguments.  If we can't run cpuid on the host, return true.
getX86CpuIDAndInfo(unsigned value,unsigned * rEAX,unsigned * rEBX,unsigned * rECX,unsigned * rEDX)165 static bool getX86CpuIDAndInfo(unsigned value, unsigned *rEAX, unsigned *rEBX,
166                                unsigned *rECX, unsigned *rEDX) {
167 #if defined(__GNUC__) || defined(__clang__)
168 #if defined(__x86_64__)
169   // gcc doesn't know cpuid would clobber ebx/rbx. Preserve it manually.
170   // FIXME: should we save this for Clang?
171   __asm__("movq\t%%rbx, %%rsi\n\t"
172           "cpuid\n\t"
173           "xchgq\t%%rbx, %%rsi\n\t"
174           : "=a"(*rEAX), "=S"(*rEBX), "=c"(*rECX), "=d"(*rEDX)
175           : "a"(value));
176   return false;
177 #elif defined(__i386__)
178   __asm__("movl\t%%ebx, %%esi\n\t"
179           "cpuid\n\t"
180           "xchgl\t%%ebx, %%esi\n\t"
181           : "=a"(*rEAX), "=S"(*rEBX), "=c"(*rECX), "=d"(*rEDX)
182           : "a"(value));
183   return false;
184 #else
185   return true;
186 #endif
187 #elif defined(_MSC_VER)
188   // The MSVC intrinsic is portable across x86 and x64.
189   int registers[4];
190   __cpuid(registers, value);
191   *rEAX = registers[0];
192   *rEBX = registers[1];
193   *rECX = registers[2];
194   *rEDX = registers[3];
195   return false;
196 #else
197   return true;
198 #endif
199 }
200 
201 /// getX86CpuIDAndInfoEx - Execute the specified cpuid with subleaf and return
202 /// the 4 values in the specified arguments.  If we can't run cpuid on the host,
203 /// return true.
getX86CpuIDAndInfoEx(unsigned value,unsigned subleaf,unsigned * rEAX,unsigned * rEBX,unsigned * rECX,unsigned * rEDX)204 static bool getX86CpuIDAndInfoEx(unsigned value, unsigned subleaf,
205                                  unsigned *rEAX, unsigned *rEBX, unsigned *rECX,
206                                  unsigned *rEDX) {
207 #if defined(__GNUC__) || defined(__clang__)
208 #if defined(__x86_64__)
209   // gcc doesn't know cpuid would clobber ebx/rbx. Preserve it manually.
210   // FIXME: should we save this for Clang?
211   __asm__("movq\t%%rbx, %%rsi\n\t"
212           "cpuid\n\t"
213           "xchgq\t%%rbx, %%rsi\n\t"
214           : "=a"(*rEAX), "=S"(*rEBX), "=c"(*rECX), "=d"(*rEDX)
215           : "a"(value), "c"(subleaf));
216   return false;
217 #elif defined(__i386__)
218   __asm__("movl\t%%ebx, %%esi\n\t"
219           "cpuid\n\t"
220           "xchgl\t%%ebx, %%esi\n\t"
221           : "=a"(*rEAX), "=S"(*rEBX), "=c"(*rECX), "=d"(*rEDX)
222           : "a"(value), "c"(subleaf));
223   return false;
224 #else
225   return true;
226 #endif
227 #elif defined(_MSC_VER)
228   int registers[4];
229   __cpuidex(registers, value, subleaf);
230   *rEAX = registers[0];
231   *rEBX = registers[1];
232   *rECX = registers[2];
233   *rEDX = registers[3];
234   return false;
235 #else
236   return true;
237 #endif
238 }
239 
240 // Read control register 0 (XCR0). Used to detect features such as AVX.
getX86XCR0(unsigned * rEAX,unsigned * rEDX)241 static bool getX86XCR0(unsigned *rEAX, unsigned *rEDX) {
242 #if defined(__GNUC__) || defined(__clang__)
243   // Check xgetbv; this uses a .byte sequence instead of the instruction
244   // directly because older assemblers do not include support for xgetbv and
245   // there is no easy way to conditionally compile based on the assembler used.
246   __asm__(".byte 0x0f, 0x01, 0xd0" : "=a"(*rEAX), "=d"(*rEDX) : "c"(0));
247   return false;
248 #elif defined(_MSC_FULL_VER) && defined(_XCR_XFEATURE_ENABLED_MASK)
249   unsigned long long Result = _xgetbv(_XCR_XFEATURE_ENABLED_MASK);
250   *rEAX = Result;
251   *rEDX = Result >> 32;
252   return false;
253 #else
254   return true;
255 #endif
256 }
257 
detectX86FamilyModel(unsigned EAX,unsigned * Family,unsigned * Model)258 static void detectX86FamilyModel(unsigned EAX, unsigned *Family,
259                                  unsigned *Model) {
260   *Family = (EAX >> 8) & 0xf; // Bits 8 - 11
261   *Model = (EAX >> 4) & 0xf;  // Bits 4 - 7
262   if (*Family == 6 || *Family == 0xf) {
263     if (*Family == 0xf)
264       // Examine extended family ID if family ID is F.
265       *Family += (EAX >> 20) & 0xff; // Bits 20 - 27
266     // Examine extended model ID if family ID is 6 or F.
267     *Model += ((EAX >> 16) & 0xf) << 4; // Bits 16 - 19
268   }
269 }
270 
getIntelProcessorTypeAndSubtype(unsigned Family,unsigned Model,unsigned Brand_id,unsigned Features,unsigned Features2,unsigned * Type,unsigned * Subtype)271 static void getIntelProcessorTypeAndSubtype(unsigned Family, unsigned Model,
272                                             unsigned Brand_id,
273                                             unsigned Features,
274                                             unsigned Features2, unsigned *Type,
275                                             unsigned *Subtype) {
276   if (Brand_id != 0)
277     return;
278   switch (Family) {
279   case 6:
280     switch (Model) {
281     case 0x0f: // Intel Core 2 Duo processor, Intel Core 2 Duo mobile
282                // processor, Intel Core 2 Quad processor, Intel Core 2 Quad
283                // mobile processor, Intel Core 2 Extreme processor, Intel
284                // Pentium Dual-Core processor, Intel Xeon processor, model
285                // 0Fh. All processors are manufactured using the 65 nm process.
286     case 0x16: // Intel Celeron processor model 16h. All processors are
287                // manufactured using the 65 nm process
288     case 0x17: // Intel Core 2 Extreme processor, Intel Xeon processor, model
289                // 17h. All processors are manufactured using the 45 nm process.
290                //
291                // 45nm: Penryn , Wolfdale, Yorkfield (XE)
292     case 0x1d: // Intel Xeon processor MP. All processors are manufactured using
293                // the 45 nm process.
294       *Type = INTEL_CORE2; // "penryn"
295       break;
296     case 0x1a: // Intel Core i7 processor and Intel Xeon processor. All
297                // processors are manufactured using the 45 nm process.
298     case 0x1e: // Intel(R) Core(TM) i7 CPU         870  @ 2.93GHz.
299                // As found in a Summer 2010 model iMac.
300     case 0x1f:
301     case 0x2e:              // Nehalem EX
302       *Type = INTEL_COREI7; // "nehalem"
303       *Subtype = INTEL_COREI7_NEHALEM;
304       break;
305     case 0x25: // Intel Core i7, laptop version.
306     case 0x2c: // Intel Core i7 processor and Intel Xeon processor. All
307                // processors are manufactured using the 32 nm process.
308     case 0x2f: // Westmere EX
309       *Type = INTEL_COREI7; // "westmere"
310       *Subtype = INTEL_COREI7_WESTMERE;
311       break;
312     case 0x2a: // Intel Core i7 processor. All processors are manufactured
313                // using the 32 nm process.
314     case 0x2d:
315       *Type = INTEL_COREI7; //"sandybridge"
316       *Subtype = INTEL_COREI7_SANDYBRIDGE;
317       break;
318     case 0x3a:
319     case 0x3e:              // Ivy Bridge EP
320       *Type = INTEL_COREI7; // "ivybridge"
321       *Subtype = INTEL_COREI7_IVYBRIDGE;
322       break;
323 
324     // Haswell:
325     case 0x3c:
326     case 0x3f:
327     case 0x45:
328     case 0x46:
329       *Type = INTEL_COREI7; // "haswell"
330       *Subtype = INTEL_COREI7_HASWELL;
331       break;
332 
333     // Broadwell:
334     case 0x3d:
335     case 0x47:
336     case 0x4f:
337     case 0x56:
338       *Type = INTEL_COREI7; // "broadwell"
339       *Subtype = INTEL_COREI7_BROADWELL;
340       break;
341 
342     // Skylake:
343     case 0x4e:              // Skylake mobile
344     case 0x5e:              // Skylake desktop
345     case 0x8e:              // Kaby Lake mobile
346     case 0x9e:              // Kaby Lake desktop
347       *Type = INTEL_COREI7; // "skylake"
348       *Subtype = INTEL_COREI7_SKYLAKE;
349       break;
350 
351     // Skylake Xeon:
352     case 0x55:
353       *Type = INTEL_COREI7;
354       if (Features2 & (1 << (FEATURE_AVX512VNNI - 32)))
355         *Subtype = INTEL_COREI7_CASCADELAKE; // "cascadelake"
356       else
357         *Subtype = INTEL_COREI7_SKYLAKE_AVX512; // "skylake-avx512"
358       break;
359 
360     // Cannonlake:
361     case 0x66:
362       *Type = INTEL_COREI7;
363       *Subtype = INTEL_COREI7_CANNONLAKE; // "cannonlake"
364       break;
365 
366     // Icelake:
367     case 0x7d:
368     case 0x7e:
369       *Type = INTEL_COREI7;
370       *Subtype = INTEL_COREI7_ICELAKE_CLIENT; // "icelake-client"
371       break;
372 
373     // Icelake Xeon:
374     case 0x6a:
375     case 0x6c:
376       *Type = INTEL_COREI7;
377       *Subtype = INTEL_COREI7_ICELAKE_SERVER; // "icelake-server"
378       break;
379 
380     case 0x1c: // Most 45 nm Intel Atom processors
381     case 0x26: // 45 nm Atom Lincroft
382     case 0x27: // 32 nm Atom Medfield
383     case 0x35: // 32 nm Atom Midview
384     case 0x36: // 32 nm Atom Midview
385       *Type = INTEL_BONNELL;
386       break; // "bonnell"
387 
388     // Atom Silvermont codes from the Intel software optimization guide.
389     case 0x37:
390     case 0x4a:
391     case 0x4d:
392     case 0x5a:
393     case 0x5d:
394     case 0x4c: // really airmont
395       *Type = INTEL_SILVERMONT;
396       break; // "silvermont"
397     // Goldmont:
398     case 0x5c: // Apollo Lake
399     case 0x5f: // Denverton
400       *Type = INTEL_GOLDMONT;
401       break; // "goldmont"
402     case 0x7a:
403       *Type = INTEL_GOLDMONT_PLUS;
404       break;
405     case 0x86:
406       *Type = INTEL_TREMONT;
407       break;
408 
409     case 0x57:
410       *Type = INTEL_KNL; // knl
411       break;
412 
413     case 0x85:
414       *Type = INTEL_KNM; // knm
415       break;
416 
417     default: // Unknown family 6 CPU.
418       break;
419     }
420     break;
421   default:
422     break; // Unknown.
423   }
424 }
425 
getAMDProcessorTypeAndSubtype(unsigned Family,unsigned Model,unsigned Features,unsigned Features2,unsigned * Type,unsigned * Subtype)426 static void getAMDProcessorTypeAndSubtype(unsigned Family, unsigned Model,
427                                           unsigned Features, unsigned Features2,
428                                           unsigned *Type, unsigned *Subtype) {
429   // FIXME: this poorly matches the generated SubtargetFeatureKV table.  There
430   // appears to be no way to generate the wide variety of AMD-specific targets
431   // from the information returned from CPUID.
432   switch (Family) {
433   case 16:
434     *Type = AMDFAM10H; // "amdfam10"
435     switch (Model) {
436     case 2:
437       *Subtype = AMDFAM10H_BARCELONA;
438       break;
439     case 4:
440       *Subtype = AMDFAM10H_SHANGHAI;
441       break;
442     case 8:
443       *Subtype = AMDFAM10H_ISTANBUL;
444       break;
445     }
446     break;
447   case 20:
448     *Type = AMD_BTVER1;
449     break; // "btver1";
450   case 21:
451     *Type = AMDFAM15H;
452     if (Model >= 0x60 && Model <= 0x7f) {
453       *Subtype = AMDFAM15H_BDVER4;
454       break; // "bdver4"; 60h-7Fh: Excavator
455     }
456     if (Model >= 0x30 && Model <= 0x3f) {
457       *Subtype = AMDFAM15H_BDVER3;
458       break; // "bdver3"; 30h-3Fh: Steamroller
459     }
460     if ((Model >= 0x10 && Model <= 0x1f) || Model == 0x02) {
461       *Subtype = AMDFAM15H_BDVER2;
462       break; // "bdver2"; 02h, 10h-1Fh: Piledriver
463     }
464     if (Model <= 0x0f) {
465       *Subtype = AMDFAM15H_BDVER1;
466       break; // "bdver1"; 00h-0Fh: Bulldozer
467     }
468     break;
469   case 22:
470     *Type = AMD_BTVER2;
471     break; // "btver2"
472   case 23:
473     *Type = AMDFAM17H;
474     if ((Model >= 0x30 && Model <= 0x3f) || Model == 0x71) {
475       *Subtype = AMDFAM17H_ZNVER2;
476       break; // "znver2"; 30h-3fh, 71h: Zen2
477     }
478     if (Model <= 0x0f) {
479       *Subtype = AMDFAM17H_ZNVER1;
480       break; // "znver1"; 00h-0Fh: Zen1
481     }
482     break;
483   default:
484     break; // "generic"
485   }
486 }
487 
getAvailableFeatures(unsigned ECX,unsigned EDX,unsigned MaxLeaf,unsigned * FeaturesOut,unsigned * Features2Out)488 static void getAvailableFeatures(unsigned ECX, unsigned EDX, unsigned MaxLeaf,
489                                  unsigned *FeaturesOut,
490                                  unsigned *Features2Out) {
491   unsigned Features = 0;
492   unsigned Features2 = 0;
493   unsigned EAX, EBX;
494 
495 #define setFeature(F)                                                          \
496   do {                                                                         \
497     if (F < 32)                                                                \
498       Features |= 1U << (F & 0x1f);                                            \
499     else if (F < 64)                                                           \
500       Features2 |= 1U << ((F - 32) & 0x1f);                                    \
501   } while (0)
502 
503   if ((EDX >> 15) & 1)
504     setFeature(FEATURE_CMOV);
505   if ((EDX >> 23) & 1)
506     setFeature(FEATURE_MMX);
507   if ((EDX >> 25) & 1)
508     setFeature(FEATURE_SSE);
509   if ((EDX >> 26) & 1)
510     setFeature(FEATURE_SSE2);
511 
512   if ((ECX >> 0) & 1)
513     setFeature(FEATURE_SSE3);
514   if ((ECX >> 1) & 1)
515     setFeature(FEATURE_PCLMUL);
516   if ((ECX >> 9) & 1)
517     setFeature(FEATURE_SSSE3);
518   if ((ECX >> 12) & 1)
519     setFeature(FEATURE_FMA);
520   if ((ECX >> 19) & 1)
521     setFeature(FEATURE_SSE4_1);
522   if ((ECX >> 20) & 1)
523     setFeature(FEATURE_SSE4_2);
524   if ((ECX >> 23) & 1)
525     setFeature(FEATURE_POPCNT);
526   if ((ECX >> 25) & 1)
527     setFeature(FEATURE_AES);
528 
529   // If CPUID indicates support for XSAVE, XRESTORE and AVX, and XGETBV
530   // indicates that the AVX registers will be saved and restored on context
531   // switch, then we have full AVX support.
532   const unsigned AVXBits = (1 << 27) | (1 << 28);
533   bool HasAVX = ((ECX & AVXBits) == AVXBits) && !getX86XCR0(&EAX, &EDX) &&
534                 ((EAX & 0x6) == 0x6);
535 #if defined(__APPLE__)
536   // Darwin lazily saves the AVX512 context on first use: trust that the OS will
537   // save the AVX512 context if we use AVX512 instructions, even the bit is not
538   // set right now.
539   bool HasAVX512Save = true;
540 #else
541   // AVX512 requires additional context to be saved by the OS.
542   bool HasAVX512Save = HasAVX && ((EAX & 0xe0) == 0xe0);
543 #endif
544 
545   if (HasAVX)
546     setFeature(FEATURE_AVX);
547 
548   bool HasLeaf7 =
549       MaxLeaf >= 0x7 && !getX86CpuIDAndInfoEx(0x7, 0x0, &EAX, &EBX, &ECX, &EDX);
550 
551   if (HasLeaf7 && ((EBX >> 3) & 1))
552     setFeature(FEATURE_BMI);
553   if (HasLeaf7 && ((EBX >> 5) & 1) && HasAVX)
554     setFeature(FEATURE_AVX2);
555   if (HasLeaf7 && ((EBX >> 8) & 1))
556     setFeature(FEATURE_BMI2);
557   if (HasLeaf7 && ((EBX >> 16) & 1) && HasAVX512Save)
558     setFeature(FEATURE_AVX512F);
559   if (HasLeaf7 && ((EBX >> 17) & 1) && HasAVX512Save)
560     setFeature(FEATURE_AVX512DQ);
561   if (HasLeaf7 && ((EBX >> 21) & 1) && HasAVX512Save)
562     setFeature(FEATURE_AVX512IFMA);
563   if (HasLeaf7 && ((EBX >> 26) & 1) && HasAVX512Save)
564     setFeature(FEATURE_AVX512PF);
565   if (HasLeaf7 && ((EBX >> 27) & 1) && HasAVX512Save)
566     setFeature(FEATURE_AVX512ER);
567   if (HasLeaf7 && ((EBX >> 28) & 1) && HasAVX512Save)
568     setFeature(FEATURE_AVX512CD);
569   if (HasLeaf7 && ((EBX >> 30) & 1) && HasAVX512Save)
570     setFeature(FEATURE_AVX512BW);
571   if (HasLeaf7 && ((EBX >> 31) & 1) && HasAVX512Save)
572     setFeature(FEATURE_AVX512VL);
573 
574   if (HasLeaf7 && ((ECX >> 1) & 1) && HasAVX512Save)
575     setFeature(FEATURE_AVX512VBMI);
576   if (HasLeaf7 && ((ECX >> 6) & 1) && HasAVX512Save)
577     setFeature(FEATURE_AVX512VBMI2);
578   if (HasLeaf7 && ((ECX >> 8) & 1))
579     setFeature(FEATURE_GFNI);
580   if (HasLeaf7 && ((ECX >> 10) & 1) && HasAVX)
581     setFeature(FEATURE_VPCLMULQDQ);
582   if (HasLeaf7 && ((ECX >> 11) & 1) && HasAVX512Save)
583     setFeature(FEATURE_AVX512VNNI);
584   if (HasLeaf7 && ((ECX >> 12) & 1) && HasAVX512Save)
585     setFeature(FEATURE_AVX512BITALG);
586   if (HasLeaf7 && ((ECX >> 14) & 1) && HasAVX512Save)
587     setFeature(FEATURE_AVX512VPOPCNTDQ);
588 
589   if (HasLeaf7 && ((EDX >> 2) & 1) && HasAVX512Save)
590     setFeature(FEATURE_AVX5124VNNIW);
591   if (HasLeaf7 && ((EDX >> 3) & 1) && HasAVX512Save)
592     setFeature(FEATURE_AVX5124FMAPS);
593 
594   bool HasLeaf7Subleaf1 =
595       MaxLeaf >= 0x7 && !getX86CpuIDAndInfoEx(0x7, 0x1, &EAX, &EBX, &ECX, &EDX);
596   if (HasLeaf7Subleaf1 && ((EAX >> 5) & 1) && HasAVX512Save)
597     setFeature(FEATURE_AVX512BF16);
598 
599   unsigned MaxExtLevel;
600   getX86CpuIDAndInfo(0x80000000, &MaxExtLevel, &EBX, &ECX, &EDX);
601 
602   bool HasExtLeaf1 = MaxExtLevel >= 0x80000001 &&
603                      !getX86CpuIDAndInfo(0x80000001, &EAX, &EBX, &ECX, &EDX);
604   if (HasExtLeaf1 && ((ECX >> 6) & 1))
605     setFeature(FEATURE_SSE4_A);
606   if (HasExtLeaf1 && ((ECX >> 11) & 1))
607     setFeature(FEATURE_XOP);
608   if (HasExtLeaf1 && ((ECX >> 16) & 1))
609     setFeature(FEATURE_FMA4);
610 
611   *FeaturesOut = Features;
612   *Features2Out = Features2;
613 #undef setFeature
614 }
615 
616 #if defined(HAVE_INIT_PRIORITY)
617 #define CONSTRUCTOR_ATTRIBUTE __attribute__((__constructor__ 101))
618 #elif __has_attribute(__constructor__)
619 #define CONSTRUCTOR_ATTRIBUTE __attribute__((__constructor__))
620 #else
621 // FIXME: For MSVC, we should make a function pointer global in .CRT$X?? so that
622 // this runs during initialization.
623 #define CONSTRUCTOR_ATTRIBUTE
624 #endif
625 
626 #ifndef _WIN32
627 __attribute__((visibility("hidden")))
628 #endif
629 int __cpu_indicator_init(void) CONSTRUCTOR_ATTRIBUTE;
630 
631 #ifndef _WIN32
632 __attribute__((visibility("hidden")))
633 #endif
634 struct __processor_model {
635   unsigned int __cpu_vendor;
636   unsigned int __cpu_type;
637   unsigned int __cpu_subtype;
638   unsigned int __cpu_features[1];
639 } __cpu_model = {0, 0, 0, {0}};
640 
641 #ifndef _WIN32
642 __attribute__((visibility("hidden")))
643 #endif
644 unsigned int __cpu_features2;
645 
646 // A constructor function that is sets __cpu_model and __cpu_features2 with
647 // the right values.  This needs to run only once.  This constructor is
648 // given the highest priority and it should run before constructors without
649 // the priority set.  However, it still runs after ifunc initializers and
650 // needs to be called explicitly there.
651 
__cpu_indicator_init(void)652 int CONSTRUCTOR_ATTRIBUTE __cpu_indicator_init(void) {
653   unsigned EAX, EBX, ECX, EDX;
654   unsigned MaxLeaf = 5;
655   unsigned Vendor;
656   unsigned Model, Family, Brand_id;
657   unsigned Features = 0;
658   unsigned Features2 = 0;
659 
660   // This function needs to run just once.
661   if (__cpu_model.__cpu_vendor)
662     return 0;
663 
664   if (!isCpuIdSupported())
665     return -1;
666 
667   // Assume cpuid insn present. Run in level 0 to get vendor id.
668   if (getX86CpuIDAndInfo(0, &MaxLeaf, &Vendor, &ECX, &EDX) || MaxLeaf < 1) {
669     __cpu_model.__cpu_vendor = VENDOR_OTHER;
670     return -1;
671   }
672   getX86CpuIDAndInfo(1, &EAX, &EBX, &ECX, &EDX);
673   detectX86FamilyModel(EAX, &Family, &Model);
674   Brand_id = EBX & 0xff;
675 
676   // Find available features.
677   getAvailableFeatures(ECX, EDX, MaxLeaf, &Features, &Features2);
678   __cpu_model.__cpu_features[0] = Features;
679   __cpu_features2 = Features2;
680 
681   if (Vendor == SIG_INTEL) {
682     // Get CPU type.
683     getIntelProcessorTypeAndSubtype(Family, Model, Brand_id, Features,
684                                     Features2, &(__cpu_model.__cpu_type),
685                                     &(__cpu_model.__cpu_subtype));
686     __cpu_model.__cpu_vendor = VENDOR_INTEL;
687   } else if (Vendor == SIG_AMD) {
688     // Get CPU type.
689     getAMDProcessorTypeAndSubtype(Family, Model, Features, Features2,
690                                   &(__cpu_model.__cpu_type),
691                                   &(__cpu_model.__cpu_subtype));
692     __cpu_model.__cpu_vendor = VENDOR_AMD;
693   } else
694     __cpu_model.__cpu_vendor = VENDOR_OTHER;
695 
696   assert(__cpu_model.__cpu_vendor < VENDOR_MAX);
697   assert(__cpu_model.__cpu_type < CPU_TYPE_MAX);
698   assert(__cpu_model.__cpu_subtype < CPU_SUBTYPE_MAX);
699 
700   return 0;
701 }
702 
703 #endif
704