1 //===-- cpu_model/x86.c - Support for __cpu_model builtin --------*- C -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file is based on LLVM's lib/Support/Host.cpp.
10 // It implements the operating system Host concept and builtin
11 // __cpu_model for the compiler_rt library for x86.
12 //
13 //===----------------------------------------------------------------------===//
14
15 #include "cpu_model.h"
16
17 #if !(defined(__i386__) || defined(_M_IX86) || defined(__x86_64__) || \
18 defined(_M_X64))
19 #error This file is intended only for x86-based targets
20 #endif
21
22 #if defined(__GNUC__) || defined(__clang__) || defined(_MSC_VER)
23
24 #include <assert.h>
25
26 #ifdef _MSC_VER
27 #include <intrin.h>
28 #endif
29
30 enum VendorSignatures {
31 SIG_INTEL = 0x756e6547, // Genu
32 SIG_AMD = 0x68747541, // Auth
33 };
34
35 enum ProcessorVendors {
36 VENDOR_INTEL = 1,
37 VENDOR_AMD,
38 VENDOR_OTHER,
39 VENDOR_MAX
40 };
41
42 enum ProcessorTypes {
43 INTEL_BONNELL = 1,
44 INTEL_CORE2,
45 INTEL_COREI7,
46 AMDFAM10H,
47 AMDFAM15H,
48 INTEL_SILVERMONT,
49 INTEL_KNL,
50 AMD_BTVER1,
51 AMD_BTVER2,
52 AMDFAM17H,
53 INTEL_KNM,
54 INTEL_GOLDMONT,
55 INTEL_GOLDMONT_PLUS,
56 INTEL_TREMONT,
57 AMDFAM19H,
58 ZHAOXIN_FAM7H,
59 INTEL_SIERRAFOREST,
60 INTEL_GRANDRIDGE,
61 INTEL_CLEARWATERFOREST,
62 CPU_TYPE_MAX
63 };
64
65 enum ProcessorSubtypes {
66 INTEL_COREI7_NEHALEM = 1,
67 INTEL_COREI7_WESTMERE,
68 INTEL_COREI7_SANDYBRIDGE,
69 AMDFAM10H_BARCELONA,
70 AMDFAM10H_SHANGHAI,
71 AMDFAM10H_ISTANBUL,
72 AMDFAM15H_BDVER1,
73 AMDFAM15H_BDVER2,
74 AMDFAM15H_BDVER3,
75 AMDFAM15H_BDVER4,
76 AMDFAM17H_ZNVER1,
77 INTEL_COREI7_IVYBRIDGE,
78 INTEL_COREI7_HASWELL,
79 INTEL_COREI7_BROADWELL,
80 INTEL_COREI7_SKYLAKE,
81 INTEL_COREI7_SKYLAKE_AVX512,
82 INTEL_COREI7_CANNONLAKE,
83 INTEL_COREI7_ICELAKE_CLIENT,
84 INTEL_COREI7_ICELAKE_SERVER,
85 AMDFAM17H_ZNVER2,
86 INTEL_COREI7_CASCADELAKE,
87 INTEL_COREI7_TIGERLAKE,
88 INTEL_COREI7_COOPERLAKE,
89 INTEL_COREI7_SAPPHIRERAPIDS,
90 INTEL_COREI7_ALDERLAKE,
91 AMDFAM19H_ZNVER3,
92 INTEL_COREI7_ROCKETLAKE,
93 ZHAOXIN_FAM7H_LUJIAZUI,
94 AMDFAM19H_ZNVER4,
95 INTEL_COREI7_GRANITERAPIDS,
96 INTEL_COREI7_GRANITERAPIDS_D,
97 INTEL_COREI7_ARROWLAKE,
98 INTEL_COREI7_ARROWLAKE_S,
99 INTEL_COREI7_PANTHERLAKE,
100 CPU_SUBTYPE_MAX
101 };
102
103 enum ProcessorFeatures {
104 FEATURE_CMOV = 0,
105 FEATURE_MMX,
106 FEATURE_POPCNT,
107 FEATURE_SSE,
108 FEATURE_SSE2,
109 FEATURE_SSE3,
110 FEATURE_SSSE3,
111 FEATURE_SSE4_1,
112 FEATURE_SSE4_2,
113 FEATURE_AVX,
114 FEATURE_AVX2,
115 FEATURE_SSE4_A,
116 FEATURE_FMA4,
117 FEATURE_XOP,
118 FEATURE_FMA,
119 FEATURE_AVX512F,
120 FEATURE_BMI,
121 FEATURE_BMI2,
122 FEATURE_AES,
123 FEATURE_PCLMUL,
124 FEATURE_AVX512VL,
125 FEATURE_AVX512BW,
126 FEATURE_AVX512DQ,
127 FEATURE_AVX512CD,
128 FEATURE_AVX512ER,
129 FEATURE_AVX512PF,
130 FEATURE_AVX512VBMI,
131 FEATURE_AVX512IFMA,
132 FEATURE_AVX5124VNNIW,
133 FEATURE_AVX5124FMAPS,
134 FEATURE_AVX512VPOPCNTDQ,
135 FEATURE_AVX512VBMI2,
136 FEATURE_GFNI,
137 FEATURE_VPCLMULQDQ,
138 FEATURE_AVX512VNNI,
139 FEATURE_AVX512BITALG,
140 FEATURE_AVX512BF16,
141 FEATURE_AVX512VP2INTERSECT,
142
143 FEATURE_CMPXCHG16B = 46,
144 FEATURE_F16C = 49,
145 FEATURE_LAHF_LM = 54,
146 FEATURE_LM,
147 FEATURE_WP,
148 FEATURE_LZCNT,
149 FEATURE_MOVBE,
150
151 FEATURE_AVX512FP16 = 94,
152 FEATURE_X86_64_BASELINE,
153 FEATURE_X86_64_V2,
154 FEATURE_X86_64_V3,
155 FEATURE_X86_64_V4,
156 CPU_FEATURE_MAX
157 };
158
159 // The check below for i386 was copied from clang's cpuid.h (__get_cpuid_max).
160 // Check motivated by bug reports for OpenSSL crashing on CPUs without CPUID
161 // support. Consequently, for i386, the presence of CPUID is checked first
162 // via the corresponding eflags bit.
isCpuIdSupported(void)163 static bool isCpuIdSupported(void) {
164 #if defined(__GNUC__) || defined(__clang__)
165 #if defined(__i386__)
166 int __cpuid_supported;
167 __asm__(" pushfl\n"
168 " popl %%eax\n"
169 " movl %%eax,%%ecx\n"
170 " xorl $0x00200000,%%eax\n"
171 " pushl %%eax\n"
172 " popfl\n"
173 " pushfl\n"
174 " popl %%eax\n"
175 " movl $0,%0\n"
176 " cmpl %%eax,%%ecx\n"
177 " je 1f\n"
178 " movl $1,%0\n"
179 "1:"
180 : "=r"(__cpuid_supported)
181 :
182 : "eax", "ecx");
183 if (!__cpuid_supported)
184 return false;
185 #endif
186 return true;
187 #endif
188 return true;
189 }
190
191 // This code is copied from lib/Support/Host.cpp.
192 // Changes to either file should be mirrored in the other.
193
194 /// getX86CpuIDAndInfo - Execute the specified cpuid and return the 4 values in
195 /// the specified arguments. If we can't run cpuid on the host, return true.
getX86CpuIDAndInfo(unsigned value,unsigned * rEAX,unsigned * rEBX,unsigned * rECX,unsigned * rEDX)196 static bool getX86CpuIDAndInfo(unsigned value, unsigned *rEAX, unsigned *rEBX,
197 unsigned *rECX, unsigned *rEDX) {
198 #if defined(__GNUC__) || defined(__clang__)
199 #if defined(__x86_64__)
200 // gcc doesn't know cpuid would clobber ebx/rbx. Preserve it manually.
201 // FIXME: should we save this for Clang?
202 __asm__("movq\t%%rbx, %%rsi\n\t"
203 "cpuid\n\t"
204 "xchgq\t%%rbx, %%rsi\n\t"
205 : "=a"(*rEAX), "=S"(*rEBX), "=c"(*rECX), "=d"(*rEDX)
206 : "a"(value));
207 return false;
208 #elif defined(__i386__)
209 __asm__("movl\t%%ebx, %%esi\n\t"
210 "cpuid\n\t"
211 "xchgl\t%%ebx, %%esi\n\t"
212 : "=a"(*rEAX), "=S"(*rEBX), "=c"(*rECX), "=d"(*rEDX)
213 : "a"(value));
214 return false;
215 #else
216 return true;
217 #endif
218 #elif defined(_MSC_VER)
219 // The MSVC intrinsic is portable across x86 and x64.
220 int registers[4];
221 __cpuid(registers, value);
222 *rEAX = registers[0];
223 *rEBX = registers[1];
224 *rECX = registers[2];
225 *rEDX = registers[3];
226 return false;
227 #else
228 return true;
229 #endif
230 }
231
232 /// getX86CpuIDAndInfoEx - Execute the specified cpuid with subleaf and return
233 /// the 4 values in the specified arguments. If we can't run cpuid on the host,
234 /// return true.
getX86CpuIDAndInfoEx(unsigned value,unsigned subleaf,unsigned * rEAX,unsigned * rEBX,unsigned * rECX,unsigned * rEDX)235 static bool getX86CpuIDAndInfoEx(unsigned value, unsigned subleaf,
236 unsigned *rEAX, unsigned *rEBX, unsigned *rECX,
237 unsigned *rEDX) {
238 #if defined(__GNUC__) || defined(__clang__)
239 #if defined(__x86_64__)
240 // gcc doesn't know cpuid would clobber ebx/rbx. Preserve it manually.
241 // FIXME: should we save this for Clang?
242 __asm__("movq\t%%rbx, %%rsi\n\t"
243 "cpuid\n\t"
244 "xchgq\t%%rbx, %%rsi\n\t"
245 : "=a"(*rEAX), "=S"(*rEBX), "=c"(*rECX), "=d"(*rEDX)
246 : "a"(value), "c"(subleaf));
247 return false;
248 #elif defined(__i386__)
249 __asm__("movl\t%%ebx, %%esi\n\t"
250 "cpuid\n\t"
251 "xchgl\t%%ebx, %%esi\n\t"
252 : "=a"(*rEAX), "=S"(*rEBX), "=c"(*rECX), "=d"(*rEDX)
253 : "a"(value), "c"(subleaf));
254 return false;
255 #else
256 return true;
257 #endif
258 #elif defined(_MSC_VER)
259 int registers[4];
260 __cpuidex(registers, value, subleaf);
261 *rEAX = registers[0];
262 *rEBX = registers[1];
263 *rECX = registers[2];
264 *rEDX = registers[3];
265 return false;
266 #else
267 return true;
268 #endif
269 }
270
271 // Read control register 0 (XCR0). Used to detect features such as AVX.
getX86XCR0(unsigned * rEAX,unsigned * rEDX)272 static bool getX86XCR0(unsigned *rEAX, unsigned *rEDX) {
273 #if defined(__GNUC__) || defined(__clang__)
274 // Check xgetbv; this uses a .byte sequence instead of the instruction
275 // directly because older assemblers do not include support for xgetbv and
276 // there is no easy way to conditionally compile based on the assembler used.
277 __asm__(".byte 0x0f, 0x01, 0xd0" : "=a"(*rEAX), "=d"(*rEDX) : "c"(0));
278 return false;
279 #elif defined(_MSC_FULL_VER) && defined(_XCR_XFEATURE_ENABLED_MASK)
280 unsigned long long Result = _xgetbv(_XCR_XFEATURE_ENABLED_MASK);
281 *rEAX = Result;
282 *rEDX = Result >> 32;
283 return false;
284 #else
285 return true;
286 #endif
287 }
288
detectX86FamilyModel(unsigned EAX,unsigned * Family,unsigned * Model)289 static void detectX86FamilyModel(unsigned EAX, unsigned *Family,
290 unsigned *Model) {
291 *Family = (EAX >> 8) & 0xf; // Bits 8 - 11
292 *Model = (EAX >> 4) & 0xf; // Bits 4 - 7
293 if (*Family == 6 || *Family == 0xf) {
294 if (*Family == 0xf)
295 // Examine extended family ID if family ID is F.
296 *Family += (EAX >> 20) & 0xff; // Bits 20 - 27
297 // Examine extended model ID if family ID is 6 or F.
298 *Model += ((EAX >> 16) & 0xf) << 4; // Bits 16 - 19
299 }
300 }
301
getIntelProcessorTypeAndSubtype(unsigned Family,unsigned Model,const unsigned * Features,unsigned * Type,unsigned * Subtype)302 static const char *getIntelProcessorTypeAndSubtype(unsigned Family,
303 unsigned Model,
304 const unsigned *Features,
305 unsigned *Type,
306 unsigned *Subtype) {
307 #define testFeature(F) (Features[F / 32] & (1 << (F % 32))) != 0
308
309 // We select CPU strings to match the code in Host.cpp, but we don't use them
310 // in compiler-rt.
311 const char *CPU = 0;
312
313 switch (Family) {
314 case 6:
315 switch (Model) {
316 case 0x0f: // Intel Core 2 Duo processor, Intel Core 2 Duo mobile
317 // processor, Intel Core 2 Quad processor, Intel Core 2 Quad
318 // mobile processor, Intel Core 2 Extreme processor, Intel
319 // Pentium Dual-Core processor, Intel Xeon processor, model
320 // 0Fh. All processors are manufactured using the 65 nm process.
321 case 0x16: // Intel Celeron processor model 16h. All processors are
322 // manufactured using the 65 nm process
323 CPU = "core2";
324 *Type = INTEL_CORE2;
325 break;
326 case 0x17: // Intel Core 2 Extreme processor, Intel Xeon processor, model
327 // 17h. All processors are manufactured using the 45 nm process.
328 //
329 // 45nm: Penryn , Wolfdale, Yorkfield (XE)
330 case 0x1d: // Intel Xeon processor MP. All processors are manufactured using
331 // the 45 nm process.
332 CPU = "penryn";
333 *Type = INTEL_CORE2;
334 break;
335 case 0x1a: // Intel Core i7 processor and Intel Xeon processor. All
336 // processors are manufactured using the 45 nm process.
337 case 0x1e: // Intel(R) Core(TM) i7 CPU 870 @ 2.93GHz.
338 // As found in a Summer 2010 model iMac.
339 case 0x1f:
340 case 0x2e: // Nehalem EX
341 CPU = "nehalem";
342 *Type = INTEL_COREI7;
343 *Subtype = INTEL_COREI7_NEHALEM;
344 break;
345 case 0x25: // Intel Core i7, laptop version.
346 case 0x2c: // Intel Core i7 processor and Intel Xeon processor. All
347 // processors are manufactured using the 32 nm process.
348 case 0x2f: // Westmere EX
349 CPU = "westmere";
350 *Type = INTEL_COREI7;
351 *Subtype = INTEL_COREI7_WESTMERE;
352 break;
353 case 0x2a: // Intel Core i7 processor. All processors are manufactured
354 // using the 32 nm process.
355 case 0x2d:
356 CPU = "sandybridge";
357 *Type = INTEL_COREI7;
358 *Subtype = INTEL_COREI7_SANDYBRIDGE;
359 break;
360 case 0x3a:
361 case 0x3e: // Ivy Bridge EP
362 CPU = "ivybridge";
363 *Type = INTEL_COREI7;
364 *Subtype = INTEL_COREI7_IVYBRIDGE;
365 break;
366
367 // Haswell:
368 case 0x3c:
369 case 0x3f:
370 case 0x45:
371 case 0x46:
372 CPU = "haswell";
373 *Type = INTEL_COREI7;
374 *Subtype = INTEL_COREI7_HASWELL;
375 break;
376
377 // Broadwell:
378 case 0x3d:
379 case 0x47:
380 case 0x4f:
381 case 0x56:
382 CPU = "broadwell";
383 *Type = INTEL_COREI7;
384 *Subtype = INTEL_COREI7_BROADWELL;
385 break;
386
387 // Skylake:
388 case 0x4e: // Skylake mobile
389 case 0x5e: // Skylake desktop
390 case 0x8e: // Kaby Lake mobile
391 case 0x9e: // Kaby Lake desktop
392 case 0xa5: // Comet Lake-H/S
393 case 0xa6: // Comet Lake-U
394 CPU = "skylake";
395 *Type = INTEL_COREI7;
396 *Subtype = INTEL_COREI7_SKYLAKE;
397 break;
398
399 // Rocketlake:
400 case 0xa7:
401 CPU = "rocketlake";
402 *Type = INTEL_COREI7;
403 *Subtype = INTEL_COREI7_ROCKETLAKE;
404 break;
405
406 // Skylake Xeon:
407 case 0x55:
408 *Type = INTEL_COREI7;
409 if (testFeature(FEATURE_AVX512BF16)) {
410 CPU = "cooperlake";
411 *Subtype = INTEL_COREI7_COOPERLAKE;
412 } else if (testFeature(FEATURE_AVX512VNNI)) {
413 CPU = "cascadelake";
414 *Subtype = INTEL_COREI7_CASCADELAKE;
415 } else {
416 CPU = "skylake-avx512";
417 *Subtype = INTEL_COREI7_SKYLAKE_AVX512;
418 }
419 break;
420
421 // Cannonlake:
422 case 0x66:
423 CPU = "cannonlake";
424 *Type = INTEL_COREI7;
425 *Subtype = INTEL_COREI7_CANNONLAKE;
426 break;
427
428 // Icelake:
429 case 0x7d:
430 case 0x7e:
431 CPU = "icelake-client";
432 *Type = INTEL_COREI7;
433 *Subtype = INTEL_COREI7_ICELAKE_CLIENT;
434 break;
435
436 // Tigerlake:
437 case 0x8c:
438 case 0x8d:
439 CPU = "tigerlake";
440 *Type = INTEL_COREI7;
441 *Subtype = INTEL_COREI7_TIGERLAKE;
442 break;
443
444 // Alderlake:
445 case 0x97:
446 case 0x9a:
447 // Raptorlake:
448 case 0xb7:
449 case 0xba:
450 case 0xbf:
451 // Meteorlake:
452 case 0xaa:
453 case 0xac:
454 // Gracemont:
455 case 0xbe:
456 CPU = "alderlake";
457 *Type = INTEL_COREI7;
458 *Subtype = INTEL_COREI7_ALDERLAKE;
459 break;
460
461 // Arrowlake:
462 case 0xc5:
463 CPU = "arrowlake";
464 *Type = INTEL_COREI7;
465 *Subtype = INTEL_COREI7_ARROWLAKE;
466 break;
467
468 // Arrowlake S:
469 case 0xc6:
470 // Lunarlake:
471 case 0xbd:
472 CPU = "arrowlake-s";
473 *Type = INTEL_COREI7;
474 *Subtype = INTEL_COREI7_ARROWLAKE_S;
475 break;
476
477 // Pantherlake:
478 case 0xcc:
479 CPU = "pantherlake";
480 *Type = INTEL_COREI7;
481 *Subtype = INTEL_COREI7_PANTHERLAKE;
482 break;
483
484 // Icelake Xeon:
485 case 0x6a:
486 case 0x6c:
487 CPU = "icelake-server";
488 *Type = INTEL_COREI7;
489 *Subtype = INTEL_COREI7_ICELAKE_SERVER;
490 break;
491
492 // Emerald Rapids:
493 case 0xcf:
494 // Sapphire Rapids:
495 case 0x8f:
496 CPU = "sapphirerapids";
497 *Type = INTEL_COREI7;
498 *Subtype = INTEL_COREI7_SAPPHIRERAPIDS;
499 break;
500
501 // Granite Rapids:
502 case 0xad:
503 CPU = "graniterapids";
504 *Type = INTEL_COREI7;
505 *Subtype = INTEL_COREI7_GRANITERAPIDS;
506 break;
507
508 // Granite Rapids D:
509 case 0xae:
510 CPU = "graniterapids-d";
511 *Type = INTEL_COREI7;
512 *Subtype = INTEL_COREI7_GRANITERAPIDS_D;
513 break;
514
515 case 0x1c: // Most 45 nm Intel Atom processors
516 case 0x26: // 45 nm Atom Lincroft
517 case 0x27: // 32 nm Atom Medfield
518 case 0x35: // 32 nm Atom Midview
519 case 0x36: // 32 nm Atom Midview
520 CPU = "bonnell";
521 *Type = INTEL_BONNELL;
522 break;
523
524 // Atom Silvermont codes from the Intel software optimization guide.
525 case 0x37:
526 case 0x4a:
527 case 0x4d:
528 case 0x5a:
529 case 0x5d:
530 case 0x4c: // really airmont
531 CPU = "silvermont";
532 *Type = INTEL_SILVERMONT;
533 break;
534 // Goldmont:
535 case 0x5c: // Apollo Lake
536 case 0x5f: // Denverton
537 CPU = "goldmont";
538 *Type = INTEL_GOLDMONT;
539 break; // "goldmont"
540 case 0x7a:
541 CPU = "goldmont-plus";
542 *Type = INTEL_GOLDMONT_PLUS;
543 break;
544 case 0x86:
545 case 0x8a: // Lakefield
546 case 0x96: // Elkhart Lake
547 case 0x9c: // Jasper Lake
548 CPU = "tremont";
549 *Type = INTEL_TREMONT;
550 break;
551
552 // Sierraforest:
553 case 0xaf:
554 CPU = "sierraforest";
555 *Type = INTEL_SIERRAFOREST;
556 break;
557
558 // Grandridge:
559 case 0xb6:
560 CPU = "grandridge";
561 *Type = INTEL_GRANDRIDGE;
562 break;
563
564 // Clearwaterforest:
565 case 0xdd:
566 CPU = "clearwaterforest";
567 *Type = INTEL_COREI7;
568 *Subtype = INTEL_CLEARWATERFOREST;
569 break;
570
571 case 0x57:
572 CPU = "knl";
573 *Type = INTEL_KNL;
574 break;
575
576 case 0x85:
577 CPU = "knm";
578 *Type = INTEL_KNM;
579 break;
580
581 default: // Unknown family 6 CPU.
582 break;
583 }
584 break;
585 default:
586 break; // Unknown.
587 }
588
589 return CPU;
590 }
591
getAMDProcessorTypeAndSubtype(unsigned Family,unsigned Model,const unsigned * Features,unsigned * Type,unsigned * Subtype)592 static const char *getAMDProcessorTypeAndSubtype(unsigned Family,
593 unsigned Model,
594 const unsigned *Features,
595 unsigned *Type,
596 unsigned *Subtype) {
597 // We select CPU strings to match the code in Host.cpp, but we don't use them
598 // in compiler-rt.
599 const char *CPU = 0;
600
601 switch (Family) {
602 case 16:
603 CPU = "amdfam10";
604 *Type = AMDFAM10H;
605 switch (Model) {
606 case 2:
607 *Subtype = AMDFAM10H_BARCELONA;
608 break;
609 case 4:
610 *Subtype = AMDFAM10H_SHANGHAI;
611 break;
612 case 8:
613 *Subtype = AMDFAM10H_ISTANBUL;
614 break;
615 }
616 break;
617 case 20:
618 CPU = "btver1";
619 *Type = AMD_BTVER1;
620 break;
621 case 21:
622 CPU = "bdver1";
623 *Type = AMDFAM15H;
624 if (Model >= 0x60 && Model <= 0x7f) {
625 CPU = "bdver4";
626 *Subtype = AMDFAM15H_BDVER4;
627 break; // 60h-7Fh: Excavator
628 }
629 if (Model >= 0x30 && Model <= 0x3f) {
630 CPU = "bdver3";
631 *Subtype = AMDFAM15H_BDVER3;
632 break; // 30h-3Fh: Steamroller
633 }
634 if ((Model >= 0x10 && Model <= 0x1f) || Model == 0x02) {
635 CPU = "bdver2";
636 *Subtype = AMDFAM15H_BDVER2;
637 break; // 02h, 10h-1Fh: Piledriver
638 }
639 if (Model <= 0x0f) {
640 *Subtype = AMDFAM15H_BDVER1;
641 break; // 00h-0Fh: Bulldozer
642 }
643 break;
644 case 22:
645 CPU = "btver2";
646 *Type = AMD_BTVER2;
647 break;
648 case 23:
649 CPU = "znver1";
650 *Type = AMDFAM17H;
651 if ((Model >= 0x30 && Model <= 0x3f) || (Model == 0x47) ||
652 (Model >= 0x60 && Model <= 0x67) || (Model >= 0x68 && Model <= 0x6f) ||
653 (Model >= 0x70 && Model <= 0x7f) || (Model >= 0x84 && Model <= 0x87) ||
654 (Model >= 0x90 && Model <= 0x97) || (Model >= 0x98 && Model <= 0x9f) ||
655 (Model >= 0xa0 && Model <= 0xaf)) {
656 // Family 17h Models 30h-3Fh (Starship) Zen 2
657 // Family 17h Models 47h (Cardinal) Zen 2
658 // Family 17h Models 60h-67h (Renoir) Zen 2
659 // Family 17h Models 68h-6Fh (Lucienne) Zen 2
660 // Family 17h Models 70h-7Fh (Matisse) Zen 2
661 // Family 17h Models 84h-87h (ProjectX) Zen 2
662 // Family 17h Models 90h-97h (VanGogh) Zen 2
663 // Family 17h Models 98h-9Fh (Mero) Zen 2
664 // Family 17h Models A0h-AFh (Mendocino) Zen 2
665 CPU = "znver2";
666 *Subtype = AMDFAM17H_ZNVER2;
667 break;
668 }
669 if ((Model >= 0x10 && Model <= 0x1f) || (Model >= 0x20 && Model <= 0x2f)) {
670 // Family 17h Models 10h-1Fh (Raven1) Zen
671 // Family 17h Models 10h-1Fh (Picasso) Zen+
672 // Family 17h Models 20h-2Fh (Raven2 x86) Zen
673 *Subtype = AMDFAM17H_ZNVER1;
674 break;
675 }
676 break;
677 case 25:
678 CPU = "znver3";
679 *Type = AMDFAM19H;
680 if ((Model <= 0x0f) || (Model >= 0x20 && Model <= 0x2f) ||
681 (Model >= 0x30 && Model <= 0x3f) || (Model >= 0x40 && Model <= 0x4f) ||
682 (Model >= 0x50 && Model <= 0x5f)) {
683 // Family 19h Models 00h-0Fh (Genesis, Chagall) Zen 3
684 // Family 19h Models 20h-2Fh (Vermeer) Zen 3
685 // Family 19h Models 30h-3Fh (Badami) Zen 3
686 // Family 19h Models 40h-4Fh (Rembrandt) Zen 3+
687 // Family 19h Models 50h-5Fh (Cezanne) Zen 3
688 *Subtype = AMDFAM19H_ZNVER3;
689 break;
690 }
691 if ((Model >= 0x10 && Model <= 0x1f) || (Model >= 0x60 && Model <= 0x6f) ||
692 (Model >= 0x70 && Model <= 0x77) || (Model >= 0x78 && Model <= 0x7f) ||
693 (Model >= 0xa0 && Model <= 0xaf)) {
694 // Family 19h Models 10h-1Fh (Stones; Storm Peak) Zen 4
695 // Family 19h Models 60h-6Fh (Raphael) Zen 4
696 // Family 19h Models 70h-77h (Phoenix, Hawkpoint1) Zen 4
697 // Family 19h Models 78h-7Fh (Phoenix 2, Hawkpoint2) Zen 4
698 // Family 19h Models A0h-AFh (Stones-Dense) Zen 4
699 CPU = "znver4";
700 *Subtype = AMDFAM19H_ZNVER4;
701 break; // "znver4"
702 }
703 break; // family 19h
704 default:
705 break; // Unknown AMD CPU.
706 }
707
708 return CPU;
709 }
710
getAvailableFeatures(unsigned ECX,unsigned EDX,unsigned MaxLeaf,unsigned * Features)711 static void getAvailableFeatures(unsigned ECX, unsigned EDX, unsigned MaxLeaf,
712 unsigned *Features) {
713 unsigned EAX = 0, EBX = 0;
714
715 #define hasFeature(F) ((Features[F / 32] >> (F % 32)) & 1)
716 #define setFeature(F) Features[F / 32] |= 1U << (F % 32)
717
718 if ((EDX >> 15) & 1)
719 setFeature(FEATURE_CMOV);
720 if ((EDX >> 23) & 1)
721 setFeature(FEATURE_MMX);
722 if ((EDX >> 25) & 1)
723 setFeature(FEATURE_SSE);
724 if ((EDX >> 26) & 1)
725 setFeature(FEATURE_SSE2);
726
727 if ((ECX >> 0) & 1)
728 setFeature(FEATURE_SSE3);
729 if ((ECX >> 1) & 1)
730 setFeature(FEATURE_PCLMUL);
731 if ((ECX >> 9) & 1)
732 setFeature(FEATURE_SSSE3);
733 if ((ECX >> 12) & 1)
734 setFeature(FEATURE_FMA);
735 if ((ECX >> 13) & 1)
736 setFeature(FEATURE_CMPXCHG16B);
737 if ((ECX >> 19) & 1)
738 setFeature(FEATURE_SSE4_1);
739 if ((ECX >> 20) & 1)
740 setFeature(FEATURE_SSE4_2);
741 if ((ECX >> 22) & 1)
742 setFeature(FEATURE_MOVBE);
743 if ((ECX >> 23) & 1)
744 setFeature(FEATURE_POPCNT);
745 if ((ECX >> 25) & 1)
746 setFeature(FEATURE_AES);
747 if ((ECX >> 29) & 1)
748 setFeature(FEATURE_F16C);
749
750 // If CPUID indicates support for XSAVE, XRESTORE and AVX, and XGETBV
751 // indicates that the AVX registers will be saved and restored on context
752 // switch, then we have full AVX support.
753 const unsigned AVXBits = (1 << 27) | (1 << 28);
754 bool HasAVX = ((ECX & AVXBits) == AVXBits) && !getX86XCR0(&EAX, &EDX) &&
755 ((EAX & 0x6) == 0x6);
756 #if defined(__APPLE__)
757 // Darwin lazily saves the AVX512 context on first use: trust that the OS will
758 // save the AVX512 context if we use AVX512 instructions, even the bit is not
759 // set right now.
760 bool HasAVX512Save = true;
761 #else
762 // AVX512 requires additional context to be saved by the OS.
763 bool HasAVX512Save = HasAVX && ((EAX & 0xe0) == 0xe0);
764 #endif
765
766 if (HasAVX)
767 setFeature(FEATURE_AVX);
768
769 bool HasLeaf7 =
770 MaxLeaf >= 0x7 && !getX86CpuIDAndInfoEx(0x7, 0x0, &EAX, &EBX, &ECX, &EDX);
771
772 if (HasLeaf7 && ((EBX >> 3) & 1))
773 setFeature(FEATURE_BMI);
774 if (HasLeaf7 && ((EBX >> 5) & 1) && HasAVX)
775 setFeature(FEATURE_AVX2);
776 if (HasLeaf7 && ((EBX >> 8) & 1))
777 setFeature(FEATURE_BMI2);
778 if (HasLeaf7 && ((EBX >> 16) & 1) && HasAVX512Save)
779 setFeature(FEATURE_AVX512F);
780 if (HasLeaf7 && ((EBX >> 17) & 1) && HasAVX512Save)
781 setFeature(FEATURE_AVX512DQ);
782 if (HasLeaf7 && ((EBX >> 21) & 1) && HasAVX512Save)
783 setFeature(FEATURE_AVX512IFMA);
784 if (HasLeaf7 && ((EBX >> 26) & 1) && HasAVX512Save)
785 setFeature(FEATURE_AVX512PF);
786 if (HasLeaf7 && ((EBX >> 27) & 1) && HasAVX512Save)
787 setFeature(FEATURE_AVX512ER);
788 if (HasLeaf7 && ((EBX >> 28) & 1) && HasAVX512Save)
789 setFeature(FEATURE_AVX512CD);
790 if (HasLeaf7 && ((EBX >> 30) & 1) && HasAVX512Save)
791 setFeature(FEATURE_AVX512BW);
792 if (HasLeaf7 && ((EBX >> 31) & 1) && HasAVX512Save)
793 setFeature(FEATURE_AVX512VL);
794
795 if (HasLeaf7 && ((ECX >> 1) & 1) && HasAVX512Save)
796 setFeature(FEATURE_AVX512VBMI);
797 if (HasLeaf7 && ((ECX >> 6) & 1) && HasAVX512Save)
798 setFeature(FEATURE_AVX512VBMI2);
799 if (HasLeaf7 && ((ECX >> 8) & 1))
800 setFeature(FEATURE_GFNI);
801 if (HasLeaf7 && ((ECX >> 10) & 1) && HasAVX)
802 setFeature(FEATURE_VPCLMULQDQ);
803 if (HasLeaf7 && ((ECX >> 11) & 1) && HasAVX512Save)
804 setFeature(FEATURE_AVX512VNNI);
805 if (HasLeaf7 && ((ECX >> 12) & 1) && HasAVX512Save)
806 setFeature(FEATURE_AVX512BITALG);
807 if (HasLeaf7 && ((ECX >> 14) & 1) && HasAVX512Save)
808 setFeature(FEATURE_AVX512VPOPCNTDQ);
809
810 if (HasLeaf7 && ((EDX >> 2) & 1) && HasAVX512Save)
811 setFeature(FEATURE_AVX5124VNNIW);
812 if (HasLeaf7 && ((EDX >> 3) & 1) && HasAVX512Save)
813 setFeature(FEATURE_AVX5124FMAPS);
814 if (HasLeaf7 && ((EDX >> 8) & 1) && HasAVX512Save)
815 setFeature(FEATURE_AVX512VP2INTERSECT);
816 if (HasLeaf7 && ((EDX >> 23) & 1) && HasAVX512Save)
817 setFeature(FEATURE_AVX512FP16);
818
819 // EAX from subleaf 0 is the maximum subleaf supported. Some CPUs don't
820 // return all 0s for invalid subleaves so check the limit.
821 bool HasLeaf7Subleaf1 =
822 HasLeaf7 && EAX >= 1 &&
823 !getX86CpuIDAndInfoEx(0x7, 0x1, &EAX, &EBX, &ECX, &EDX);
824 if (HasLeaf7Subleaf1 && ((EAX >> 5) & 1) && HasAVX512Save)
825 setFeature(FEATURE_AVX512BF16);
826
827 unsigned MaxExtLevel;
828 getX86CpuIDAndInfo(0x80000000, &MaxExtLevel, &EBX, &ECX, &EDX);
829
830 bool HasExtLeaf1 = MaxExtLevel >= 0x80000001 &&
831 !getX86CpuIDAndInfo(0x80000001, &EAX, &EBX, &ECX, &EDX);
832 if (HasExtLeaf1) {
833 if (ECX & 1)
834 setFeature(FEATURE_LAHF_LM);
835 if ((ECX >> 5) & 1)
836 setFeature(FEATURE_LZCNT);
837 if (((ECX >> 6) & 1))
838 setFeature(FEATURE_SSE4_A);
839 if (((ECX >> 11) & 1))
840 setFeature(FEATURE_XOP);
841 if (((ECX >> 16) & 1))
842 setFeature(FEATURE_FMA4);
843 if (((EDX >> 29) & 1))
844 setFeature(FEATURE_LM);
845 }
846
847 if (hasFeature(FEATURE_LM) && hasFeature(FEATURE_SSE2)) {
848 setFeature(FEATURE_X86_64_BASELINE);
849 if (hasFeature(FEATURE_CMPXCHG16B) && hasFeature(FEATURE_POPCNT) &&
850 hasFeature(FEATURE_LAHF_LM) && hasFeature(FEATURE_SSE4_2)) {
851 setFeature(FEATURE_X86_64_V2);
852 if (hasFeature(FEATURE_AVX2) && hasFeature(FEATURE_BMI) &&
853 hasFeature(FEATURE_BMI2) && hasFeature(FEATURE_F16C) &&
854 hasFeature(FEATURE_FMA) && hasFeature(FEATURE_LZCNT) &&
855 hasFeature(FEATURE_MOVBE)) {
856 setFeature(FEATURE_X86_64_V3);
857 if (hasFeature(FEATURE_AVX512BW) && hasFeature(FEATURE_AVX512CD) &&
858 hasFeature(FEATURE_AVX512DQ) && hasFeature(FEATURE_AVX512VL))
859 setFeature(FEATURE_X86_64_V4);
860 }
861 }
862 }
863
864 #undef hasFeature
865 #undef setFeature
866 }
867
868 #ifndef _WIN32
869 __attribute__((visibility("hidden")))
870 #endif
871 int __cpu_indicator_init(void) CONSTRUCTOR_ATTRIBUTE;
872
873 #ifndef _WIN32
874 __attribute__((visibility("hidden")))
875 #endif
876 struct __processor_model {
877 unsigned int __cpu_vendor;
878 unsigned int __cpu_type;
879 unsigned int __cpu_subtype;
880 unsigned int __cpu_features[1];
881 } __cpu_model = {0, 0, 0, {0}};
882
883 #ifndef _WIN32
884 __attribute__((visibility("hidden")))
885 #endif
886 unsigned __cpu_features2[(CPU_FEATURE_MAX - 1) / 32];
887
888 // A constructor function that is sets __cpu_model and __cpu_features2 with
889 // the right values. This needs to run only once. This constructor is
890 // given the highest priority and it should run before constructors without
891 // the priority set. However, it still runs after ifunc initializers and
892 // needs to be called explicitly there.
893
__cpu_indicator_init(void)894 int CONSTRUCTOR_ATTRIBUTE __cpu_indicator_init(void) {
895 unsigned EAX, EBX, ECX, EDX;
896 unsigned MaxLeaf = 5;
897 unsigned Vendor;
898 unsigned Model, Family;
899 unsigned Features[(CPU_FEATURE_MAX + 31) / 32] = {0};
900 static_assert(sizeof(Features) / sizeof(Features[0]) == 4, "");
901 static_assert(sizeof(__cpu_features2) / sizeof(__cpu_features2[0]) == 3, "");
902
903 // This function needs to run just once.
904 if (__cpu_model.__cpu_vendor)
905 return 0;
906
907 if (!isCpuIdSupported() ||
908 getX86CpuIDAndInfo(0, &MaxLeaf, &Vendor, &ECX, &EDX) || MaxLeaf < 1) {
909 __cpu_model.__cpu_vendor = VENDOR_OTHER;
910 return -1;
911 }
912
913 getX86CpuIDAndInfo(1, &EAX, &EBX, &ECX, &EDX);
914 detectX86FamilyModel(EAX, &Family, &Model);
915
916 // Find available features.
917 getAvailableFeatures(ECX, EDX, MaxLeaf, &Features[0]);
918
919 __cpu_model.__cpu_features[0] = Features[0];
920 __cpu_features2[0] = Features[1];
921 __cpu_features2[1] = Features[2];
922 __cpu_features2[2] = Features[3];
923
924 if (Vendor == SIG_INTEL) {
925 // Get CPU type.
926 getIntelProcessorTypeAndSubtype(Family, Model, &Features[0],
927 &(__cpu_model.__cpu_type),
928 &(__cpu_model.__cpu_subtype));
929 __cpu_model.__cpu_vendor = VENDOR_INTEL;
930 } else if (Vendor == SIG_AMD) {
931 // Get CPU type.
932 getAMDProcessorTypeAndSubtype(Family, Model, &Features[0],
933 &(__cpu_model.__cpu_type),
934 &(__cpu_model.__cpu_subtype));
935 __cpu_model.__cpu_vendor = VENDOR_AMD;
936 } else
937 __cpu_model.__cpu_vendor = VENDOR_OTHER;
938
939 assert(__cpu_model.__cpu_vendor < VENDOR_MAX);
940 assert(__cpu_model.__cpu_type < CPU_TYPE_MAX);
941 assert(__cpu_model.__cpu_subtype < CPU_SUBTYPE_MAX);
942
943 return 0;
944 }
945 #endif // defined(__GNUC__) || defined(__clang__) || defined(_MSC_VER)
946