1 //===-- cpu_model.c - Support for __cpu_model builtin ------------*- C -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file is based on LLVM's lib/Support/Host.cpp. 10 // It implements the operating system Host concept and builtin 11 // __cpu_model for the compiler_rt library, for x86 only. 12 // 13 //===----------------------------------------------------------------------===// 14 15 #if (defined(__i386__) || defined(_M_IX86) || defined(__x86_64__) || \ 16 defined(_M_X64)) && \ 17 (defined(__GNUC__) || defined(__clang__) || defined(_MSC_VER)) 18 19 #include <assert.h> 20 21 #define bool int 22 #define true 1 23 #define false 0 24 25 #ifdef _MSC_VER 26 #include <intrin.h> 27 #endif 28 29 #ifndef __has_attribute 30 #define __has_attribute(attr) 0 31 #endif 32 33 enum VendorSignatures { 34 SIG_INTEL = 0x756e6547, // Genu 35 SIG_AMD = 0x68747541, // Auth 36 }; 37 38 enum ProcessorVendors { 39 VENDOR_INTEL = 1, 40 VENDOR_AMD, 41 VENDOR_OTHER, 42 VENDOR_MAX 43 }; 44 45 enum ProcessorTypes { 46 INTEL_BONNELL = 1, 47 INTEL_CORE2, 48 INTEL_COREI7, 49 AMDFAM10H, 50 AMDFAM15H, 51 INTEL_SILVERMONT, 52 INTEL_KNL, 53 AMD_BTVER1, 54 AMD_BTVER2, 55 AMDFAM17H, 56 INTEL_KNM, 57 INTEL_GOLDMONT, 58 INTEL_GOLDMONT_PLUS, 59 INTEL_TREMONT, 60 CPU_TYPE_MAX 61 }; 62 63 enum ProcessorSubtypes { 64 INTEL_COREI7_NEHALEM = 1, 65 INTEL_COREI7_WESTMERE, 66 INTEL_COREI7_SANDYBRIDGE, 67 AMDFAM10H_BARCELONA, 68 AMDFAM10H_SHANGHAI, 69 AMDFAM10H_ISTANBUL, 70 AMDFAM15H_BDVER1, 71 AMDFAM15H_BDVER2, 72 AMDFAM15H_BDVER3, 73 AMDFAM15H_BDVER4, 74 AMDFAM17H_ZNVER1, 75 INTEL_COREI7_IVYBRIDGE, 76 INTEL_COREI7_HASWELL, 77 INTEL_COREI7_BROADWELL, 78 INTEL_COREI7_SKYLAKE, 79 INTEL_COREI7_SKYLAKE_AVX512, 80 INTEL_COREI7_CANNONLAKE, 81 INTEL_COREI7_ICELAKE_CLIENT, 82 INTEL_COREI7_ICELAKE_SERVER, 83 AMDFAM17H_ZNVER2, 84 INTEL_COREI7_CASCADELAKE, 85 CPU_SUBTYPE_MAX 86 }; 87 88 enum ProcessorFeatures { 89 FEATURE_CMOV = 0, 90 FEATURE_MMX, 91 FEATURE_POPCNT, 92 FEATURE_SSE, 93 FEATURE_SSE2, 94 FEATURE_SSE3, 95 FEATURE_SSSE3, 96 FEATURE_SSE4_1, 97 FEATURE_SSE4_2, 98 FEATURE_AVX, 99 FEATURE_AVX2, 100 FEATURE_SSE4_A, 101 FEATURE_FMA4, 102 FEATURE_XOP, 103 FEATURE_FMA, 104 FEATURE_AVX512F, 105 FEATURE_BMI, 106 FEATURE_BMI2, 107 FEATURE_AES, 108 FEATURE_PCLMUL, 109 FEATURE_AVX512VL, 110 FEATURE_AVX512BW, 111 FEATURE_AVX512DQ, 112 FEATURE_AVX512CD, 113 FEATURE_AVX512ER, 114 FEATURE_AVX512PF, 115 FEATURE_AVX512VBMI, 116 FEATURE_AVX512IFMA, 117 FEATURE_AVX5124VNNIW, 118 FEATURE_AVX5124FMAPS, 119 FEATURE_AVX512VPOPCNTDQ, 120 FEATURE_AVX512VBMI2, 121 FEATURE_GFNI, 122 FEATURE_VPCLMULQDQ, 123 FEATURE_AVX512VNNI, 124 FEATURE_AVX512BITALG, 125 FEATURE_AVX512BF16 126 }; 127 128 // The check below for i386 was copied from clang's cpuid.h (__get_cpuid_max). 129 // Check motivated by bug reports for OpenSSL crashing on CPUs without CPUID 130 // support. Consequently, for i386, the presence of CPUID is checked first 131 // via the corresponding eflags bit. 132 static bool isCpuIdSupported() { 133 #if defined(__GNUC__) || defined(__clang__) 134 #if defined(__i386__) 135 int __cpuid_supported; 136 __asm__(" pushfl\n" 137 " popl %%eax\n" 138 " movl %%eax,%%ecx\n" 139 " xorl $0x00200000,%%eax\n" 140 " pushl %%eax\n" 141 " popfl\n" 142 " pushfl\n" 143 " popl %%eax\n" 144 " movl $0,%0\n" 145 " cmpl %%eax,%%ecx\n" 146 " je 1f\n" 147 " movl $1,%0\n" 148 "1:" 149 : "=r"(__cpuid_supported) 150 : 151 : "eax", "ecx"); 152 if (!__cpuid_supported) 153 return false; 154 #endif 155 return true; 156 #endif 157 return true; 158 } 159 160 // This code is copied from lib/Support/Host.cpp. 161 // Changes to either file should be mirrored in the other. 162 163 /// getX86CpuIDAndInfo - Execute the specified cpuid and return the 4 values in 164 /// the specified arguments. If we can't run cpuid on the host, return true. 165 static bool getX86CpuIDAndInfo(unsigned value, unsigned *rEAX, unsigned *rEBX, 166 unsigned *rECX, unsigned *rEDX) { 167 #if defined(__GNUC__) || defined(__clang__) 168 #if defined(__x86_64__) 169 // gcc doesn't know cpuid would clobber ebx/rbx. Preserve it manually. 170 // FIXME: should we save this for Clang? 171 __asm__("movq\t%%rbx, %%rsi\n\t" 172 "cpuid\n\t" 173 "xchgq\t%%rbx, %%rsi\n\t" 174 : "=a"(*rEAX), "=S"(*rEBX), "=c"(*rECX), "=d"(*rEDX) 175 : "a"(value)); 176 return false; 177 #elif defined(__i386__) 178 __asm__("movl\t%%ebx, %%esi\n\t" 179 "cpuid\n\t" 180 "xchgl\t%%ebx, %%esi\n\t" 181 : "=a"(*rEAX), "=S"(*rEBX), "=c"(*rECX), "=d"(*rEDX) 182 : "a"(value)); 183 return false; 184 #else 185 return true; 186 #endif 187 #elif defined(_MSC_VER) 188 // The MSVC intrinsic is portable across x86 and x64. 189 int registers[4]; 190 __cpuid(registers, value); 191 *rEAX = registers[0]; 192 *rEBX = registers[1]; 193 *rECX = registers[2]; 194 *rEDX = registers[3]; 195 return false; 196 #else 197 return true; 198 #endif 199 } 200 201 /// getX86CpuIDAndInfoEx - Execute the specified cpuid with subleaf and return 202 /// the 4 values in the specified arguments. If we can't run cpuid on the host, 203 /// return true. 204 static bool getX86CpuIDAndInfoEx(unsigned value, unsigned subleaf, 205 unsigned *rEAX, unsigned *rEBX, unsigned *rECX, 206 unsigned *rEDX) { 207 #if defined(__GNUC__) || defined(__clang__) 208 #if defined(__x86_64__) 209 // gcc doesn't know cpuid would clobber ebx/rbx. Preserve it manually. 210 // FIXME: should we save this for Clang? 211 __asm__("movq\t%%rbx, %%rsi\n\t" 212 "cpuid\n\t" 213 "xchgq\t%%rbx, %%rsi\n\t" 214 : "=a"(*rEAX), "=S"(*rEBX), "=c"(*rECX), "=d"(*rEDX) 215 : "a"(value), "c"(subleaf)); 216 return false; 217 #elif defined(__i386__) 218 __asm__("movl\t%%ebx, %%esi\n\t" 219 "cpuid\n\t" 220 "xchgl\t%%ebx, %%esi\n\t" 221 : "=a"(*rEAX), "=S"(*rEBX), "=c"(*rECX), "=d"(*rEDX) 222 : "a"(value), "c"(subleaf)); 223 return false; 224 #else 225 return true; 226 #endif 227 #elif defined(_MSC_VER) 228 int registers[4]; 229 __cpuidex(registers, value, subleaf); 230 *rEAX = registers[0]; 231 *rEBX = registers[1]; 232 *rECX = registers[2]; 233 *rEDX = registers[3]; 234 return false; 235 #else 236 return true; 237 #endif 238 } 239 240 // Read control register 0 (XCR0). Used to detect features such as AVX. 241 static bool getX86XCR0(unsigned *rEAX, unsigned *rEDX) { 242 #if defined(__GNUC__) || defined(__clang__) 243 // Check xgetbv; this uses a .byte sequence instead of the instruction 244 // directly because older assemblers do not include support for xgetbv and 245 // there is no easy way to conditionally compile based on the assembler used. 246 __asm__(".byte 0x0f, 0x01, 0xd0" : "=a"(*rEAX), "=d"(*rEDX) : "c"(0)); 247 return false; 248 #elif defined(_MSC_FULL_VER) && defined(_XCR_XFEATURE_ENABLED_MASK) 249 unsigned long long Result = _xgetbv(_XCR_XFEATURE_ENABLED_MASK); 250 *rEAX = Result; 251 *rEDX = Result >> 32; 252 return false; 253 #else 254 return true; 255 #endif 256 } 257 258 static void detectX86FamilyModel(unsigned EAX, unsigned *Family, 259 unsigned *Model) { 260 *Family = (EAX >> 8) & 0xf; // Bits 8 - 11 261 *Model = (EAX >> 4) & 0xf; // Bits 4 - 7 262 if (*Family == 6 || *Family == 0xf) { 263 if (*Family == 0xf) 264 // Examine extended family ID if family ID is F. 265 *Family += (EAX >> 20) & 0xff; // Bits 20 - 27 266 // Examine extended model ID if family ID is 6 or F. 267 *Model += ((EAX >> 16) & 0xf) << 4; // Bits 16 - 19 268 } 269 } 270 271 static void getIntelProcessorTypeAndSubtype(unsigned Family, unsigned Model, 272 unsigned Brand_id, 273 unsigned Features, 274 unsigned Features2, unsigned *Type, 275 unsigned *Subtype) { 276 if (Brand_id != 0) 277 return; 278 switch (Family) { 279 case 6: 280 switch (Model) { 281 case 0x0f: // Intel Core 2 Duo processor, Intel Core 2 Duo mobile 282 // processor, Intel Core 2 Quad processor, Intel Core 2 Quad 283 // mobile processor, Intel Core 2 Extreme processor, Intel 284 // Pentium Dual-Core processor, Intel Xeon processor, model 285 // 0Fh. All processors are manufactured using the 65 nm process. 286 case 0x16: // Intel Celeron processor model 16h. All processors are 287 // manufactured using the 65 nm process 288 case 0x17: // Intel Core 2 Extreme processor, Intel Xeon processor, model 289 // 17h. All processors are manufactured using the 45 nm process. 290 // 291 // 45nm: Penryn , Wolfdale, Yorkfield (XE) 292 case 0x1d: // Intel Xeon processor MP. All processors are manufactured using 293 // the 45 nm process. 294 *Type = INTEL_CORE2; // "penryn" 295 break; 296 case 0x1a: // Intel Core i7 processor and Intel Xeon processor. All 297 // processors are manufactured using the 45 nm process. 298 case 0x1e: // Intel(R) Core(TM) i7 CPU 870 @ 2.93GHz. 299 // As found in a Summer 2010 model iMac. 300 case 0x1f: 301 case 0x2e: // Nehalem EX 302 *Type = INTEL_COREI7; // "nehalem" 303 *Subtype = INTEL_COREI7_NEHALEM; 304 break; 305 case 0x25: // Intel Core i7, laptop version. 306 case 0x2c: // Intel Core i7 processor and Intel Xeon processor. All 307 // processors are manufactured using the 32 nm process. 308 case 0x2f: // Westmere EX 309 *Type = INTEL_COREI7; // "westmere" 310 *Subtype = INTEL_COREI7_WESTMERE; 311 break; 312 case 0x2a: // Intel Core i7 processor. All processors are manufactured 313 // using the 32 nm process. 314 case 0x2d: 315 *Type = INTEL_COREI7; //"sandybridge" 316 *Subtype = INTEL_COREI7_SANDYBRIDGE; 317 break; 318 case 0x3a: 319 case 0x3e: // Ivy Bridge EP 320 *Type = INTEL_COREI7; // "ivybridge" 321 *Subtype = INTEL_COREI7_IVYBRIDGE; 322 break; 323 324 // Haswell: 325 case 0x3c: 326 case 0x3f: 327 case 0x45: 328 case 0x46: 329 *Type = INTEL_COREI7; // "haswell" 330 *Subtype = INTEL_COREI7_HASWELL; 331 break; 332 333 // Broadwell: 334 case 0x3d: 335 case 0x47: 336 case 0x4f: 337 case 0x56: 338 *Type = INTEL_COREI7; // "broadwell" 339 *Subtype = INTEL_COREI7_BROADWELL; 340 break; 341 342 // Skylake: 343 case 0x4e: // Skylake mobile 344 case 0x5e: // Skylake desktop 345 case 0x8e: // Kaby Lake mobile 346 case 0x9e: // Kaby Lake desktop 347 *Type = INTEL_COREI7; // "skylake" 348 *Subtype = INTEL_COREI7_SKYLAKE; 349 break; 350 351 // Skylake Xeon: 352 case 0x55: 353 *Type = INTEL_COREI7; 354 if (Features2 & (1 << (FEATURE_AVX512VNNI - 32))) 355 *Subtype = INTEL_COREI7_CASCADELAKE; // "cascadelake" 356 else 357 *Subtype = INTEL_COREI7_SKYLAKE_AVX512; // "skylake-avx512" 358 break; 359 360 // Cannonlake: 361 case 0x66: 362 *Type = INTEL_COREI7; 363 *Subtype = INTEL_COREI7_CANNONLAKE; // "cannonlake" 364 break; 365 366 // Icelake: 367 case 0x7d: 368 case 0x7e: 369 *Type = INTEL_COREI7; 370 *Subtype = INTEL_COREI7_ICELAKE_CLIENT; // "icelake-client" 371 break; 372 373 // Icelake Xeon: 374 case 0x6a: 375 case 0x6c: 376 *Type = INTEL_COREI7; 377 *Subtype = INTEL_COREI7_ICELAKE_SERVER; // "icelake-server" 378 break; 379 380 case 0x1c: // Most 45 nm Intel Atom processors 381 case 0x26: // 45 nm Atom Lincroft 382 case 0x27: // 32 nm Atom Medfield 383 case 0x35: // 32 nm Atom Midview 384 case 0x36: // 32 nm Atom Midview 385 *Type = INTEL_BONNELL; 386 break; // "bonnell" 387 388 // Atom Silvermont codes from the Intel software optimization guide. 389 case 0x37: 390 case 0x4a: 391 case 0x4d: 392 case 0x5a: 393 case 0x5d: 394 case 0x4c: // really airmont 395 *Type = INTEL_SILVERMONT; 396 break; // "silvermont" 397 // Goldmont: 398 case 0x5c: // Apollo Lake 399 case 0x5f: // Denverton 400 *Type = INTEL_GOLDMONT; 401 break; // "goldmont" 402 case 0x7a: 403 *Type = INTEL_GOLDMONT_PLUS; 404 break; 405 case 0x86: 406 *Type = INTEL_TREMONT; 407 break; 408 409 case 0x57: 410 *Type = INTEL_KNL; // knl 411 break; 412 413 case 0x85: 414 *Type = INTEL_KNM; // knm 415 break; 416 417 default: // Unknown family 6 CPU. 418 break; 419 } 420 break; 421 default: 422 break; // Unknown. 423 } 424 } 425 426 static void getAMDProcessorTypeAndSubtype(unsigned Family, unsigned Model, 427 unsigned Features, unsigned Features2, 428 unsigned *Type, unsigned *Subtype) { 429 // FIXME: this poorly matches the generated SubtargetFeatureKV table. There 430 // appears to be no way to generate the wide variety of AMD-specific targets 431 // from the information returned from CPUID. 432 switch (Family) { 433 case 16: 434 *Type = AMDFAM10H; // "amdfam10" 435 switch (Model) { 436 case 2: 437 *Subtype = AMDFAM10H_BARCELONA; 438 break; 439 case 4: 440 *Subtype = AMDFAM10H_SHANGHAI; 441 break; 442 case 8: 443 *Subtype = AMDFAM10H_ISTANBUL; 444 break; 445 } 446 break; 447 case 20: 448 *Type = AMD_BTVER1; 449 break; // "btver1"; 450 case 21: 451 *Type = AMDFAM15H; 452 if (Model >= 0x60 && Model <= 0x7f) { 453 *Subtype = AMDFAM15H_BDVER4; 454 break; // "bdver4"; 60h-7Fh: Excavator 455 } 456 if (Model >= 0x30 && Model <= 0x3f) { 457 *Subtype = AMDFAM15H_BDVER3; 458 break; // "bdver3"; 30h-3Fh: Steamroller 459 } 460 if ((Model >= 0x10 && Model <= 0x1f) || Model == 0x02) { 461 *Subtype = AMDFAM15H_BDVER2; 462 break; // "bdver2"; 02h, 10h-1Fh: Piledriver 463 } 464 if (Model <= 0x0f) { 465 *Subtype = AMDFAM15H_BDVER1; 466 break; // "bdver1"; 00h-0Fh: Bulldozer 467 } 468 break; 469 case 22: 470 *Type = AMD_BTVER2; 471 break; // "btver2" 472 case 23: 473 *Type = AMDFAM17H; 474 if ((Model >= 0x30 && Model <= 0x3f) || Model == 0x71) { 475 *Subtype = AMDFAM17H_ZNVER2; 476 break; // "znver2"; 30h-3fh, 71h: Zen2 477 } 478 if (Model <= 0x0f) { 479 *Subtype = AMDFAM17H_ZNVER1; 480 break; // "znver1"; 00h-0Fh: Zen1 481 } 482 break; 483 default: 484 break; // "generic" 485 } 486 } 487 488 static void getAvailableFeatures(unsigned ECX, unsigned EDX, unsigned MaxLeaf, 489 unsigned *FeaturesOut, 490 unsigned *Features2Out) { 491 unsigned Features = 0; 492 unsigned Features2 = 0; 493 unsigned EAX, EBX; 494 495 #define setFeature(F) \ 496 do { \ 497 if (F < 32) \ 498 Features |= 1U << (F & 0x1f); \ 499 else if (F < 64) \ 500 Features2 |= 1U << ((F - 32) & 0x1f); \ 501 } while (0) 502 503 if ((EDX >> 15) & 1) 504 setFeature(FEATURE_CMOV); 505 if ((EDX >> 23) & 1) 506 setFeature(FEATURE_MMX); 507 if ((EDX >> 25) & 1) 508 setFeature(FEATURE_SSE); 509 if ((EDX >> 26) & 1) 510 setFeature(FEATURE_SSE2); 511 512 if ((ECX >> 0) & 1) 513 setFeature(FEATURE_SSE3); 514 if ((ECX >> 1) & 1) 515 setFeature(FEATURE_PCLMUL); 516 if ((ECX >> 9) & 1) 517 setFeature(FEATURE_SSSE3); 518 if ((ECX >> 12) & 1) 519 setFeature(FEATURE_FMA); 520 if ((ECX >> 19) & 1) 521 setFeature(FEATURE_SSE4_1); 522 if ((ECX >> 20) & 1) 523 setFeature(FEATURE_SSE4_2); 524 if ((ECX >> 23) & 1) 525 setFeature(FEATURE_POPCNT); 526 if ((ECX >> 25) & 1) 527 setFeature(FEATURE_AES); 528 529 // If CPUID indicates support for XSAVE, XRESTORE and AVX, and XGETBV 530 // indicates that the AVX registers will be saved and restored on context 531 // switch, then we have full AVX support. 532 const unsigned AVXBits = (1 << 27) | (1 << 28); 533 bool HasAVX = ((ECX & AVXBits) == AVXBits) && !getX86XCR0(&EAX, &EDX) && 534 ((EAX & 0x6) == 0x6); 535 #if defined(__APPLE__) 536 // Darwin lazily saves the AVX512 context on first use: trust that the OS will 537 // save the AVX512 context if we use AVX512 instructions, even the bit is not 538 // set right now. 539 bool HasAVX512Save = true; 540 #else 541 // AVX512 requires additional context to be saved by the OS. 542 bool HasAVX512Save = HasAVX && ((EAX & 0xe0) == 0xe0); 543 #endif 544 545 if (HasAVX) 546 setFeature(FEATURE_AVX); 547 548 bool HasLeaf7 = 549 MaxLeaf >= 0x7 && !getX86CpuIDAndInfoEx(0x7, 0x0, &EAX, &EBX, &ECX, &EDX); 550 551 if (HasLeaf7 && ((EBX >> 3) & 1)) 552 setFeature(FEATURE_BMI); 553 if (HasLeaf7 && ((EBX >> 5) & 1) && HasAVX) 554 setFeature(FEATURE_AVX2); 555 if (HasLeaf7 && ((EBX >> 8) & 1)) 556 setFeature(FEATURE_BMI2); 557 if (HasLeaf7 && ((EBX >> 16) & 1) && HasAVX512Save) 558 setFeature(FEATURE_AVX512F); 559 if (HasLeaf7 && ((EBX >> 17) & 1) && HasAVX512Save) 560 setFeature(FEATURE_AVX512DQ); 561 if (HasLeaf7 && ((EBX >> 21) & 1) && HasAVX512Save) 562 setFeature(FEATURE_AVX512IFMA); 563 if (HasLeaf7 && ((EBX >> 26) & 1) && HasAVX512Save) 564 setFeature(FEATURE_AVX512PF); 565 if (HasLeaf7 && ((EBX >> 27) & 1) && HasAVX512Save) 566 setFeature(FEATURE_AVX512ER); 567 if (HasLeaf7 && ((EBX >> 28) & 1) && HasAVX512Save) 568 setFeature(FEATURE_AVX512CD); 569 if (HasLeaf7 && ((EBX >> 30) & 1) && HasAVX512Save) 570 setFeature(FEATURE_AVX512BW); 571 if (HasLeaf7 && ((EBX >> 31) & 1) && HasAVX512Save) 572 setFeature(FEATURE_AVX512VL); 573 574 if (HasLeaf7 && ((ECX >> 1) & 1) && HasAVX512Save) 575 setFeature(FEATURE_AVX512VBMI); 576 if (HasLeaf7 && ((ECX >> 6) & 1) && HasAVX512Save) 577 setFeature(FEATURE_AVX512VBMI2); 578 if (HasLeaf7 && ((ECX >> 8) & 1)) 579 setFeature(FEATURE_GFNI); 580 if (HasLeaf7 && ((ECX >> 10) & 1) && HasAVX) 581 setFeature(FEATURE_VPCLMULQDQ); 582 if (HasLeaf7 && ((ECX >> 11) & 1) && HasAVX512Save) 583 setFeature(FEATURE_AVX512VNNI); 584 if (HasLeaf7 && ((ECX >> 12) & 1) && HasAVX512Save) 585 setFeature(FEATURE_AVX512BITALG); 586 if (HasLeaf7 && ((ECX >> 14) & 1) && HasAVX512Save) 587 setFeature(FEATURE_AVX512VPOPCNTDQ); 588 589 if (HasLeaf7 && ((EDX >> 2) & 1) && HasAVX512Save) 590 setFeature(FEATURE_AVX5124VNNIW); 591 if (HasLeaf7 && ((EDX >> 3) & 1) && HasAVX512Save) 592 setFeature(FEATURE_AVX5124FMAPS); 593 594 bool HasLeaf7Subleaf1 = 595 MaxLeaf >= 0x7 && !getX86CpuIDAndInfoEx(0x7, 0x1, &EAX, &EBX, &ECX, &EDX); 596 if (HasLeaf7Subleaf1 && ((EAX >> 5) & 1) && HasAVX512Save) 597 setFeature(FEATURE_AVX512BF16); 598 599 unsigned MaxExtLevel; 600 getX86CpuIDAndInfo(0x80000000, &MaxExtLevel, &EBX, &ECX, &EDX); 601 602 bool HasExtLeaf1 = MaxExtLevel >= 0x80000001 && 603 !getX86CpuIDAndInfo(0x80000001, &EAX, &EBX, &ECX, &EDX); 604 if (HasExtLeaf1 && ((ECX >> 6) & 1)) 605 setFeature(FEATURE_SSE4_A); 606 if (HasExtLeaf1 && ((ECX >> 11) & 1)) 607 setFeature(FEATURE_XOP); 608 if (HasExtLeaf1 && ((ECX >> 16) & 1)) 609 setFeature(FEATURE_FMA4); 610 611 *FeaturesOut = Features; 612 *Features2Out = Features2; 613 #undef setFeature 614 } 615 616 #if defined(HAVE_INIT_PRIORITY) 617 #define CONSTRUCTOR_ATTRIBUTE __attribute__((__constructor__ 101)) 618 #elif __has_attribute(__constructor__) 619 #define CONSTRUCTOR_ATTRIBUTE __attribute__((__constructor__)) 620 #else 621 // FIXME: For MSVC, we should make a function pointer global in .CRT$X?? so that 622 // this runs during initialization. 623 #define CONSTRUCTOR_ATTRIBUTE 624 #endif 625 626 #ifndef _WIN32 627 __attribute__((visibility("hidden"))) 628 #endif 629 int __cpu_indicator_init(void) CONSTRUCTOR_ATTRIBUTE; 630 631 #ifndef _WIN32 632 __attribute__((visibility("hidden"))) 633 #endif 634 struct __processor_model { 635 unsigned int __cpu_vendor; 636 unsigned int __cpu_type; 637 unsigned int __cpu_subtype; 638 unsigned int __cpu_features[1]; 639 } __cpu_model = {0, 0, 0, {0}}; 640 641 #ifndef _WIN32 642 __attribute__((visibility("hidden"))) 643 #endif 644 unsigned int __cpu_features2; 645 646 // A constructor function that is sets __cpu_model and __cpu_features2 with 647 // the right values. This needs to run only once. This constructor is 648 // given the highest priority and it should run before constructors without 649 // the priority set. However, it still runs after ifunc initializers and 650 // needs to be called explicitly there. 651 652 int CONSTRUCTOR_ATTRIBUTE __cpu_indicator_init(void) { 653 unsigned EAX, EBX, ECX, EDX; 654 unsigned MaxLeaf = 5; 655 unsigned Vendor; 656 unsigned Model, Family, Brand_id; 657 unsigned Features = 0; 658 unsigned Features2 = 0; 659 660 // This function needs to run just once. 661 if (__cpu_model.__cpu_vendor) 662 return 0; 663 664 if (!isCpuIdSupported()) 665 return -1; 666 667 // Assume cpuid insn present. Run in level 0 to get vendor id. 668 if (getX86CpuIDAndInfo(0, &MaxLeaf, &Vendor, &ECX, &EDX) || MaxLeaf < 1) { 669 __cpu_model.__cpu_vendor = VENDOR_OTHER; 670 return -1; 671 } 672 getX86CpuIDAndInfo(1, &EAX, &EBX, &ECX, &EDX); 673 detectX86FamilyModel(EAX, &Family, &Model); 674 Brand_id = EBX & 0xff; 675 676 // Find available features. 677 getAvailableFeatures(ECX, EDX, MaxLeaf, &Features, &Features2); 678 __cpu_model.__cpu_features[0] = Features; 679 __cpu_features2 = Features2; 680 681 if (Vendor == SIG_INTEL) { 682 // Get CPU type. 683 getIntelProcessorTypeAndSubtype(Family, Model, Brand_id, Features, 684 Features2, &(__cpu_model.__cpu_type), 685 &(__cpu_model.__cpu_subtype)); 686 __cpu_model.__cpu_vendor = VENDOR_INTEL; 687 } else if (Vendor == SIG_AMD) { 688 // Get CPU type. 689 getAMDProcessorTypeAndSubtype(Family, Model, Features, Features2, 690 &(__cpu_model.__cpu_type), 691 &(__cpu_model.__cpu_subtype)); 692 __cpu_model.__cpu_vendor = VENDOR_AMD; 693 } else 694 __cpu_model.__cpu_vendor = VENDOR_OTHER; 695 696 assert(__cpu_model.__cpu_vendor < VENDOR_MAX); 697 assert(__cpu_model.__cpu_type < CPU_TYPE_MAX); 698 assert(__cpu_model.__cpu_subtype < CPU_SUBTYPE_MAX); 699 700 return 0; 701 } 702 703 #endif 704