1 //===-- cpu_model.c - Support for __cpu_model builtin ------------*- C -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file is based on LLVM's lib/Support/Host.cpp. 10 // It implements the operating system Host concept and builtin 11 // __cpu_model for the compiler_rt library for x86 and 12 // __aarch64_have_lse_atomics, __aarch64_cpu_features for AArch64. 13 // 14 //===----------------------------------------------------------------------===// 15 16 #ifndef __has_attribute 17 #define __has_attribute(attr) 0 18 #endif 19 20 #if __has_attribute(constructor) 21 #if __GNUC__ >= 9 22 // Ordinarily init priorities below 101 are disallowed as they are reserved for the 23 // implementation. However, we are the implementation, so silence the diagnostic, 24 // since it doesn't apply to us. 25 #pragma GCC diagnostic ignored "-Wprio-ctor-dtor" 26 #endif 27 // We're choosing init priority 90 to force our constructors to run before any 28 // constructors in the end user application (starting at priority 101). This value 29 // matches the libgcc choice for the same functions. 30 #define CONSTRUCTOR_ATTRIBUTE __attribute__((constructor(90))) 31 #else 32 // FIXME: For MSVC, we should make a function pointer global in .CRT$X?? so that 33 // this runs during initialization. 34 #define CONSTRUCTOR_ATTRIBUTE 35 #endif 36 37 #if (defined(__i386__) || defined(_M_IX86) || defined(__x86_64__) || \ 38 defined(_M_X64)) && \ 39 (defined(__GNUC__) || defined(__clang__) || defined(_MSC_VER)) 40 41 #include <assert.h> 42 43 #define bool int 44 #define true 1 45 #define false 0 46 47 #ifdef _MSC_VER 48 #include <intrin.h> 49 #endif 50 51 enum VendorSignatures { 52 SIG_INTEL = 0x756e6547, // Genu 53 SIG_AMD = 0x68747541, // Auth 54 }; 55 56 enum ProcessorVendors { 57 VENDOR_INTEL = 1, 58 VENDOR_AMD, 59 VENDOR_OTHER, 60 VENDOR_MAX 61 }; 62 63 enum ProcessorTypes { 64 INTEL_BONNELL = 1, 65 INTEL_CORE2, 66 INTEL_COREI7, 67 AMDFAM10H, 68 AMDFAM15H, 69 INTEL_SILVERMONT, 70 INTEL_KNL, 71 AMD_BTVER1, 72 AMD_BTVER2, 73 AMDFAM17H, 74 INTEL_KNM, 75 INTEL_GOLDMONT, 76 INTEL_GOLDMONT_PLUS, 77 INTEL_TREMONT, 78 AMDFAM19H, 79 ZHAOXIN_FAM7H, 80 INTEL_SIERRAFOREST, 81 INTEL_GRANDRIDGE, 82 CPU_TYPE_MAX 83 }; 84 85 enum ProcessorSubtypes { 86 INTEL_COREI7_NEHALEM = 1, 87 INTEL_COREI7_WESTMERE, 88 INTEL_COREI7_SANDYBRIDGE, 89 AMDFAM10H_BARCELONA, 90 AMDFAM10H_SHANGHAI, 91 AMDFAM10H_ISTANBUL, 92 AMDFAM15H_BDVER1, 93 AMDFAM15H_BDVER2, 94 AMDFAM15H_BDVER3, 95 AMDFAM15H_BDVER4, 96 AMDFAM17H_ZNVER1, 97 INTEL_COREI7_IVYBRIDGE, 98 INTEL_COREI7_HASWELL, 99 INTEL_COREI7_BROADWELL, 100 INTEL_COREI7_SKYLAKE, 101 INTEL_COREI7_SKYLAKE_AVX512, 102 INTEL_COREI7_CANNONLAKE, 103 INTEL_COREI7_ICELAKE_CLIENT, 104 INTEL_COREI7_ICELAKE_SERVER, 105 AMDFAM17H_ZNVER2, 106 INTEL_COREI7_CASCADELAKE, 107 INTEL_COREI7_TIGERLAKE, 108 INTEL_COREI7_COOPERLAKE, 109 INTEL_COREI7_SAPPHIRERAPIDS, 110 INTEL_COREI7_ALDERLAKE, 111 AMDFAM19H_ZNVER3, 112 INTEL_COREI7_ROCKETLAKE, 113 ZHAOXIN_FAM7H_LUJIAZUI, 114 AMDFAM19H_ZNVER4, 115 INTEL_COREI7_GRANITERAPIDS, 116 CPU_SUBTYPE_MAX 117 }; 118 119 enum ProcessorFeatures { 120 FEATURE_CMOV = 0, 121 FEATURE_MMX, 122 FEATURE_POPCNT, 123 FEATURE_SSE, 124 FEATURE_SSE2, 125 FEATURE_SSE3, 126 FEATURE_SSSE3, 127 FEATURE_SSE4_1, 128 FEATURE_SSE4_2, 129 FEATURE_AVX, 130 FEATURE_AVX2, 131 FEATURE_SSE4_A, 132 FEATURE_FMA4, 133 FEATURE_XOP, 134 FEATURE_FMA, 135 FEATURE_AVX512F, 136 FEATURE_BMI, 137 FEATURE_BMI2, 138 FEATURE_AES, 139 FEATURE_PCLMUL, 140 FEATURE_AVX512VL, 141 FEATURE_AVX512BW, 142 FEATURE_AVX512DQ, 143 FEATURE_AVX512CD, 144 FEATURE_AVX512ER, 145 FEATURE_AVX512PF, 146 FEATURE_AVX512VBMI, 147 FEATURE_AVX512IFMA, 148 FEATURE_AVX5124VNNIW, 149 FEATURE_AVX5124FMAPS, 150 FEATURE_AVX512VPOPCNTDQ, 151 FEATURE_AVX512VBMI2, 152 FEATURE_GFNI, 153 FEATURE_VPCLMULQDQ, 154 FEATURE_AVX512VNNI, 155 FEATURE_AVX512BITALG, 156 FEATURE_AVX512BF16, 157 FEATURE_AVX512VP2INTERSECT, 158 CPU_FEATURE_MAX 159 }; 160 161 // The check below for i386 was copied from clang's cpuid.h (__get_cpuid_max). 162 // Check motivated by bug reports for OpenSSL crashing on CPUs without CPUID 163 // support. Consequently, for i386, the presence of CPUID is checked first 164 // via the corresponding eflags bit. 165 static bool isCpuIdSupported(void) { 166 #if defined(__GNUC__) || defined(__clang__) 167 #if defined(__i386__) 168 int __cpuid_supported; 169 __asm__(" pushfl\n" 170 " popl %%eax\n" 171 " movl %%eax,%%ecx\n" 172 " xorl $0x00200000,%%eax\n" 173 " pushl %%eax\n" 174 " popfl\n" 175 " pushfl\n" 176 " popl %%eax\n" 177 " movl $0,%0\n" 178 " cmpl %%eax,%%ecx\n" 179 " je 1f\n" 180 " movl $1,%0\n" 181 "1:" 182 : "=r"(__cpuid_supported) 183 : 184 : "eax", "ecx"); 185 if (!__cpuid_supported) 186 return false; 187 #endif 188 return true; 189 #endif 190 return true; 191 } 192 193 // This code is copied from lib/Support/Host.cpp. 194 // Changes to either file should be mirrored in the other. 195 196 /// getX86CpuIDAndInfo - Execute the specified cpuid and return the 4 values in 197 /// the specified arguments. If we can't run cpuid on the host, return true. 198 static bool getX86CpuIDAndInfo(unsigned value, unsigned *rEAX, unsigned *rEBX, 199 unsigned *rECX, unsigned *rEDX) { 200 #if defined(__GNUC__) || defined(__clang__) 201 #if defined(__x86_64__) 202 // gcc doesn't know cpuid would clobber ebx/rbx. Preserve it manually. 203 // FIXME: should we save this for Clang? 204 __asm__("movq\t%%rbx, %%rsi\n\t" 205 "cpuid\n\t" 206 "xchgq\t%%rbx, %%rsi\n\t" 207 : "=a"(*rEAX), "=S"(*rEBX), "=c"(*rECX), "=d"(*rEDX) 208 : "a"(value)); 209 return false; 210 #elif defined(__i386__) 211 __asm__("movl\t%%ebx, %%esi\n\t" 212 "cpuid\n\t" 213 "xchgl\t%%ebx, %%esi\n\t" 214 : "=a"(*rEAX), "=S"(*rEBX), "=c"(*rECX), "=d"(*rEDX) 215 : "a"(value)); 216 return false; 217 #else 218 return true; 219 #endif 220 #elif defined(_MSC_VER) 221 // The MSVC intrinsic is portable across x86 and x64. 222 int registers[4]; 223 __cpuid(registers, value); 224 *rEAX = registers[0]; 225 *rEBX = registers[1]; 226 *rECX = registers[2]; 227 *rEDX = registers[3]; 228 return false; 229 #else 230 return true; 231 #endif 232 } 233 234 /// getX86CpuIDAndInfoEx - Execute the specified cpuid with subleaf and return 235 /// the 4 values in the specified arguments. If we can't run cpuid on the host, 236 /// return true. 237 static bool getX86CpuIDAndInfoEx(unsigned value, unsigned subleaf, 238 unsigned *rEAX, unsigned *rEBX, unsigned *rECX, 239 unsigned *rEDX) { 240 #if defined(__GNUC__) || defined(__clang__) 241 #if defined(__x86_64__) 242 // gcc doesn't know cpuid would clobber ebx/rbx. Preserve it manually. 243 // FIXME: should we save this for Clang? 244 __asm__("movq\t%%rbx, %%rsi\n\t" 245 "cpuid\n\t" 246 "xchgq\t%%rbx, %%rsi\n\t" 247 : "=a"(*rEAX), "=S"(*rEBX), "=c"(*rECX), "=d"(*rEDX) 248 : "a"(value), "c"(subleaf)); 249 return false; 250 #elif defined(__i386__) 251 __asm__("movl\t%%ebx, %%esi\n\t" 252 "cpuid\n\t" 253 "xchgl\t%%ebx, %%esi\n\t" 254 : "=a"(*rEAX), "=S"(*rEBX), "=c"(*rECX), "=d"(*rEDX) 255 : "a"(value), "c"(subleaf)); 256 return false; 257 #else 258 return true; 259 #endif 260 #elif defined(_MSC_VER) 261 int registers[4]; 262 __cpuidex(registers, value, subleaf); 263 *rEAX = registers[0]; 264 *rEBX = registers[1]; 265 *rECX = registers[2]; 266 *rEDX = registers[3]; 267 return false; 268 #else 269 return true; 270 #endif 271 } 272 273 // Read control register 0 (XCR0). Used to detect features such as AVX. 274 static bool getX86XCR0(unsigned *rEAX, unsigned *rEDX) { 275 #if defined(__GNUC__) || defined(__clang__) 276 // Check xgetbv; this uses a .byte sequence instead of the instruction 277 // directly because older assemblers do not include support for xgetbv and 278 // there is no easy way to conditionally compile based on the assembler used. 279 __asm__(".byte 0x0f, 0x01, 0xd0" : "=a"(*rEAX), "=d"(*rEDX) : "c"(0)); 280 return false; 281 #elif defined(_MSC_FULL_VER) && defined(_XCR_XFEATURE_ENABLED_MASK) 282 unsigned long long Result = _xgetbv(_XCR_XFEATURE_ENABLED_MASK); 283 *rEAX = Result; 284 *rEDX = Result >> 32; 285 return false; 286 #else 287 return true; 288 #endif 289 } 290 291 static void detectX86FamilyModel(unsigned EAX, unsigned *Family, 292 unsigned *Model) { 293 *Family = (EAX >> 8) & 0xf; // Bits 8 - 11 294 *Model = (EAX >> 4) & 0xf; // Bits 4 - 7 295 if (*Family == 6 || *Family == 0xf) { 296 if (*Family == 0xf) 297 // Examine extended family ID if family ID is F. 298 *Family += (EAX >> 20) & 0xff; // Bits 20 - 27 299 // Examine extended model ID if family ID is 6 or F. 300 *Model += ((EAX >> 16) & 0xf) << 4; // Bits 16 - 19 301 } 302 } 303 304 static const char * 305 getIntelProcessorTypeAndSubtype(unsigned Family, unsigned Model, 306 const unsigned *Features, 307 unsigned *Type, unsigned *Subtype) { 308 #define testFeature(F) \ 309 (Features[F / 32] & (1 << (F % 32))) != 0 310 311 // We select CPU strings to match the code in Host.cpp, but we don't use them 312 // in compiler-rt. 313 const char *CPU = 0; 314 315 switch (Family) { 316 case 6: 317 switch (Model) { 318 case 0x0f: // Intel Core 2 Duo processor, Intel Core 2 Duo mobile 319 // processor, Intel Core 2 Quad processor, Intel Core 2 Quad 320 // mobile processor, Intel Core 2 Extreme processor, Intel 321 // Pentium Dual-Core processor, Intel Xeon processor, model 322 // 0Fh. All processors are manufactured using the 65 nm process. 323 case 0x16: // Intel Celeron processor model 16h. All processors are 324 // manufactured using the 65 nm process 325 CPU = "core2"; 326 *Type = INTEL_CORE2; 327 break; 328 case 0x17: // Intel Core 2 Extreme processor, Intel Xeon processor, model 329 // 17h. All processors are manufactured using the 45 nm process. 330 // 331 // 45nm: Penryn , Wolfdale, Yorkfield (XE) 332 case 0x1d: // Intel Xeon processor MP. All processors are manufactured using 333 // the 45 nm process. 334 CPU = "penryn"; 335 *Type = INTEL_CORE2; 336 break; 337 case 0x1a: // Intel Core i7 processor and Intel Xeon processor. All 338 // processors are manufactured using the 45 nm process. 339 case 0x1e: // Intel(R) Core(TM) i7 CPU 870 @ 2.93GHz. 340 // As found in a Summer 2010 model iMac. 341 case 0x1f: 342 case 0x2e: // Nehalem EX 343 CPU = "nehalem"; 344 *Type = INTEL_COREI7; 345 *Subtype = INTEL_COREI7_NEHALEM; 346 break; 347 case 0x25: // Intel Core i7, laptop version. 348 case 0x2c: // Intel Core i7 processor and Intel Xeon processor. All 349 // processors are manufactured using the 32 nm process. 350 case 0x2f: // Westmere EX 351 CPU = "westmere"; 352 *Type = INTEL_COREI7; 353 *Subtype = INTEL_COREI7_WESTMERE; 354 break; 355 case 0x2a: // Intel Core i7 processor. All processors are manufactured 356 // using the 32 nm process. 357 case 0x2d: 358 CPU = "sandybridge"; 359 *Type = INTEL_COREI7; 360 *Subtype = INTEL_COREI7_SANDYBRIDGE; 361 break; 362 case 0x3a: 363 case 0x3e: // Ivy Bridge EP 364 CPU = "ivybridge"; 365 *Type = INTEL_COREI7; 366 *Subtype = INTEL_COREI7_IVYBRIDGE; 367 break; 368 369 // Haswell: 370 case 0x3c: 371 case 0x3f: 372 case 0x45: 373 case 0x46: 374 CPU = "haswell"; 375 *Type = INTEL_COREI7; 376 *Subtype = INTEL_COREI7_HASWELL; 377 break; 378 379 // Broadwell: 380 case 0x3d: 381 case 0x47: 382 case 0x4f: 383 case 0x56: 384 CPU = "broadwell"; 385 *Type = INTEL_COREI7; 386 *Subtype = INTEL_COREI7_BROADWELL; 387 break; 388 389 // Skylake: 390 case 0x4e: // Skylake mobile 391 case 0x5e: // Skylake desktop 392 case 0x8e: // Kaby Lake mobile 393 case 0x9e: // Kaby Lake desktop 394 case 0xa5: // Comet Lake-H/S 395 case 0xa6: // Comet Lake-U 396 CPU = "skylake"; 397 *Type = INTEL_COREI7; 398 *Subtype = INTEL_COREI7_SKYLAKE; 399 break; 400 401 // Rocketlake: 402 case 0xa7: 403 CPU = "rocketlake"; 404 *Type = INTEL_COREI7; 405 *Subtype = INTEL_COREI7_ROCKETLAKE; 406 break; 407 408 // Skylake Xeon: 409 case 0x55: 410 *Type = INTEL_COREI7; 411 if (testFeature(FEATURE_AVX512BF16)) { 412 CPU = "cooperlake"; 413 *Subtype = INTEL_COREI7_COOPERLAKE; 414 } else if (testFeature(FEATURE_AVX512VNNI)) { 415 CPU = "cascadelake"; 416 *Subtype = INTEL_COREI7_CASCADELAKE; 417 } else { 418 CPU = "skylake-avx512"; 419 *Subtype = INTEL_COREI7_SKYLAKE_AVX512; 420 } 421 break; 422 423 // Cannonlake: 424 case 0x66: 425 CPU = "cannonlake"; 426 *Type = INTEL_COREI7; 427 *Subtype = INTEL_COREI7_CANNONLAKE; 428 break; 429 430 // Icelake: 431 case 0x7d: 432 case 0x7e: 433 CPU = "icelake-client"; 434 *Type = INTEL_COREI7; 435 *Subtype = INTEL_COREI7_ICELAKE_CLIENT; 436 break; 437 438 // Tigerlake: 439 case 0x8c: 440 case 0x8d: 441 CPU = "tigerlake"; 442 *Type = INTEL_COREI7; 443 *Subtype = INTEL_COREI7_TIGERLAKE; 444 break; 445 446 // Alderlake: 447 case 0x97: 448 case 0x9a: 449 // Raptorlake: 450 case 0xb7: 451 // Meteorlake: 452 case 0xaa: 453 case 0xac: 454 CPU = "alderlake"; 455 *Type = INTEL_COREI7; 456 *Subtype = INTEL_COREI7_ALDERLAKE; 457 break; 458 459 // Icelake Xeon: 460 case 0x6a: 461 case 0x6c: 462 CPU = "icelake-server"; 463 *Type = INTEL_COREI7; 464 *Subtype = INTEL_COREI7_ICELAKE_SERVER; 465 break; 466 467 // Emerald Rapids: 468 case 0xcf: 469 // Sapphire Rapids: 470 case 0x8f: 471 CPU = "sapphirerapids"; 472 *Type = INTEL_COREI7; 473 *Subtype = INTEL_COREI7_SAPPHIRERAPIDS; 474 break; 475 476 // Granite Rapids: 477 case 0xae: 478 case 0xad: 479 CPU = "graniterapids"; 480 *Type = INTEL_COREI7; 481 *Subtype = INTEL_COREI7_GRANITERAPIDS; 482 break; 483 484 case 0x1c: // Most 45 nm Intel Atom processors 485 case 0x26: // 45 nm Atom Lincroft 486 case 0x27: // 32 nm Atom Medfield 487 case 0x35: // 32 nm Atom Midview 488 case 0x36: // 32 nm Atom Midview 489 CPU = "bonnell"; 490 *Type = INTEL_BONNELL; 491 break; 492 493 // Atom Silvermont codes from the Intel software optimization guide. 494 case 0x37: 495 case 0x4a: 496 case 0x4d: 497 case 0x5a: 498 case 0x5d: 499 case 0x4c: // really airmont 500 CPU = "silvermont"; 501 *Type = INTEL_SILVERMONT; 502 break; 503 // Goldmont: 504 case 0x5c: // Apollo Lake 505 case 0x5f: // Denverton 506 CPU = "goldmont"; 507 *Type = INTEL_GOLDMONT; 508 break; // "goldmont" 509 case 0x7a: 510 CPU = "goldmont-plus"; 511 *Type = INTEL_GOLDMONT_PLUS; 512 break; 513 case 0x86: 514 CPU = "tremont"; 515 *Type = INTEL_TREMONT; 516 break; 517 518 // Sierraforest: 519 case 0xaf: 520 CPU = "sierraforest"; 521 *Type = INTEL_SIERRAFOREST; 522 break; 523 524 // Grandridge: 525 case 0xb6: 526 CPU = "grandridge"; 527 *Type = INTEL_GRANDRIDGE; 528 break; 529 530 case 0x57: 531 CPU = "knl"; 532 *Type = INTEL_KNL; 533 break; 534 535 case 0x85: 536 CPU = "knm"; 537 *Type = INTEL_KNM; 538 break; 539 540 default: // Unknown family 6 CPU. 541 break; 542 } 543 break; 544 default: 545 break; // Unknown. 546 } 547 548 return CPU; 549 } 550 551 static const char * 552 getAMDProcessorTypeAndSubtype(unsigned Family, unsigned Model, 553 const unsigned *Features, 554 unsigned *Type, unsigned *Subtype) { 555 // We select CPU strings to match the code in Host.cpp, but we don't use them 556 // in compiler-rt. 557 const char *CPU = 0; 558 559 switch (Family) { 560 case 16: 561 CPU = "amdfam10"; 562 *Type = AMDFAM10H; 563 switch (Model) { 564 case 2: 565 *Subtype = AMDFAM10H_BARCELONA; 566 break; 567 case 4: 568 *Subtype = AMDFAM10H_SHANGHAI; 569 break; 570 case 8: 571 *Subtype = AMDFAM10H_ISTANBUL; 572 break; 573 } 574 break; 575 case 20: 576 CPU = "btver1"; 577 *Type = AMD_BTVER1; 578 break; 579 case 21: 580 CPU = "bdver1"; 581 *Type = AMDFAM15H; 582 if (Model >= 0x60 && Model <= 0x7f) { 583 CPU = "bdver4"; 584 *Subtype = AMDFAM15H_BDVER4; 585 break; // 60h-7Fh: Excavator 586 } 587 if (Model >= 0x30 && Model <= 0x3f) { 588 CPU = "bdver3"; 589 *Subtype = AMDFAM15H_BDVER3; 590 break; // 30h-3Fh: Steamroller 591 } 592 if ((Model >= 0x10 && Model <= 0x1f) || Model == 0x02) { 593 CPU = "bdver2"; 594 *Subtype = AMDFAM15H_BDVER2; 595 break; // 02h, 10h-1Fh: Piledriver 596 } 597 if (Model <= 0x0f) { 598 *Subtype = AMDFAM15H_BDVER1; 599 break; // 00h-0Fh: Bulldozer 600 } 601 break; 602 case 22: 603 CPU = "btver2"; 604 *Type = AMD_BTVER2; 605 break; 606 case 23: 607 CPU = "znver1"; 608 *Type = AMDFAM17H; 609 if ((Model >= 0x30 && Model <= 0x3f) || Model == 0x71) { 610 CPU = "znver2"; 611 *Subtype = AMDFAM17H_ZNVER2; 612 break; // 30h-3fh, 71h: Zen2 613 } 614 if (Model <= 0x0f) { 615 *Subtype = AMDFAM17H_ZNVER1; 616 break; // 00h-0Fh: Zen1 617 } 618 break; 619 case 25: 620 CPU = "znver3"; 621 *Type = AMDFAM19H; 622 if (Model <= 0x0f || (Model >= 0x20 && Model <= 0x5f)) { 623 // Family 19h Models 00h-0Fh - Zen3 624 // Family 19h Models 20h-2Fh - Zen3 625 // Family 19h Models 30h-3Fh - Zen3 626 // Family 19h Models 40h-4Fh - Zen3+ 627 // Family 19h Models 50h-5Fh - Zen3+ 628 *Subtype = AMDFAM19H_ZNVER3; 629 break; 630 } 631 if ((Model >= 0x10 && Model <= 0x1f) || 632 (Model >= 0x60 && Model <= 0x74) || 633 (Model >= 0x78 && Model <= 0x7b) || 634 (Model >= 0xA0 && Model <= 0xAf)) { 635 CPU = "znver4"; 636 *Subtype = AMDFAM19H_ZNVER4; 637 break; // "znver4" 638 } 639 break; 640 default: 641 break; // Unknown AMD CPU. 642 } 643 644 return CPU; 645 } 646 647 static void getAvailableFeatures(unsigned ECX, unsigned EDX, unsigned MaxLeaf, 648 unsigned *Features) { 649 unsigned EAX, EBX; 650 651 #define setFeature(F) \ 652 Features[F / 32] |= 1U << (F % 32) 653 654 if ((EDX >> 15) & 1) 655 setFeature(FEATURE_CMOV); 656 if ((EDX >> 23) & 1) 657 setFeature(FEATURE_MMX); 658 if ((EDX >> 25) & 1) 659 setFeature(FEATURE_SSE); 660 if ((EDX >> 26) & 1) 661 setFeature(FEATURE_SSE2); 662 663 if ((ECX >> 0) & 1) 664 setFeature(FEATURE_SSE3); 665 if ((ECX >> 1) & 1) 666 setFeature(FEATURE_PCLMUL); 667 if ((ECX >> 9) & 1) 668 setFeature(FEATURE_SSSE3); 669 if ((ECX >> 12) & 1) 670 setFeature(FEATURE_FMA); 671 if ((ECX >> 19) & 1) 672 setFeature(FEATURE_SSE4_1); 673 if ((ECX >> 20) & 1) 674 setFeature(FEATURE_SSE4_2); 675 if ((ECX >> 23) & 1) 676 setFeature(FEATURE_POPCNT); 677 if ((ECX >> 25) & 1) 678 setFeature(FEATURE_AES); 679 680 // If CPUID indicates support for XSAVE, XRESTORE and AVX, and XGETBV 681 // indicates that the AVX registers will be saved and restored on context 682 // switch, then we have full AVX support. 683 const unsigned AVXBits = (1 << 27) | (1 << 28); 684 bool HasAVX = ((ECX & AVXBits) == AVXBits) && !getX86XCR0(&EAX, &EDX) && 685 ((EAX & 0x6) == 0x6); 686 #if defined(__APPLE__) 687 // Darwin lazily saves the AVX512 context on first use: trust that the OS will 688 // save the AVX512 context if we use AVX512 instructions, even the bit is not 689 // set right now. 690 bool HasAVX512Save = true; 691 #else 692 // AVX512 requires additional context to be saved by the OS. 693 bool HasAVX512Save = HasAVX && ((EAX & 0xe0) == 0xe0); 694 #endif 695 696 if (HasAVX) 697 setFeature(FEATURE_AVX); 698 699 bool HasLeaf7 = 700 MaxLeaf >= 0x7 && !getX86CpuIDAndInfoEx(0x7, 0x0, &EAX, &EBX, &ECX, &EDX); 701 702 if (HasLeaf7 && ((EBX >> 3) & 1)) 703 setFeature(FEATURE_BMI); 704 if (HasLeaf7 && ((EBX >> 5) & 1) && HasAVX) 705 setFeature(FEATURE_AVX2); 706 if (HasLeaf7 && ((EBX >> 8) & 1)) 707 setFeature(FEATURE_BMI2); 708 if (HasLeaf7 && ((EBX >> 16) & 1) && HasAVX512Save) 709 setFeature(FEATURE_AVX512F); 710 if (HasLeaf7 && ((EBX >> 17) & 1) && HasAVX512Save) 711 setFeature(FEATURE_AVX512DQ); 712 if (HasLeaf7 && ((EBX >> 21) & 1) && HasAVX512Save) 713 setFeature(FEATURE_AVX512IFMA); 714 if (HasLeaf7 && ((EBX >> 26) & 1) && HasAVX512Save) 715 setFeature(FEATURE_AVX512PF); 716 if (HasLeaf7 && ((EBX >> 27) & 1) && HasAVX512Save) 717 setFeature(FEATURE_AVX512ER); 718 if (HasLeaf7 && ((EBX >> 28) & 1) && HasAVX512Save) 719 setFeature(FEATURE_AVX512CD); 720 if (HasLeaf7 && ((EBX >> 30) & 1) && HasAVX512Save) 721 setFeature(FEATURE_AVX512BW); 722 if (HasLeaf7 && ((EBX >> 31) & 1) && HasAVX512Save) 723 setFeature(FEATURE_AVX512VL); 724 725 if (HasLeaf7 && ((ECX >> 1) & 1) && HasAVX512Save) 726 setFeature(FEATURE_AVX512VBMI); 727 if (HasLeaf7 && ((ECX >> 6) & 1) && HasAVX512Save) 728 setFeature(FEATURE_AVX512VBMI2); 729 if (HasLeaf7 && ((ECX >> 8) & 1)) 730 setFeature(FEATURE_GFNI); 731 if (HasLeaf7 && ((ECX >> 10) & 1) && HasAVX) 732 setFeature(FEATURE_VPCLMULQDQ); 733 if (HasLeaf7 && ((ECX >> 11) & 1) && HasAVX512Save) 734 setFeature(FEATURE_AVX512VNNI); 735 if (HasLeaf7 && ((ECX >> 12) & 1) && HasAVX512Save) 736 setFeature(FEATURE_AVX512BITALG); 737 if (HasLeaf7 && ((ECX >> 14) & 1) && HasAVX512Save) 738 setFeature(FEATURE_AVX512VPOPCNTDQ); 739 740 if (HasLeaf7 && ((EDX >> 2) & 1) && HasAVX512Save) 741 setFeature(FEATURE_AVX5124VNNIW); 742 if (HasLeaf7 && ((EDX >> 3) & 1) && HasAVX512Save) 743 setFeature(FEATURE_AVX5124FMAPS); 744 if (HasLeaf7 && ((EDX >> 8) & 1) && HasAVX512Save) 745 setFeature(FEATURE_AVX512VP2INTERSECT); 746 747 bool HasLeaf7Subleaf1 = 748 MaxLeaf >= 0x7 && !getX86CpuIDAndInfoEx(0x7, 0x1, &EAX, &EBX, &ECX, &EDX); 749 if (HasLeaf7Subleaf1 && ((EAX >> 5) & 1) && HasAVX512Save) 750 setFeature(FEATURE_AVX512BF16); 751 752 unsigned MaxExtLevel; 753 getX86CpuIDAndInfo(0x80000000, &MaxExtLevel, &EBX, &ECX, &EDX); 754 755 bool HasExtLeaf1 = MaxExtLevel >= 0x80000001 && 756 !getX86CpuIDAndInfo(0x80000001, &EAX, &EBX, &ECX, &EDX); 757 if (HasExtLeaf1 && ((ECX >> 6) & 1)) 758 setFeature(FEATURE_SSE4_A); 759 if (HasExtLeaf1 && ((ECX >> 11) & 1)) 760 setFeature(FEATURE_XOP); 761 if (HasExtLeaf1 && ((ECX >> 16) & 1)) 762 setFeature(FEATURE_FMA4); 763 #undef setFeature 764 } 765 766 #ifndef _WIN32 767 __attribute__((visibility("hidden"))) 768 #endif 769 int __cpu_indicator_init(void) CONSTRUCTOR_ATTRIBUTE; 770 771 #ifndef _WIN32 772 __attribute__((visibility("hidden"))) 773 #endif 774 struct __processor_model { 775 unsigned int __cpu_vendor; 776 unsigned int __cpu_type; 777 unsigned int __cpu_subtype; 778 unsigned int __cpu_features[1]; 779 } __cpu_model = {0, 0, 0, {0}}; 780 781 #ifndef _WIN32 782 __attribute__((visibility("hidden"))) 783 #endif 784 unsigned int __cpu_features2 = 0; 785 786 // A constructor function that is sets __cpu_model and __cpu_features2 with 787 // the right values. This needs to run only once. This constructor is 788 // given the highest priority and it should run before constructors without 789 // the priority set. However, it still runs after ifunc initializers and 790 // needs to be called explicitly there. 791 792 int CONSTRUCTOR_ATTRIBUTE __cpu_indicator_init(void) { 793 unsigned EAX, EBX, ECX, EDX; 794 unsigned MaxLeaf = 5; 795 unsigned Vendor; 796 unsigned Model, Family; 797 unsigned Features[(CPU_FEATURE_MAX + 31) / 32] = {0}; 798 799 // This function needs to run just once. 800 if (__cpu_model.__cpu_vendor) 801 return 0; 802 803 if (!isCpuIdSupported() || 804 getX86CpuIDAndInfo(0, &MaxLeaf, &Vendor, &ECX, &EDX) || MaxLeaf < 1) { 805 __cpu_model.__cpu_vendor = VENDOR_OTHER; 806 return -1; 807 } 808 809 getX86CpuIDAndInfo(1, &EAX, &EBX, &ECX, &EDX); 810 detectX86FamilyModel(EAX, &Family, &Model); 811 812 // Find available features. 813 getAvailableFeatures(ECX, EDX, MaxLeaf, &Features[0]); 814 815 assert((sizeof(Features)/sizeof(Features[0])) == 2); 816 __cpu_model.__cpu_features[0] = Features[0]; 817 __cpu_features2 = Features[1]; 818 819 if (Vendor == SIG_INTEL) { 820 // Get CPU type. 821 getIntelProcessorTypeAndSubtype(Family, Model, &Features[0], 822 &(__cpu_model.__cpu_type), 823 &(__cpu_model.__cpu_subtype)); 824 __cpu_model.__cpu_vendor = VENDOR_INTEL; 825 } else if (Vendor == SIG_AMD) { 826 // Get CPU type. 827 getAMDProcessorTypeAndSubtype(Family, Model, &Features[0], 828 &(__cpu_model.__cpu_type), 829 &(__cpu_model.__cpu_subtype)); 830 __cpu_model.__cpu_vendor = VENDOR_AMD; 831 } else 832 __cpu_model.__cpu_vendor = VENDOR_OTHER; 833 834 assert(__cpu_model.__cpu_vendor < VENDOR_MAX); 835 assert(__cpu_model.__cpu_type < CPU_TYPE_MAX); 836 assert(__cpu_model.__cpu_subtype < CPU_SUBTYPE_MAX); 837 838 return 0; 839 } 840 #elif defined(__aarch64__) 841 842 #ifndef AT_HWCAP 843 #define AT_HWCAP 16 844 #endif 845 #ifndef HWCAP_CPUID 846 #define HWCAP_CPUID (1 << 11) 847 #endif 848 #ifndef HWCAP_FP 849 #define HWCAP_FP (1 << 0) 850 #endif 851 #ifndef HWCAP_ASIMD 852 #define HWCAP_ASIMD (1 << 1) 853 #endif 854 #ifndef HWCAP_AES 855 #define HWCAP_AES (1 << 3) 856 #endif 857 #ifndef HWCAP_PMULL 858 #define HWCAP_PMULL (1 << 4) 859 #endif 860 #ifndef HWCAP_SHA1 861 #define HWCAP_SHA1 (1 << 5) 862 #endif 863 #ifndef HWCAP_SHA2 864 #define HWCAP_SHA2 (1 << 6) 865 #endif 866 #ifndef HWCAP_ATOMICS 867 #define HWCAP_ATOMICS (1 << 8) 868 #endif 869 #ifndef HWCAP_FPHP 870 #define HWCAP_FPHP (1 << 9) 871 #endif 872 #ifndef HWCAP_ASIMDHP 873 #define HWCAP_ASIMDHP (1 << 10) 874 #endif 875 #ifndef HWCAP_ASIMDRDM 876 #define HWCAP_ASIMDRDM (1 << 12) 877 #endif 878 #ifndef HWCAP_JSCVT 879 #define HWCAP_JSCVT (1 << 13) 880 #endif 881 #ifndef HWCAP_FCMA 882 #define HWCAP_FCMA (1 << 14) 883 #endif 884 #ifndef HWCAP_LRCPC 885 #define HWCAP_LRCPC (1 << 15) 886 #endif 887 #ifndef HWCAP_DCPOP 888 #define HWCAP_DCPOP (1 << 16) 889 #endif 890 #ifndef HWCAP_SHA3 891 #define HWCAP_SHA3 (1 << 17) 892 #endif 893 #ifndef HWCAP_SM3 894 #define HWCAP_SM3 (1 << 18) 895 #endif 896 #ifndef HWCAP_SM4 897 #define HWCAP_SM4 (1 << 19) 898 #endif 899 #ifndef HWCAP_ASIMDDP 900 #define HWCAP_ASIMDDP (1 << 20) 901 #endif 902 #ifndef HWCAP_SHA512 903 #define HWCAP_SHA512 (1 << 21) 904 #endif 905 #ifndef HWCAP_SVE 906 #define HWCAP_SVE (1 << 22) 907 #endif 908 #ifndef HWCAP_ASIMDFHM 909 #define HWCAP_ASIMDFHM (1 << 23) 910 #endif 911 #ifndef HWCAP_DIT 912 #define HWCAP_DIT (1 << 24) 913 #endif 914 #ifndef HWCAP_ILRCPC 915 #define HWCAP_ILRCPC (1 << 26) 916 #endif 917 #ifndef HWCAP_FLAGM 918 #define HWCAP_FLAGM (1 << 27) 919 #endif 920 #ifndef HWCAP_SSBS 921 #define HWCAP_SSBS (1 << 28) 922 #endif 923 #ifndef HWCAP_SB 924 #define HWCAP_SB (1 << 29) 925 #endif 926 927 #ifndef AT_HWCAP2 928 #define AT_HWCAP2 26 929 #endif 930 #ifndef HWCAP2_DCPODP 931 #define HWCAP2_DCPODP (1 << 0) 932 #endif 933 #ifndef HWCAP2_SVE2 934 #define HWCAP2_SVE2 (1 << 1) 935 #endif 936 #ifndef HWCAP2_SVEAES 937 #define HWCAP2_SVEAES (1 << 2) 938 #endif 939 #ifndef HWCAP2_SVEPMULL 940 #define HWCAP2_SVEPMULL (1 << 3) 941 #endif 942 #ifndef HWCAP2_SVEBITPERM 943 #define HWCAP2_SVEBITPERM (1 << 4) 944 #endif 945 #ifndef HWCAP2_SVESHA3 946 #define HWCAP2_SVESHA3 (1 << 5) 947 #endif 948 #ifndef HWCAP2_SVESM4 949 #define HWCAP2_SVESM4 (1 << 6) 950 #endif 951 #ifndef HWCAP2_FLAGM2 952 #define HWCAP2_FLAGM2 (1 << 7) 953 #endif 954 #ifndef HWCAP2_FRINT 955 #define HWCAP2_FRINT (1 << 8) 956 #endif 957 #ifndef HWCAP2_SVEI8MM 958 #define HWCAP2_SVEI8MM (1 << 9) 959 #endif 960 #ifndef HWCAP2_SVEF32MM 961 #define HWCAP2_SVEF32MM (1 << 10) 962 #endif 963 #ifndef HWCAP2_SVEF64MM 964 #define HWCAP2_SVEF64MM (1 << 11) 965 #endif 966 #ifndef HWCAP2_SVEBF16 967 #define HWCAP2_SVEBF16 (1 << 12) 968 #endif 969 #ifndef HWCAP2_I8MM 970 #define HWCAP2_I8MM (1 << 13) 971 #endif 972 #ifndef HWCAP2_BF16 973 #define HWCAP2_BF16 (1 << 14) 974 #endif 975 #ifndef HWCAP2_DGH 976 #define HWCAP2_DGH (1 << 15) 977 #endif 978 #ifndef HWCAP2_RNG 979 #define HWCAP2_RNG (1 << 16) 980 #endif 981 #ifndef HWCAP2_BTI 982 #define HWCAP2_BTI (1 << 17) 983 #endif 984 #ifndef HWCAP2_MTE 985 #define HWCAP2_MTE (1 << 18) 986 #endif 987 #ifndef HWCAP2_RPRES 988 #define HWCAP2_RPRES (1 << 21) 989 #endif 990 #ifndef HWCAP2_MTE3 991 #define HWCAP2_MTE3 (1 << 22) 992 #endif 993 #ifndef HWCAP2_SME 994 #define HWCAP2_SME (1 << 23) 995 #endif 996 #ifndef HWCAP2_SME_I16I64 997 #define HWCAP2_SME_I16I64 (1 << 24) 998 #endif 999 #ifndef HWCAP2_SME_F64F64 1000 #define HWCAP2_SME_F64F64 (1 << 25) 1001 #endif 1002 #ifndef HWCAP2_WFXT 1003 #define HWCAP2_WFXT (1UL << 31) 1004 #endif 1005 #ifndef HWCAP2_EBF16 1006 #define HWCAP2_EBF16 (1UL << 32) 1007 #endif 1008 #ifndef HWCAP2_SVE_EBF16 1009 #define HWCAP2_SVE_EBF16 (1UL << 33) 1010 #endif 1011 1012 // LSE support detection for out-of-line atomics 1013 // using HWCAP and Auxiliary vector 1014 _Bool __aarch64_have_lse_atomics 1015 __attribute__((visibility("hidden"), nocommon)); 1016 1017 #if defined(__has_include) 1018 #if __has_include(<sys/auxv.h>) 1019 #include <sys/auxv.h> 1020 #if __has_include(<asm/hwcap.h>) 1021 #include <asm/hwcap.h> 1022 1023 #if defined(__ANDROID__) 1024 #include <string.h> 1025 #include <sys/system_properties.h> 1026 #elif defined(__Fuchsia__) 1027 #include <zircon/features.h> 1028 #include <zircon/syscalls.h> 1029 #endif 1030 1031 // Detect Exynos 9810 CPU 1032 #define IF_EXYNOS9810 \ 1033 char arch[PROP_VALUE_MAX]; \ 1034 if (__system_property_get("ro.arch", arch) > 0 && \ 1035 strncmp(arch, "exynos9810", sizeof("exynos9810") - 1) == 0) 1036 1037 static void CONSTRUCTOR_ATTRIBUTE init_have_lse_atomics(void) { 1038 #if defined(__FreeBSD__) 1039 unsigned long hwcap; 1040 int result = elf_aux_info(AT_HWCAP, &hwcap, sizeof hwcap); 1041 __aarch64_have_lse_atomics = result == 0 && (hwcap & HWCAP_ATOMICS) != 0; 1042 #elif defined(__Fuchsia__) 1043 // This ensures the vDSO is a direct link-time dependency of anything that 1044 // needs this initializer code. 1045 #pragma comment(lib, "zircon") 1046 uint32_t features; 1047 zx_status_t status = _zx_system_get_features(ZX_FEATURE_KIND_CPU, &features); 1048 __aarch64_have_lse_atomics = 1049 status == ZX_OK && (features & ZX_ARM64_FEATURE_ISA_ATOMICS) != 0; 1050 #else 1051 unsigned long hwcap = getauxval(AT_HWCAP); 1052 _Bool result = (hwcap & HWCAP_ATOMICS) != 0; 1053 #if defined(__ANDROID__) 1054 if (result) { 1055 // Some cores in the Exynos 9810 CPU are ARMv8.2 and others are ARMv8.0; 1056 // only the former support LSE atomics. However, the kernel in the 1057 // initial Android 8.0 release of Galaxy S9/S9+ devices incorrectly 1058 // reported the feature as being supported. 1059 // 1060 // The kernel appears to have been corrected to mark it unsupported as of 1061 // the Android 9.0 release on those devices, and this issue has not been 1062 // observed anywhere else. Thus, this workaround may be removed if 1063 // compiler-rt ever drops support for Android 8.0. 1064 IF_EXYNOS9810 result = false; 1065 } 1066 #endif // defined(__ANDROID__) 1067 __aarch64_have_lse_atomics = result; 1068 #endif // defined(__FreeBSD__) 1069 } 1070 1071 #if !defined(DISABLE_AARCH64_FMV) 1072 // CPUFeatures must correspond to the same AArch64 features in 1073 // AArch64TargetParser.h 1074 enum CPUFeatures { 1075 FEAT_RNG, 1076 FEAT_FLAGM, 1077 FEAT_FLAGM2, 1078 FEAT_FP16FML, 1079 FEAT_DOTPROD, 1080 FEAT_SM4, 1081 FEAT_RDM, 1082 FEAT_LSE, 1083 FEAT_FP, 1084 FEAT_SIMD, 1085 FEAT_CRC, 1086 FEAT_SHA1, 1087 FEAT_SHA2, 1088 FEAT_SHA3, 1089 FEAT_AES, 1090 FEAT_PMULL, 1091 FEAT_FP16, 1092 FEAT_DIT, 1093 FEAT_DPB, 1094 FEAT_DPB2, 1095 FEAT_JSCVT, 1096 FEAT_FCMA, 1097 FEAT_RCPC, 1098 FEAT_RCPC2, 1099 FEAT_FRINTTS, 1100 FEAT_DGH, 1101 FEAT_I8MM, 1102 FEAT_BF16, 1103 FEAT_EBF16, 1104 FEAT_RPRES, 1105 FEAT_SVE, 1106 FEAT_SVE_BF16, 1107 FEAT_SVE_EBF16, 1108 FEAT_SVE_I8MM, 1109 FEAT_SVE_F32MM, 1110 FEAT_SVE_F64MM, 1111 FEAT_SVE2, 1112 FEAT_SVE_AES, 1113 FEAT_SVE_PMULL128, 1114 FEAT_SVE_BITPERM, 1115 FEAT_SVE_SHA3, 1116 FEAT_SVE_SM4, 1117 FEAT_SME, 1118 FEAT_MEMTAG, 1119 FEAT_MEMTAG2, 1120 FEAT_MEMTAG3, 1121 FEAT_SB, 1122 FEAT_PREDRES, 1123 FEAT_SSBS, 1124 FEAT_SSBS2, 1125 FEAT_BTI, 1126 FEAT_LS64, 1127 FEAT_LS64_V, 1128 FEAT_LS64_ACCDATA, 1129 FEAT_WFXT, 1130 FEAT_SME_F64, 1131 FEAT_SME_I64, 1132 FEAT_SME2, 1133 FEAT_MAX 1134 }; 1135 1136 // Architecture features used 1137 // in Function Multi Versioning 1138 struct { 1139 unsigned long long features; 1140 // As features grows new fields could be added 1141 } __aarch64_cpu_features __attribute__((visibility("hidden"), nocommon)); 1142 1143 void init_cpu_features_resolver(unsigned long hwcap, unsigned long hwcap2) { 1144 #define setCPUFeature(F) __aarch64_cpu_features.features |= 1ULL << F 1145 #define getCPUFeature(id, ftr) __asm__("mrs %0, " #id : "=r"(ftr)) 1146 #define extractBits(val, start, number) \ 1147 (val & ((1ULL << number) - 1ULL) << start) >> start 1148 if (hwcap & HWCAP_CRC32) 1149 setCPUFeature(FEAT_CRC); 1150 if (hwcap & HWCAP_PMULL) 1151 setCPUFeature(FEAT_PMULL); 1152 if (hwcap & HWCAP_FLAGM) 1153 setCPUFeature(FEAT_FLAGM); 1154 if (hwcap2 & HWCAP2_FLAGM2) { 1155 setCPUFeature(FEAT_FLAGM); 1156 setCPUFeature(FEAT_FLAGM2); 1157 } 1158 if (hwcap & HWCAP_SM3 && hwcap & HWCAP_SM4) 1159 setCPUFeature(FEAT_SM4); 1160 if (hwcap & HWCAP_ASIMDDP) 1161 setCPUFeature(FEAT_DOTPROD); 1162 if (hwcap & HWCAP_ASIMDFHM) 1163 setCPUFeature(FEAT_FP16FML); 1164 if (hwcap & HWCAP_FPHP) { 1165 setCPUFeature(FEAT_FP16); 1166 setCPUFeature(FEAT_FP); 1167 } 1168 if (hwcap & HWCAP_DIT) 1169 setCPUFeature(FEAT_DIT); 1170 if (hwcap & HWCAP_ASIMDRDM) 1171 setCPUFeature(FEAT_RDM); 1172 if (hwcap & HWCAP_ILRCPC) 1173 setCPUFeature(FEAT_RCPC2); 1174 if (hwcap & HWCAP_AES) 1175 setCPUFeature(FEAT_AES); 1176 if (hwcap & HWCAP_SHA1) 1177 setCPUFeature(FEAT_SHA1); 1178 if (hwcap & HWCAP_SHA2) 1179 setCPUFeature(FEAT_SHA2); 1180 if (hwcap & HWCAP_JSCVT) 1181 setCPUFeature(FEAT_JSCVT); 1182 if (hwcap & HWCAP_FCMA) 1183 setCPUFeature(FEAT_FCMA); 1184 if (hwcap & HWCAP_SB) 1185 setCPUFeature(FEAT_SB); 1186 if (hwcap & HWCAP_SSBS) 1187 setCPUFeature(FEAT_SSBS2); 1188 if (hwcap2 & HWCAP2_MTE) { 1189 setCPUFeature(FEAT_MEMTAG); 1190 setCPUFeature(FEAT_MEMTAG2); 1191 } 1192 if (hwcap2 & HWCAP2_MTE3) { 1193 setCPUFeature(FEAT_MEMTAG); 1194 setCPUFeature(FEAT_MEMTAG2); 1195 setCPUFeature(FEAT_MEMTAG3); 1196 } 1197 if (hwcap2 & HWCAP2_SVEAES) 1198 setCPUFeature(FEAT_SVE_AES); 1199 if (hwcap2 & HWCAP2_SVEPMULL) { 1200 setCPUFeature(FEAT_SVE_AES); 1201 setCPUFeature(FEAT_SVE_PMULL128); 1202 } 1203 if (hwcap2 & HWCAP2_SVEBITPERM) 1204 setCPUFeature(FEAT_SVE_BITPERM); 1205 if (hwcap2 & HWCAP2_SVESHA3) 1206 setCPUFeature(FEAT_SVE_SHA3); 1207 if (hwcap2 & HWCAP2_SVESM4) 1208 setCPUFeature(FEAT_SVE_SM4); 1209 if (hwcap2 & HWCAP2_DCPODP) 1210 setCPUFeature(FEAT_DPB2); 1211 if (hwcap & HWCAP_ATOMICS) 1212 setCPUFeature(FEAT_LSE); 1213 if (hwcap2 & HWCAP2_RNG) 1214 setCPUFeature(FEAT_RNG); 1215 if (hwcap2 & HWCAP2_I8MM) 1216 setCPUFeature(FEAT_I8MM); 1217 if (hwcap2 & HWCAP2_EBF16) 1218 setCPUFeature(FEAT_EBF16); 1219 if (hwcap2 & HWCAP2_SVE_EBF16) 1220 setCPUFeature(FEAT_SVE_EBF16); 1221 if (hwcap2 & HWCAP2_DGH) 1222 setCPUFeature(FEAT_DGH); 1223 if (hwcap2 & HWCAP2_FRINT) 1224 setCPUFeature(FEAT_FRINTTS); 1225 if (hwcap2 & HWCAP2_SVEI8MM) 1226 setCPUFeature(FEAT_SVE_I8MM); 1227 if (hwcap2 & HWCAP2_SVEF32MM) 1228 setCPUFeature(FEAT_SVE_F32MM); 1229 if (hwcap2 & HWCAP2_SVEF64MM) 1230 setCPUFeature(FEAT_SVE_F64MM); 1231 if (hwcap2 & HWCAP2_BTI) 1232 setCPUFeature(FEAT_BTI); 1233 if (hwcap2 & HWCAP2_RPRES) 1234 setCPUFeature(FEAT_RPRES); 1235 if (hwcap2 & HWCAP2_WFXT) 1236 setCPUFeature(FEAT_WFXT); 1237 if (hwcap2 & HWCAP2_SME) 1238 setCPUFeature(FEAT_SME); 1239 if (hwcap2 & HWCAP2_SME_I16I64) 1240 setCPUFeature(FEAT_SME_I64); 1241 if (hwcap2 & HWCAP2_SME_F64F64) 1242 setCPUFeature(FEAT_SME_F64); 1243 if (hwcap & HWCAP_CPUID) { 1244 unsigned long ftr; 1245 getCPUFeature(ID_AA64PFR1_EL1, ftr); 1246 // ID_AA64PFR1_EL1.MTE >= 0b0001 1247 if (extractBits(ftr, 8, 4) >= 0x1) 1248 setCPUFeature(FEAT_MEMTAG); 1249 // ID_AA64PFR1_EL1.SSBS == 0b0001 1250 if (extractBits(ftr, 4, 4) == 0x1) 1251 setCPUFeature(FEAT_SSBS); 1252 // ID_AA64PFR1_EL1.SME == 0b0010 1253 if (extractBits(ftr, 24, 4) == 0x2) 1254 setCPUFeature(FEAT_SME2); 1255 getCPUFeature(ID_AA64PFR0_EL1, ftr); 1256 // ID_AA64PFR0_EL1.FP != 0b1111 1257 if (extractBits(ftr, 16, 4) != 0xF) { 1258 setCPUFeature(FEAT_FP); 1259 // ID_AA64PFR0_EL1.AdvSIMD has the same value as ID_AA64PFR0_EL1.FP 1260 setCPUFeature(FEAT_SIMD); 1261 } 1262 // ID_AA64PFR0_EL1.SVE != 0b0000 1263 if (extractBits(ftr, 32, 4) != 0x0) { 1264 // get ID_AA64ZFR0_EL1, that name supported 1265 // if sve enabled only 1266 getCPUFeature(S3_0_C0_C4_4, ftr); 1267 // ID_AA64ZFR0_EL1.SVEver == 0b0000 1268 if (extractBits(ftr, 0, 4) == 0x0) 1269 setCPUFeature(FEAT_SVE); 1270 // ID_AA64ZFR0_EL1.SVEver == 0b0001 1271 if (extractBits(ftr, 0, 4) == 0x1) 1272 setCPUFeature(FEAT_SVE2); 1273 // ID_AA64ZFR0_EL1.BF16 != 0b0000 1274 if (extractBits(ftr, 20, 4) != 0x0) 1275 setCPUFeature(FEAT_SVE_BF16); 1276 } 1277 getCPUFeature(ID_AA64ISAR0_EL1, ftr); 1278 // ID_AA64ISAR0_EL1.SHA3 != 0b0000 1279 if (extractBits(ftr, 32, 4) != 0x0) 1280 setCPUFeature(FEAT_SHA3); 1281 getCPUFeature(ID_AA64ISAR1_EL1, ftr); 1282 // ID_AA64ISAR1_EL1.DPB >= 0b0001 1283 if (extractBits(ftr, 0, 4) >= 0x1) 1284 setCPUFeature(FEAT_DPB); 1285 // ID_AA64ISAR1_EL1.LRCPC != 0b0000 1286 if (extractBits(ftr, 20, 4) != 0x0) 1287 setCPUFeature(FEAT_RCPC); 1288 // ID_AA64ISAR1_EL1.SPECRES == 0b0001 1289 if (extractBits(ftr, 40, 4) == 0x2) 1290 setCPUFeature(FEAT_PREDRES); 1291 // ID_AA64ISAR1_EL1.BF16 != 0b0000 1292 if (extractBits(ftr, 44, 4) != 0x0) 1293 setCPUFeature(FEAT_BF16); 1294 // ID_AA64ISAR1_EL1.LS64 >= 0b0001 1295 if (extractBits(ftr, 60, 4) >= 0x1) 1296 setCPUFeature(FEAT_LS64); 1297 // ID_AA64ISAR1_EL1.LS64 >= 0b0010 1298 if (extractBits(ftr, 60, 4) >= 0x2) 1299 setCPUFeature(FEAT_LS64_V); 1300 // ID_AA64ISAR1_EL1.LS64 >= 0b0011 1301 if (extractBits(ftr, 60, 4) >= 0x3) 1302 setCPUFeature(FEAT_LS64_ACCDATA); 1303 } else { 1304 // Set some features in case of no CPUID support 1305 if (hwcap & (HWCAP_FP | HWCAP_FPHP)) { 1306 setCPUFeature(FEAT_FP); 1307 // FP and AdvSIMD fields have the same value 1308 setCPUFeature(FEAT_SIMD); 1309 } 1310 if (hwcap & HWCAP_DCPOP || hwcap2 & HWCAP2_DCPODP) 1311 setCPUFeature(FEAT_DPB); 1312 if (hwcap & HWCAP_LRCPC || hwcap & HWCAP_ILRCPC) 1313 setCPUFeature(FEAT_RCPC); 1314 if (hwcap2 & HWCAP2_BF16 || hwcap2 & HWCAP2_EBF16) 1315 setCPUFeature(FEAT_BF16); 1316 if (hwcap2 & HWCAP2_SVEBF16) 1317 setCPUFeature(FEAT_SVE_BF16); 1318 if (hwcap2 & HWCAP2_SVE2 && hwcap & HWCAP_SVE) 1319 setCPUFeature(FEAT_SVE2); 1320 if (hwcap & HWCAP_SHA3) 1321 setCPUFeature(FEAT_SHA3); 1322 } 1323 } 1324 1325 void CONSTRUCTOR_ATTRIBUTE init_cpu_features(void) { 1326 unsigned long hwcap; 1327 unsigned long hwcap2; 1328 // CPU features already initialized. 1329 if (__aarch64_cpu_features.features) 1330 return; 1331 setCPUFeature(FEAT_MAX); 1332 #if defined(__FreeBSD__) 1333 int res = 0; 1334 res = elf_aux_info(AT_HWCAP, &hwcap, sizeof hwcap); 1335 res |= elf_aux_info(AT_HWCAP2, &hwcap2, sizeof hwcap2); 1336 if (res) 1337 return; 1338 #else 1339 #if defined(__ANDROID__) 1340 // Don't set any CPU features, 1341 // detection could be wrong on Exynos 9810. 1342 IF_EXYNOS9810 return; 1343 #endif // defined(__ANDROID__) 1344 hwcap = getauxval(AT_HWCAP); 1345 hwcap2 = getauxval(AT_HWCAP2); 1346 #endif // defined(__FreeBSD__) 1347 init_cpu_features_resolver(hwcap, hwcap2); 1348 #undef extractBits 1349 #undef getCPUFeature 1350 #undef setCPUFeature 1351 #undef IF_EXYNOS9810 1352 } 1353 #endif // !defined(DISABLE_AARCH64_FMV) 1354 #endif // defined(__has_include) 1355 #endif // __has_include(<sys/auxv.h>) 1356 #endif // __has_include(<asm/hwcap.h>) 1357 #endif // defined(__aarch64__) 1358