1 //===-- Host.cpp - Implement OS Host Detection ------------------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file implements the operating system Host detection. 10 // 11 //===----------------------------------------------------------------------===// 12 13 #include "llvm/TargetParser/Host.h" 14 #include "llvm/ADT/SmallVector.h" 15 #include "llvm/ADT/StringMap.h" 16 #include "llvm/ADT/StringRef.h" 17 #include "llvm/ADT/StringSwitch.h" 18 #include "llvm/Config/llvm-config.h" 19 #include "llvm/Support/MemoryBuffer.h" 20 #include "llvm/Support/raw_ostream.h" 21 #include "llvm/TargetParser/Triple.h" 22 #include "llvm/TargetParser/X86TargetParser.h" 23 #include <string.h> 24 25 // Include the platform-specific parts of this class. 26 #ifdef LLVM_ON_UNIX 27 #include "Unix/Host.inc" 28 #include <sched.h> 29 #endif 30 #ifdef _WIN32 31 #include "Windows/Host.inc" 32 #endif 33 #ifdef _MSC_VER 34 #include <intrin.h> 35 #endif 36 #ifdef __MVS__ 37 #include "llvm/Support/BCD.h" 38 #endif 39 #if defined(__APPLE__) 40 #include <mach/host_info.h> 41 #include <mach/mach.h> 42 #include <mach/mach_host.h> 43 #include <mach/machine.h> 44 #include <sys/param.h> 45 #include <sys/sysctl.h> 46 #endif 47 #ifdef _AIX 48 #include <sys/systemcfg.h> 49 #endif 50 #if defined(__sun__) && defined(__svr4__) 51 #include <kstat.h> 52 #endif 53 54 #define DEBUG_TYPE "host-detection" 55 56 //===----------------------------------------------------------------------===// 57 // 58 // Implementations of the CPU detection routines 59 // 60 //===----------------------------------------------------------------------===// 61 62 using namespace llvm; 63 64 static std::unique_ptr<llvm::MemoryBuffer> 65 LLVM_ATTRIBUTE_UNUSED getProcCpuinfoContent() { 66 llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> Text = 67 llvm::MemoryBuffer::getFileAsStream("/proc/cpuinfo"); 68 if (std::error_code EC = Text.getError()) { 69 llvm::errs() << "Can't read " 70 << "/proc/cpuinfo: " << EC.message() << "\n"; 71 return nullptr; 72 } 73 return std::move(*Text); 74 } 75 76 StringRef sys::detail::getHostCPUNameForPowerPC(StringRef ProcCpuinfoContent) { 77 // Access to the Processor Version Register (PVR) on PowerPC is privileged, 78 // and so we must use an operating-system interface to determine the current 79 // processor type. On Linux, this is exposed through the /proc/cpuinfo file. 80 const char *generic = "generic"; 81 82 // The cpu line is second (after the 'processor: 0' line), so if this 83 // buffer is too small then something has changed (or is wrong). 84 StringRef::const_iterator CPUInfoStart = ProcCpuinfoContent.begin(); 85 StringRef::const_iterator CPUInfoEnd = ProcCpuinfoContent.end(); 86 87 StringRef::const_iterator CIP = CPUInfoStart; 88 89 StringRef::const_iterator CPUStart = nullptr; 90 size_t CPULen = 0; 91 92 // We need to find the first line which starts with cpu, spaces, and a colon. 93 // After the colon, there may be some additional spaces and then the cpu type. 94 while (CIP < CPUInfoEnd && CPUStart == nullptr) { 95 if (CIP < CPUInfoEnd && *CIP == '\n') 96 ++CIP; 97 98 if (CIP < CPUInfoEnd && *CIP == 'c') { 99 ++CIP; 100 if (CIP < CPUInfoEnd && *CIP == 'p') { 101 ++CIP; 102 if (CIP < CPUInfoEnd && *CIP == 'u') { 103 ++CIP; 104 while (CIP < CPUInfoEnd && (*CIP == ' ' || *CIP == '\t')) 105 ++CIP; 106 107 if (CIP < CPUInfoEnd && *CIP == ':') { 108 ++CIP; 109 while (CIP < CPUInfoEnd && (*CIP == ' ' || *CIP == '\t')) 110 ++CIP; 111 112 if (CIP < CPUInfoEnd) { 113 CPUStart = CIP; 114 while (CIP < CPUInfoEnd && (*CIP != ' ' && *CIP != '\t' && 115 *CIP != ',' && *CIP != '\n')) 116 ++CIP; 117 CPULen = CIP - CPUStart; 118 } 119 } 120 } 121 } 122 } 123 124 if (CPUStart == nullptr) 125 while (CIP < CPUInfoEnd && *CIP != '\n') 126 ++CIP; 127 } 128 129 if (CPUStart == nullptr) 130 return generic; 131 132 return StringSwitch<const char *>(StringRef(CPUStart, CPULen)) 133 .Case("604e", "604e") 134 .Case("604", "604") 135 .Case("7400", "7400") 136 .Case("7410", "7400") 137 .Case("7447", "7400") 138 .Case("7455", "7450") 139 .Case("G4", "g4") 140 .Case("POWER4", "970") 141 .Case("PPC970FX", "970") 142 .Case("PPC970MP", "970") 143 .Case("G5", "g5") 144 .Case("POWER5", "g5") 145 .Case("A2", "a2") 146 .Case("POWER6", "pwr6") 147 .Case("POWER7", "pwr7") 148 .Case("POWER8", "pwr8") 149 .Case("POWER8E", "pwr8") 150 .Case("POWER8NVL", "pwr8") 151 .Case("POWER9", "pwr9") 152 .Case("POWER10", "pwr10") 153 // FIXME: If we get a simulator or machine with the capabilities of 154 // mcpu=future, we should revisit this and add the name reported by the 155 // simulator/machine. 156 .Default(generic); 157 } 158 159 StringRef sys::detail::getHostCPUNameForARM(StringRef ProcCpuinfoContent) { 160 // The cpuid register on arm is not accessible from user space. On Linux, 161 // it is exposed through the /proc/cpuinfo file. 162 163 // Read 32 lines from /proc/cpuinfo, which should contain the CPU part line 164 // in all cases. 165 SmallVector<StringRef, 32> Lines; 166 ProcCpuinfoContent.split(Lines, "\n"); 167 168 // Look for the CPU implementer line. 169 StringRef Implementer; 170 StringRef Hardware; 171 StringRef Part; 172 for (unsigned I = 0, E = Lines.size(); I != E; ++I) { 173 if (Lines[I].starts_with("CPU implementer")) 174 Implementer = Lines[I].substr(15).ltrim("\t :"); 175 if (Lines[I].starts_with("Hardware")) 176 Hardware = Lines[I].substr(8).ltrim("\t :"); 177 if (Lines[I].starts_with("CPU part")) 178 Part = Lines[I].substr(8).ltrim("\t :"); 179 } 180 181 if (Implementer == "0x41") { // ARM Ltd. 182 // MSM8992/8994 may give cpu part for the core that the kernel is running on, 183 // which is undeterministic and wrong. Always return cortex-a53 for these SoC. 184 if (Hardware.ends_with("MSM8994") || Hardware.ends_with("MSM8996")) 185 return "cortex-a53"; 186 187 188 // The CPU part is a 3 digit hexadecimal number with a 0x prefix. The 189 // values correspond to the "Part number" in the CP15/c0 register. The 190 // contents are specified in the various processor manuals. 191 // This corresponds to the Main ID Register in Technical Reference Manuals. 192 // and is used in programs like sys-utils 193 return StringSwitch<const char *>(Part) 194 .Case("0x926", "arm926ej-s") 195 .Case("0xb02", "mpcore") 196 .Case("0xb36", "arm1136j-s") 197 .Case("0xb56", "arm1156t2-s") 198 .Case("0xb76", "arm1176jz-s") 199 .Case("0xc08", "cortex-a8") 200 .Case("0xc09", "cortex-a9") 201 .Case("0xc0f", "cortex-a15") 202 .Case("0xc20", "cortex-m0") 203 .Case("0xc23", "cortex-m3") 204 .Case("0xc24", "cortex-m4") 205 .Case("0xd24", "cortex-m52") 206 .Case("0xd22", "cortex-m55") 207 .Case("0xd02", "cortex-a34") 208 .Case("0xd04", "cortex-a35") 209 .Case("0xd03", "cortex-a53") 210 .Case("0xd05", "cortex-a55") 211 .Case("0xd46", "cortex-a510") 212 .Case("0xd80", "cortex-a520") 213 .Case("0xd07", "cortex-a57") 214 .Case("0xd08", "cortex-a72") 215 .Case("0xd09", "cortex-a73") 216 .Case("0xd0a", "cortex-a75") 217 .Case("0xd0b", "cortex-a76") 218 .Case("0xd0d", "cortex-a77") 219 .Case("0xd41", "cortex-a78") 220 .Case("0xd47", "cortex-a710") 221 .Case("0xd4d", "cortex-a715") 222 .Case("0xd81", "cortex-a720") 223 .Case("0xd44", "cortex-x1") 224 .Case("0xd4c", "cortex-x1c") 225 .Case("0xd48", "cortex-x2") 226 .Case("0xd4e", "cortex-x3") 227 .Case("0xd82", "cortex-x4") 228 .Case("0xd0c", "neoverse-n1") 229 .Case("0xd49", "neoverse-n2") 230 .Case("0xd40", "neoverse-v1") 231 .Case("0xd4f", "neoverse-v2") 232 .Default("generic"); 233 } 234 235 if (Implementer == "0x42" || Implementer == "0x43") { // Broadcom | Cavium. 236 return StringSwitch<const char *>(Part) 237 .Case("0x516", "thunderx2t99") 238 .Case("0x0516", "thunderx2t99") 239 .Case("0xaf", "thunderx2t99") 240 .Case("0x0af", "thunderx2t99") 241 .Case("0xa1", "thunderxt88") 242 .Case("0x0a1", "thunderxt88") 243 .Default("generic"); 244 } 245 246 if (Implementer == "0x46") { // Fujitsu Ltd. 247 return StringSwitch<const char *>(Part) 248 .Case("0x001", "a64fx") 249 .Default("generic"); 250 } 251 252 if (Implementer == "0x4e") { // NVIDIA Corporation 253 return StringSwitch<const char *>(Part) 254 .Case("0x004", "carmel") 255 .Default("generic"); 256 } 257 258 if (Implementer == "0x48") // HiSilicon Technologies, Inc. 259 // The CPU part is a 3 digit hexadecimal number with a 0x prefix. The 260 // values correspond to the "Part number" in the CP15/c0 register. The 261 // contents are specified in the various processor manuals. 262 return StringSwitch<const char *>(Part) 263 .Case("0xd01", "tsv110") 264 .Default("generic"); 265 266 if (Implementer == "0x51") // Qualcomm Technologies, Inc. 267 // The CPU part is a 3 digit hexadecimal number with a 0x prefix. The 268 // values correspond to the "Part number" in the CP15/c0 register. The 269 // contents are specified in the various processor manuals. 270 return StringSwitch<const char *>(Part) 271 .Case("0x06f", "krait") // APQ8064 272 .Case("0x201", "kryo") 273 .Case("0x205", "kryo") 274 .Case("0x211", "kryo") 275 .Case("0x800", "cortex-a73") // Kryo 2xx Gold 276 .Case("0x801", "cortex-a73") // Kryo 2xx Silver 277 .Case("0x802", "cortex-a75") // Kryo 3xx Gold 278 .Case("0x803", "cortex-a75") // Kryo 3xx Silver 279 .Case("0x804", "cortex-a76") // Kryo 4xx Gold 280 .Case("0x805", "cortex-a76") // Kryo 4xx/5xx Silver 281 .Case("0xc00", "falkor") 282 .Case("0xc01", "saphira") 283 .Default("generic"); 284 if (Implementer == "0x53") { // Samsung Electronics Co., Ltd. 285 // The Exynos chips have a convoluted ID scheme that doesn't seem to follow 286 // any predictive pattern across variants and parts. 287 unsigned Variant = 0, Part = 0; 288 289 // Look for the CPU variant line, whose value is a 1 digit hexadecimal 290 // number, corresponding to the Variant bits in the CP15/C0 register. 291 for (auto I : Lines) 292 if (I.consume_front("CPU variant")) 293 I.ltrim("\t :").getAsInteger(0, Variant); 294 295 // Look for the CPU part line, whose value is a 3 digit hexadecimal 296 // number, corresponding to the PartNum bits in the CP15/C0 register. 297 for (auto I : Lines) 298 if (I.consume_front("CPU part")) 299 I.ltrim("\t :").getAsInteger(0, Part); 300 301 unsigned Exynos = (Variant << 12) | Part; 302 switch (Exynos) { 303 default: 304 // Default by falling through to Exynos M3. 305 [[fallthrough]]; 306 case 0x1002: 307 return "exynos-m3"; 308 case 0x1003: 309 return "exynos-m4"; 310 } 311 } 312 313 if (Implementer == "0xc0") { // Ampere Computing 314 return StringSwitch<const char *>(Part) 315 .Case("0xac3", "ampere1") 316 .Case("0xac4", "ampere1a") 317 .Default("generic"); 318 } 319 320 return "generic"; 321 } 322 323 namespace { 324 StringRef getCPUNameFromS390Model(unsigned int Id, bool HaveVectorSupport) { 325 switch (Id) { 326 case 2064: // z900 not supported by LLVM 327 case 2066: 328 case 2084: // z990 not supported by LLVM 329 case 2086: 330 case 2094: // z9-109 not supported by LLVM 331 case 2096: 332 return "generic"; 333 case 2097: 334 case 2098: 335 return "z10"; 336 case 2817: 337 case 2818: 338 return "z196"; 339 case 2827: 340 case 2828: 341 return "zEC12"; 342 case 2964: 343 case 2965: 344 return HaveVectorSupport? "z13" : "zEC12"; 345 case 3906: 346 case 3907: 347 return HaveVectorSupport? "z14" : "zEC12"; 348 case 8561: 349 case 8562: 350 return HaveVectorSupport? "z15" : "zEC12"; 351 case 3931: 352 case 3932: 353 default: 354 return HaveVectorSupport? "z16" : "zEC12"; 355 } 356 } 357 } // end anonymous namespace 358 359 StringRef sys::detail::getHostCPUNameForS390x(StringRef ProcCpuinfoContent) { 360 // STIDP is a privileged operation, so use /proc/cpuinfo instead. 361 362 // The "processor 0:" line comes after a fair amount of other information, 363 // including a cache breakdown, but this should be plenty. 364 SmallVector<StringRef, 32> Lines; 365 ProcCpuinfoContent.split(Lines, "\n"); 366 367 // Look for the CPU features. 368 SmallVector<StringRef, 32> CPUFeatures; 369 for (unsigned I = 0, E = Lines.size(); I != E; ++I) 370 if (Lines[I].starts_with("features")) { 371 size_t Pos = Lines[I].find(':'); 372 if (Pos != StringRef::npos) { 373 Lines[I].drop_front(Pos + 1).split(CPUFeatures, ' '); 374 break; 375 } 376 } 377 378 // We need to check for the presence of vector support independently of 379 // the machine type, since we may only use the vector register set when 380 // supported by the kernel (and hypervisor). 381 bool HaveVectorSupport = false; 382 for (unsigned I = 0, E = CPUFeatures.size(); I != E; ++I) { 383 if (CPUFeatures[I] == "vx") 384 HaveVectorSupport = true; 385 } 386 387 // Now check the processor machine type. 388 for (unsigned I = 0, E = Lines.size(); I != E; ++I) { 389 if (Lines[I].starts_with("processor ")) { 390 size_t Pos = Lines[I].find("machine = "); 391 if (Pos != StringRef::npos) { 392 Pos += sizeof("machine = ") - 1; 393 unsigned int Id; 394 if (!Lines[I].drop_front(Pos).getAsInteger(10, Id)) 395 return getCPUNameFromS390Model(Id, HaveVectorSupport); 396 } 397 break; 398 } 399 } 400 401 return "generic"; 402 } 403 404 StringRef sys::detail::getHostCPUNameForRISCV(StringRef ProcCpuinfoContent) { 405 // There are 24 lines in /proc/cpuinfo 406 SmallVector<StringRef> Lines; 407 ProcCpuinfoContent.split(Lines, "\n"); 408 409 // Look for uarch line to determine cpu name 410 StringRef UArch; 411 for (unsigned I = 0, E = Lines.size(); I != E; ++I) { 412 if (Lines[I].starts_with("uarch")) { 413 UArch = Lines[I].substr(5).ltrim("\t :"); 414 break; 415 } 416 } 417 418 return StringSwitch<const char *>(UArch) 419 .Case("sifive,u74-mc", "sifive-u74") 420 .Case("sifive,bullet0", "sifive-u74") 421 .Default("generic"); 422 } 423 424 StringRef sys::detail::getHostCPUNameForBPF() { 425 #if !defined(__linux__) || !defined(__x86_64__) 426 return "generic"; 427 #else 428 uint8_t v3_insns[40] __attribute__ ((aligned (8))) = 429 /* BPF_MOV64_IMM(BPF_REG_0, 0) */ 430 { 0xb7, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 431 /* BPF_MOV64_IMM(BPF_REG_2, 1) */ 432 0xb7, 0x2, 0x0, 0x0, 0x1, 0x0, 0x0, 0x0, 433 /* BPF_JMP32_REG(BPF_JLT, BPF_REG_0, BPF_REG_2, 1) */ 434 0xae, 0x20, 0x1, 0x0, 0x0, 0x0, 0x0, 0x0, 435 /* BPF_MOV64_IMM(BPF_REG_0, 1) */ 436 0xb7, 0x0, 0x0, 0x0, 0x1, 0x0, 0x0, 0x0, 437 /* BPF_EXIT_INSN() */ 438 0x95, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; 439 440 uint8_t v2_insns[40] __attribute__ ((aligned (8))) = 441 /* BPF_MOV64_IMM(BPF_REG_0, 0) */ 442 { 0xb7, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 443 /* BPF_MOV64_IMM(BPF_REG_2, 1) */ 444 0xb7, 0x2, 0x0, 0x0, 0x1, 0x0, 0x0, 0x0, 445 /* BPF_JMP_REG(BPF_JLT, BPF_REG_0, BPF_REG_2, 1) */ 446 0xad, 0x20, 0x1, 0x0, 0x0, 0x0, 0x0, 0x0, 447 /* BPF_MOV64_IMM(BPF_REG_0, 1) */ 448 0xb7, 0x0, 0x0, 0x0, 0x1, 0x0, 0x0, 0x0, 449 /* BPF_EXIT_INSN() */ 450 0x95, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; 451 452 struct bpf_prog_load_attr { 453 uint32_t prog_type; 454 uint32_t insn_cnt; 455 uint64_t insns; 456 uint64_t license; 457 uint32_t log_level; 458 uint32_t log_size; 459 uint64_t log_buf; 460 uint32_t kern_version; 461 uint32_t prog_flags; 462 } attr = {}; 463 attr.prog_type = 1; /* BPF_PROG_TYPE_SOCKET_FILTER */ 464 attr.insn_cnt = 5; 465 attr.insns = (uint64_t)v3_insns; 466 attr.license = (uint64_t)"DUMMY"; 467 468 int fd = syscall(321 /* __NR_bpf */, 5 /* BPF_PROG_LOAD */, &attr, 469 sizeof(attr)); 470 if (fd >= 0) { 471 close(fd); 472 return "v3"; 473 } 474 475 /* Clear the whole attr in case its content changed by syscall. */ 476 memset(&attr, 0, sizeof(attr)); 477 attr.prog_type = 1; /* BPF_PROG_TYPE_SOCKET_FILTER */ 478 attr.insn_cnt = 5; 479 attr.insns = (uint64_t)v2_insns; 480 attr.license = (uint64_t)"DUMMY"; 481 fd = syscall(321 /* __NR_bpf */, 5 /* BPF_PROG_LOAD */, &attr, sizeof(attr)); 482 if (fd >= 0) { 483 close(fd); 484 return "v2"; 485 } 486 return "v1"; 487 #endif 488 } 489 490 #if defined(__i386__) || defined(_M_IX86) || \ 491 defined(__x86_64__) || defined(_M_X64) 492 493 // The check below for i386 was copied from clang's cpuid.h (__get_cpuid_max). 494 // Check motivated by bug reports for OpenSSL crashing on CPUs without CPUID 495 // support. Consequently, for i386, the presence of CPUID is checked first 496 // via the corresponding eflags bit. 497 // Removal of cpuid.h header motivated by PR30384 498 // Header cpuid.h and method __get_cpuid_max are not used in llvm, clang, openmp 499 // or test-suite, but are used in external projects e.g. libstdcxx 500 static bool isCpuIdSupported() { 501 #if defined(__GNUC__) || defined(__clang__) 502 #if defined(__i386__) 503 int __cpuid_supported; 504 __asm__(" pushfl\n" 505 " popl %%eax\n" 506 " movl %%eax,%%ecx\n" 507 " xorl $0x00200000,%%eax\n" 508 " pushl %%eax\n" 509 " popfl\n" 510 " pushfl\n" 511 " popl %%eax\n" 512 " movl $0,%0\n" 513 " cmpl %%eax,%%ecx\n" 514 " je 1f\n" 515 " movl $1,%0\n" 516 "1:" 517 : "=r"(__cpuid_supported) 518 : 519 : "eax", "ecx"); 520 if (!__cpuid_supported) 521 return false; 522 #endif 523 return true; 524 #endif 525 return true; 526 } 527 528 /// getX86CpuIDAndInfo - Execute the specified cpuid and return the 4 values in 529 /// the specified arguments. If we can't run cpuid on the host, return true. 530 static bool getX86CpuIDAndInfo(unsigned value, unsigned *rEAX, unsigned *rEBX, 531 unsigned *rECX, unsigned *rEDX) { 532 #if defined(__GNUC__) || defined(__clang__) 533 #if defined(__x86_64__) 534 // gcc doesn't know cpuid would clobber ebx/rbx. Preserve it manually. 535 // FIXME: should we save this for Clang? 536 __asm__("movq\t%%rbx, %%rsi\n\t" 537 "cpuid\n\t" 538 "xchgq\t%%rbx, %%rsi\n\t" 539 : "=a"(*rEAX), "=S"(*rEBX), "=c"(*rECX), "=d"(*rEDX) 540 : "a"(value)); 541 return false; 542 #elif defined(__i386__) 543 __asm__("movl\t%%ebx, %%esi\n\t" 544 "cpuid\n\t" 545 "xchgl\t%%ebx, %%esi\n\t" 546 : "=a"(*rEAX), "=S"(*rEBX), "=c"(*rECX), "=d"(*rEDX) 547 : "a"(value)); 548 return false; 549 #else 550 return true; 551 #endif 552 #elif defined(_MSC_VER) 553 // The MSVC intrinsic is portable across x86 and x64. 554 int registers[4]; 555 __cpuid(registers, value); 556 *rEAX = registers[0]; 557 *rEBX = registers[1]; 558 *rECX = registers[2]; 559 *rEDX = registers[3]; 560 return false; 561 #else 562 return true; 563 #endif 564 } 565 566 namespace llvm { 567 namespace sys { 568 namespace detail { 569 namespace x86 { 570 571 VendorSignatures getVendorSignature(unsigned *MaxLeaf) { 572 unsigned EAX = 0, EBX = 0, ECX = 0, EDX = 0; 573 if (MaxLeaf == nullptr) 574 MaxLeaf = &EAX; 575 else 576 *MaxLeaf = 0; 577 578 if (!isCpuIdSupported()) 579 return VendorSignatures::UNKNOWN; 580 581 if (getX86CpuIDAndInfo(0, MaxLeaf, &EBX, &ECX, &EDX) || *MaxLeaf < 1) 582 return VendorSignatures::UNKNOWN; 583 584 // "Genu ineI ntel" 585 if (EBX == 0x756e6547 && EDX == 0x49656e69 && ECX == 0x6c65746e) 586 return VendorSignatures::GENUINE_INTEL; 587 588 // "Auth enti cAMD" 589 if (EBX == 0x68747541 && EDX == 0x69746e65 && ECX == 0x444d4163) 590 return VendorSignatures::AUTHENTIC_AMD; 591 592 return VendorSignatures::UNKNOWN; 593 } 594 595 } // namespace x86 596 } // namespace detail 597 } // namespace sys 598 } // namespace llvm 599 600 using namespace llvm::sys::detail::x86; 601 602 /// getX86CpuIDAndInfoEx - Execute the specified cpuid with subleaf and return 603 /// the 4 values in the specified arguments. If we can't run cpuid on the host, 604 /// return true. 605 static bool getX86CpuIDAndInfoEx(unsigned value, unsigned subleaf, 606 unsigned *rEAX, unsigned *rEBX, unsigned *rECX, 607 unsigned *rEDX) { 608 #if defined(__GNUC__) || defined(__clang__) 609 #if defined(__x86_64__) 610 // gcc doesn't know cpuid would clobber ebx/rbx. Preserve it manually. 611 // FIXME: should we save this for Clang? 612 __asm__("movq\t%%rbx, %%rsi\n\t" 613 "cpuid\n\t" 614 "xchgq\t%%rbx, %%rsi\n\t" 615 : "=a"(*rEAX), "=S"(*rEBX), "=c"(*rECX), "=d"(*rEDX) 616 : "a"(value), "c"(subleaf)); 617 return false; 618 #elif defined(__i386__) 619 __asm__("movl\t%%ebx, %%esi\n\t" 620 "cpuid\n\t" 621 "xchgl\t%%ebx, %%esi\n\t" 622 : "=a"(*rEAX), "=S"(*rEBX), "=c"(*rECX), "=d"(*rEDX) 623 : "a"(value), "c"(subleaf)); 624 return false; 625 #else 626 return true; 627 #endif 628 #elif defined(_MSC_VER) 629 int registers[4]; 630 __cpuidex(registers, value, subleaf); 631 *rEAX = registers[0]; 632 *rEBX = registers[1]; 633 *rECX = registers[2]; 634 *rEDX = registers[3]; 635 return false; 636 #else 637 return true; 638 #endif 639 } 640 641 // Read control register 0 (XCR0). Used to detect features such as AVX. 642 static bool getX86XCR0(unsigned *rEAX, unsigned *rEDX) { 643 #if defined(__GNUC__) || defined(__clang__) 644 // Check xgetbv; this uses a .byte sequence instead of the instruction 645 // directly because older assemblers do not include support for xgetbv and 646 // there is no easy way to conditionally compile based on the assembler used. 647 __asm__(".byte 0x0f, 0x01, 0xd0" : "=a"(*rEAX), "=d"(*rEDX) : "c"(0)); 648 return false; 649 #elif defined(_MSC_FULL_VER) && defined(_XCR_XFEATURE_ENABLED_MASK) 650 unsigned long long Result = _xgetbv(_XCR_XFEATURE_ENABLED_MASK); 651 *rEAX = Result; 652 *rEDX = Result >> 32; 653 return false; 654 #else 655 return true; 656 #endif 657 } 658 659 static void detectX86FamilyModel(unsigned EAX, unsigned *Family, 660 unsigned *Model) { 661 *Family = (EAX >> 8) & 0xf; // Bits 8 - 11 662 *Model = (EAX >> 4) & 0xf; // Bits 4 - 7 663 if (*Family == 6 || *Family == 0xf) { 664 if (*Family == 0xf) 665 // Examine extended family ID if family ID is F. 666 *Family += (EAX >> 20) & 0xff; // Bits 20 - 27 667 // Examine extended model ID if family ID is 6 or F. 668 *Model += ((EAX >> 16) & 0xf) << 4; // Bits 16 - 19 669 } 670 } 671 672 static StringRef 673 getIntelProcessorTypeAndSubtype(unsigned Family, unsigned Model, 674 const unsigned *Features, 675 unsigned *Type, unsigned *Subtype) { 676 auto testFeature = [&](unsigned F) { 677 return (Features[F / 32] & (1U << (F % 32))) != 0; 678 }; 679 680 StringRef CPU; 681 682 switch (Family) { 683 case 3: 684 CPU = "i386"; 685 break; 686 case 4: 687 CPU = "i486"; 688 break; 689 case 5: 690 if (testFeature(X86::FEATURE_MMX)) { 691 CPU = "pentium-mmx"; 692 break; 693 } 694 CPU = "pentium"; 695 break; 696 case 6: 697 switch (Model) { 698 case 0x0f: // Intel Core 2 Duo processor, Intel Core 2 Duo mobile 699 // processor, Intel Core 2 Quad processor, Intel Core 2 Quad 700 // mobile processor, Intel Core 2 Extreme processor, Intel 701 // Pentium Dual-Core processor, Intel Xeon processor, model 702 // 0Fh. All processors are manufactured using the 65 nm process. 703 case 0x16: // Intel Celeron processor model 16h. All processors are 704 // manufactured using the 65 nm process 705 CPU = "core2"; 706 *Type = X86::INTEL_CORE2; 707 break; 708 case 0x17: // Intel Core 2 Extreme processor, Intel Xeon processor, model 709 // 17h. All processors are manufactured using the 45 nm process. 710 // 711 // 45nm: Penryn , Wolfdale, Yorkfield (XE) 712 case 0x1d: // Intel Xeon processor MP. All processors are manufactured using 713 // the 45 nm process. 714 CPU = "penryn"; 715 *Type = X86::INTEL_CORE2; 716 break; 717 case 0x1a: // Intel Core i7 processor and Intel Xeon processor. All 718 // processors are manufactured using the 45 nm process. 719 case 0x1e: // Intel(R) Core(TM) i7 CPU 870 @ 2.93GHz. 720 // As found in a Summer 2010 model iMac. 721 case 0x1f: 722 case 0x2e: // Nehalem EX 723 CPU = "nehalem"; 724 *Type = X86::INTEL_COREI7; 725 *Subtype = X86::INTEL_COREI7_NEHALEM; 726 break; 727 case 0x25: // Intel Core i7, laptop version. 728 case 0x2c: // Intel Core i7 processor and Intel Xeon processor. All 729 // processors are manufactured using the 32 nm process. 730 case 0x2f: // Westmere EX 731 CPU = "westmere"; 732 *Type = X86::INTEL_COREI7; 733 *Subtype = X86::INTEL_COREI7_WESTMERE; 734 break; 735 case 0x2a: // Intel Core i7 processor. All processors are manufactured 736 // using the 32 nm process. 737 case 0x2d: 738 CPU = "sandybridge"; 739 *Type = X86::INTEL_COREI7; 740 *Subtype = X86::INTEL_COREI7_SANDYBRIDGE; 741 break; 742 case 0x3a: 743 case 0x3e: // Ivy Bridge EP 744 CPU = "ivybridge"; 745 *Type = X86::INTEL_COREI7; 746 *Subtype = X86::INTEL_COREI7_IVYBRIDGE; 747 break; 748 749 // Haswell: 750 case 0x3c: 751 case 0x3f: 752 case 0x45: 753 case 0x46: 754 CPU = "haswell"; 755 *Type = X86::INTEL_COREI7; 756 *Subtype = X86::INTEL_COREI7_HASWELL; 757 break; 758 759 // Broadwell: 760 case 0x3d: 761 case 0x47: 762 case 0x4f: 763 case 0x56: 764 CPU = "broadwell"; 765 *Type = X86::INTEL_COREI7; 766 *Subtype = X86::INTEL_COREI7_BROADWELL; 767 break; 768 769 // Skylake: 770 case 0x4e: // Skylake mobile 771 case 0x5e: // Skylake desktop 772 case 0x8e: // Kaby Lake mobile 773 case 0x9e: // Kaby Lake desktop 774 case 0xa5: // Comet Lake-H/S 775 case 0xa6: // Comet Lake-U 776 CPU = "skylake"; 777 *Type = X86::INTEL_COREI7; 778 *Subtype = X86::INTEL_COREI7_SKYLAKE; 779 break; 780 781 // Rocketlake: 782 case 0xa7: 783 CPU = "rocketlake"; 784 *Type = X86::INTEL_COREI7; 785 *Subtype = X86::INTEL_COREI7_ROCKETLAKE; 786 break; 787 788 // Skylake Xeon: 789 case 0x55: 790 *Type = X86::INTEL_COREI7; 791 if (testFeature(X86::FEATURE_AVX512BF16)) { 792 CPU = "cooperlake"; 793 *Subtype = X86::INTEL_COREI7_COOPERLAKE; 794 } else if (testFeature(X86::FEATURE_AVX512VNNI)) { 795 CPU = "cascadelake"; 796 *Subtype = X86::INTEL_COREI7_CASCADELAKE; 797 } else { 798 CPU = "skylake-avx512"; 799 *Subtype = X86::INTEL_COREI7_SKYLAKE_AVX512; 800 } 801 break; 802 803 // Cannonlake: 804 case 0x66: 805 CPU = "cannonlake"; 806 *Type = X86::INTEL_COREI7; 807 *Subtype = X86::INTEL_COREI7_CANNONLAKE; 808 break; 809 810 // Icelake: 811 case 0x7d: 812 case 0x7e: 813 CPU = "icelake-client"; 814 *Type = X86::INTEL_COREI7; 815 *Subtype = X86::INTEL_COREI7_ICELAKE_CLIENT; 816 break; 817 818 // Tigerlake: 819 case 0x8c: 820 case 0x8d: 821 CPU = "tigerlake"; 822 *Type = X86::INTEL_COREI7; 823 *Subtype = X86::INTEL_COREI7_TIGERLAKE; 824 break; 825 826 // Alderlake: 827 case 0x97: 828 case 0x9a: 829 // Gracemont 830 case 0xbe: 831 // Raptorlake: 832 case 0xb7: 833 case 0xba: 834 case 0xbf: 835 // Meteorlake: 836 case 0xaa: 837 case 0xac: 838 CPU = "alderlake"; 839 *Type = X86::INTEL_COREI7; 840 *Subtype = X86::INTEL_COREI7_ALDERLAKE; 841 break; 842 843 // Arrowlake: 844 case 0xc5: 845 CPU = "arrowlake"; 846 *Type = X86::INTEL_COREI7; 847 *Subtype = X86::INTEL_COREI7_ARROWLAKE; 848 break; 849 850 // Arrowlake S: 851 case 0xc6: 852 // Lunarlake: 853 case 0xbd: 854 CPU = "arrowlake-s"; 855 *Type = X86::INTEL_COREI7; 856 *Subtype = X86::INTEL_COREI7_ARROWLAKE_S; 857 break; 858 859 // Pantherlake: 860 case 0xcc: 861 CPU = "pantherlake"; 862 *Type = X86::INTEL_COREI7; 863 *Subtype = X86::INTEL_COREI7_PANTHERLAKE; 864 break; 865 866 // Graniterapids: 867 case 0xad: 868 CPU = "graniterapids"; 869 *Type = X86::INTEL_COREI7; 870 *Subtype = X86::INTEL_COREI7_GRANITERAPIDS; 871 break; 872 873 // Granite Rapids D: 874 case 0xae: 875 CPU = "graniterapids-d"; 876 *Type = X86::INTEL_COREI7; 877 *Subtype = X86::INTEL_COREI7_GRANITERAPIDS_D; 878 break; 879 880 // Icelake Xeon: 881 case 0x6a: 882 case 0x6c: 883 CPU = "icelake-server"; 884 *Type = X86::INTEL_COREI7; 885 *Subtype = X86::INTEL_COREI7_ICELAKE_SERVER; 886 break; 887 888 // Emerald Rapids: 889 case 0xcf: 890 // Sapphire Rapids: 891 case 0x8f: 892 CPU = "sapphirerapids"; 893 *Type = X86::INTEL_COREI7; 894 *Subtype = X86::INTEL_COREI7_SAPPHIRERAPIDS; 895 break; 896 897 case 0x1c: // Most 45 nm Intel Atom processors 898 case 0x26: // 45 nm Atom Lincroft 899 case 0x27: // 32 nm Atom Medfield 900 case 0x35: // 32 nm Atom Midview 901 case 0x36: // 32 nm Atom Midview 902 CPU = "bonnell"; 903 *Type = X86::INTEL_BONNELL; 904 break; 905 906 // Atom Silvermont codes from the Intel software optimization guide. 907 case 0x37: 908 case 0x4a: 909 case 0x4d: 910 case 0x5a: 911 case 0x5d: 912 case 0x4c: // really airmont 913 CPU = "silvermont"; 914 *Type = X86::INTEL_SILVERMONT; 915 break; 916 // Goldmont: 917 case 0x5c: // Apollo Lake 918 case 0x5f: // Denverton 919 CPU = "goldmont"; 920 *Type = X86::INTEL_GOLDMONT; 921 break; 922 case 0x7a: 923 CPU = "goldmont-plus"; 924 *Type = X86::INTEL_GOLDMONT_PLUS; 925 break; 926 case 0x86: 927 case 0x8a: // Lakefield 928 case 0x96: // Elkhart Lake 929 case 0x9c: // Jasper Lake 930 CPU = "tremont"; 931 *Type = X86::INTEL_TREMONT; 932 break; 933 934 // Sierraforest: 935 case 0xaf: 936 CPU = "sierraforest"; 937 *Type = X86::INTEL_SIERRAFOREST; 938 break; 939 940 // Grandridge: 941 case 0xb6: 942 CPU = "grandridge"; 943 *Type = X86::INTEL_GRANDRIDGE; 944 break; 945 946 // Clearwaterforest: 947 case 0xdd: 948 CPU = "clearwaterforest"; 949 *Type = X86::INTEL_CLEARWATERFOREST; 950 break; 951 952 // Xeon Phi (Knights Landing + Knights Mill): 953 case 0x57: 954 CPU = "knl"; 955 *Type = X86::INTEL_KNL; 956 break; 957 case 0x85: 958 CPU = "knm"; 959 *Type = X86::INTEL_KNM; 960 break; 961 962 default: // Unknown family 6 CPU, try to guess. 963 // Don't both with Type/Subtype here, they aren't used by the caller. 964 // They're used above to keep the code in sync with compiler-rt. 965 // TODO detect tigerlake host from model 966 if (testFeature(X86::FEATURE_AVX512VP2INTERSECT)) { 967 CPU = "tigerlake"; 968 } else if (testFeature(X86::FEATURE_AVX512VBMI2)) { 969 CPU = "icelake-client"; 970 } else if (testFeature(X86::FEATURE_AVX512VBMI)) { 971 CPU = "cannonlake"; 972 } else if (testFeature(X86::FEATURE_AVX512BF16)) { 973 CPU = "cooperlake"; 974 } else if (testFeature(X86::FEATURE_AVX512VNNI)) { 975 CPU = "cascadelake"; 976 } else if (testFeature(X86::FEATURE_AVX512VL)) { 977 CPU = "skylake-avx512"; 978 } else if (testFeature(X86::FEATURE_AVX512ER)) { 979 CPU = "knl"; 980 } else if (testFeature(X86::FEATURE_CLFLUSHOPT)) { 981 if (testFeature(X86::FEATURE_SHA)) 982 CPU = "goldmont"; 983 else 984 CPU = "skylake"; 985 } else if (testFeature(X86::FEATURE_ADX)) { 986 CPU = "broadwell"; 987 } else if (testFeature(X86::FEATURE_AVX2)) { 988 CPU = "haswell"; 989 } else if (testFeature(X86::FEATURE_AVX)) { 990 CPU = "sandybridge"; 991 } else if (testFeature(X86::FEATURE_SSE4_2)) { 992 if (testFeature(X86::FEATURE_MOVBE)) 993 CPU = "silvermont"; 994 else 995 CPU = "nehalem"; 996 } else if (testFeature(X86::FEATURE_SSE4_1)) { 997 CPU = "penryn"; 998 } else if (testFeature(X86::FEATURE_SSSE3)) { 999 if (testFeature(X86::FEATURE_MOVBE)) 1000 CPU = "bonnell"; 1001 else 1002 CPU = "core2"; 1003 } else if (testFeature(X86::FEATURE_64BIT)) { 1004 CPU = "core2"; 1005 } else if (testFeature(X86::FEATURE_SSE3)) { 1006 CPU = "yonah"; 1007 } else if (testFeature(X86::FEATURE_SSE2)) { 1008 CPU = "pentium-m"; 1009 } else if (testFeature(X86::FEATURE_SSE)) { 1010 CPU = "pentium3"; 1011 } else if (testFeature(X86::FEATURE_MMX)) { 1012 CPU = "pentium2"; 1013 } else { 1014 CPU = "pentiumpro"; 1015 } 1016 break; 1017 } 1018 break; 1019 case 15: { 1020 if (testFeature(X86::FEATURE_64BIT)) { 1021 CPU = "nocona"; 1022 break; 1023 } 1024 if (testFeature(X86::FEATURE_SSE3)) { 1025 CPU = "prescott"; 1026 break; 1027 } 1028 CPU = "pentium4"; 1029 break; 1030 } 1031 default: 1032 break; // Unknown. 1033 } 1034 1035 return CPU; 1036 } 1037 1038 static StringRef 1039 getAMDProcessorTypeAndSubtype(unsigned Family, unsigned Model, 1040 const unsigned *Features, 1041 unsigned *Type, unsigned *Subtype) { 1042 auto testFeature = [&](unsigned F) { 1043 return (Features[F / 32] & (1U << (F % 32))) != 0; 1044 }; 1045 1046 StringRef CPU; 1047 1048 switch (Family) { 1049 case 4: 1050 CPU = "i486"; 1051 break; 1052 case 5: 1053 CPU = "pentium"; 1054 switch (Model) { 1055 case 6: 1056 case 7: 1057 CPU = "k6"; 1058 break; 1059 case 8: 1060 CPU = "k6-2"; 1061 break; 1062 case 9: 1063 case 13: 1064 CPU = "k6-3"; 1065 break; 1066 case 10: 1067 CPU = "geode"; 1068 break; 1069 } 1070 break; 1071 case 6: 1072 if (testFeature(X86::FEATURE_SSE)) { 1073 CPU = "athlon-xp"; 1074 break; 1075 } 1076 CPU = "athlon"; 1077 break; 1078 case 15: 1079 if (testFeature(X86::FEATURE_SSE3)) { 1080 CPU = "k8-sse3"; 1081 break; 1082 } 1083 CPU = "k8"; 1084 break; 1085 case 16: 1086 CPU = "amdfam10"; 1087 *Type = X86::AMDFAM10H; // "amdfam10" 1088 switch (Model) { 1089 case 2: 1090 *Subtype = X86::AMDFAM10H_BARCELONA; 1091 break; 1092 case 4: 1093 *Subtype = X86::AMDFAM10H_SHANGHAI; 1094 break; 1095 case 8: 1096 *Subtype = X86::AMDFAM10H_ISTANBUL; 1097 break; 1098 } 1099 break; 1100 case 20: 1101 CPU = "btver1"; 1102 *Type = X86::AMD_BTVER1; 1103 break; 1104 case 21: 1105 CPU = "bdver1"; 1106 *Type = X86::AMDFAM15H; 1107 if (Model >= 0x60 && Model <= 0x7f) { 1108 CPU = "bdver4"; 1109 *Subtype = X86::AMDFAM15H_BDVER4; 1110 break; // 60h-7Fh: Excavator 1111 } 1112 if (Model >= 0x30 && Model <= 0x3f) { 1113 CPU = "bdver3"; 1114 *Subtype = X86::AMDFAM15H_BDVER3; 1115 break; // 30h-3Fh: Steamroller 1116 } 1117 if ((Model >= 0x10 && Model <= 0x1f) || Model == 0x02) { 1118 CPU = "bdver2"; 1119 *Subtype = X86::AMDFAM15H_BDVER2; 1120 break; // 02h, 10h-1Fh: Piledriver 1121 } 1122 if (Model <= 0x0f) { 1123 *Subtype = X86::AMDFAM15H_BDVER1; 1124 break; // 00h-0Fh: Bulldozer 1125 } 1126 break; 1127 case 22: 1128 CPU = "btver2"; 1129 *Type = X86::AMD_BTVER2; 1130 break; 1131 case 23: 1132 CPU = "znver1"; 1133 *Type = X86::AMDFAM17H; 1134 if ((Model >= 0x30 && Model <= 0x3f) || Model == 0x71) { 1135 CPU = "znver2"; 1136 *Subtype = X86::AMDFAM17H_ZNVER2; 1137 break; // 30h-3fh, 71h: Zen2 1138 } 1139 if (Model <= 0x0f) { 1140 *Subtype = X86::AMDFAM17H_ZNVER1; 1141 break; // 00h-0Fh: Zen1 1142 } 1143 break; 1144 case 25: 1145 CPU = "znver3"; 1146 *Type = X86::AMDFAM19H; 1147 if (Model <= 0x0f || (Model >= 0x20 && Model <= 0x5f)) { 1148 // Family 19h Models 00h-0Fh - Zen3 1149 // Family 19h Models 20h-2Fh - Zen3 1150 // Family 19h Models 30h-3Fh - Zen3 1151 // Family 19h Models 40h-4Fh - Zen3+ 1152 // Family 19h Models 50h-5Fh - Zen3+ 1153 *Subtype = X86::AMDFAM19H_ZNVER3; 1154 break; 1155 } 1156 if ((Model >= 0x10 && Model <= 0x1f) || 1157 (Model >= 0x60 && Model <= 0x74) || 1158 (Model >= 0x78 && Model <= 0x7b) || 1159 (Model >= 0xA0 && Model <= 0xAf)) { 1160 CPU = "znver4"; 1161 *Subtype = X86::AMDFAM19H_ZNVER4; 1162 break; // "znver4" 1163 } 1164 break; // family 19h 1165 default: 1166 break; // Unknown AMD CPU. 1167 } 1168 1169 return CPU; 1170 } 1171 1172 static void getAvailableFeatures(unsigned ECX, unsigned EDX, unsigned MaxLeaf, 1173 unsigned *Features) { 1174 unsigned EAX, EBX; 1175 1176 auto setFeature = [&](unsigned F) { 1177 Features[F / 32] |= 1U << (F % 32); 1178 }; 1179 1180 if ((EDX >> 15) & 1) 1181 setFeature(X86::FEATURE_CMOV); 1182 if ((EDX >> 23) & 1) 1183 setFeature(X86::FEATURE_MMX); 1184 if ((EDX >> 25) & 1) 1185 setFeature(X86::FEATURE_SSE); 1186 if ((EDX >> 26) & 1) 1187 setFeature(X86::FEATURE_SSE2); 1188 1189 if ((ECX >> 0) & 1) 1190 setFeature(X86::FEATURE_SSE3); 1191 if ((ECX >> 1) & 1) 1192 setFeature(X86::FEATURE_PCLMUL); 1193 if ((ECX >> 9) & 1) 1194 setFeature(X86::FEATURE_SSSE3); 1195 if ((ECX >> 12) & 1) 1196 setFeature(X86::FEATURE_FMA); 1197 if ((ECX >> 19) & 1) 1198 setFeature(X86::FEATURE_SSE4_1); 1199 if ((ECX >> 20) & 1) { 1200 setFeature(X86::FEATURE_SSE4_2); 1201 setFeature(X86::FEATURE_CRC32); 1202 } 1203 if ((ECX >> 23) & 1) 1204 setFeature(X86::FEATURE_POPCNT); 1205 if ((ECX >> 25) & 1) 1206 setFeature(X86::FEATURE_AES); 1207 1208 if ((ECX >> 22) & 1) 1209 setFeature(X86::FEATURE_MOVBE); 1210 1211 // If CPUID indicates support for XSAVE, XRESTORE and AVX, and XGETBV 1212 // indicates that the AVX registers will be saved and restored on context 1213 // switch, then we have full AVX support. 1214 const unsigned AVXBits = (1 << 27) | (1 << 28); 1215 bool HasAVX = ((ECX & AVXBits) == AVXBits) && !getX86XCR0(&EAX, &EDX) && 1216 ((EAX & 0x6) == 0x6); 1217 #if defined(__APPLE__) 1218 // Darwin lazily saves the AVX512 context on first use: trust that the OS will 1219 // save the AVX512 context if we use AVX512 instructions, even the bit is not 1220 // set right now. 1221 bool HasAVX512Save = true; 1222 #else 1223 // AVX512 requires additional context to be saved by the OS. 1224 bool HasAVX512Save = HasAVX && ((EAX & 0xe0) == 0xe0); 1225 #endif 1226 1227 if (HasAVX) 1228 setFeature(X86::FEATURE_AVX); 1229 1230 bool HasLeaf7 = 1231 MaxLeaf >= 0x7 && !getX86CpuIDAndInfoEx(0x7, 0x0, &EAX, &EBX, &ECX, &EDX); 1232 1233 if (HasLeaf7 && ((EBX >> 3) & 1)) 1234 setFeature(X86::FEATURE_BMI); 1235 if (HasLeaf7 && ((EBX >> 5) & 1) && HasAVX) 1236 setFeature(X86::FEATURE_AVX2); 1237 if (HasLeaf7 && ((EBX >> 8) & 1)) 1238 setFeature(X86::FEATURE_BMI2); 1239 if (HasLeaf7 && ((EBX >> 16) & 1) && HasAVX512Save) 1240 setFeature(X86::FEATURE_AVX512F); 1241 if (HasLeaf7 && ((EBX >> 17) & 1) && HasAVX512Save) 1242 setFeature(X86::FEATURE_AVX512DQ); 1243 if (HasLeaf7 && ((EBX >> 19) & 1)) 1244 setFeature(X86::FEATURE_ADX); 1245 if (HasLeaf7 && ((EBX >> 21) & 1) && HasAVX512Save) 1246 setFeature(X86::FEATURE_AVX512IFMA); 1247 if (HasLeaf7 && ((EBX >> 23) & 1)) 1248 setFeature(X86::FEATURE_CLFLUSHOPT); 1249 if (HasLeaf7 && ((EBX >> 26) & 1) && HasAVX512Save) 1250 setFeature(X86::FEATURE_AVX512PF); 1251 if (HasLeaf7 && ((EBX >> 27) & 1) && HasAVX512Save) 1252 setFeature(X86::FEATURE_AVX512ER); 1253 if (HasLeaf7 && ((EBX >> 28) & 1) && HasAVX512Save) 1254 setFeature(X86::FEATURE_AVX512CD); 1255 if (HasLeaf7 && ((EBX >> 29) & 1)) 1256 setFeature(X86::FEATURE_SHA); 1257 if (HasLeaf7 && ((EBX >> 30) & 1) && HasAVX512Save) 1258 setFeature(X86::FEATURE_AVX512BW); 1259 if (HasLeaf7 && ((EBX >> 31) & 1) && HasAVX512Save) 1260 setFeature(X86::FEATURE_AVX512VL); 1261 1262 if (HasLeaf7 && ((ECX >> 1) & 1) && HasAVX512Save) 1263 setFeature(X86::FEATURE_AVX512VBMI); 1264 if (HasLeaf7 && ((ECX >> 6) & 1) && HasAVX512Save) 1265 setFeature(X86::FEATURE_AVX512VBMI2); 1266 if (HasLeaf7 && ((ECX >> 8) & 1)) 1267 setFeature(X86::FEATURE_GFNI); 1268 if (HasLeaf7 && ((ECX >> 10) & 1) && HasAVX) 1269 setFeature(X86::FEATURE_VPCLMULQDQ); 1270 if (HasLeaf7 && ((ECX >> 11) & 1) && HasAVX512Save) 1271 setFeature(X86::FEATURE_AVX512VNNI); 1272 if (HasLeaf7 && ((ECX >> 12) & 1) && HasAVX512Save) 1273 setFeature(X86::FEATURE_AVX512BITALG); 1274 if (HasLeaf7 && ((ECX >> 14) & 1) && HasAVX512Save) 1275 setFeature(X86::FEATURE_AVX512VPOPCNTDQ); 1276 1277 if (HasLeaf7 && ((EDX >> 2) & 1) && HasAVX512Save) 1278 setFeature(X86::FEATURE_AVX5124VNNIW); 1279 if (HasLeaf7 && ((EDX >> 3) & 1) && HasAVX512Save) 1280 setFeature(X86::FEATURE_AVX5124FMAPS); 1281 if (HasLeaf7 && ((EDX >> 8) & 1) && HasAVX512Save) 1282 setFeature(X86::FEATURE_AVX512VP2INTERSECT); 1283 1284 // EAX from subleaf 0 is the maximum subleaf supported. Some CPUs don't 1285 // return all 0s for invalid subleaves so check the limit. 1286 bool HasLeaf7Subleaf1 = 1287 HasLeaf7 && EAX >= 1 && 1288 !getX86CpuIDAndInfoEx(0x7, 0x1, &EAX, &EBX, &ECX, &EDX); 1289 if (HasLeaf7Subleaf1 && ((EAX >> 5) & 1) && HasAVX512Save) 1290 setFeature(X86::FEATURE_AVX512BF16); 1291 1292 unsigned MaxExtLevel; 1293 getX86CpuIDAndInfo(0x80000000, &MaxExtLevel, &EBX, &ECX, &EDX); 1294 1295 bool HasExtLeaf1 = MaxExtLevel >= 0x80000001 && 1296 !getX86CpuIDAndInfo(0x80000001, &EAX, &EBX, &ECX, &EDX); 1297 if (HasExtLeaf1 && ((ECX >> 6) & 1)) 1298 setFeature(X86::FEATURE_SSE4_A); 1299 if (HasExtLeaf1 && ((ECX >> 11) & 1)) 1300 setFeature(X86::FEATURE_XOP); 1301 if (HasExtLeaf1 && ((ECX >> 16) & 1)) 1302 setFeature(X86::FEATURE_FMA4); 1303 1304 if (HasExtLeaf1 && ((EDX >> 29) & 1)) 1305 setFeature(X86::FEATURE_64BIT); 1306 } 1307 1308 StringRef sys::getHostCPUName() { 1309 unsigned MaxLeaf = 0; 1310 const VendorSignatures Vendor = getVendorSignature(&MaxLeaf); 1311 if (Vendor == VendorSignatures::UNKNOWN) 1312 return "generic"; 1313 1314 unsigned EAX = 0, EBX = 0, ECX = 0, EDX = 0; 1315 getX86CpuIDAndInfo(0x1, &EAX, &EBX, &ECX, &EDX); 1316 1317 unsigned Family = 0, Model = 0; 1318 unsigned Features[(X86::CPU_FEATURE_MAX + 31) / 32] = {0}; 1319 detectX86FamilyModel(EAX, &Family, &Model); 1320 getAvailableFeatures(ECX, EDX, MaxLeaf, Features); 1321 1322 // These aren't consumed in this file, but we try to keep some source code the 1323 // same or similar to compiler-rt. 1324 unsigned Type = 0; 1325 unsigned Subtype = 0; 1326 1327 StringRef CPU; 1328 1329 if (Vendor == VendorSignatures::GENUINE_INTEL) { 1330 CPU = getIntelProcessorTypeAndSubtype(Family, Model, Features, &Type, 1331 &Subtype); 1332 } else if (Vendor == VendorSignatures::AUTHENTIC_AMD) { 1333 CPU = getAMDProcessorTypeAndSubtype(Family, Model, Features, &Type, 1334 &Subtype); 1335 } 1336 1337 if (!CPU.empty()) 1338 return CPU; 1339 1340 return "generic"; 1341 } 1342 1343 #elif defined(__APPLE__) && defined(__powerpc__) 1344 StringRef sys::getHostCPUName() { 1345 host_basic_info_data_t hostInfo; 1346 mach_msg_type_number_t infoCount; 1347 1348 infoCount = HOST_BASIC_INFO_COUNT; 1349 mach_port_t hostPort = mach_host_self(); 1350 host_info(hostPort, HOST_BASIC_INFO, (host_info_t)&hostInfo, 1351 &infoCount); 1352 mach_port_deallocate(mach_task_self(), hostPort); 1353 1354 if (hostInfo.cpu_type != CPU_TYPE_POWERPC) 1355 return "generic"; 1356 1357 switch (hostInfo.cpu_subtype) { 1358 case CPU_SUBTYPE_POWERPC_601: 1359 return "601"; 1360 case CPU_SUBTYPE_POWERPC_602: 1361 return "602"; 1362 case CPU_SUBTYPE_POWERPC_603: 1363 return "603"; 1364 case CPU_SUBTYPE_POWERPC_603e: 1365 return "603e"; 1366 case CPU_SUBTYPE_POWERPC_603ev: 1367 return "603ev"; 1368 case CPU_SUBTYPE_POWERPC_604: 1369 return "604"; 1370 case CPU_SUBTYPE_POWERPC_604e: 1371 return "604e"; 1372 case CPU_SUBTYPE_POWERPC_620: 1373 return "620"; 1374 case CPU_SUBTYPE_POWERPC_750: 1375 return "750"; 1376 case CPU_SUBTYPE_POWERPC_7400: 1377 return "7400"; 1378 case CPU_SUBTYPE_POWERPC_7450: 1379 return "7450"; 1380 case CPU_SUBTYPE_POWERPC_970: 1381 return "970"; 1382 default:; 1383 } 1384 1385 return "generic"; 1386 } 1387 #elif defined(__linux__) && defined(__powerpc__) 1388 StringRef sys::getHostCPUName() { 1389 std::unique_ptr<llvm::MemoryBuffer> P = getProcCpuinfoContent(); 1390 StringRef Content = P ? P->getBuffer() : ""; 1391 return detail::getHostCPUNameForPowerPC(Content); 1392 } 1393 #elif defined(__linux__) && (defined(__arm__) || defined(__aarch64__)) 1394 StringRef sys::getHostCPUName() { 1395 std::unique_ptr<llvm::MemoryBuffer> P = getProcCpuinfoContent(); 1396 StringRef Content = P ? P->getBuffer() : ""; 1397 return detail::getHostCPUNameForARM(Content); 1398 } 1399 #elif defined(__linux__) && defined(__s390x__) 1400 StringRef sys::getHostCPUName() { 1401 std::unique_ptr<llvm::MemoryBuffer> P = getProcCpuinfoContent(); 1402 StringRef Content = P ? P->getBuffer() : ""; 1403 return detail::getHostCPUNameForS390x(Content); 1404 } 1405 #elif defined(__MVS__) 1406 StringRef sys::getHostCPUName() { 1407 // Get pointer to Communications Vector Table (CVT). 1408 // The pointer is located at offset 16 of the Prefixed Save Area (PSA). 1409 // It is stored as 31 bit pointer and will be zero-extended to 64 bit. 1410 int *StartToCVTOffset = reinterpret_cast<int *>(0x10); 1411 // Since its stored as a 31-bit pointer, get the 4 bytes from the start 1412 // of address. 1413 int ReadValue = *StartToCVTOffset; 1414 // Explicitly clear the high order bit. 1415 ReadValue = (ReadValue & 0x7FFFFFFF); 1416 char *CVT = reinterpret_cast<char *>(ReadValue); 1417 // The model number is located in the CVT prefix at offset -6 and stored as 1418 // signless packed decimal. 1419 uint16_t Id = *(uint16_t *)&CVT[-6]; 1420 // Convert number to integer. 1421 Id = decodePackedBCD<uint16_t>(Id, false); 1422 // Check for vector support. It's stored in field CVTFLAG5 (offset 244), 1423 // bit CVTVEF (X'80'). The facilities list is part of the PSA but the vector 1424 // extension can only be used if bit CVTVEF is on. 1425 bool HaveVectorSupport = CVT[244] & 0x80; 1426 return getCPUNameFromS390Model(Id, HaveVectorSupport); 1427 } 1428 #elif defined(__APPLE__) && (defined(__arm__) || defined(__aarch64__)) 1429 #define CPUFAMILY_ARM_SWIFT 0x1e2d6381 1430 #define CPUFAMILY_ARM_CYCLONE 0x37a09642 1431 #define CPUFAMILY_ARM_TYPHOON 0x2c91a47e 1432 #define CPUFAMILY_ARM_TWISTER 0x92fb37c8 1433 #define CPUFAMILY_ARM_HURRICANE 0x67ceee93 1434 #define CPUFAMILY_ARM_MONSOON_MISTRAL 0xe81e7ef6 1435 #define CPUFAMILY_ARM_VORTEX_TEMPEST 0x07d34b9f 1436 #define CPUFAMILY_ARM_LIGHTNING_THUNDER 0x462504d2 1437 #define CPUFAMILY_ARM_FIRESTORM_ICESTORM 0x1b588bb3 1438 1439 StringRef sys::getHostCPUName() { 1440 uint32_t Family; 1441 size_t Length = sizeof(Family); 1442 sysctlbyname("hw.cpufamily", &Family, &Length, NULL, 0); 1443 1444 switch (Family) { 1445 case CPUFAMILY_ARM_SWIFT: 1446 return "swift"; 1447 case CPUFAMILY_ARM_CYCLONE: 1448 return "apple-a7"; 1449 case CPUFAMILY_ARM_TYPHOON: 1450 return "apple-a8"; 1451 case CPUFAMILY_ARM_TWISTER: 1452 return "apple-a9"; 1453 case CPUFAMILY_ARM_HURRICANE: 1454 return "apple-a10"; 1455 case CPUFAMILY_ARM_MONSOON_MISTRAL: 1456 return "apple-a11"; 1457 case CPUFAMILY_ARM_VORTEX_TEMPEST: 1458 return "apple-a12"; 1459 case CPUFAMILY_ARM_LIGHTNING_THUNDER: 1460 return "apple-a13"; 1461 case CPUFAMILY_ARM_FIRESTORM_ICESTORM: 1462 return "apple-m1"; 1463 default: 1464 // Default to the newest CPU we know about. 1465 return "apple-m1"; 1466 } 1467 } 1468 #elif defined(_AIX) 1469 StringRef sys::getHostCPUName() { 1470 switch (_system_configuration.implementation) { 1471 case POWER_4: 1472 if (_system_configuration.version == PV_4_3) 1473 return "970"; 1474 return "pwr4"; 1475 case POWER_5: 1476 if (_system_configuration.version == PV_5) 1477 return "pwr5"; 1478 return "pwr5x"; 1479 case POWER_6: 1480 if (_system_configuration.version == PV_6_Compat) 1481 return "pwr6"; 1482 return "pwr6x"; 1483 case POWER_7: 1484 return "pwr7"; 1485 case POWER_8: 1486 return "pwr8"; 1487 case POWER_9: 1488 return "pwr9"; 1489 // TODO: simplify this once the macro is available in all OS levels. 1490 #ifdef POWER_10 1491 case POWER_10: 1492 #else 1493 case 0x40000: 1494 #endif 1495 return "pwr10"; 1496 default: 1497 return "generic"; 1498 } 1499 } 1500 #elif defined(__loongarch__) 1501 StringRef sys::getHostCPUName() { 1502 // Use processor id to detect cpu name. 1503 uint32_t processor_id; 1504 __asm__("cpucfg %[prid], $zero\n\t" : [prid] "=r"(processor_id)); 1505 switch (processor_id & 0xff00) { 1506 case 0xc000: // Loongson 64bit, 4-issue 1507 return "la464"; 1508 // TODO: Others. 1509 default: 1510 break; 1511 } 1512 return "generic"; 1513 } 1514 #elif defined(__riscv) 1515 StringRef sys::getHostCPUName() { 1516 #if defined(__linux__) 1517 std::unique_ptr<llvm::MemoryBuffer> P = getProcCpuinfoContent(); 1518 StringRef Content = P ? P->getBuffer() : ""; 1519 return detail::getHostCPUNameForRISCV(Content); 1520 #else 1521 #if __riscv_xlen == 64 1522 return "generic-rv64"; 1523 #elif __riscv_xlen == 32 1524 return "generic-rv32"; 1525 #else 1526 #error "Unhandled value of __riscv_xlen" 1527 #endif 1528 #endif 1529 } 1530 #elif defined(__sparc__) 1531 #if defined(__linux__) 1532 StringRef sys::detail::getHostCPUNameForSPARC(StringRef ProcCpuinfoContent) { 1533 SmallVector<StringRef> Lines; 1534 ProcCpuinfoContent.split(Lines, "\n"); 1535 1536 // Look for cpu line to determine cpu name 1537 StringRef Cpu; 1538 for (unsigned I = 0, E = Lines.size(); I != E; ++I) { 1539 if (Lines[I].starts_with("cpu")) { 1540 Cpu = Lines[I].substr(5).ltrim("\t :"); 1541 break; 1542 } 1543 } 1544 1545 return StringSwitch<const char *>(Cpu) 1546 .StartsWith("SuperSparc", "supersparc") 1547 .StartsWith("HyperSparc", "hypersparc") 1548 .StartsWith("SpitFire", "ultrasparc") 1549 .StartsWith("BlackBird", "ultrasparc") 1550 .StartsWith("Sabre", " ultrasparc") 1551 .StartsWith("Hummingbird", "ultrasparc") 1552 .StartsWith("Cheetah", "ultrasparc3") 1553 .StartsWith("Jalapeno", "ultrasparc3") 1554 .StartsWith("Jaguar", "ultrasparc3") 1555 .StartsWith("Panther", "ultrasparc3") 1556 .StartsWith("Serrano", "ultrasparc3") 1557 .StartsWith("UltraSparc T1", "niagara") 1558 .StartsWith("UltraSparc T2", "niagara2") 1559 .StartsWith("UltraSparc T3", "niagara3") 1560 .StartsWith("UltraSparc T4", "niagara4") 1561 .StartsWith("UltraSparc T5", "niagara4") 1562 .StartsWith("LEON", "leon3") 1563 // niagara7/m8 not supported by LLVM yet. 1564 .StartsWith("SPARC-M7", "niagara4" /* "niagara7" */) 1565 .StartsWith("SPARC-S7", "niagara4" /* "niagara7" */) 1566 .StartsWith("SPARC-M8", "niagara4" /* "m8" */) 1567 .Default("generic"); 1568 } 1569 #endif 1570 1571 StringRef sys::getHostCPUName() { 1572 #if defined(__linux__) 1573 std::unique_ptr<llvm::MemoryBuffer> P = getProcCpuinfoContent(); 1574 StringRef Content = P ? P->getBuffer() : ""; 1575 return detail::getHostCPUNameForSPARC(Content); 1576 #elif defined(__sun__) && defined(__svr4__) 1577 char *buf = NULL; 1578 kstat_ctl_t *kc; 1579 kstat_t *ksp; 1580 kstat_named_t *brand = NULL; 1581 1582 kc = kstat_open(); 1583 if (kc != NULL) { 1584 ksp = kstat_lookup(kc, const_cast<char *>("cpu_info"), -1, NULL); 1585 if (ksp != NULL && kstat_read(kc, ksp, NULL) != -1 && 1586 ksp->ks_type == KSTAT_TYPE_NAMED) 1587 brand = 1588 (kstat_named_t *)kstat_data_lookup(ksp, const_cast<char *>("brand")); 1589 if (brand != NULL && brand->data_type == KSTAT_DATA_STRING) 1590 buf = KSTAT_NAMED_STR_PTR(brand); 1591 } 1592 kstat_close(kc); 1593 1594 return StringSwitch<const char *>(buf) 1595 .Case("TMS390S10", "supersparc") // Texas Instruments microSPARC I 1596 .Case("TMS390Z50", "supersparc") // Texas Instruments SuperSPARC I 1597 .Case("TMS390Z55", 1598 "supersparc") // Texas Instruments SuperSPARC I with SuperCache 1599 .Case("MB86904", "supersparc") // Fujitsu microSPARC II 1600 .Case("MB86907", "supersparc") // Fujitsu TurboSPARC 1601 .Case("RT623", "hypersparc") // Ross hyperSPARC 1602 .Case("RT625", "hypersparc") 1603 .Case("RT626", "hypersparc") 1604 .Case("UltraSPARC-I", "ultrasparc") 1605 .Case("UltraSPARC-II", "ultrasparc") 1606 .Case("UltraSPARC-IIe", "ultrasparc") 1607 .Case("UltraSPARC-IIi", "ultrasparc") 1608 .Case("SPARC64-III", "ultrasparc") 1609 .Case("SPARC64-IV", "ultrasparc") 1610 .Case("UltraSPARC-III", "ultrasparc3") 1611 .Case("UltraSPARC-III+", "ultrasparc3") 1612 .Case("UltraSPARC-IIIi", "ultrasparc3") 1613 .Case("UltraSPARC-IIIi+", "ultrasparc3") 1614 .Case("UltraSPARC-IV", "ultrasparc3") 1615 .Case("UltraSPARC-IV+", "ultrasparc3") 1616 .Case("SPARC64-V", "ultrasparc3") 1617 .Case("SPARC64-VI", "ultrasparc3") 1618 .Case("SPARC64-VII", "ultrasparc3") 1619 .Case("UltraSPARC-T1", "niagara") 1620 .Case("UltraSPARC-T2", "niagara2") 1621 .Case("UltraSPARC-T2", "niagara2") 1622 .Case("UltraSPARC-T2+", "niagara2") 1623 .Case("SPARC-T3", "niagara3") 1624 .Case("SPARC-T4", "niagara4") 1625 .Case("SPARC-T5", "niagara4") 1626 // niagara7/m8 not supported by LLVM yet. 1627 .Case("SPARC-M7", "niagara4" /* "niagara7" */) 1628 .Case("SPARC-S7", "niagara4" /* "niagara7" */) 1629 .Case("SPARC-M8", "niagara4" /* "m8" */) 1630 .Default("generic"); 1631 #else 1632 return "generic"; 1633 #endif 1634 } 1635 #else 1636 StringRef sys::getHostCPUName() { return "generic"; } 1637 namespace llvm { 1638 namespace sys { 1639 namespace detail { 1640 namespace x86 { 1641 1642 VendorSignatures getVendorSignature(unsigned *MaxLeaf) { 1643 return VendorSignatures::UNKNOWN; 1644 } 1645 1646 } // namespace x86 1647 } // namespace detail 1648 } // namespace sys 1649 } // namespace llvm 1650 #endif 1651 1652 #if defined(__i386__) || defined(_M_IX86) || \ 1653 defined(__x86_64__) || defined(_M_X64) 1654 bool sys::getHostCPUFeatures(StringMap<bool> &Features) { 1655 unsigned EAX = 0, EBX = 0, ECX = 0, EDX = 0; 1656 unsigned MaxLevel; 1657 1658 if (getX86CpuIDAndInfo(0, &MaxLevel, &EBX, &ECX, &EDX) || MaxLevel < 1) 1659 return false; 1660 1661 getX86CpuIDAndInfo(1, &EAX, &EBX, &ECX, &EDX); 1662 1663 Features["cx8"] = (EDX >> 8) & 1; 1664 Features["cmov"] = (EDX >> 15) & 1; 1665 Features["mmx"] = (EDX >> 23) & 1; 1666 Features["fxsr"] = (EDX >> 24) & 1; 1667 Features["sse"] = (EDX >> 25) & 1; 1668 Features["sse2"] = (EDX >> 26) & 1; 1669 1670 Features["sse3"] = (ECX >> 0) & 1; 1671 Features["pclmul"] = (ECX >> 1) & 1; 1672 Features["ssse3"] = (ECX >> 9) & 1; 1673 Features["cx16"] = (ECX >> 13) & 1; 1674 Features["sse4.1"] = (ECX >> 19) & 1; 1675 Features["sse4.2"] = (ECX >> 20) & 1; 1676 Features["crc32"] = Features["sse4.2"]; 1677 Features["movbe"] = (ECX >> 22) & 1; 1678 Features["popcnt"] = (ECX >> 23) & 1; 1679 Features["aes"] = (ECX >> 25) & 1; 1680 Features["rdrnd"] = (ECX >> 30) & 1; 1681 1682 // If CPUID indicates support for XSAVE, XRESTORE and AVX, and XGETBV 1683 // indicates that the AVX registers will be saved and restored on context 1684 // switch, then we have full AVX support. 1685 bool HasXSave = ((ECX >> 27) & 1) && !getX86XCR0(&EAX, &EDX); 1686 bool HasAVXSave = HasXSave && ((ECX >> 28) & 1) && ((EAX & 0x6) == 0x6); 1687 #if defined(__APPLE__) 1688 // Darwin lazily saves the AVX512 context on first use: trust that the OS will 1689 // save the AVX512 context if we use AVX512 instructions, even the bit is not 1690 // set right now. 1691 bool HasAVX512Save = true; 1692 #else 1693 // AVX512 requires additional context to be saved by the OS. 1694 bool HasAVX512Save = HasAVXSave && ((EAX & 0xe0) == 0xe0); 1695 #endif 1696 // AMX requires additional context to be saved by the OS. 1697 const unsigned AMXBits = (1 << 17) | (1 << 18); 1698 bool HasAMXSave = HasXSave && ((EAX & AMXBits) == AMXBits); 1699 1700 Features["avx"] = HasAVXSave; 1701 Features["fma"] = ((ECX >> 12) & 1) && HasAVXSave; 1702 // Only enable XSAVE if OS has enabled support for saving YMM state. 1703 Features["xsave"] = ((ECX >> 26) & 1) && HasAVXSave; 1704 Features["f16c"] = ((ECX >> 29) & 1) && HasAVXSave; 1705 1706 unsigned MaxExtLevel; 1707 getX86CpuIDAndInfo(0x80000000, &MaxExtLevel, &EBX, &ECX, &EDX); 1708 1709 bool HasExtLeaf1 = MaxExtLevel >= 0x80000001 && 1710 !getX86CpuIDAndInfo(0x80000001, &EAX, &EBX, &ECX, &EDX); 1711 Features["sahf"] = HasExtLeaf1 && ((ECX >> 0) & 1); 1712 Features["lzcnt"] = HasExtLeaf1 && ((ECX >> 5) & 1); 1713 Features["sse4a"] = HasExtLeaf1 && ((ECX >> 6) & 1); 1714 Features["prfchw"] = HasExtLeaf1 && ((ECX >> 8) & 1); 1715 Features["xop"] = HasExtLeaf1 && ((ECX >> 11) & 1) && HasAVXSave; 1716 Features["lwp"] = HasExtLeaf1 && ((ECX >> 15) & 1); 1717 Features["fma4"] = HasExtLeaf1 && ((ECX >> 16) & 1) && HasAVXSave; 1718 Features["tbm"] = HasExtLeaf1 && ((ECX >> 21) & 1); 1719 Features["mwaitx"] = HasExtLeaf1 && ((ECX >> 29) & 1); 1720 1721 Features["64bit"] = HasExtLeaf1 && ((EDX >> 29) & 1); 1722 1723 // Miscellaneous memory related features, detected by 1724 // using the 0x80000008 leaf of the CPUID instruction 1725 bool HasExtLeaf8 = MaxExtLevel >= 0x80000008 && 1726 !getX86CpuIDAndInfo(0x80000008, &EAX, &EBX, &ECX, &EDX); 1727 Features["clzero"] = HasExtLeaf8 && ((EBX >> 0) & 1); 1728 Features["rdpru"] = HasExtLeaf8 && ((EBX >> 4) & 1); 1729 Features["wbnoinvd"] = HasExtLeaf8 && ((EBX >> 9) & 1); 1730 1731 bool HasLeaf7 = 1732 MaxLevel >= 7 && !getX86CpuIDAndInfoEx(0x7, 0x0, &EAX, &EBX, &ECX, &EDX); 1733 1734 Features["fsgsbase"] = HasLeaf7 && ((EBX >> 0) & 1); 1735 Features["sgx"] = HasLeaf7 && ((EBX >> 2) & 1); 1736 Features["bmi"] = HasLeaf7 && ((EBX >> 3) & 1); 1737 // AVX2 is only supported if we have the OS save support from AVX. 1738 Features["avx2"] = HasLeaf7 && ((EBX >> 5) & 1) && HasAVXSave; 1739 Features["bmi2"] = HasLeaf7 && ((EBX >> 8) & 1); 1740 Features["invpcid"] = HasLeaf7 && ((EBX >> 10) & 1); 1741 Features["rtm"] = HasLeaf7 && ((EBX >> 11) & 1); 1742 // AVX512 is only supported if the OS supports the context save for it. 1743 Features["avx512f"] = HasLeaf7 && ((EBX >> 16) & 1) && HasAVX512Save; 1744 Features["avx512dq"] = HasLeaf7 && ((EBX >> 17) & 1) && HasAVX512Save; 1745 Features["rdseed"] = HasLeaf7 && ((EBX >> 18) & 1); 1746 Features["adx"] = HasLeaf7 && ((EBX >> 19) & 1); 1747 Features["avx512ifma"] = HasLeaf7 && ((EBX >> 21) & 1) && HasAVX512Save; 1748 Features["clflushopt"] = HasLeaf7 && ((EBX >> 23) & 1); 1749 Features["clwb"] = HasLeaf7 && ((EBX >> 24) & 1); 1750 Features["avx512pf"] = HasLeaf7 && ((EBX >> 26) & 1) && HasAVX512Save; 1751 Features["avx512er"] = HasLeaf7 && ((EBX >> 27) & 1) && HasAVX512Save; 1752 Features["avx512cd"] = HasLeaf7 && ((EBX >> 28) & 1) && HasAVX512Save; 1753 Features["sha"] = HasLeaf7 && ((EBX >> 29) & 1); 1754 Features["avx512bw"] = HasLeaf7 && ((EBX >> 30) & 1) && HasAVX512Save; 1755 Features["avx512vl"] = HasLeaf7 && ((EBX >> 31) & 1) && HasAVX512Save; 1756 1757 Features["prefetchwt1"] = HasLeaf7 && ((ECX >> 0) & 1); 1758 Features["avx512vbmi"] = HasLeaf7 && ((ECX >> 1) & 1) && HasAVX512Save; 1759 Features["pku"] = HasLeaf7 && ((ECX >> 4) & 1); 1760 Features["waitpkg"] = HasLeaf7 && ((ECX >> 5) & 1); 1761 Features["avx512vbmi2"] = HasLeaf7 && ((ECX >> 6) & 1) && HasAVX512Save; 1762 Features["shstk"] = HasLeaf7 && ((ECX >> 7) & 1); 1763 Features["gfni"] = HasLeaf7 && ((ECX >> 8) & 1); 1764 Features["vaes"] = HasLeaf7 && ((ECX >> 9) & 1) && HasAVXSave; 1765 Features["vpclmulqdq"] = HasLeaf7 && ((ECX >> 10) & 1) && HasAVXSave; 1766 Features["avx512vnni"] = HasLeaf7 && ((ECX >> 11) & 1) && HasAVX512Save; 1767 Features["avx512bitalg"] = HasLeaf7 && ((ECX >> 12) & 1) && HasAVX512Save; 1768 Features["avx512vpopcntdq"] = HasLeaf7 && ((ECX >> 14) & 1) && HasAVX512Save; 1769 Features["rdpid"] = HasLeaf7 && ((ECX >> 22) & 1); 1770 Features["kl"] = HasLeaf7 && ((ECX >> 23) & 1); // key locker 1771 Features["cldemote"] = HasLeaf7 && ((ECX >> 25) & 1); 1772 Features["movdiri"] = HasLeaf7 && ((ECX >> 27) & 1); 1773 Features["movdir64b"] = HasLeaf7 && ((ECX >> 28) & 1); 1774 Features["enqcmd"] = HasLeaf7 && ((ECX >> 29) & 1); 1775 1776 Features["uintr"] = HasLeaf7 && ((EDX >> 5) & 1); 1777 Features["avx512vp2intersect"] = 1778 HasLeaf7 && ((EDX >> 8) & 1) && HasAVX512Save; 1779 Features["serialize"] = HasLeaf7 && ((EDX >> 14) & 1); 1780 Features["tsxldtrk"] = HasLeaf7 && ((EDX >> 16) & 1); 1781 // There are two CPUID leafs which information associated with the pconfig 1782 // instruction: 1783 // EAX=0x7, ECX=0x0 indicates the availability of the instruction (via the 18th 1784 // bit of EDX), while the EAX=0x1b leaf returns information on the 1785 // availability of specific pconfig leafs. 1786 // The target feature here only refers to the the first of these two. 1787 // Users might need to check for the availability of specific pconfig 1788 // leaves using cpuid, since that information is ignored while 1789 // detecting features using the "-march=native" flag. 1790 // For more info, see X86 ISA docs. 1791 Features["pconfig"] = HasLeaf7 && ((EDX >> 18) & 1); 1792 Features["amx-bf16"] = HasLeaf7 && ((EDX >> 22) & 1) && HasAMXSave; 1793 Features["avx512fp16"] = HasLeaf7 && ((EDX >> 23) & 1) && HasAVX512Save; 1794 Features["amx-tile"] = HasLeaf7 && ((EDX >> 24) & 1) && HasAMXSave; 1795 Features["amx-int8"] = HasLeaf7 && ((EDX >> 25) & 1) && HasAMXSave; 1796 // EAX from subleaf 0 is the maximum subleaf supported. Some CPUs don't 1797 // return all 0s for invalid subleaves so check the limit. 1798 bool HasLeaf7Subleaf1 = 1799 HasLeaf7 && EAX >= 1 && 1800 !getX86CpuIDAndInfoEx(0x7, 0x1, &EAX, &EBX, &ECX, &EDX); 1801 Features["sha512"] = HasLeaf7Subleaf1 && ((EAX >> 0) & 1); 1802 Features["sm3"] = HasLeaf7Subleaf1 && ((EAX >> 1) & 1); 1803 Features["sm4"] = HasLeaf7Subleaf1 && ((EAX >> 2) & 1); 1804 Features["raoint"] = HasLeaf7Subleaf1 && ((EAX >> 3) & 1); 1805 Features["avxvnni"] = HasLeaf7Subleaf1 && ((EAX >> 4) & 1) && HasAVXSave; 1806 Features["avx512bf16"] = HasLeaf7Subleaf1 && ((EAX >> 5) & 1) && HasAVX512Save; 1807 Features["amx-fp16"] = HasLeaf7Subleaf1 && ((EAX >> 21) & 1) && HasAMXSave; 1808 Features["cmpccxadd"] = HasLeaf7Subleaf1 && ((EAX >> 7) & 1); 1809 Features["hreset"] = HasLeaf7Subleaf1 && ((EAX >> 22) & 1); 1810 Features["avxifma"] = HasLeaf7Subleaf1 && ((EAX >> 23) & 1) && HasAVXSave; 1811 Features["avxvnniint8"] = HasLeaf7Subleaf1 && ((EDX >> 4) & 1) && HasAVXSave; 1812 Features["avxneconvert"] = HasLeaf7Subleaf1 && ((EDX >> 5) & 1) && HasAVXSave; 1813 Features["amx-complex"] = HasLeaf7Subleaf1 && ((EDX >> 8) & 1) && HasAMXSave; 1814 Features["avxvnniint16"] = HasLeaf7Subleaf1 && ((EDX >> 10) & 1) && HasAVXSave; 1815 Features["prefetchi"] = HasLeaf7Subleaf1 && ((EDX >> 14) & 1); 1816 Features["usermsr"] = HasLeaf7Subleaf1 && ((EDX >> 15) & 1); 1817 Features["avx10.1-256"] = HasLeaf7Subleaf1 && ((EDX >> 19) & 1); 1818 1819 bool HasLeafD = MaxLevel >= 0xd && 1820 !getX86CpuIDAndInfoEx(0xd, 0x1, &EAX, &EBX, &ECX, &EDX); 1821 1822 // Only enable XSAVE if OS has enabled support for saving YMM state. 1823 Features["xsaveopt"] = HasLeafD && ((EAX >> 0) & 1) && HasAVXSave; 1824 Features["xsavec"] = HasLeafD && ((EAX >> 1) & 1) && HasAVXSave; 1825 Features["xsaves"] = HasLeafD && ((EAX >> 3) & 1) && HasAVXSave; 1826 1827 bool HasLeaf14 = MaxLevel >= 0x14 && 1828 !getX86CpuIDAndInfoEx(0x14, 0x0, &EAX, &EBX, &ECX, &EDX); 1829 1830 Features["ptwrite"] = HasLeaf14 && ((EBX >> 4) & 1); 1831 1832 bool HasLeaf19 = 1833 MaxLevel >= 0x19 && !getX86CpuIDAndInfo(0x19, &EAX, &EBX, &ECX, &EDX); 1834 Features["widekl"] = HasLeaf7 && HasLeaf19 && ((EBX >> 2) & 1); 1835 1836 bool HasLeaf24 = 1837 MaxLevel >= 0x24 && !getX86CpuIDAndInfo(0x24, &EAX, &EBX, &ECX, &EDX); 1838 Features["avx10.1-512"] = 1839 Features["avx10.1-256"] && HasLeaf24 && ((EBX >> 18) & 1); 1840 1841 return true; 1842 } 1843 #elif defined(__linux__) && (defined(__arm__) || defined(__aarch64__)) 1844 bool sys::getHostCPUFeatures(StringMap<bool> &Features) { 1845 std::unique_ptr<llvm::MemoryBuffer> P = getProcCpuinfoContent(); 1846 if (!P) 1847 return false; 1848 1849 SmallVector<StringRef, 32> Lines; 1850 P->getBuffer().split(Lines, "\n"); 1851 1852 SmallVector<StringRef, 32> CPUFeatures; 1853 1854 // Look for the CPU features. 1855 for (unsigned I = 0, E = Lines.size(); I != E; ++I) 1856 if (Lines[I].starts_with("Features")) { 1857 Lines[I].split(CPUFeatures, ' '); 1858 break; 1859 } 1860 1861 #if defined(__aarch64__) 1862 // Keep track of which crypto features we have seen 1863 enum { CAP_AES = 0x1, CAP_PMULL = 0x2, CAP_SHA1 = 0x4, CAP_SHA2 = 0x8 }; 1864 uint32_t crypto = 0; 1865 #endif 1866 1867 for (unsigned I = 0, E = CPUFeatures.size(); I != E; ++I) { 1868 StringRef LLVMFeatureStr = StringSwitch<StringRef>(CPUFeatures[I]) 1869 #if defined(__aarch64__) 1870 .Case("asimd", "neon") 1871 .Case("fp", "fp-armv8") 1872 .Case("crc32", "crc") 1873 .Case("atomics", "lse") 1874 .Case("sve", "sve") 1875 .Case("sve2", "sve2") 1876 #else 1877 .Case("half", "fp16") 1878 .Case("neon", "neon") 1879 .Case("vfpv3", "vfp3") 1880 .Case("vfpv3d16", "vfp3d16") 1881 .Case("vfpv4", "vfp4") 1882 .Case("idiva", "hwdiv-arm") 1883 .Case("idivt", "hwdiv") 1884 #endif 1885 .Default(""); 1886 1887 #if defined(__aarch64__) 1888 // We need to check crypto separately since we need all of the crypto 1889 // extensions to enable the subtarget feature 1890 if (CPUFeatures[I] == "aes") 1891 crypto |= CAP_AES; 1892 else if (CPUFeatures[I] == "pmull") 1893 crypto |= CAP_PMULL; 1894 else if (CPUFeatures[I] == "sha1") 1895 crypto |= CAP_SHA1; 1896 else if (CPUFeatures[I] == "sha2") 1897 crypto |= CAP_SHA2; 1898 #endif 1899 1900 if (LLVMFeatureStr != "") 1901 Features[LLVMFeatureStr] = true; 1902 } 1903 1904 #if defined(__aarch64__) 1905 // If we have all crypto bits we can add the feature 1906 if (crypto == (CAP_AES | CAP_PMULL | CAP_SHA1 | CAP_SHA2)) 1907 Features["crypto"] = true; 1908 #endif 1909 1910 return true; 1911 } 1912 #elif defined(_WIN32) && (defined(__aarch64__) || defined(_M_ARM64)) 1913 bool sys::getHostCPUFeatures(StringMap<bool> &Features) { 1914 if (IsProcessorFeaturePresent(PF_ARM_NEON_INSTRUCTIONS_AVAILABLE)) 1915 Features["neon"] = true; 1916 if (IsProcessorFeaturePresent(PF_ARM_V8_CRC32_INSTRUCTIONS_AVAILABLE)) 1917 Features["crc"] = true; 1918 if (IsProcessorFeaturePresent(PF_ARM_V8_CRYPTO_INSTRUCTIONS_AVAILABLE)) 1919 Features["crypto"] = true; 1920 1921 return true; 1922 } 1923 #elif defined(__linux__) && defined(__loongarch__) 1924 #include <sys/auxv.h> 1925 bool sys::getHostCPUFeatures(StringMap<bool> &Features) { 1926 unsigned long hwcap = getauxval(AT_HWCAP); 1927 bool HasFPU = hwcap & (1UL << 3); // HWCAP_LOONGARCH_FPU 1928 uint32_t cpucfg2 = 0x2; 1929 __asm__("cpucfg %[cpucfg2], %[cpucfg2]\n\t" : [cpucfg2] "+r"(cpucfg2)); 1930 1931 Features["f"] = HasFPU && (cpucfg2 & (1U << 1)); // CPUCFG.2.FP_SP 1932 Features["d"] = HasFPU && (cpucfg2 & (1U << 2)); // CPUCFG.2.FP_DP 1933 1934 Features["lsx"] = hwcap & (1UL << 4); // HWCAP_LOONGARCH_LSX 1935 Features["lasx"] = hwcap & (1UL << 5); // HWCAP_LOONGARCH_LASX 1936 Features["lvz"] = hwcap & (1UL << 9); // HWCAP_LOONGARCH_LVZ 1937 1938 return true; 1939 } 1940 #else 1941 bool sys::getHostCPUFeatures(StringMap<bool> &Features) { return false; } 1942 #endif 1943 1944 #if __APPLE__ 1945 /// \returns the \p triple, but with the Host's arch spliced in. 1946 static Triple withHostArch(Triple T) { 1947 #if defined(__arm__) 1948 T.setArch(Triple::arm); 1949 T.setArchName("arm"); 1950 #elif defined(__arm64e__) 1951 T.setArch(Triple::aarch64, Triple::AArch64SubArch_arm64e); 1952 T.setArchName("arm64e"); 1953 #elif defined(__aarch64__) 1954 T.setArch(Triple::aarch64); 1955 T.setArchName("arm64"); 1956 #elif defined(__x86_64h__) 1957 T.setArch(Triple::x86_64); 1958 T.setArchName("x86_64h"); 1959 #elif defined(__x86_64__) 1960 T.setArch(Triple::x86_64); 1961 T.setArchName("x86_64"); 1962 #elif defined(__i386__) 1963 T.setArch(Triple::x86); 1964 T.setArchName("i386"); 1965 #elif defined(__powerpc__) 1966 T.setArch(Triple::ppc); 1967 T.setArchName("powerpc"); 1968 #else 1969 # error "Unimplemented host arch fixup" 1970 #endif 1971 return T; 1972 } 1973 #endif 1974 1975 std::string sys::getProcessTriple() { 1976 std::string TargetTripleString = updateTripleOSVersion(LLVM_HOST_TRIPLE); 1977 Triple PT(Triple::normalize(TargetTripleString)); 1978 1979 #if __APPLE__ 1980 /// In Universal builds, LLVM_HOST_TRIPLE will have the wrong arch in one of 1981 /// the slices. This fixes that up. 1982 PT = withHostArch(PT); 1983 #endif 1984 1985 if (sizeof(void *) == 8 && PT.isArch32Bit()) 1986 PT = PT.get64BitArchVariant(); 1987 if (sizeof(void *) == 4 && PT.isArch64Bit()) 1988 PT = PT.get32BitArchVariant(); 1989 1990 return PT.str(); 1991 } 1992 1993 void sys::printDefaultTargetAndDetectedCPU(raw_ostream &OS) { 1994 #if LLVM_VERSION_PRINTER_SHOW_HOST_TARGET_INFO 1995 std::string CPU = std::string(sys::getHostCPUName()); 1996 if (CPU == "generic") 1997 CPU = "(unknown)"; 1998 OS << " Default target: " << sys::getDefaultTargetTriple() << '\n' 1999 << " Host CPU: " << CPU << '\n'; 2000 #endif 2001 } 2002