1// Copyright (c) 2015 Klaus Post, released under MIT License. See LICENSE file. 2 3// Package cpuid provides information about the CPU running the current program. 4// 5// CPU features are detected on startup, and kept for fast access through the life of the application. 6// Currently x86 / x64 (AMD64) as well as arm64 is supported. 7// 8// You can access the CPU information by accessing the shared CPU variable of the cpuid library. 9// 10// Package home: https://github.com/klauspost/cpuid 11package cpuid 12 13import ( 14 "flag" 15 "fmt" 16 "math" 17 "os" 18 "runtime" 19 "strings" 20) 21 22// AMD refererence: https://www.amd.com/system/files/TechDocs/25481.pdf 23// and Processor Programming Reference (PPR) 24 25// Vendor is a representation of a CPU vendor. 26type Vendor int 27 28const ( 29 VendorUnknown Vendor = iota 30 Intel 31 AMD 32 VIA 33 Transmeta 34 NSC 35 KVM // Kernel-based Virtual Machine 36 MSVM // Microsoft Hyper-V or Windows Virtual PC 37 VMware 38 XenHVM 39 Bhyve 40 Hygon 41 SiS 42 RDC 43 44 Ampere 45 ARM 46 Broadcom 47 Cavium 48 DEC 49 Fujitsu 50 Infineon 51 Motorola 52 NVIDIA 53 AMCC 54 Qualcomm 55 Marvell 56 57 lastVendor 58) 59 60//go:generate stringer -type=FeatureID,Vendor 61 62// FeatureID is the ID of a specific cpu feature. 63type FeatureID int 64 65const ( 66 // Keep index -1 as unknown 67 UNKNOWN = -1 68 69 // Add features 70 ADX FeatureID = iota // Intel ADX (Multi-Precision Add-Carry Instruction Extensions) 71 AESNI // Advanced Encryption Standard New Instructions 72 AMD3DNOW // AMD 3DNOW 73 AMD3DNOWEXT // AMD 3DNowExt 74 AMXBF16 // Tile computational operations on BFLOAT16 numbers 75 AMXINT8 // Tile computational operations on 8-bit integers 76 AMXTILE // Tile architecture 77 AVX // AVX functions 78 AVX2 // AVX2 functions 79 AVX512BF16 // AVX-512 BFLOAT16 Instructions 80 AVX512BITALG // AVX-512 Bit Algorithms 81 AVX512BW // AVX-512 Byte and Word Instructions 82 AVX512CD // AVX-512 Conflict Detection Instructions 83 AVX512DQ // AVX-512 Doubleword and Quadword Instructions 84 AVX512ER // AVX-512 Exponential and Reciprocal Instructions 85 AVX512F // AVX-512 Foundation 86 AVX512FP16 // AVX-512 FP16 Instructions 87 AVX512IFMA // AVX-512 Integer Fused Multiply-Add Instructions 88 AVX512PF // AVX-512 Prefetch Instructions 89 AVX512VBMI // AVX-512 Vector Bit Manipulation Instructions 90 AVX512VBMI2 // AVX-512 Vector Bit Manipulation Instructions, Version 2 91 AVX512VL // AVX-512 Vector Length Extensions 92 AVX512VNNI // AVX-512 Vector Neural Network Instructions 93 AVX512VP2INTERSECT // AVX-512 Intersect for D/Q 94 AVX512VPOPCNTDQ // AVX-512 Vector Population Count Doubleword and Quadword 95 AVXSLOW // Indicates the CPU performs 2 128 bit operations instead of one. 96 BMI1 // Bit Manipulation Instruction Set 1 97 BMI2 // Bit Manipulation Instruction Set 2 98 CLDEMOTE // Cache Line Demote 99 CLMUL // Carry-less Multiplication 100 CLZERO // CLZERO instruction supported 101 CMOV // i686 CMOV 102 CPBOOST // Core Performance Boost 103 CX16 // CMPXCHG16B Instruction 104 ENQCMD // Enqueue Command 105 ERMS // Enhanced REP MOVSB/STOSB 106 F16C // Half-precision floating-point conversion 107 FMA3 // Intel FMA 3. Does not imply AVX. 108 FMA4 // Bulldozer FMA4 functions 109 GFNI // Galois Field New Instructions 110 HLE // Hardware Lock Elision 111 HTT // Hyperthreading (enabled) 112 HWA // Hardware assert supported. Indicates support for MSRC001_10 113 HYPERVISOR // This bit has been reserved by Intel & AMD for use by hypervisors 114 IBPB // Indirect Branch Restricted Speculation (IBRS) and Indirect Branch Predictor Barrier (IBPB) 115 IBS // Instruction Based Sampling (AMD) 116 IBSBRNTRGT // Instruction Based Sampling Feature (AMD) 117 IBSFETCHSAM // Instruction Based Sampling Feature (AMD) 118 IBSFFV // Instruction Based Sampling Feature (AMD) 119 IBSOPCNT // Instruction Based Sampling Feature (AMD) 120 IBSOPCNTEXT // Instruction Based Sampling Feature (AMD) 121 IBSOPSAM // Instruction Based Sampling Feature (AMD) 122 IBSRDWROPCNT // Instruction Based Sampling Feature (AMD) 123 IBSRIPINVALIDCHK // Instruction Based Sampling Feature (AMD) 124 INT_WBINVD // WBINVD/WBNOINVD are interruptible. 125 INVLPGB // NVLPGB and TLBSYNC instruction supported 126 LZCNT // LZCNT instruction 127 MCAOVERFLOW // MCA overflow recovery support. 128 MCOMMIT // MCOMMIT instruction supported 129 MMX // standard MMX 130 MMXEXT // SSE integer functions or AMD MMX ext 131 MOVDIR64B // Move 64 Bytes as Direct Store 132 MOVDIRI // Move Doubleword as Direct Store 133 MPX // Intel MPX (Memory Protection Extensions) 134 MSRIRC // Instruction Retired Counter MSR available 135 NX // NX (No-Execute) bit 136 POPCNT // POPCNT instruction 137 RDPRU // RDPRU instruction supported 138 RDRAND // RDRAND instruction is available 139 RDSEED // RDSEED instruction is available 140 RDTSCP // RDTSCP Instruction 141 RTM // Restricted Transactional Memory 142 RTM_ALWAYS_ABORT // Indicates that the loaded microcode is forcing RTM abort. 143 SERIALIZE // Serialize Instruction Execution 144 SGX // Software Guard Extensions 145 SGXLC // Software Guard Extensions Launch Control 146 SHA // Intel SHA Extensions 147 SSE // SSE functions 148 SSE2 // P4 SSE functions 149 SSE3 // Prescott SSE3 functions 150 SSE4 // Penryn SSE4.1 functions 151 SSE42 // Nehalem SSE4.2 functions 152 SSE4A // AMD Barcelona microarchitecture SSE4a instructions 153 SSSE3 // Conroe SSSE3 functions 154 STIBP // Single Thread Indirect Branch Predictors 155 SUCCOR // Software uncorrectable error containment and recovery capability. 156 TBM // AMD Trailing Bit Manipulation 157 TSXLDTRK // Intel TSX Suspend Load Address Tracking 158 VAES // Vector AES 159 VMX // Virtual Machine Extensions 160 VPCLMULQDQ // Carry-Less Multiplication Quadword 161 WAITPKG // TPAUSE, UMONITOR, UMWAIT 162 WBNOINVD // Write Back and Do Not Invalidate Cache 163 XOP // Bulldozer XOP functions 164 165 // ARM features: 166 AESARM // AES instructions 167 ARMCPUID // Some CPU ID registers readable at user-level 168 ASIMD // Advanced SIMD 169 ASIMDDP // SIMD Dot Product 170 ASIMDHP // Advanced SIMD half-precision floating point 171 ASIMDRDM // Rounding Double Multiply Accumulate/Subtract (SQRDMLAH/SQRDMLSH) 172 ATOMICS // Large System Extensions (LSE) 173 CRC32 // CRC32/CRC32C instructions 174 DCPOP // Data cache clean to Point of Persistence (DC CVAP) 175 EVTSTRM // Generic timer 176 FCMA // Floatin point complex number addition and multiplication 177 FP // Single-precision and double-precision floating point 178 FPHP // Half-precision floating point 179 GPA // Generic Pointer Authentication 180 JSCVT // Javascript-style double->int convert (FJCVTZS) 181 LRCPC // Weaker release consistency (LDAPR, etc) 182 PMULL // Polynomial Multiply instructions (PMULL/PMULL2) 183 SHA1 // SHA-1 instructions (SHA1C, etc) 184 SHA2 // SHA-2 instructions (SHA256H, etc) 185 SHA3 // SHA-3 instructions (EOR3, RAXI, XAR, BCAX) 186 SHA512 // SHA512 instructions 187 SM3 // SM3 instructions 188 SM4 // SM4 instructions 189 SVE // Scalable Vector Extension 190 191 // Keep it last. It automatically defines the size of []flagSet 192 lastID 193 194 firstID FeatureID = UNKNOWN + 1 195) 196 197// CPUInfo contains information about the detected system CPU. 198type CPUInfo struct { 199 BrandName string // Brand name reported by the CPU 200 VendorID Vendor // Comparable CPU vendor ID 201 VendorString string // Raw vendor string. 202 featureSet flagSet // Features of the CPU 203 PhysicalCores int // Number of physical processor cores in your CPU. Will be 0 if undetectable. 204 ThreadsPerCore int // Number of threads per physical core. Will be 1 if undetectable. 205 LogicalCores int // Number of physical cores times threads that can run on each core through the use of hyperthreading. Will be 0 if undetectable. 206 Family int // CPU family number 207 Model int // CPU model number 208 CacheLine int // Cache line size in bytes. Will be 0 if undetectable. 209 Hz int64 // Clock speed, if known, 0 otherwise. Will attempt to contain base clock speed. 210 BoostFreq int64 // Max clock speed, if known, 0 otherwise 211 Cache struct { 212 L1I int // L1 Instruction Cache (per core or shared). Will be -1 if undetected 213 L1D int // L1 Data Cache (per core or shared). Will be -1 if undetected 214 L2 int // L2 Cache (per core or shared). Will be -1 if undetected 215 L3 int // L3 Cache (per core, per ccx or shared). Will be -1 if undetected 216 } 217 SGX SGXSupport 218 maxFunc uint32 219 maxExFunc uint32 220} 221 222var cpuid func(op uint32) (eax, ebx, ecx, edx uint32) 223var cpuidex func(op, op2 uint32) (eax, ebx, ecx, edx uint32) 224var xgetbv func(index uint32) (eax, edx uint32) 225var rdtscpAsm func() (eax, ebx, ecx, edx uint32) 226var darwinHasAVX512 = func() bool { return false } 227 228// CPU contains information about the CPU as detected on startup, 229// or when Detect last was called. 230// 231// Use this as the primary entry point to you data. 232var CPU CPUInfo 233 234func init() { 235 initCPU() 236 Detect() 237} 238 239// Detect will re-detect current CPU info. 240// This will replace the content of the exported CPU variable. 241// 242// Unless you expect the CPU to change while you are running your program 243// you should not need to call this function. 244// If you call this, you must ensure that no other goroutine is accessing the 245// exported CPU variable. 246func Detect() { 247 // Set defaults 248 CPU.ThreadsPerCore = 1 249 CPU.Cache.L1I = -1 250 CPU.Cache.L1D = -1 251 CPU.Cache.L2 = -1 252 CPU.Cache.L3 = -1 253 safe := true 254 if detectArmFlag != nil { 255 safe = !*detectArmFlag 256 } 257 addInfo(&CPU, safe) 258 if displayFeats != nil && *displayFeats { 259 fmt.Println("cpu features:", strings.Join(CPU.FeatureSet(), ",")) 260 // Exit with non-zero so tests will print value. 261 os.Exit(1) 262 } 263 if disableFlag != nil { 264 s := strings.Split(*disableFlag, ",") 265 for _, feat := range s { 266 feat := ParseFeature(strings.TrimSpace(feat)) 267 if feat != UNKNOWN { 268 CPU.featureSet.unset(feat) 269 } 270 } 271 } 272} 273 274// DetectARM will detect ARM64 features. 275// This is NOT done automatically since it can potentially crash 276// if the OS does not handle the command. 277// If in the future this can be done safely this function may not 278// do anything. 279func DetectARM() { 280 addInfo(&CPU, false) 281} 282 283var detectArmFlag *bool 284var displayFeats *bool 285var disableFlag *string 286 287// Flags will enable flags. 288// This must be called *before* flag.Parse AND 289// Detect must be called after the flags have been parsed. 290// Note that this means that any detection used in init() functions 291// will not contain these flags. 292func Flags() { 293 disableFlag = flag.String("cpu.disable", "", "disable cpu features; comma separated list") 294 displayFeats = flag.Bool("cpu.features", false, "lists cpu features and exits") 295 detectArmFlag = flag.Bool("cpu.arm", false, "allow ARM features to be detected; can potentially crash") 296} 297 298// Supports returns whether the CPU supports all of the requested features. 299func (c CPUInfo) Supports(ids ...FeatureID) bool { 300 for _, id := range ids { 301 if !c.featureSet.inSet(id) { 302 return false 303 } 304 } 305 return true 306} 307 308// Has allows for checking a single feature. 309// Should be inlined by the compiler. 310func (c CPUInfo) Has(id FeatureID) bool { 311 return c.featureSet.inSet(id) 312} 313 314// Disable will disable one or several features. 315func (c *CPUInfo) Disable(ids ...FeatureID) bool { 316 for _, id := range ids { 317 c.featureSet.unset(id) 318 } 319 return true 320} 321 322// Enable will disable one or several features even if they were undetected. 323// This is of course not recommended for obvious reasons. 324func (c *CPUInfo) Enable(ids ...FeatureID) bool { 325 for _, id := range ids { 326 c.featureSet.set(id) 327 } 328 return true 329} 330 331// IsVendor returns true if vendor is recognized as Intel 332func (c CPUInfo) IsVendor(v Vendor) bool { 333 return c.VendorID == v 334} 335 336func (c CPUInfo) FeatureSet() []string { 337 s := make([]string, 0) 338 for _, f := range c.featureSet.Strings() { 339 s = append(s, f) 340 } 341 return s 342} 343 344// RTCounter returns the 64-bit time-stamp counter 345// Uses the RDTSCP instruction. The value 0 is returned 346// if the CPU does not support the instruction. 347func (c CPUInfo) RTCounter() uint64 { 348 if !c.Supports(RDTSCP) { 349 return 0 350 } 351 a, _, _, d := rdtscpAsm() 352 return uint64(a) | (uint64(d) << 32) 353} 354 355// Ia32TscAux returns the IA32_TSC_AUX part of the RDTSCP. 356// This variable is OS dependent, but on Linux contains information 357// about the current cpu/core the code is running on. 358// If the RDTSCP instruction isn't supported on the CPU, the value 0 is returned. 359func (c CPUInfo) Ia32TscAux() uint32 { 360 if !c.Supports(RDTSCP) { 361 return 0 362 } 363 _, _, ecx, _ := rdtscpAsm() 364 return ecx 365} 366 367// LogicalCPU will return the Logical CPU the code is currently executing on. 368// This is likely to change when the OS re-schedules the running thread 369// to another CPU. 370// If the current core cannot be detected, -1 will be returned. 371func (c CPUInfo) LogicalCPU() int { 372 if c.maxFunc < 1 { 373 return -1 374 } 375 _, ebx, _, _ := cpuid(1) 376 return int(ebx >> 24) 377} 378 379// frequencies tries to compute the clock speed of the CPU. If leaf 15 is 380// supported, use it, otherwise parse the brand string. Yes, really. 381func (c *CPUInfo) frequencies() { 382 c.Hz, c.BoostFreq = 0, 0 383 mfi := maxFunctionID() 384 if mfi >= 0x15 { 385 eax, ebx, ecx, _ := cpuid(0x15) 386 if eax != 0 && ebx != 0 && ecx != 0 { 387 c.Hz = (int64(ecx) * int64(ebx)) / int64(eax) 388 } 389 } 390 if mfi >= 0x16 { 391 a, b, _, _ := cpuid(0x16) 392 // Base... 393 if a&0xffff > 0 { 394 c.Hz = int64(a&0xffff) * 1_000_000 395 } 396 // Boost... 397 if b&0xffff > 0 { 398 c.BoostFreq = int64(b&0xffff) * 1_000_000 399 } 400 } 401 if c.Hz > 0 { 402 return 403 } 404 405 // computeHz determines the official rated speed of a CPU from its brand 406 // string. This insanity is *actually the official documented way to do 407 // this according to Intel*, prior to leaf 0x15 existing. The official 408 // documentation only shows this working for exactly `x.xx` or `xxxx` 409 // cases, e.g., `2.50GHz` or `1300MHz`; this parser will accept other 410 // sizes. 411 model := c.BrandName 412 hz := strings.LastIndex(model, "Hz") 413 if hz < 3 { 414 return 415 } 416 var multiplier int64 417 switch model[hz-1] { 418 case 'M': 419 multiplier = 1000 * 1000 420 case 'G': 421 multiplier = 1000 * 1000 * 1000 422 case 'T': 423 multiplier = 1000 * 1000 * 1000 * 1000 424 } 425 if multiplier == 0 { 426 return 427 } 428 freq := int64(0) 429 divisor := int64(0) 430 decimalShift := int64(1) 431 var i int 432 for i = hz - 2; i >= 0 && model[i] != ' '; i-- { 433 if model[i] >= '0' && model[i] <= '9' { 434 freq += int64(model[i]-'0') * decimalShift 435 decimalShift *= 10 436 } else if model[i] == '.' { 437 if divisor != 0 { 438 return 439 } 440 divisor = decimalShift 441 } else { 442 return 443 } 444 } 445 // we didn't find a space 446 if i < 0 { 447 return 448 } 449 if divisor != 0 { 450 c.Hz = (freq * multiplier) / divisor 451 return 452 } 453 c.Hz = freq * multiplier 454} 455 456// VM Will return true if the cpu id indicates we are in 457// a virtual machine. 458func (c CPUInfo) VM() bool { 459 return CPU.featureSet.inSet(HYPERVISOR) 460} 461 462// flags contains detected cpu features and characteristics 463type flags uint64 464 465// log2(bits_in_uint64) 466const flagBitsLog2 = 6 467const flagBits = 1 << flagBitsLog2 468const flagMask = flagBits - 1 469 470// flagSet contains detected cpu features and characteristics in an array of flags 471type flagSet [(lastID + flagMask) / flagBits]flags 472 473func (s flagSet) inSet(feat FeatureID) bool { 474 return s[feat>>flagBitsLog2]&(1<<(feat&flagMask)) != 0 475} 476 477func (s *flagSet) set(feat FeatureID) { 478 s[feat>>flagBitsLog2] |= 1 << (feat & flagMask) 479} 480 481// setIf will set a feature if boolean is true. 482func (s *flagSet) setIf(cond bool, features ...FeatureID) { 483 if cond { 484 for _, offset := range features { 485 s[offset>>flagBitsLog2] |= 1 << (offset & flagMask) 486 } 487 } 488} 489 490func (s *flagSet) unset(offset FeatureID) { 491 bit := flags(1 << (offset & flagMask)) 492 s[offset>>flagBitsLog2] = s[offset>>flagBitsLog2] & ^bit 493} 494 495// or with another flagset. 496func (s *flagSet) or(other flagSet) { 497 for i, v := range other[:] { 498 s[i] |= v 499 } 500} 501 502// ParseFeature will parse the string and return the ID of the matching feature. 503// Will return UNKNOWN if not found. 504func ParseFeature(s string) FeatureID { 505 s = strings.ToUpper(s) 506 for i := firstID; i < lastID; i++ { 507 if i.String() == s { 508 return i 509 } 510 } 511 return UNKNOWN 512} 513 514// Strings returns an array of the detected features for FlagsSet. 515func (s flagSet) Strings() []string { 516 if len(s) == 0 { 517 return []string{""} 518 } 519 r := make([]string, 0) 520 for i := firstID; i < lastID; i++ { 521 if s.inSet(i) { 522 r = append(r, i.String()) 523 } 524 } 525 return r 526} 527 528func maxExtendedFunction() uint32 { 529 eax, _, _, _ := cpuid(0x80000000) 530 return eax 531} 532 533func maxFunctionID() uint32 { 534 a, _, _, _ := cpuid(0) 535 return a 536} 537 538func brandName() string { 539 if maxExtendedFunction() >= 0x80000004 { 540 v := make([]uint32, 0, 48) 541 for i := uint32(0); i < 3; i++ { 542 a, b, c, d := cpuid(0x80000002 + i) 543 v = append(v, a, b, c, d) 544 } 545 return strings.Trim(string(valAsString(v...)), " ") 546 } 547 return "unknown" 548} 549 550func threadsPerCore() int { 551 mfi := maxFunctionID() 552 vend, _ := vendorID() 553 554 if mfi < 0x4 || (vend != Intel && vend != AMD) { 555 return 1 556 } 557 558 if mfi < 0xb { 559 if vend != Intel { 560 return 1 561 } 562 _, b, _, d := cpuid(1) 563 if (d & (1 << 28)) != 0 { 564 // v will contain logical core count 565 v := (b >> 16) & 255 566 if v > 1 { 567 a4, _, _, _ := cpuid(4) 568 // physical cores 569 v2 := (a4 >> 26) + 1 570 if v2 > 0 { 571 return int(v) / int(v2) 572 } 573 } 574 } 575 return 1 576 } 577 _, b, _, _ := cpuidex(0xb, 0) 578 if b&0xffff == 0 { 579 if vend == AMD { 580 // Workaround for AMD returning 0, assume 2 if >= Zen 2 581 // It will be more correct than not. 582 fam, _ := familyModel() 583 _, _, _, d := cpuid(1) 584 if (d&(1<<28)) != 0 && fam >= 23 { 585 return 2 586 } 587 } 588 return 1 589 } 590 return int(b & 0xffff) 591} 592 593func logicalCores() int { 594 mfi := maxFunctionID() 595 v, _ := vendorID() 596 switch v { 597 case Intel: 598 // Use this on old Intel processors 599 if mfi < 0xb { 600 if mfi < 1 { 601 return 0 602 } 603 // CPUID.1:EBX[23:16] represents the maximum number of addressable IDs (initial APIC ID) 604 // that can be assigned to logical processors in a physical package. 605 // The value may not be the same as the number of logical processors that are present in the hardware of a physical package. 606 _, ebx, _, _ := cpuid(1) 607 logical := (ebx >> 16) & 0xff 608 return int(logical) 609 } 610 _, b, _, _ := cpuidex(0xb, 1) 611 return int(b & 0xffff) 612 case AMD, Hygon: 613 _, b, _, _ := cpuid(1) 614 return int((b >> 16) & 0xff) 615 default: 616 return 0 617 } 618} 619 620func familyModel() (int, int) { 621 if maxFunctionID() < 0x1 { 622 return 0, 0 623 } 624 eax, _, _, _ := cpuid(1) 625 family := ((eax >> 8) & 0xf) + ((eax >> 20) & 0xff) 626 model := ((eax >> 4) & 0xf) + ((eax >> 12) & 0xf0) 627 return int(family), int(model) 628} 629 630func physicalCores() int { 631 v, _ := vendorID() 632 switch v { 633 case Intel: 634 return logicalCores() / threadsPerCore() 635 case AMD, Hygon: 636 lc := logicalCores() 637 tpc := threadsPerCore() 638 if lc > 0 && tpc > 0 { 639 return lc / tpc 640 } 641 642 // The following is inaccurate on AMD EPYC 7742 64-Core Processor 643 if maxExtendedFunction() >= 0x80000008 { 644 _, _, c, _ := cpuid(0x80000008) 645 if c&0xff > 0 { 646 return int(c&0xff) + 1 647 } 648 } 649 } 650 return 0 651} 652 653// Except from http://en.wikipedia.org/wiki/CPUID#EAX.3D0:_Get_vendor_ID 654var vendorMapping = map[string]Vendor{ 655 "AMDisbetter!": AMD, 656 "AuthenticAMD": AMD, 657 "CentaurHauls": VIA, 658 "GenuineIntel": Intel, 659 "TransmetaCPU": Transmeta, 660 "GenuineTMx86": Transmeta, 661 "Geode by NSC": NSC, 662 "VIA VIA VIA ": VIA, 663 "KVMKVMKVMKVM": KVM, 664 "Microsoft Hv": MSVM, 665 "VMwareVMware": VMware, 666 "XenVMMXenVMM": XenHVM, 667 "bhyve bhyve ": Bhyve, 668 "HygonGenuine": Hygon, 669 "Vortex86 SoC": SiS, 670 "SiS SiS SiS ": SiS, 671 "RiseRiseRise": SiS, 672 "Genuine RDC": RDC, 673} 674 675func vendorID() (Vendor, string) { 676 _, b, c, d := cpuid(0) 677 v := string(valAsString(b, d, c)) 678 vend, ok := vendorMapping[v] 679 if !ok { 680 return VendorUnknown, v 681 } 682 return vend, v 683} 684 685func cacheLine() int { 686 if maxFunctionID() < 0x1 { 687 return 0 688 } 689 690 _, ebx, _, _ := cpuid(1) 691 cache := (ebx & 0xff00) >> 5 // cflush size 692 if cache == 0 && maxExtendedFunction() >= 0x80000006 { 693 _, _, ecx, _ := cpuid(0x80000006) 694 cache = ecx & 0xff // cacheline size 695 } 696 // TODO: Read from Cache and TLB Information 697 return int(cache) 698} 699 700func (c *CPUInfo) cacheSize() { 701 c.Cache.L1D = -1 702 c.Cache.L1I = -1 703 c.Cache.L2 = -1 704 c.Cache.L3 = -1 705 vendor, _ := vendorID() 706 switch vendor { 707 case Intel: 708 if maxFunctionID() < 4 { 709 return 710 } 711 for i := uint32(0); ; i++ { 712 eax, ebx, ecx, _ := cpuidex(4, i) 713 cacheType := eax & 15 714 if cacheType == 0 { 715 break 716 } 717 cacheLevel := (eax >> 5) & 7 718 coherency := int(ebx&0xfff) + 1 719 partitions := int((ebx>>12)&0x3ff) + 1 720 associativity := int((ebx>>22)&0x3ff) + 1 721 sets := int(ecx) + 1 722 size := associativity * partitions * coherency * sets 723 switch cacheLevel { 724 case 1: 725 if cacheType == 1 { 726 // 1 = Data Cache 727 c.Cache.L1D = size 728 } else if cacheType == 2 { 729 // 2 = Instruction Cache 730 c.Cache.L1I = size 731 } else { 732 if c.Cache.L1D < 0 { 733 c.Cache.L1I = size 734 } 735 if c.Cache.L1I < 0 { 736 c.Cache.L1I = size 737 } 738 } 739 case 2: 740 c.Cache.L2 = size 741 case 3: 742 c.Cache.L3 = size 743 } 744 } 745 case AMD, Hygon: 746 // Untested. 747 if maxExtendedFunction() < 0x80000005 { 748 return 749 } 750 _, _, ecx, edx := cpuid(0x80000005) 751 c.Cache.L1D = int(((ecx >> 24) & 0xFF) * 1024) 752 c.Cache.L1I = int(((edx >> 24) & 0xFF) * 1024) 753 754 if maxExtendedFunction() < 0x80000006 { 755 return 756 } 757 _, _, ecx, _ = cpuid(0x80000006) 758 c.Cache.L2 = int(((ecx >> 16) & 0xFFFF) * 1024) 759 760 // CPUID Fn8000_001D_EAX_x[N:0] Cache Properties 761 if maxExtendedFunction() < 0x8000001D { 762 return 763 } 764 for i := uint32(0); i < math.MaxUint32; i++ { 765 eax, ebx, ecx, _ := cpuidex(0x8000001D, i) 766 767 level := (eax >> 5) & 7 768 cacheNumSets := ecx + 1 769 cacheLineSize := 1 + (ebx & 2047) 770 cachePhysPartitions := 1 + ((ebx >> 12) & 511) 771 cacheNumWays := 1 + ((ebx >> 22) & 511) 772 773 typ := eax & 15 774 size := int(cacheNumSets * cacheLineSize * cachePhysPartitions * cacheNumWays) 775 if typ == 0 { 776 return 777 } 778 779 switch level { 780 case 1: 781 switch typ { 782 case 1: 783 // Data cache 784 c.Cache.L1D = size 785 case 2: 786 // Inst cache 787 c.Cache.L1I = size 788 default: 789 if c.Cache.L1D < 0 { 790 c.Cache.L1I = size 791 } 792 if c.Cache.L1I < 0 { 793 c.Cache.L1I = size 794 } 795 } 796 case 2: 797 c.Cache.L2 = size 798 case 3: 799 c.Cache.L3 = size 800 } 801 } 802 } 803 804 return 805} 806 807type SGXEPCSection struct { 808 BaseAddress uint64 809 EPCSize uint64 810} 811 812type SGXSupport struct { 813 Available bool 814 LaunchControl bool 815 SGX1Supported bool 816 SGX2Supported bool 817 MaxEnclaveSizeNot64 int64 818 MaxEnclaveSize64 int64 819 EPCSections []SGXEPCSection 820} 821 822func hasSGX(available, lc bool) (rval SGXSupport) { 823 rval.Available = available 824 825 if !available { 826 return 827 } 828 829 rval.LaunchControl = lc 830 831 a, _, _, d := cpuidex(0x12, 0) 832 rval.SGX1Supported = a&0x01 != 0 833 rval.SGX2Supported = a&0x02 != 0 834 rval.MaxEnclaveSizeNot64 = 1 << (d & 0xFF) // pow 2 835 rval.MaxEnclaveSize64 = 1 << ((d >> 8) & 0xFF) // pow 2 836 rval.EPCSections = make([]SGXEPCSection, 0) 837 838 for subleaf := uint32(2); subleaf < 2+8; subleaf++ { 839 eax, ebx, ecx, edx := cpuidex(0x12, subleaf) 840 leafType := eax & 0xf 841 842 if leafType == 0 { 843 // Invalid subleaf, stop iterating 844 break 845 } else if leafType == 1 { 846 // EPC Section subleaf 847 baseAddress := uint64(eax&0xfffff000) + (uint64(ebx&0x000fffff) << 32) 848 size := uint64(ecx&0xfffff000) + (uint64(edx&0x000fffff) << 32) 849 850 section := SGXEPCSection{BaseAddress: baseAddress, EPCSize: size} 851 rval.EPCSections = append(rval.EPCSections, section) 852 } 853 } 854 855 return 856} 857 858func support() flagSet { 859 var fs flagSet 860 mfi := maxFunctionID() 861 vend, _ := vendorID() 862 if mfi < 0x1 { 863 return fs 864 } 865 family, model := familyModel() 866 867 _, _, c, d := cpuid(1) 868 fs.setIf((d&(1<<15)) != 0, CMOV) 869 fs.setIf((d&(1<<23)) != 0, MMX) 870 fs.setIf((d&(1<<25)) != 0, MMXEXT) 871 fs.setIf((d&(1<<25)) != 0, SSE) 872 fs.setIf((d&(1<<26)) != 0, SSE2) 873 fs.setIf((c&1) != 0, SSE3) 874 fs.setIf((c&(1<<5)) != 0, VMX) 875 fs.setIf((c&0x00000200) != 0, SSSE3) 876 fs.setIf((c&0x00080000) != 0, SSE4) 877 fs.setIf((c&0x00100000) != 0, SSE42) 878 fs.setIf((c&(1<<25)) != 0, AESNI) 879 fs.setIf((c&(1<<1)) != 0, CLMUL) 880 fs.setIf(c&(1<<23) != 0, POPCNT) 881 fs.setIf(c&(1<<30) != 0, RDRAND) 882 883 // This bit has been reserved by Intel & AMD for use by hypervisors, 884 // and indicates the presence of a hypervisor. 885 fs.setIf(c&(1<<31) != 0, HYPERVISOR) 886 fs.setIf(c&(1<<29) != 0, F16C) 887 fs.setIf(c&(1<<13) != 0, CX16) 888 889 if vend == Intel && (d&(1<<28)) != 0 && mfi >= 4 { 890 fs.setIf(threadsPerCore() > 1, HTT) 891 } 892 if vend == AMD && (d&(1<<28)) != 0 && mfi >= 4 { 893 fs.setIf(threadsPerCore() > 1, HTT) 894 } 895 // Check XGETBV/XSAVE (26), OXSAVE (27) and AVX (28) bits 896 const avxCheck = 1<<26 | 1<<27 | 1<<28 897 if c&avxCheck == avxCheck { 898 // Check for OS support 899 eax, _ := xgetbv(0) 900 if (eax & 0x6) == 0x6 { 901 fs.set(AVX) 902 switch vend { 903 case Intel: 904 // Older than Haswell. 905 fs.setIf(family == 6 && model < 60, AVXSLOW) 906 case AMD: 907 // Older than Zen 2 908 fs.setIf(family < 23 || (family == 23 && model < 49), AVXSLOW) 909 } 910 } 911 } 912 // FMA3 can be used with SSE registers, so no OS support is strictly needed. 913 // fma3 and OSXSAVE needed. 914 const fma3Check = 1<<12 | 1<<27 915 fs.setIf(c&fma3Check == fma3Check, FMA3) 916 917 // Check AVX2, AVX2 requires OS support, but BMI1/2 don't. 918 if mfi >= 7 { 919 _, ebx, ecx, edx := cpuidex(7, 0) 920 eax1, _, _, _ := cpuidex(7, 1) 921 if fs.inSet(AVX) && (ebx&0x00000020) != 0 { 922 fs.set(AVX2) 923 } 924 // CPUID.(EAX=7, ECX=0).EBX 925 if (ebx & 0x00000008) != 0 { 926 fs.set(BMI1) 927 fs.setIf((ebx&0x00000100) != 0, BMI2) 928 } 929 fs.setIf(ebx&(1<<2) != 0, SGX) 930 fs.setIf(ebx&(1<<4) != 0, HLE) 931 fs.setIf(ebx&(1<<9) != 0, ERMS) 932 fs.setIf(ebx&(1<<11) != 0, RTM) 933 fs.setIf(ebx&(1<<14) != 0, MPX) 934 fs.setIf(ebx&(1<<18) != 0, RDSEED) 935 fs.setIf(ebx&(1<<19) != 0, ADX) 936 fs.setIf(ebx&(1<<29) != 0, SHA) 937 // CPUID.(EAX=7, ECX=0).ECX 938 fs.setIf(ecx&(1<<5) != 0, WAITPKG) 939 fs.setIf(ecx&(1<<25) != 0, CLDEMOTE) 940 fs.setIf(ecx&(1<<27) != 0, MOVDIRI) 941 fs.setIf(ecx&(1<<28) != 0, MOVDIR64B) 942 fs.setIf(ecx&(1<<29) != 0, ENQCMD) 943 fs.setIf(ecx&(1<<30) != 0, SGXLC) 944 // CPUID.(EAX=7, ECX=0).EDX 945 fs.setIf(edx&(1<<11) != 0, RTM_ALWAYS_ABORT) 946 fs.setIf(edx&(1<<14) != 0, SERIALIZE) 947 fs.setIf(edx&(1<<16) != 0, TSXLDTRK) 948 fs.setIf(edx&(1<<26) != 0, IBPB) 949 fs.setIf(edx&(1<<27) != 0, STIBP) 950 951 // Only detect AVX-512 features if XGETBV is supported 952 if c&((1<<26)|(1<<27)) == (1<<26)|(1<<27) { 953 // Check for OS support 954 eax, _ := xgetbv(0) 955 956 // Verify that XCR0[7:5] = ‘111b’ (OPMASK state, upper 256-bit of ZMM0-ZMM15 and 957 // ZMM16-ZMM31 state are enabled by OS) 958 /// and that XCR0[2:1] = ‘11b’ (XMM state and YMM state are enabled by OS). 959 hasAVX512 := (eax>>5)&7 == 7 && (eax>>1)&3 == 3 960 if runtime.GOOS == "darwin" { 961 hasAVX512 = fs.inSet(AVX) && darwinHasAVX512() 962 } 963 if hasAVX512 { 964 fs.setIf(ebx&(1<<16) != 0, AVX512F) 965 fs.setIf(ebx&(1<<17) != 0, AVX512DQ) 966 fs.setIf(ebx&(1<<21) != 0, AVX512IFMA) 967 fs.setIf(ebx&(1<<26) != 0, AVX512PF) 968 fs.setIf(ebx&(1<<27) != 0, AVX512ER) 969 fs.setIf(ebx&(1<<28) != 0, AVX512CD) 970 fs.setIf(ebx&(1<<30) != 0, AVX512BW) 971 fs.setIf(ebx&(1<<31) != 0, AVX512VL) 972 // ecx 973 fs.setIf(ecx&(1<<1) != 0, AVX512VBMI) 974 fs.setIf(ecx&(1<<6) != 0, AVX512VBMI2) 975 fs.setIf(ecx&(1<<8) != 0, GFNI) 976 fs.setIf(ecx&(1<<9) != 0, VAES) 977 fs.setIf(ecx&(1<<10) != 0, VPCLMULQDQ) 978 fs.setIf(ecx&(1<<11) != 0, AVX512VNNI) 979 fs.setIf(ecx&(1<<12) != 0, AVX512BITALG) 980 fs.setIf(ecx&(1<<14) != 0, AVX512VPOPCNTDQ) 981 // edx 982 fs.setIf(edx&(1<<8) != 0, AVX512VP2INTERSECT) 983 fs.setIf(edx&(1<<22) != 0, AMXBF16) 984 fs.setIf(edx&(1<<23) != 0, AVX512FP16) 985 fs.setIf(edx&(1<<24) != 0, AMXTILE) 986 fs.setIf(edx&(1<<25) != 0, AMXINT8) 987 // eax1 = CPUID.(EAX=7, ECX=1).EAX 988 fs.setIf(eax1&(1<<5) != 0, AVX512BF16) 989 } 990 } 991 } 992 993 if maxExtendedFunction() >= 0x80000001 { 994 _, _, c, d := cpuid(0x80000001) 995 if (c & (1 << 5)) != 0 { 996 fs.set(LZCNT) 997 fs.set(POPCNT) 998 } 999 fs.setIf((c&(1<<10)) != 0, IBS) 1000 fs.setIf((d&(1<<31)) != 0, AMD3DNOW) 1001 fs.setIf((d&(1<<30)) != 0, AMD3DNOWEXT) 1002 fs.setIf((d&(1<<23)) != 0, MMX) 1003 fs.setIf((d&(1<<22)) != 0, MMXEXT) 1004 fs.setIf((c&(1<<6)) != 0, SSE4A) 1005 fs.setIf(d&(1<<20) != 0, NX) 1006 fs.setIf(d&(1<<27) != 0, RDTSCP) 1007 1008 /* XOP and FMA4 use the AVX instruction coding scheme, so they can't be 1009 * used unless the OS has AVX support. */ 1010 if fs.inSet(AVX) { 1011 fs.setIf((c&0x00000800) != 0, XOP) 1012 fs.setIf((c&0x00010000) != 0, FMA4) 1013 } 1014 1015 } 1016 if maxExtendedFunction() >= 0x80000007 { 1017 _, b, _, d := cpuid(0x80000007) 1018 fs.setIf((b&(1<<0)) != 0, MCAOVERFLOW) 1019 fs.setIf((b&(1<<1)) != 0, SUCCOR) 1020 fs.setIf((b&(1<<2)) != 0, HWA) 1021 fs.setIf((d&(1<<9)) != 0, CPBOOST) 1022 } 1023 1024 if maxExtendedFunction() >= 0x80000008 { 1025 _, b, _, _ := cpuid(0x80000008) 1026 fs.setIf((b&(1<<9)) != 0, WBNOINVD) 1027 fs.setIf((b&(1<<8)) != 0, MCOMMIT) 1028 fs.setIf((b&(1<<13)) != 0, INT_WBINVD) 1029 fs.setIf((b&(1<<4)) != 0, RDPRU) 1030 fs.setIf((b&(1<<3)) != 0, INVLPGB) 1031 fs.setIf((b&(1<<1)) != 0, MSRIRC) 1032 fs.setIf((b&(1<<0)) != 0, CLZERO) 1033 } 1034 1035 if maxExtendedFunction() >= 0x8000001b && fs.inSet(IBS) { 1036 eax, _, _, _ := cpuid(0x8000001b) 1037 fs.setIf((eax>>0)&1 == 1, IBSFFV) 1038 fs.setIf((eax>>1)&1 == 1, IBSFETCHSAM) 1039 fs.setIf((eax>>2)&1 == 1, IBSOPSAM) 1040 fs.setIf((eax>>3)&1 == 1, IBSRDWROPCNT) 1041 fs.setIf((eax>>4)&1 == 1, IBSOPCNT) 1042 fs.setIf((eax>>5)&1 == 1, IBSBRNTRGT) 1043 fs.setIf((eax>>6)&1 == 1, IBSOPCNTEXT) 1044 fs.setIf((eax>>7)&1 == 1, IBSRIPINVALIDCHK) 1045 } 1046 1047 return fs 1048} 1049 1050func valAsString(values ...uint32) []byte { 1051 r := make([]byte, 4*len(values)) 1052 for i, v := range values { 1053 dst := r[i*4:] 1054 dst[0] = byte(v & 0xff) 1055 dst[1] = byte((v >> 8) & 0xff) 1056 dst[2] = byte((v >> 16) & 0xff) 1057 dst[3] = byte((v >> 24) & 0xff) 1058 switch { 1059 case dst[0] == 0: 1060 return r[:i*4] 1061 case dst[1] == 0: 1062 return r[:i*4+1] 1063 case dst[2] == 0: 1064 return r[:i*4+2] 1065 case dst[3] == 0: 1066 return r[:i*4+3] 1067 } 1068 } 1069 return r 1070} 1071