1// Copyright (c) 2015 Klaus Post, released under MIT License. See LICENSE file. 2 3// Package cpuid provides information about the CPU running the current program. 4// 5// CPU features are detected on startup, and kept for fast access through the life of the application. 6// Currently x86 / x64 (AMD64) as well as arm64 is supported. 7// 8// You can access the CPU information by accessing the shared CPU variable of the cpuid library. 9// 10// Package home: https://github.com/klauspost/cpuid 11package cpuid 12 13import ( 14 "flag" 15 "fmt" 16 "math" 17 "os" 18 "runtime" 19 "strings" 20) 21 22// AMD refererence: https://www.amd.com/system/files/TechDocs/25481.pdf 23// and Processor Programming Reference (PPR) 24 25// Vendor is a representation of a CPU vendor. 26type Vendor int 27 28const ( 29 VendorUnknown Vendor = iota 30 Intel 31 AMD 32 VIA 33 Transmeta 34 NSC 35 KVM // Kernel-based Virtual Machine 36 MSVM // Microsoft Hyper-V or Windows Virtual PC 37 VMware 38 XenHVM 39 Bhyve 40 Hygon 41 SiS 42 RDC 43 44 Ampere 45 ARM 46 Broadcom 47 Cavium 48 DEC 49 Fujitsu 50 Infineon 51 Motorola 52 NVIDIA 53 AMCC 54 Qualcomm 55 Marvell 56 57 lastVendor 58) 59 60//go:generate stringer -type=FeatureID,Vendor 61 62// FeatureID is the ID of a specific cpu feature. 63type FeatureID int 64 65const ( 66 // Keep index -1 as unknown 67 UNKNOWN = -1 68 69 // Add features 70 ADX FeatureID = iota // Intel ADX (Multi-Precision Add-Carry Instruction Extensions) 71 AESNI // Advanced Encryption Standard New Instructions 72 AMD3DNOW // AMD 3DNOW 73 AMD3DNOWEXT // AMD 3DNowExt 74 AMXBF16 // Tile computational operations on BFLOAT16 numbers 75 AMXINT8 // Tile computational operations on 8-bit integers 76 AMXTILE // Tile architecture 77 AVX // AVX functions 78 AVX2 // AVX2 functions 79 AVX512BF16 // AVX-512 BFLOAT16 Instructions 80 AVX512BITALG // AVX-512 Bit Algorithms 81 AVX512BW // AVX-512 Byte and Word Instructions 82 AVX512CD // AVX-512 Conflict Detection Instructions 83 AVX512DQ // AVX-512 Doubleword and Quadword Instructions 84 AVX512ER // AVX-512 Exponential and Reciprocal Instructions 85 AVX512F // AVX-512 Foundation 86 AVX512IFMA // AVX-512 Integer Fused Multiply-Add Instructions 87 AVX512PF // AVX-512 Prefetch Instructions 88 AVX512VBMI // AVX-512 Vector Bit Manipulation Instructions 89 AVX512VBMI2 // AVX-512 Vector Bit Manipulation Instructions, Version 2 90 AVX512VL // AVX-512 Vector Length Extensions 91 AVX512VNNI // AVX-512 Vector Neural Network Instructions 92 AVX512VP2INTERSECT // AVX-512 Intersect for D/Q 93 AVX512VPOPCNTDQ // AVX-512 Vector Population Count Doubleword and Quadword 94 AVXSLOW // Indicates the CPU performs 2 128 bit operations instead of one. 95 BMI1 // Bit Manipulation Instruction Set 1 96 BMI2 // Bit Manipulation Instruction Set 2 97 CLDEMOTE // Cache Line Demote 98 CLMUL // Carry-less Multiplication 99 CMOV // i686 CMOV 100 CX16 // CMPXCHG16B Instruction 101 ENQCMD // Enqueue Command 102 ERMS // Enhanced REP MOVSB/STOSB 103 F16C // Half-precision floating-point conversion 104 FMA3 // Intel FMA 3. Does not imply AVX. 105 FMA4 // Bulldozer FMA4 functions 106 GFNI // Galois Field New Instructions 107 HLE // Hardware Lock Elision 108 HTT // Hyperthreading (enabled) 109 HYPERVISOR // This bit has been reserved by Intel & AMD for use by hypervisors 110 IBPB // Indirect Branch Restricted Speculation (IBRS) and Indirect Branch Predictor Barrier (IBPB) 111 IBS // Instruction Based Sampling (AMD) 112 IBSBRNTRGT // Instruction Based Sampling Feature (AMD) 113 IBSFETCHSAM // Instruction Based Sampling Feature (AMD) 114 IBSFFV // Instruction Based Sampling Feature (AMD) 115 IBSOPCNT // Instruction Based Sampling Feature (AMD) 116 IBSOPCNTEXT // Instruction Based Sampling Feature (AMD) 117 IBSOPSAM // Instruction Based Sampling Feature (AMD) 118 IBSRDWROPCNT // Instruction Based Sampling Feature (AMD) 119 IBSRIPINVALIDCHK // Instruction Based Sampling Feature (AMD) 120 LZCNT // LZCNT instruction 121 MMX // standard MMX 122 MMXEXT // SSE integer functions or AMD MMX ext 123 MOVDIR64B // Move 64 Bytes as Direct Store 124 MOVDIRI // Move Doubleword as Direct Store 125 MPX // Intel MPX (Memory Protection Extensions) 126 NX // NX (No-Execute) bit 127 POPCNT // POPCNT instruction 128 RDRAND // RDRAND instruction is available 129 RDSEED // RDSEED instruction is available 130 RDTSCP // RDTSCP Instruction 131 RTM // Restricted Transactional Memory 132 SERIALIZE // Serialize Instruction Execution 133 SGX // Software Guard Extensions 134 SGXLC // Software Guard Extensions Launch Control 135 SHA // Intel SHA Extensions 136 SSE // SSE functions 137 SSE2 // P4 SSE functions 138 SSE3 // Prescott SSE3 functions 139 SSE4 // Penryn SSE4.1 functions 140 SSE42 // Nehalem SSE4.2 functions 141 SSE4A // AMD Barcelona microarchitecture SSE4a instructions 142 SSSE3 // Conroe SSSE3 functions 143 STIBP // Single Thread Indirect Branch Predictors 144 TBM // AMD Trailing Bit Manipulation 145 TSXLDTRK // Intel TSX Suspend Load Address Tracking 146 VAES // Vector AES 147 VMX // Virtual Machine Extensions 148 VPCLMULQDQ // Carry-Less Multiplication Quadword 149 WAITPKG // TPAUSE, UMONITOR, UMWAIT 150 WBNOINVD // Write Back and Do Not Invalidate Cache 151 XOP // Bulldozer XOP functions 152 153 // ARM features: 154 AESARM // AES instructions 155 ARMCPUID // Some CPU ID registers readable at user-level 156 ASIMD // Advanced SIMD 157 ASIMDDP // SIMD Dot Product 158 ASIMDHP // Advanced SIMD half-precision floating point 159 ASIMDRDM // Rounding Double Multiply Accumulate/Subtract (SQRDMLAH/SQRDMLSH) 160 ATOMICS // Large System Extensions (LSE) 161 CRC32 // CRC32/CRC32C instructions 162 DCPOP // Data cache clean to Point of Persistence (DC CVAP) 163 EVTSTRM // Generic timer 164 FCMA // Floatin point complex number addition and multiplication 165 FP // Single-precision and double-precision floating point 166 FPHP // Half-precision floating point 167 GPA // Generic Pointer Authentication 168 JSCVT // Javascript-style double->int convert (FJCVTZS) 169 LRCPC // Weaker release consistency (LDAPR, etc) 170 PMULL // Polynomial Multiply instructions (PMULL/PMULL2) 171 SHA1 // SHA-1 instructions (SHA1C, etc) 172 SHA2 // SHA-2 instructions (SHA256H, etc) 173 SHA3 // SHA-3 instructions (EOR3, RAXI, XAR, BCAX) 174 SHA512 // SHA512 instructions 175 SM3 // SM3 instructions 176 SM4 // SM4 instructions 177 SVE // Scalable Vector Extension 178 179 // Keep it last. It automatically defines the size of []flagSet 180 lastID 181 182 firstID FeatureID = UNKNOWN + 1 183) 184 185// CPUInfo contains information about the detected system CPU. 186type CPUInfo struct { 187 BrandName string // Brand name reported by the CPU 188 VendorID Vendor // Comparable CPU vendor ID 189 VendorString string // Raw vendor string. 190 featureSet flagSet // Features of the CPU 191 PhysicalCores int // Number of physical processor cores in your CPU. Will be 0 if undetectable. 192 ThreadsPerCore int // Number of threads per physical core. Will be 1 if undetectable. 193 LogicalCores int // Number of physical cores times threads that can run on each core through the use of hyperthreading. Will be 0 if undetectable. 194 Family int // CPU family number 195 Model int // CPU model number 196 CacheLine int // Cache line size in bytes. Will be 0 if undetectable. 197 Hz int64 // Clock speed, if known, 0 otherwise 198 Cache struct { 199 L1I int // L1 Instruction Cache (per core or shared). Will be -1 if undetected 200 L1D int // L1 Data Cache (per core or shared). Will be -1 if undetected 201 L2 int // L2 Cache (per core or shared). Will be -1 if undetected 202 L3 int // L3 Cache (per core, per ccx or shared). Will be -1 if undetected 203 } 204 SGX SGXSupport 205 maxFunc uint32 206 maxExFunc uint32 207} 208 209var cpuid func(op uint32) (eax, ebx, ecx, edx uint32) 210var cpuidex func(op, op2 uint32) (eax, ebx, ecx, edx uint32) 211var xgetbv func(index uint32) (eax, edx uint32) 212var rdtscpAsm func() (eax, ebx, ecx, edx uint32) 213var darwinHasAVX512 = func() bool { return false } 214 215// CPU contains information about the CPU as detected on startup, 216// or when Detect last was called. 217// 218// Use this as the primary entry point to you data. 219var CPU CPUInfo 220 221func init() { 222 initCPU() 223 Detect() 224} 225 226// Detect will re-detect current CPU info. 227// This will replace the content of the exported CPU variable. 228// 229// Unless you expect the CPU to change while you are running your program 230// you should not need to call this function. 231// If you call this, you must ensure that no other goroutine is accessing the 232// exported CPU variable. 233func Detect() { 234 // Set defaults 235 CPU.ThreadsPerCore = 1 236 CPU.Cache.L1I = -1 237 CPU.Cache.L1D = -1 238 CPU.Cache.L2 = -1 239 CPU.Cache.L3 = -1 240 safe := true 241 if detectArmFlag != nil { 242 safe = !*detectArmFlag 243 } 244 addInfo(&CPU, safe) 245 if displayFeats != nil && *displayFeats { 246 fmt.Println("cpu features:", strings.Join(CPU.FeatureSet(), ",")) 247 // Exit with non-zero so tests will print value. 248 os.Exit(1) 249 } 250 if disableFlag != nil { 251 s := strings.Split(*disableFlag, ",") 252 for _, feat := range s { 253 feat := ParseFeature(strings.TrimSpace(feat)) 254 if feat != UNKNOWN { 255 CPU.featureSet.unset(feat) 256 } 257 } 258 } 259} 260 261// DetectARM will detect ARM64 features. 262// This is NOT done automatically since it can potentially crash 263// if the OS does not handle the command. 264// If in the future this can be done safely this function may not 265// do anything. 266func DetectARM() { 267 addInfo(&CPU, false) 268} 269 270var detectArmFlag *bool 271var displayFeats *bool 272var disableFlag *string 273 274// Flags will enable flags. 275// This must be called *before* flag.Parse AND 276// Detect must be called after the flags have been parsed. 277// Note that this means that any detection used in init() functions 278// will not contain these flags. 279func Flags() { 280 disableFlag = flag.String("cpu.disable", "", "disable cpu features; comma separated list") 281 displayFeats = flag.Bool("cpu.features", false, "lists cpu features and exits") 282 detectArmFlag = flag.Bool("cpu.arm", false, "allow ARM features to be detected; can potentially crash") 283} 284 285// Supports returns whether the CPU supports all of the requested features. 286func (c CPUInfo) Supports(ids ...FeatureID) bool { 287 for _, id := range ids { 288 if !c.featureSet.inSet(id) { 289 return false 290 } 291 } 292 return true 293} 294 295// Has allows for checking a single feature. 296// Should be inlined by the compiler. 297func (c CPUInfo) Has(id FeatureID) bool { 298 return c.featureSet.inSet(id) 299} 300 301// Disable will disable one or several features. 302func (c *CPUInfo) Disable(ids ...FeatureID) bool { 303 for _, id := range ids { 304 c.featureSet.unset(id) 305 } 306 return true 307} 308 309// Enable will disable one or several features even if they were undetected. 310// This is of course not recommended for obvious reasons. 311func (c *CPUInfo) Enable(ids ...FeatureID) bool { 312 for _, id := range ids { 313 c.featureSet.set(id) 314 } 315 return true 316} 317 318// IsVendor returns true if vendor is recognized as Intel 319func (c CPUInfo) IsVendor(v Vendor) bool { 320 return c.VendorID == v 321} 322 323func (c CPUInfo) FeatureSet() []string { 324 s := make([]string, 0) 325 for _, f := range c.featureSet.Strings() { 326 s = append(s, f) 327 } 328 return s 329} 330 331// RTCounter returns the 64-bit time-stamp counter 332// Uses the RDTSCP instruction. The value 0 is returned 333// if the CPU does not support the instruction. 334func (c CPUInfo) RTCounter() uint64 { 335 if !c.Supports(RDTSCP) { 336 return 0 337 } 338 a, _, _, d := rdtscpAsm() 339 return uint64(a) | (uint64(d) << 32) 340} 341 342// Ia32TscAux returns the IA32_TSC_AUX part of the RDTSCP. 343// This variable is OS dependent, but on Linux contains information 344// about the current cpu/core the code is running on. 345// If the RDTSCP instruction isn't supported on the CPU, the value 0 is returned. 346func (c CPUInfo) Ia32TscAux() uint32 { 347 if !c.Supports(RDTSCP) { 348 return 0 349 } 350 _, _, ecx, _ := rdtscpAsm() 351 return ecx 352} 353 354// LogicalCPU will return the Logical CPU the code is currently executing on. 355// This is likely to change when the OS re-schedules the running thread 356// to another CPU. 357// If the current core cannot be detected, -1 will be returned. 358func (c CPUInfo) LogicalCPU() int { 359 if c.maxFunc < 1 { 360 return -1 361 } 362 _, ebx, _, _ := cpuid(1) 363 return int(ebx >> 24) 364} 365 366// hertz tries to compute the clock speed of the CPU. If leaf 15 is 367// supported, use it, otherwise parse the brand string. Yes, really. 368func hertz(model string) int64 { 369 mfi := maxFunctionID() 370 if mfi >= 0x15 { 371 eax, ebx, ecx, _ := cpuid(0x15) 372 if eax != 0 && ebx != 0 && ecx != 0 { 373 return int64((int64(ecx) * int64(ebx)) / int64(eax)) 374 } 375 } 376 // computeHz determines the official rated speed of a CPU from its brand 377 // string. This insanity is *actually the official documented way to do 378 // this according to Intel*, prior to leaf 0x15 existing. The official 379 // documentation only shows this working for exactly `x.xx` or `xxxx` 380 // cases, e.g., `2.50GHz` or `1300MHz`; this parser will accept other 381 // sizes. 382 hz := strings.LastIndex(model, "Hz") 383 if hz < 3 { 384 return 0 385 } 386 var multiplier int64 387 switch model[hz-1] { 388 case 'M': 389 multiplier = 1000 * 1000 390 case 'G': 391 multiplier = 1000 * 1000 * 1000 392 case 'T': 393 multiplier = 1000 * 1000 * 1000 * 1000 394 } 395 if multiplier == 0 { 396 return 0 397 } 398 freq := int64(0) 399 divisor := int64(0) 400 decimalShift := int64(1) 401 var i int 402 for i = hz - 2; i >= 0 && model[i] != ' '; i-- { 403 if model[i] >= '0' && model[i] <= '9' { 404 freq += int64(model[i]-'0') * decimalShift 405 decimalShift *= 10 406 } else if model[i] == '.' { 407 if divisor != 0 { 408 return 0 409 } 410 divisor = decimalShift 411 } else { 412 return 0 413 } 414 } 415 // we didn't find a space 416 if i < 0 { 417 return 0 418 } 419 if divisor != 0 { 420 return (freq * multiplier) / divisor 421 } 422 return freq * multiplier 423} 424 425// VM Will return true if the cpu id indicates we are in 426// a virtual machine. 427func (c CPUInfo) VM() bool { 428 return CPU.featureSet.inSet(HYPERVISOR) 429} 430 431// flags contains detected cpu features and characteristics 432type flags uint64 433 434// log2(bits_in_uint64) 435const flagBitsLog2 = 6 436const flagBits = 1 << flagBitsLog2 437const flagMask = flagBits - 1 438 439// flagSet contains detected cpu features and characteristics in an array of flags 440type flagSet [(lastID + flagMask) / flagBits]flags 441 442func (s flagSet) inSet(feat FeatureID) bool { 443 return s[feat>>flagBitsLog2]&(1<<(feat&flagMask)) != 0 444} 445 446func (s *flagSet) set(feat FeatureID) { 447 s[feat>>flagBitsLog2] |= 1 << (feat & flagMask) 448} 449 450// setIf will set a feature if boolean is true. 451func (s *flagSet) setIf(cond bool, features ...FeatureID) { 452 if cond { 453 for _, offset := range features { 454 s[offset>>flagBitsLog2] |= 1 << (offset & flagMask) 455 } 456 } 457} 458 459func (s *flagSet) unset(offset FeatureID) { 460 bit := flags(1 << (offset & flagMask)) 461 s[offset>>flagBitsLog2] = s[offset>>flagBitsLog2] & ^bit 462} 463 464// or with another flagset. 465func (s *flagSet) or(other flagSet) { 466 for i, v := range other[:] { 467 s[i] |= v 468 } 469} 470 471// ParseFeature will parse the string and return the ID of the matching feature. 472// Will return UNKNOWN if not found. 473func ParseFeature(s string) FeatureID { 474 s = strings.ToUpper(s) 475 for i := firstID; i < lastID; i++ { 476 if i.String() == s { 477 return i 478 } 479 } 480 return UNKNOWN 481} 482 483// Strings returns an array of the detected features for FlagsSet. 484func (s flagSet) Strings() []string { 485 if len(s) == 0 { 486 return []string{""} 487 } 488 r := make([]string, 0) 489 for i := firstID; i < lastID; i++ { 490 if s.inSet(i) { 491 r = append(r, i.String()) 492 } 493 } 494 return r 495} 496 497func maxExtendedFunction() uint32 { 498 eax, _, _, _ := cpuid(0x80000000) 499 return eax 500} 501 502func maxFunctionID() uint32 { 503 a, _, _, _ := cpuid(0) 504 return a 505} 506 507func brandName() string { 508 if maxExtendedFunction() >= 0x80000004 { 509 v := make([]uint32, 0, 48) 510 for i := uint32(0); i < 3; i++ { 511 a, b, c, d := cpuid(0x80000002 + i) 512 v = append(v, a, b, c, d) 513 } 514 return strings.Trim(string(valAsString(v...)), " ") 515 } 516 return "unknown" 517} 518 519func threadsPerCore() int { 520 mfi := maxFunctionID() 521 vend, _ := vendorID() 522 523 if mfi < 0x4 || (vend != Intel && vend != AMD) { 524 return 1 525 } 526 527 if mfi < 0xb { 528 if vend != Intel { 529 return 1 530 } 531 _, b, _, d := cpuid(1) 532 if (d & (1 << 28)) != 0 { 533 // v will contain logical core count 534 v := (b >> 16) & 255 535 if v > 1 { 536 a4, _, _, _ := cpuid(4) 537 // physical cores 538 v2 := (a4 >> 26) + 1 539 if v2 > 0 { 540 return int(v) / int(v2) 541 } 542 } 543 } 544 return 1 545 } 546 _, b, _, _ := cpuidex(0xb, 0) 547 if b&0xffff == 0 { 548 if vend == AMD { 549 // Workaround for AMD returning 0, assume 2 if >= Zen 2 550 // It will be more correct than not. 551 fam, _ := familyModel() 552 _, _, _, d := cpuid(1) 553 if (d&(1<<28)) != 0 && fam >= 23 { 554 return 2 555 } 556 } 557 return 1 558 } 559 return int(b & 0xffff) 560} 561 562func logicalCores() int { 563 mfi := maxFunctionID() 564 v, _ := vendorID() 565 switch v { 566 case Intel: 567 // Use this on old Intel processors 568 if mfi < 0xb { 569 if mfi < 1 { 570 return 0 571 } 572 // CPUID.1:EBX[23:16] represents the maximum number of addressable IDs (initial APIC ID) 573 // that can be assigned to logical processors in a physical package. 574 // The value may not be the same as the number of logical processors that are present in the hardware of a physical package. 575 _, ebx, _, _ := cpuid(1) 576 logical := (ebx >> 16) & 0xff 577 return int(logical) 578 } 579 _, b, _, _ := cpuidex(0xb, 1) 580 return int(b & 0xffff) 581 case AMD, Hygon: 582 _, b, _, _ := cpuid(1) 583 return int((b >> 16) & 0xff) 584 default: 585 return 0 586 } 587} 588 589func familyModel() (int, int) { 590 if maxFunctionID() < 0x1 { 591 return 0, 0 592 } 593 eax, _, _, _ := cpuid(1) 594 family := ((eax >> 8) & 0xf) + ((eax >> 20) & 0xff) 595 model := ((eax >> 4) & 0xf) + ((eax >> 12) & 0xf0) 596 return int(family), int(model) 597} 598 599func physicalCores() int { 600 v, _ := vendorID() 601 switch v { 602 case Intel: 603 return logicalCores() / threadsPerCore() 604 case AMD, Hygon: 605 lc := logicalCores() 606 tpc := threadsPerCore() 607 if lc > 0 && tpc > 0 { 608 return lc / tpc 609 } 610 611 // The following is inaccurate on AMD EPYC 7742 64-Core Processor 612 if maxExtendedFunction() >= 0x80000008 { 613 _, _, c, _ := cpuid(0x80000008) 614 if c&0xff > 0 { 615 return int(c&0xff) + 1 616 } 617 } 618 } 619 return 0 620} 621 622// Except from http://en.wikipedia.org/wiki/CPUID#EAX.3D0:_Get_vendor_ID 623var vendorMapping = map[string]Vendor{ 624 "AMDisbetter!": AMD, 625 "AuthenticAMD": AMD, 626 "CentaurHauls": VIA, 627 "GenuineIntel": Intel, 628 "TransmetaCPU": Transmeta, 629 "GenuineTMx86": Transmeta, 630 "Geode by NSC": NSC, 631 "VIA VIA VIA ": VIA, 632 "KVMKVMKVMKVM": KVM, 633 "Microsoft Hv": MSVM, 634 "VMwareVMware": VMware, 635 "XenVMMXenVMM": XenHVM, 636 "bhyve bhyve ": Bhyve, 637 "HygonGenuine": Hygon, 638 "Vortex86 SoC": SiS, 639 "SiS SiS SiS ": SiS, 640 "RiseRiseRise": SiS, 641 "Genuine RDC": RDC, 642} 643 644func vendorID() (Vendor, string) { 645 _, b, c, d := cpuid(0) 646 v := string(valAsString(b, d, c)) 647 vend, ok := vendorMapping[v] 648 if !ok { 649 return VendorUnknown, v 650 } 651 return vend, v 652} 653 654func cacheLine() int { 655 if maxFunctionID() < 0x1 { 656 return 0 657 } 658 659 _, ebx, _, _ := cpuid(1) 660 cache := (ebx & 0xff00) >> 5 // cflush size 661 if cache == 0 && maxExtendedFunction() >= 0x80000006 { 662 _, _, ecx, _ := cpuid(0x80000006) 663 cache = ecx & 0xff // cacheline size 664 } 665 // TODO: Read from Cache and TLB Information 666 return int(cache) 667} 668 669func (c *CPUInfo) cacheSize() { 670 c.Cache.L1D = -1 671 c.Cache.L1I = -1 672 c.Cache.L2 = -1 673 c.Cache.L3 = -1 674 vendor, _ := vendorID() 675 switch vendor { 676 case Intel: 677 if maxFunctionID() < 4 { 678 return 679 } 680 for i := uint32(0); ; i++ { 681 eax, ebx, ecx, _ := cpuidex(4, i) 682 cacheType := eax & 15 683 if cacheType == 0 { 684 break 685 } 686 cacheLevel := (eax >> 5) & 7 687 coherency := int(ebx&0xfff) + 1 688 partitions := int((ebx>>12)&0x3ff) + 1 689 associativity := int((ebx>>22)&0x3ff) + 1 690 sets := int(ecx) + 1 691 size := associativity * partitions * coherency * sets 692 switch cacheLevel { 693 case 1: 694 if cacheType == 1 { 695 // 1 = Data Cache 696 c.Cache.L1D = size 697 } else if cacheType == 2 { 698 // 2 = Instruction Cache 699 c.Cache.L1I = size 700 } else { 701 if c.Cache.L1D < 0 { 702 c.Cache.L1I = size 703 } 704 if c.Cache.L1I < 0 { 705 c.Cache.L1I = size 706 } 707 } 708 case 2: 709 c.Cache.L2 = size 710 case 3: 711 c.Cache.L3 = size 712 } 713 } 714 case AMD, Hygon: 715 // Untested. 716 if maxExtendedFunction() < 0x80000005 { 717 return 718 } 719 _, _, ecx, edx := cpuid(0x80000005) 720 c.Cache.L1D = int(((ecx >> 24) & 0xFF) * 1024) 721 c.Cache.L1I = int(((edx >> 24) & 0xFF) * 1024) 722 723 if maxExtendedFunction() < 0x80000006 { 724 return 725 } 726 _, _, ecx, _ = cpuid(0x80000006) 727 c.Cache.L2 = int(((ecx >> 16) & 0xFFFF) * 1024) 728 729 // CPUID Fn8000_001D_EAX_x[N:0] Cache Properties 730 if maxExtendedFunction() < 0x8000001D { 731 return 732 } 733 for i := uint32(0); i < math.MaxUint32; i++ { 734 eax, ebx, ecx, _ := cpuidex(0x8000001D, i) 735 736 level := (eax >> 5) & 7 737 cacheNumSets := ecx + 1 738 cacheLineSize := 1 + (ebx & 2047) 739 cachePhysPartitions := 1 + ((ebx >> 12) & 511) 740 cacheNumWays := 1 + ((ebx >> 22) & 511) 741 742 typ := eax & 15 743 size := int(cacheNumSets * cacheLineSize * cachePhysPartitions * cacheNumWays) 744 if typ == 0 { 745 return 746 } 747 748 switch level { 749 case 1: 750 switch typ { 751 case 1: 752 // Data cache 753 c.Cache.L1D = size 754 case 2: 755 // Inst cache 756 c.Cache.L1I = size 757 default: 758 if c.Cache.L1D < 0 { 759 c.Cache.L1I = size 760 } 761 if c.Cache.L1I < 0 { 762 c.Cache.L1I = size 763 } 764 } 765 case 2: 766 c.Cache.L2 = size 767 case 3: 768 c.Cache.L3 = size 769 } 770 } 771 } 772 773 return 774} 775 776type SGXEPCSection struct { 777 BaseAddress uint64 778 EPCSize uint64 779} 780 781type SGXSupport struct { 782 Available bool 783 LaunchControl bool 784 SGX1Supported bool 785 SGX2Supported bool 786 MaxEnclaveSizeNot64 int64 787 MaxEnclaveSize64 int64 788 EPCSections []SGXEPCSection 789} 790 791func hasSGX(available, lc bool) (rval SGXSupport) { 792 rval.Available = available 793 794 if !available { 795 return 796 } 797 798 rval.LaunchControl = lc 799 800 a, _, _, d := cpuidex(0x12, 0) 801 rval.SGX1Supported = a&0x01 != 0 802 rval.SGX2Supported = a&0x02 != 0 803 rval.MaxEnclaveSizeNot64 = 1 << (d & 0xFF) // pow 2 804 rval.MaxEnclaveSize64 = 1 << ((d >> 8) & 0xFF) // pow 2 805 rval.EPCSections = make([]SGXEPCSection, 0) 806 807 for subleaf := uint32(2); subleaf < 2+8; subleaf++ { 808 eax, ebx, ecx, edx := cpuidex(0x12, subleaf) 809 leafType := eax & 0xf 810 811 if leafType == 0 { 812 // Invalid subleaf, stop iterating 813 break 814 } else if leafType == 1 { 815 // EPC Section subleaf 816 baseAddress := uint64(eax&0xfffff000) + (uint64(ebx&0x000fffff) << 32) 817 size := uint64(ecx&0xfffff000) + (uint64(edx&0x000fffff) << 32) 818 819 section := SGXEPCSection{BaseAddress: baseAddress, EPCSize: size} 820 rval.EPCSections = append(rval.EPCSections, section) 821 } 822 } 823 824 return 825} 826 827func support() flagSet { 828 var fs flagSet 829 mfi := maxFunctionID() 830 vend, _ := vendorID() 831 if mfi < 0x1 { 832 return fs 833 } 834 family, model := familyModel() 835 836 _, _, c, d := cpuid(1) 837 fs.setIf((d&(1<<15)) != 0, CMOV) 838 fs.setIf((d&(1<<23)) != 0, MMX) 839 fs.setIf((d&(1<<25)) != 0, MMXEXT) 840 fs.setIf((d&(1<<25)) != 0, SSE) 841 fs.setIf((d&(1<<26)) != 0, SSE2) 842 fs.setIf((c&1) != 0, SSE3) 843 fs.setIf((c&(1<<5)) != 0, VMX) 844 fs.setIf((c&0x00000200) != 0, SSSE3) 845 fs.setIf((c&0x00080000) != 0, SSE4) 846 fs.setIf((c&0x00100000) != 0, SSE42) 847 fs.setIf((c&(1<<25)) != 0, AESNI) 848 fs.setIf((c&(1<<1)) != 0, CLMUL) 849 fs.setIf(c&(1<<23) != 0, POPCNT) 850 fs.setIf(c&(1<<30) != 0, RDRAND) 851 852 // This bit has been reserved by Intel & AMD for use by hypervisors, 853 // and indicates the presence of a hypervisor. 854 fs.setIf(c&(1<<31) != 0, HYPERVISOR) 855 fs.setIf(c&(1<<29) != 0, F16C) 856 fs.setIf(c&(1<<13) != 0, CX16) 857 858 if vend == Intel && (d&(1<<28)) != 0 && mfi >= 4 { 859 fs.setIf(threadsPerCore() > 1, HTT) 860 } 861 if vend == AMD && (d&(1<<28)) != 0 && mfi >= 4 { 862 fs.setIf(threadsPerCore() > 1, HTT) 863 } 864 // Check XGETBV/XSAVE (26), OXSAVE (27) and AVX (28) bits 865 const avxCheck = 1<<26 | 1<<27 | 1<<28 866 if c&avxCheck == avxCheck { 867 // Check for OS support 868 eax, _ := xgetbv(0) 869 if (eax & 0x6) == 0x6 { 870 fs.set(AVX) 871 switch vend { 872 case Intel: 873 // Older than Haswell. 874 fs.setIf(family == 6 && model < 60, AVXSLOW) 875 case AMD: 876 // Older than Zen 2 877 fs.setIf(family < 23 || (family == 23 && model < 49), AVXSLOW) 878 } 879 } 880 } 881 // FMA3 can be used with SSE registers, so no OS support is strictly needed. 882 // fma3 and OSXSAVE needed. 883 const fma3Check = 1<<12 | 1<<27 884 fs.setIf(c&fma3Check == fma3Check, FMA3) 885 886 // Check AVX2, AVX2 requires OS support, but BMI1/2 don't. 887 if mfi >= 7 { 888 _, ebx, ecx, edx := cpuidex(7, 0) 889 eax1, _, _, _ := cpuidex(7, 1) 890 if fs.inSet(AVX) && (ebx&0x00000020) != 0 { 891 fs.set(AVX2) 892 } 893 // CPUID.(EAX=7, ECX=0).EBX 894 if (ebx & 0x00000008) != 0 { 895 fs.set(BMI1) 896 fs.setIf((ebx&0x00000100) != 0, BMI2) 897 } 898 fs.setIf(ebx&(1<<2) != 0, SGX) 899 fs.setIf(ebx&(1<<4) != 0, HLE) 900 fs.setIf(ebx&(1<<9) != 0, ERMS) 901 fs.setIf(ebx&(1<<11) != 0, RTM) 902 fs.setIf(ebx&(1<<14) != 0, MPX) 903 fs.setIf(ebx&(1<<18) != 0, RDSEED) 904 fs.setIf(ebx&(1<<19) != 0, ADX) 905 fs.setIf(ebx&(1<<29) != 0, SHA) 906 // CPUID.(EAX=7, ECX=0).ECX 907 fs.setIf(ecx&(1<<5) != 0, WAITPKG) 908 fs.setIf(ecx&(1<<25) != 0, CLDEMOTE) 909 fs.setIf(ecx&(1<<27) != 0, MOVDIRI) 910 fs.setIf(ecx&(1<<28) != 0, MOVDIR64B) 911 fs.setIf(ecx&(1<<29) != 0, ENQCMD) 912 fs.setIf(ecx&(1<<30) != 0, SGXLC) 913 // CPUID.(EAX=7, ECX=0).EDX 914 fs.setIf(edx&(1<<14) != 0, SERIALIZE) 915 fs.setIf(edx&(1<<16) != 0, TSXLDTRK) 916 fs.setIf(edx&(1<<26) != 0, IBPB) 917 fs.setIf(edx&(1<<27) != 0, STIBP) 918 919 // Only detect AVX-512 features if XGETBV is supported 920 if c&((1<<26)|(1<<27)) == (1<<26)|(1<<27) { 921 // Check for OS support 922 eax, _ := xgetbv(0) 923 924 // Verify that XCR0[7:5] = ‘111b’ (OPMASK state, upper 256-bit of ZMM0-ZMM15 and 925 // ZMM16-ZMM31 state are enabled by OS) 926 /// and that XCR0[2:1] = ‘11b’ (XMM state and YMM state are enabled by OS). 927 hasAVX512 := (eax>>5)&7 == 7 && (eax>>1)&3 == 3 928 if runtime.GOOS == "darwin" { 929 hasAVX512 = fs.inSet(AVX) && darwinHasAVX512() 930 } 931 if hasAVX512 { 932 fs.setIf(ebx&(1<<16) != 0, AVX512F) 933 fs.setIf(ebx&(1<<17) != 0, AVX512DQ) 934 fs.setIf(ebx&(1<<21) != 0, AVX512IFMA) 935 fs.setIf(ebx&(1<<26) != 0, AVX512PF) 936 fs.setIf(ebx&(1<<27) != 0, AVX512ER) 937 fs.setIf(ebx&(1<<28) != 0, AVX512CD) 938 fs.setIf(ebx&(1<<30) != 0, AVX512BW) 939 fs.setIf(ebx&(1<<31) != 0, AVX512VL) 940 // ecx 941 fs.setIf(ecx&(1<<1) != 0, AVX512VBMI) 942 fs.setIf(ecx&(1<<6) != 0, AVX512VBMI2) 943 fs.setIf(ecx&(1<<8) != 0, GFNI) 944 fs.setIf(ecx&(1<<9) != 0, VAES) 945 fs.setIf(ecx&(1<<10) != 0, VPCLMULQDQ) 946 fs.setIf(ecx&(1<<11) != 0, AVX512VNNI) 947 fs.setIf(ecx&(1<<12) != 0, AVX512BITALG) 948 fs.setIf(ecx&(1<<14) != 0, AVX512VPOPCNTDQ) 949 // edx 950 fs.setIf(edx&(1<<8) != 0, AVX512VP2INTERSECT) 951 fs.setIf(edx&(1<<22) != 0, AMXBF16) 952 fs.setIf(edx&(1<<24) != 0, AMXTILE) 953 fs.setIf(edx&(1<<25) != 0, AMXINT8) 954 // eax1 = CPUID.(EAX=7, ECX=1).EAX 955 fs.setIf(eax1&(1<<5) != 0, AVX512BF16) 956 } 957 } 958 } 959 960 if maxExtendedFunction() >= 0x80000001 { 961 _, _, c, d := cpuid(0x80000001) 962 if (c & (1 << 5)) != 0 { 963 fs.set(LZCNT) 964 fs.set(POPCNT) 965 } 966 fs.setIf((c&(1<<10)) != 0, IBS) 967 fs.setIf((d&(1<<31)) != 0, AMD3DNOW) 968 fs.setIf((d&(1<<30)) != 0, AMD3DNOWEXT) 969 fs.setIf((d&(1<<23)) != 0, MMX) 970 fs.setIf((d&(1<<22)) != 0, MMXEXT) 971 fs.setIf((c&(1<<6)) != 0, SSE4A) 972 fs.setIf(d&(1<<20) != 0, NX) 973 fs.setIf(d&(1<<27) != 0, RDTSCP) 974 975 /* XOP and FMA4 use the AVX instruction coding scheme, so they can't be 976 * used unless the OS has AVX support. */ 977 if fs.inSet(AVX) { 978 fs.setIf((c&0x00000800) != 0, XOP) 979 fs.setIf((c&0x00010000) != 0, FMA4) 980 } 981 982 } 983 if maxExtendedFunction() >= 0x80000008 { 984 _, b, _, _ := cpuid(0x80000008) 985 fs.setIf((b&(1<<9)) != 0, WBNOINVD) 986 } 987 988 if maxExtendedFunction() >= 0x8000001b && fs.inSet(IBS) { 989 eax, _, _, _ := cpuid(0x8000001b) 990 fs.setIf((eax>>0)&1 == 1, IBSFFV) 991 fs.setIf((eax>>1)&1 == 1, IBSFETCHSAM) 992 fs.setIf((eax>>2)&1 == 1, IBSOPSAM) 993 fs.setIf((eax>>3)&1 == 1, IBSRDWROPCNT) 994 fs.setIf((eax>>4)&1 == 1, IBSOPCNT) 995 fs.setIf((eax>>5)&1 == 1, IBSBRNTRGT) 996 fs.setIf((eax>>6)&1 == 1, IBSOPCNTEXT) 997 fs.setIf((eax>>7)&1 == 1, IBSRIPINVALIDCHK) 998 } 999 1000 return fs 1001} 1002 1003func valAsString(values ...uint32) []byte { 1004 r := make([]byte, 4*len(values)) 1005 for i, v := range values { 1006 dst := r[i*4:] 1007 dst[0] = byte(v & 0xff) 1008 dst[1] = byte((v >> 8) & 0xff) 1009 dst[2] = byte((v >> 16) & 0xff) 1010 dst[3] = byte((v >> 24) & 0xff) 1011 switch { 1012 case dst[0] == 0: 1013 return r[:i*4] 1014 case dst[1] == 0: 1015 return r[:i*4+1] 1016 case dst[2] == 0: 1017 return r[:i*4+2] 1018 case dst[3] == 0: 1019 return r[:i*4+3] 1020 } 1021 } 1022 return r 1023} 1024