1// Copyright (c) 2015 Klaus Post, released under MIT License. See LICENSE file. 2 3// Package cpuid provides information about the CPU running the current program. 4// 5// CPU features are detected on startup, and kept for fast access through the life of the application. 6// Currently x86 / x64 (AMD64) is supported. 7// 8// You can access the CPU information by accessing the shared CPU variable of the cpuid library. 9// 10// Package home: https://github.com/klauspost/cpuid 11package cpuid 12 13import "strings" 14 15// Vendor is a representation of a CPU vendor. 16type Vendor int 17 18const ( 19 Other Vendor = iota 20 Intel 21 AMD 22 VIA 23 Transmeta 24 NSC 25 KVM // Kernel-based Virtual Machine 26 MSVM // Microsoft Hyper-V or Windows Virtual PC 27 VMware 28 XenHVM 29 Bhyve 30 Hygon 31) 32 33const ( 34 CMOV = 1 << iota // i686 CMOV 35 NX // NX (No-Execute) bit 36 AMD3DNOW // AMD 3DNOW 37 AMD3DNOWEXT // AMD 3DNowExt 38 MMX // standard MMX 39 MMXEXT // SSE integer functions or AMD MMX ext 40 SSE // SSE functions 41 SSE2 // P4 SSE functions 42 SSE3 // Prescott SSE3 functions 43 SSSE3 // Conroe SSSE3 functions 44 SSE4 // Penryn SSE4.1 functions 45 SSE4A // AMD Barcelona microarchitecture SSE4a instructions 46 SSE42 // Nehalem SSE4.2 functions 47 AVX // AVX functions 48 AVX2 // AVX2 functions 49 FMA3 // Intel FMA 3 50 FMA4 // Bulldozer FMA4 functions 51 XOP // Bulldozer XOP functions 52 F16C // Half-precision floating-point conversion 53 BMI1 // Bit Manipulation Instruction Set 1 54 BMI2 // Bit Manipulation Instruction Set 2 55 TBM // AMD Trailing Bit Manipulation 56 LZCNT // LZCNT instruction 57 POPCNT // POPCNT instruction 58 AESNI // Advanced Encryption Standard New Instructions 59 CLMUL // Carry-less Multiplication 60 HTT // Hyperthreading (enabled) 61 HLE // Hardware Lock Elision 62 RTM // Restricted Transactional Memory 63 RDRAND // RDRAND instruction is available 64 RDSEED // RDSEED instruction is available 65 ADX // Intel ADX (Multi-Precision Add-Carry Instruction Extensions) 66 SHA // Intel SHA Extensions 67 AVX512F // AVX-512 Foundation 68 AVX512DQ // AVX-512 Doubleword and Quadword Instructions 69 AVX512IFMA // AVX-512 Integer Fused Multiply-Add Instructions 70 AVX512PF // AVX-512 Prefetch Instructions 71 AVX512ER // AVX-512 Exponential and Reciprocal Instructions 72 AVX512CD // AVX-512 Conflict Detection Instructions 73 AVX512BW // AVX-512 Byte and Word Instructions 74 AVX512VL // AVX-512 Vector Length Extensions 75 AVX512VBMI // AVX-512 Vector Bit Manipulation Instructions 76 MPX // Intel MPX (Memory Protection Extensions) 77 ERMS // Enhanced REP MOVSB/STOSB 78 RDTSCP // RDTSCP Instruction 79 CX16 // CMPXCHG16B Instruction 80 SGX // Software Guard Extensions 81 IBPB // Indirect Branch Restricted Speculation (IBRS) and Indirect Branch Predictor Barrier (IBPB) 82 STIBP // Single Thread Indirect Branch Predictors 83 84 // Performance indicators 85 SSE2SLOW // SSE2 is supported, but usually not faster 86 SSE3SLOW // SSE3 is supported, but usually not faster 87 ATOM // Atom processor, some SSSE3 instructions are slower 88) 89 90var flagNames = map[Flags]string{ 91 CMOV: "CMOV", // i686 CMOV 92 NX: "NX", // NX (No-Execute) bit 93 AMD3DNOW: "AMD3DNOW", // AMD 3DNOW 94 AMD3DNOWEXT: "AMD3DNOWEXT", // AMD 3DNowExt 95 MMX: "MMX", // Standard MMX 96 MMXEXT: "MMXEXT", // SSE integer functions or AMD MMX ext 97 SSE: "SSE", // SSE functions 98 SSE2: "SSE2", // P4 SSE2 functions 99 SSE3: "SSE3", // Prescott SSE3 functions 100 SSSE3: "SSSE3", // Conroe SSSE3 functions 101 SSE4: "SSE4.1", // Penryn SSE4.1 functions 102 SSE4A: "SSE4A", // AMD Barcelona microarchitecture SSE4a instructions 103 SSE42: "SSE4.2", // Nehalem SSE4.2 functions 104 AVX: "AVX", // AVX functions 105 AVX2: "AVX2", // AVX functions 106 FMA3: "FMA3", // Intel FMA 3 107 FMA4: "FMA4", // Bulldozer FMA4 functions 108 XOP: "XOP", // Bulldozer XOP functions 109 F16C: "F16C", // Half-precision floating-point conversion 110 BMI1: "BMI1", // Bit Manipulation Instruction Set 1 111 BMI2: "BMI2", // Bit Manipulation Instruction Set 2 112 TBM: "TBM", // AMD Trailing Bit Manipulation 113 LZCNT: "LZCNT", // LZCNT instruction 114 POPCNT: "POPCNT", // POPCNT instruction 115 AESNI: "AESNI", // Advanced Encryption Standard New Instructions 116 CLMUL: "CLMUL", // Carry-less Multiplication 117 HTT: "HTT", // Hyperthreading (enabled) 118 HLE: "HLE", // Hardware Lock Elision 119 RTM: "RTM", // Restricted Transactional Memory 120 RDRAND: "RDRAND", // RDRAND instruction is available 121 RDSEED: "RDSEED", // RDSEED instruction is available 122 ADX: "ADX", // Intel ADX (Multi-Precision Add-Carry Instruction Extensions) 123 SHA: "SHA", // Intel SHA Extensions 124 AVX512F: "AVX512F", // AVX-512 Foundation 125 AVX512DQ: "AVX512DQ", // AVX-512 Doubleword and Quadword Instructions 126 AVX512IFMA: "AVX512IFMA", // AVX-512 Integer Fused Multiply-Add Instructions 127 AVX512PF: "AVX512PF", // AVX-512 Prefetch Instructions 128 AVX512ER: "AVX512ER", // AVX-512 Exponential and Reciprocal Instructions 129 AVX512CD: "AVX512CD", // AVX-512 Conflict Detection Instructions 130 AVX512BW: "AVX512BW", // AVX-512 Byte and Word Instructions 131 AVX512VL: "AVX512VL", // AVX-512 Vector Length Extensions 132 AVX512VBMI: "AVX512VBMI", // AVX-512 Vector Bit Manipulation Instructions 133 MPX: "MPX", // Intel MPX (Memory Protection Extensions) 134 ERMS: "ERMS", // Enhanced REP MOVSB/STOSB 135 RDTSCP: "RDTSCP", // RDTSCP Instruction 136 CX16: "CX16", // CMPXCHG16B Instruction 137 SGX: "SGX", // Software Guard Extensions 138 IBPB: "IBPB", // Indirect Branch Restricted Speculation and Indirect Branch Predictor Barrier 139 STIBP: "STIBP", // Single Thread Indirect Branch Predictors 140 141 // Performance indicators 142 SSE2SLOW: "SSE2SLOW", // SSE2 supported, but usually not faster 143 SSE3SLOW: "SSE3SLOW", // SSE3 supported, but usually not faster 144 ATOM: "ATOM", // Atom processor, some SSSE3 instructions are slower 145 146} 147 148// CPUInfo contains information about the detected system CPU. 149type CPUInfo struct { 150 BrandName string // Brand name reported by the CPU 151 VendorID Vendor // Comparable CPU vendor ID 152 Features Flags // Features of the CPU 153 PhysicalCores int // Number of physical processor cores in your CPU. Will be 0 if undetectable. 154 ThreadsPerCore int // Number of threads per physical core. Will be 1 if undetectable. 155 LogicalCores int // Number of physical cores times threads that can run on each core through the use of hyperthreading. Will be 0 if undetectable. 156 Family int // CPU family number 157 Model int // CPU model number 158 CacheLine int // Cache line size in bytes. Will be 0 if undetectable. 159 Cache struct { 160 L1I int // L1 Instruction Cache (per core or shared). Will be -1 if undetected 161 L1D int // L1 Data Cache (per core or shared). Will be -1 if undetected 162 L2 int // L2 Cache (per core or shared). Will be -1 if undetected 163 L3 int // L3 Instruction Cache (per core or shared). Will be -1 if undetected 164 } 165 SGX SGXSupport 166 maxFunc uint32 167 maxExFunc uint32 168} 169 170var cpuid func(op uint32) (eax, ebx, ecx, edx uint32) 171var cpuidex func(op, op2 uint32) (eax, ebx, ecx, edx uint32) 172var xgetbv func(index uint32) (eax, edx uint32) 173var rdtscpAsm func() (eax, ebx, ecx, edx uint32) 174 175// CPU contains information about the CPU as detected on startup, 176// or when Detect last was called. 177// 178// Use this as the primary entry point to you data, 179// this way queries are 180var CPU CPUInfo 181 182func init() { 183 initCPU() 184 Detect() 185} 186 187// Detect will re-detect current CPU info. 188// This will replace the content of the exported CPU variable. 189// 190// Unless you expect the CPU to change while you are running your program 191// you should not need to call this function. 192// If you call this, you must ensure that no other goroutine is accessing the 193// exported CPU variable. 194func Detect() { 195 CPU.maxFunc = maxFunctionID() 196 CPU.maxExFunc = maxExtendedFunction() 197 CPU.BrandName = brandName() 198 CPU.CacheLine = cacheLine() 199 CPU.Family, CPU.Model = familyModel() 200 CPU.Features = support() 201 CPU.SGX = hasSGX(CPU.Features&SGX != 0) 202 CPU.ThreadsPerCore = threadsPerCore() 203 CPU.LogicalCores = logicalCores() 204 CPU.PhysicalCores = physicalCores() 205 CPU.VendorID = vendorID() 206 CPU.cacheSize() 207} 208 209// Generated here: http://play.golang.org/p/BxFH2Gdc0G 210 211// Cmov indicates support of CMOV instructions 212func (c CPUInfo) Cmov() bool { 213 return c.Features&CMOV != 0 214} 215 216// Amd3dnow indicates support of AMD 3DNOW! instructions 217func (c CPUInfo) Amd3dnow() bool { 218 return c.Features&AMD3DNOW != 0 219} 220 221// Amd3dnowExt indicates support of AMD 3DNOW! Extended instructions 222func (c CPUInfo) Amd3dnowExt() bool { 223 return c.Features&AMD3DNOWEXT != 0 224} 225 226// MMX indicates support of MMX instructions 227func (c CPUInfo) MMX() bool { 228 return c.Features&MMX != 0 229} 230 231// MMXExt indicates support of MMXEXT instructions 232// (SSE integer functions or AMD MMX ext) 233func (c CPUInfo) MMXExt() bool { 234 return c.Features&MMXEXT != 0 235} 236 237// SSE indicates support of SSE instructions 238func (c CPUInfo) SSE() bool { 239 return c.Features&SSE != 0 240} 241 242// SSE2 indicates support of SSE 2 instructions 243func (c CPUInfo) SSE2() bool { 244 return c.Features&SSE2 != 0 245} 246 247// SSE3 indicates support of SSE 3 instructions 248func (c CPUInfo) SSE3() bool { 249 return c.Features&SSE3 != 0 250} 251 252// SSSE3 indicates support of SSSE 3 instructions 253func (c CPUInfo) SSSE3() bool { 254 return c.Features&SSSE3 != 0 255} 256 257// SSE4 indicates support of SSE 4 (also called SSE 4.1) instructions 258func (c CPUInfo) SSE4() bool { 259 return c.Features&SSE4 != 0 260} 261 262// SSE42 indicates support of SSE4.2 instructions 263func (c CPUInfo) SSE42() bool { 264 return c.Features&SSE42 != 0 265} 266 267// AVX indicates support of AVX instructions 268// and operating system support of AVX instructions 269func (c CPUInfo) AVX() bool { 270 return c.Features&AVX != 0 271} 272 273// AVX2 indicates support of AVX2 instructions 274func (c CPUInfo) AVX2() bool { 275 return c.Features&AVX2 != 0 276} 277 278// FMA3 indicates support of FMA3 instructions 279func (c CPUInfo) FMA3() bool { 280 return c.Features&FMA3 != 0 281} 282 283// FMA4 indicates support of FMA4 instructions 284func (c CPUInfo) FMA4() bool { 285 return c.Features&FMA4 != 0 286} 287 288// XOP indicates support of XOP instructions 289func (c CPUInfo) XOP() bool { 290 return c.Features&XOP != 0 291} 292 293// F16C indicates support of F16C instructions 294func (c CPUInfo) F16C() bool { 295 return c.Features&F16C != 0 296} 297 298// BMI1 indicates support of BMI1 instructions 299func (c CPUInfo) BMI1() bool { 300 return c.Features&BMI1 != 0 301} 302 303// BMI2 indicates support of BMI2 instructions 304func (c CPUInfo) BMI2() bool { 305 return c.Features&BMI2 != 0 306} 307 308// TBM indicates support of TBM instructions 309// (AMD Trailing Bit Manipulation) 310func (c CPUInfo) TBM() bool { 311 return c.Features&TBM != 0 312} 313 314// Lzcnt indicates support of LZCNT instruction 315func (c CPUInfo) Lzcnt() bool { 316 return c.Features&LZCNT != 0 317} 318 319// Popcnt indicates support of POPCNT instruction 320func (c CPUInfo) Popcnt() bool { 321 return c.Features&POPCNT != 0 322} 323 324// HTT indicates the processor has Hyperthreading enabled 325func (c CPUInfo) HTT() bool { 326 return c.Features&HTT != 0 327} 328 329// SSE2Slow indicates that SSE2 may be slow on this processor 330func (c CPUInfo) SSE2Slow() bool { 331 return c.Features&SSE2SLOW != 0 332} 333 334// SSE3Slow indicates that SSE3 may be slow on this processor 335func (c CPUInfo) SSE3Slow() bool { 336 return c.Features&SSE3SLOW != 0 337} 338 339// AesNi indicates support of AES-NI instructions 340// (Advanced Encryption Standard New Instructions) 341func (c CPUInfo) AesNi() bool { 342 return c.Features&AESNI != 0 343} 344 345// Clmul indicates support of CLMUL instructions 346// (Carry-less Multiplication) 347func (c CPUInfo) Clmul() bool { 348 return c.Features&CLMUL != 0 349} 350 351// NX indicates support of NX (No-Execute) bit 352func (c CPUInfo) NX() bool { 353 return c.Features&NX != 0 354} 355 356// SSE4A indicates support of AMD Barcelona microarchitecture SSE4a instructions 357func (c CPUInfo) SSE4A() bool { 358 return c.Features&SSE4A != 0 359} 360 361// HLE indicates support of Hardware Lock Elision 362func (c CPUInfo) HLE() bool { 363 return c.Features&HLE != 0 364} 365 366// RTM indicates support of Restricted Transactional Memory 367func (c CPUInfo) RTM() bool { 368 return c.Features&RTM != 0 369} 370 371// Rdrand indicates support of RDRAND instruction is available 372func (c CPUInfo) Rdrand() bool { 373 return c.Features&RDRAND != 0 374} 375 376// Rdseed indicates support of RDSEED instruction is available 377func (c CPUInfo) Rdseed() bool { 378 return c.Features&RDSEED != 0 379} 380 381// ADX indicates support of Intel ADX (Multi-Precision Add-Carry Instruction Extensions) 382func (c CPUInfo) ADX() bool { 383 return c.Features&ADX != 0 384} 385 386// SHA indicates support of Intel SHA Extensions 387func (c CPUInfo) SHA() bool { 388 return c.Features&SHA != 0 389} 390 391// AVX512F indicates support of AVX-512 Foundation 392func (c CPUInfo) AVX512F() bool { 393 return c.Features&AVX512F != 0 394} 395 396// AVX512DQ indicates support of AVX-512 Doubleword and Quadword Instructions 397func (c CPUInfo) AVX512DQ() bool { 398 return c.Features&AVX512DQ != 0 399} 400 401// AVX512IFMA indicates support of AVX-512 Integer Fused Multiply-Add Instructions 402func (c CPUInfo) AVX512IFMA() bool { 403 return c.Features&AVX512IFMA != 0 404} 405 406// AVX512PF indicates support of AVX-512 Prefetch Instructions 407func (c CPUInfo) AVX512PF() bool { 408 return c.Features&AVX512PF != 0 409} 410 411// AVX512ER indicates support of AVX-512 Exponential and Reciprocal Instructions 412func (c CPUInfo) AVX512ER() bool { 413 return c.Features&AVX512ER != 0 414} 415 416// AVX512CD indicates support of AVX-512 Conflict Detection Instructions 417func (c CPUInfo) AVX512CD() bool { 418 return c.Features&AVX512CD != 0 419} 420 421// AVX512BW indicates support of AVX-512 Byte and Word Instructions 422func (c CPUInfo) AVX512BW() bool { 423 return c.Features&AVX512BW != 0 424} 425 426// AVX512VL indicates support of AVX-512 Vector Length Extensions 427func (c CPUInfo) AVX512VL() bool { 428 return c.Features&AVX512VL != 0 429} 430 431// AVX512VBMI indicates support of AVX-512 Vector Bit Manipulation Instructions 432func (c CPUInfo) AVX512VBMI() bool { 433 return c.Features&AVX512VBMI != 0 434} 435 436// MPX indicates support of Intel MPX (Memory Protection Extensions) 437func (c CPUInfo) MPX() bool { 438 return c.Features&MPX != 0 439} 440 441// ERMS indicates support of Enhanced REP MOVSB/STOSB 442func (c CPUInfo) ERMS() bool { 443 return c.Features&ERMS != 0 444} 445 446// RDTSCP Instruction is available. 447func (c CPUInfo) RDTSCP() bool { 448 return c.Features&RDTSCP != 0 449} 450 451// CX16 indicates if CMPXCHG16B instruction is available. 452func (c CPUInfo) CX16() bool { 453 return c.Features&CX16 != 0 454} 455 456// TSX is split into HLE (Hardware Lock Elision) and RTM (Restricted Transactional Memory) detection. 457// So TSX simply checks that. 458func (c CPUInfo) TSX() bool { 459 return c.Features&(HLE|RTM) == HLE|RTM 460} 461 462// Atom indicates an Atom processor 463func (c CPUInfo) Atom() bool { 464 return c.Features&ATOM != 0 465} 466 467// Intel returns true if vendor is recognized as Intel 468func (c CPUInfo) Intel() bool { 469 return c.VendorID == Intel 470} 471 472// AMD returns true if vendor is recognized as AMD 473func (c CPUInfo) AMD() bool { 474 return c.VendorID == AMD 475} 476 477// Hygon returns true if vendor is recognized as Hygon 478func (c CPUInfo) Hygon() bool { 479 return c.VendorID == Hygon 480} 481 482// Transmeta returns true if vendor is recognized as Transmeta 483func (c CPUInfo) Transmeta() bool { 484 return c.VendorID == Transmeta 485} 486 487// NSC returns true if vendor is recognized as National Semiconductor 488func (c CPUInfo) NSC() bool { 489 return c.VendorID == NSC 490} 491 492// VIA returns true if vendor is recognized as VIA 493func (c CPUInfo) VIA() bool { 494 return c.VendorID == VIA 495} 496 497// RTCounter returns the 64-bit time-stamp counter 498// Uses the RDTSCP instruction. The value 0 is returned 499// if the CPU does not support the instruction. 500func (c CPUInfo) RTCounter() uint64 { 501 if !c.RDTSCP() { 502 return 0 503 } 504 a, _, _, d := rdtscpAsm() 505 return uint64(a) | (uint64(d) << 32) 506} 507 508// Ia32TscAux returns the IA32_TSC_AUX part of the RDTSCP. 509// This variable is OS dependent, but on Linux contains information 510// about the current cpu/core the code is running on. 511// If the RDTSCP instruction isn't supported on the CPU, the value 0 is returned. 512func (c CPUInfo) Ia32TscAux() uint32 { 513 if !c.RDTSCP() { 514 return 0 515 } 516 _, _, ecx, _ := rdtscpAsm() 517 return ecx 518} 519 520// LogicalCPU will return the Logical CPU the code is currently executing on. 521// This is likely to change when the OS re-schedules the running thread 522// to another CPU. 523// If the current core cannot be detected, -1 will be returned. 524func (c CPUInfo) LogicalCPU() int { 525 if c.maxFunc < 1 { 526 return -1 527 } 528 _, ebx, _, _ := cpuid(1) 529 return int(ebx >> 24) 530} 531 532// VM Will return true if the cpu id indicates we are in 533// a virtual machine. This is only a hint, and will very likely 534// have many false negatives. 535func (c CPUInfo) VM() bool { 536 switch c.VendorID { 537 case MSVM, KVM, VMware, XenHVM, Bhyve: 538 return true 539 } 540 return false 541} 542 543// Flags contains detected cpu features and caracteristics 544type Flags uint64 545 546// String returns a string representation of the detected 547// CPU features. 548func (f Flags) String() string { 549 return strings.Join(f.Strings(), ",") 550} 551 552// Strings returns and array of the detected features. 553func (f Flags) Strings() []string { 554 s := support() 555 r := make([]string, 0, 20) 556 for i := uint(0); i < 64; i++ { 557 key := Flags(1 << i) 558 val := flagNames[key] 559 if s&key != 0 { 560 r = append(r, val) 561 } 562 } 563 return r 564} 565 566func maxExtendedFunction() uint32 { 567 eax, _, _, _ := cpuid(0x80000000) 568 return eax 569} 570 571func maxFunctionID() uint32 { 572 a, _, _, _ := cpuid(0) 573 return a 574} 575 576func brandName() string { 577 if maxExtendedFunction() >= 0x80000004 { 578 v := make([]uint32, 0, 48) 579 for i := uint32(0); i < 3; i++ { 580 a, b, c, d := cpuid(0x80000002 + i) 581 v = append(v, a, b, c, d) 582 } 583 return strings.Trim(string(valAsString(v...)), " ") 584 } 585 return "unknown" 586} 587 588func threadsPerCore() int { 589 mfi := maxFunctionID() 590 if mfi < 0x4 || vendorID() != Intel { 591 return 1 592 } 593 594 if mfi < 0xb { 595 _, b, _, d := cpuid(1) 596 if (d & (1 << 28)) != 0 { 597 // v will contain logical core count 598 v := (b >> 16) & 255 599 if v > 1 { 600 a4, _, _, _ := cpuid(4) 601 // physical cores 602 v2 := (a4 >> 26) + 1 603 if v2 > 0 { 604 return int(v) / int(v2) 605 } 606 } 607 } 608 return 1 609 } 610 _, b, _, _ := cpuidex(0xb, 0) 611 if b&0xffff == 0 { 612 return 1 613 } 614 return int(b & 0xffff) 615} 616 617func logicalCores() int { 618 mfi := maxFunctionID() 619 switch vendorID() { 620 case Intel: 621 // Use this on old Intel processors 622 if mfi < 0xb { 623 if mfi < 1 { 624 return 0 625 } 626 // CPUID.1:EBX[23:16] represents the maximum number of addressable IDs (initial APIC ID) 627 // that can be assigned to logical processors in a physical package. 628 // The value may not be the same as the number of logical processors that are present in the hardware of a physical package. 629 _, ebx, _, _ := cpuid(1) 630 logical := (ebx >> 16) & 0xff 631 return int(logical) 632 } 633 _, b, _, _ := cpuidex(0xb, 1) 634 return int(b & 0xffff) 635 case AMD, Hygon: 636 _, b, _, _ := cpuid(1) 637 return int((b >> 16) & 0xff) 638 default: 639 return 0 640 } 641} 642 643func familyModel() (int, int) { 644 if maxFunctionID() < 0x1 { 645 return 0, 0 646 } 647 eax, _, _, _ := cpuid(1) 648 family := ((eax >> 8) & 0xf) + ((eax >> 20) & 0xff) 649 model := ((eax >> 4) & 0xf) + ((eax >> 12) & 0xf0) 650 return int(family), int(model) 651} 652 653func physicalCores() int { 654 switch vendorID() { 655 case Intel: 656 return logicalCores() / threadsPerCore() 657 case AMD, Hygon: 658 if maxExtendedFunction() >= 0x80000008 { 659 _, _, c, _ := cpuid(0x80000008) 660 return int(c&0xff) + 1 661 } 662 } 663 return 0 664} 665 666// Except from http://en.wikipedia.org/wiki/CPUID#EAX.3D0:_Get_vendor_ID 667var vendorMapping = map[string]Vendor{ 668 "AMDisbetter!": AMD, 669 "AuthenticAMD": AMD, 670 "CentaurHauls": VIA, 671 "GenuineIntel": Intel, 672 "TransmetaCPU": Transmeta, 673 "GenuineTMx86": Transmeta, 674 "Geode by NSC": NSC, 675 "VIA VIA VIA ": VIA, 676 "KVMKVMKVMKVM": KVM, 677 "Microsoft Hv": MSVM, 678 "VMwareVMware": VMware, 679 "XenVMMXenVMM": XenHVM, 680 "bhyve bhyve ": Bhyve, 681 "HygonGenuine": Hygon, 682} 683 684func vendorID() Vendor { 685 _, b, c, d := cpuid(0) 686 v := valAsString(b, d, c) 687 vend, ok := vendorMapping[string(v)] 688 if !ok { 689 return Other 690 } 691 return vend 692} 693 694func cacheLine() int { 695 if maxFunctionID() < 0x1 { 696 return 0 697 } 698 699 _, ebx, _, _ := cpuid(1) 700 cache := (ebx & 0xff00) >> 5 // cflush size 701 if cache == 0 && maxExtendedFunction() >= 0x80000006 { 702 _, _, ecx, _ := cpuid(0x80000006) 703 cache = ecx & 0xff // cacheline size 704 } 705 // TODO: Read from Cache and TLB Information 706 return int(cache) 707} 708 709func (c *CPUInfo) cacheSize() { 710 c.Cache.L1D = -1 711 c.Cache.L1I = -1 712 c.Cache.L2 = -1 713 c.Cache.L3 = -1 714 vendor := vendorID() 715 switch vendor { 716 case Intel: 717 if maxFunctionID() < 4 { 718 return 719 } 720 for i := uint32(0); ; i++ { 721 eax, ebx, ecx, _ := cpuidex(4, i) 722 cacheType := eax & 15 723 if cacheType == 0 { 724 break 725 } 726 cacheLevel := (eax >> 5) & 7 727 coherency := int(ebx&0xfff) + 1 728 partitions := int((ebx>>12)&0x3ff) + 1 729 associativity := int((ebx>>22)&0x3ff) + 1 730 sets := int(ecx) + 1 731 size := associativity * partitions * coherency * sets 732 switch cacheLevel { 733 case 1: 734 if cacheType == 1 { 735 // 1 = Data Cache 736 c.Cache.L1D = size 737 } else if cacheType == 2 { 738 // 2 = Instruction Cache 739 c.Cache.L1I = size 740 } else { 741 if c.Cache.L1D < 0 { 742 c.Cache.L1I = size 743 } 744 if c.Cache.L1I < 0 { 745 c.Cache.L1I = size 746 } 747 } 748 case 2: 749 c.Cache.L2 = size 750 case 3: 751 c.Cache.L3 = size 752 } 753 } 754 case AMD, Hygon: 755 // Untested. 756 if maxExtendedFunction() < 0x80000005 { 757 return 758 } 759 _, _, ecx, edx := cpuid(0x80000005) 760 c.Cache.L1D = int(((ecx >> 24) & 0xFF) * 1024) 761 c.Cache.L1I = int(((edx >> 24) & 0xFF) * 1024) 762 763 if maxExtendedFunction() < 0x80000006 { 764 return 765 } 766 _, _, ecx, _ = cpuid(0x80000006) 767 c.Cache.L2 = int(((ecx >> 16) & 0xFFFF) * 1024) 768 } 769 770 return 771} 772 773type SGXSupport struct { 774 Available bool 775 SGX1Supported bool 776 SGX2Supported bool 777 MaxEnclaveSizeNot64 int64 778 MaxEnclaveSize64 int64 779} 780 781func hasSGX(available bool) (rval SGXSupport) { 782 rval.Available = available 783 784 if !available { 785 return 786 } 787 788 a, _, _, d := cpuidex(0x12, 0) 789 rval.SGX1Supported = a&0x01 != 0 790 rval.SGX2Supported = a&0x02 != 0 791 rval.MaxEnclaveSizeNot64 = 1 << (d & 0xFF) // pow 2 792 rval.MaxEnclaveSize64 = 1 << ((d >> 8) & 0xFF) // pow 2 793 794 return 795} 796 797func support() Flags { 798 mfi := maxFunctionID() 799 vend := vendorID() 800 if mfi < 0x1 { 801 return 0 802 } 803 rval := uint64(0) 804 _, _, c, d := cpuid(1) 805 if (d & (1 << 15)) != 0 { 806 rval |= CMOV 807 } 808 if (d & (1 << 23)) != 0 { 809 rval |= MMX 810 } 811 if (d & (1 << 25)) != 0 { 812 rval |= MMXEXT 813 } 814 if (d & (1 << 25)) != 0 { 815 rval |= SSE 816 } 817 if (d & (1 << 26)) != 0 { 818 rval |= SSE2 819 } 820 if (c & 1) != 0 { 821 rval |= SSE3 822 } 823 if (c & 0x00000200) != 0 { 824 rval |= SSSE3 825 } 826 if (c & 0x00080000) != 0 { 827 rval |= SSE4 828 } 829 if (c & 0x00100000) != 0 { 830 rval |= SSE42 831 } 832 if (c & (1 << 25)) != 0 { 833 rval |= AESNI 834 } 835 if (c & (1 << 1)) != 0 { 836 rval |= CLMUL 837 } 838 if c&(1<<23) != 0 { 839 rval |= POPCNT 840 } 841 if c&(1<<30) != 0 { 842 rval |= RDRAND 843 } 844 if c&(1<<29) != 0 { 845 rval |= F16C 846 } 847 if c&(1<<13) != 0 { 848 rval |= CX16 849 } 850 if vend == Intel && (d&(1<<28)) != 0 && mfi >= 4 { 851 if threadsPerCore() > 1 { 852 rval |= HTT 853 } 854 } 855 856 // Check XGETBV, OXSAVE and AVX bits 857 if c&(1<<26) != 0 && c&(1<<27) != 0 && c&(1<<28) != 0 { 858 // Check for OS support 859 eax, _ := xgetbv(0) 860 if (eax & 0x6) == 0x6 { 861 rval |= AVX 862 if (c & 0x00001000) != 0 { 863 rval |= FMA3 864 } 865 } 866 } 867 868 // Check AVX2, AVX2 requires OS support, but BMI1/2 don't. 869 if mfi >= 7 { 870 _, ebx, ecx, edx := cpuidex(7, 0) 871 if (rval&AVX) != 0 && (ebx&0x00000020) != 0 { 872 rval |= AVX2 873 } 874 if (ebx & 0x00000008) != 0 { 875 rval |= BMI1 876 if (ebx & 0x00000100) != 0 { 877 rval |= BMI2 878 } 879 } 880 if ebx&(1<<2) != 0 { 881 rval |= SGX 882 } 883 if ebx&(1<<4) != 0 { 884 rval |= HLE 885 } 886 if ebx&(1<<9) != 0 { 887 rval |= ERMS 888 } 889 if ebx&(1<<11) != 0 { 890 rval |= RTM 891 } 892 if ebx&(1<<14) != 0 { 893 rval |= MPX 894 } 895 if ebx&(1<<18) != 0 { 896 rval |= RDSEED 897 } 898 if ebx&(1<<19) != 0 { 899 rval |= ADX 900 } 901 if ebx&(1<<29) != 0 { 902 rval |= SHA 903 } 904 if edx&(1<<26) != 0 { 905 rval |= IBPB 906 } 907 if edx&(1<<27) != 0 { 908 rval |= STIBP 909 } 910 911 // Only detect AVX-512 features if XGETBV is supported 912 if c&((1<<26)|(1<<27)) == (1<<26)|(1<<27) { 913 // Check for OS support 914 eax, _ := xgetbv(0) 915 916 // Verify that XCR0[7:5] = ‘111b’ (OPMASK state, upper 256-bit of ZMM0-ZMM15 and 917 // ZMM16-ZMM31 state are enabled by OS) 918 /// and that XCR0[2:1] = ‘11b’ (XMM state and YMM state are enabled by OS). 919 if (eax>>5)&7 == 7 && (eax>>1)&3 == 3 { 920 if ebx&(1<<16) != 0 { 921 rval |= AVX512F 922 } 923 if ebx&(1<<17) != 0 { 924 rval |= AVX512DQ 925 } 926 if ebx&(1<<21) != 0 { 927 rval |= AVX512IFMA 928 } 929 if ebx&(1<<26) != 0 { 930 rval |= AVX512PF 931 } 932 if ebx&(1<<27) != 0 { 933 rval |= AVX512ER 934 } 935 if ebx&(1<<28) != 0 { 936 rval |= AVX512CD 937 } 938 if ebx&(1<<30) != 0 { 939 rval |= AVX512BW 940 } 941 if ebx&(1<<31) != 0 { 942 rval |= AVX512VL 943 } 944 // ecx 945 if ecx&(1<<1) != 0 { 946 rval |= AVX512VBMI 947 } 948 } 949 } 950 } 951 952 if maxExtendedFunction() >= 0x80000001 { 953 _, _, c, d := cpuid(0x80000001) 954 if (c & (1 << 5)) != 0 { 955 rval |= LZCNT 956 rval |= POPCNT 957 } 958 if (d & (1 << 31)) != 0 { 959 rval |= AMD3DNOW 960 } 961 if (d & (1 << 30)) != 0 { 962 rval |= AMD3DNOWEXT 963 } 964 if (d & (1 << 23)) != 0 { 965 rval |= MMX 966 } 967 if (d & (1 << 22)) != 0 { 968 rval |= MMXEXT 969 } 970 if (c & (1 << 6)) != 0 { 971 rval |= SSE4A 972 } 973 if d&(1<<20) != 0 { 974 rval |= NX 975 } 976 if d&(1<<27) != 0 { 977 rval |= RDTSCP 978 } 979 980 /* Allow for selectively disabling SSE2 functions on AMD processors 981 with SSE2 support but not SSE4a. This includes Athlon64, some 982 Opteron, and some Sempron processors. MMX, SSE, or 3DNow! are faster 983 than SSE2 often enough to utilize this special-case flag. 984 AV_CPU_FLAG_SSE2 and AV_CPU_FLAG_SSE2SLOW are both set in this case 985 so that SSE2 is used unless explicitly disabled by checking 986 AV_CPU_FLAG_SSE2SLOW. */ 987 if vendorID() != Intel && 988 rval&SSE2 != 0 && (c&0x00000040) == 0 { 989 rval |= SSE2SLOW 990 } 991 992 /* XOP and FMA4 use the AVX instruction coding scheme, so they can't be 993 * used unless the OS has AVX support. */ 994 if (rval & AVX) != 0 { 995 if (c & 0x00000800) != 0 { 996 rval |= XOP 997 } 998 if (c & 0x00010000) != 0 { 999 rval |= FMA4 1000 } 1001 } 1002 1003 if vendorID() == Intel { 1004 family, model := familyModel() 1005 if family == 6 && (model == 9 || model == 13 || model == 14) { 1006 /* 6/9 (pentium-m "banias"), 6/13 (pentium-m "dothan"), and 1007 * 6/14 (core1 "yonah") theoretically support sse2, but it's 1008 * usually slower than mmx. */ 1009 if (rval & SSE2) != 0 { 1010 rval |= SSE2SLOW 1011 } 1012 if (rval & SSE3) != 0 { 1013 rval |= SSE3SLOW 1014 } 1015 } 1016 /* The Atom processor has SSSE3 support, which is useful in many cases, 1017 * but sometimes the SSSE3 version is slower than the SSE2 equivalent 1018 * on the Atom, but is generally faster on other processors supporting 1019 * SSSE3. This flag allows for selectively disabling certain SSSE3 1020 * functions on the Atom. */ 1021 if family == 6 && model == 28 { 1022 rval |= ATOM 1023 } 1024 } 1025 } 1026 return Flags(rval) 1027} 1028 1029func valAsString(values ...uint32) []byte { 1030 r := make([]byte, 4*len(values)) 1031 for i, v := range values { 1032 dst := r[i*4:] 1033 dst[0] = byte(v & 0xff) 1034 dst[1] = byte((v >> 8) & 0xff) 1035 dst[2] = byte((v >> 16) & 0xff) 1036 dst[3] = byte((v >> 24) & 0xff) 1037 switch { 1038 case dst[0] == 0: 1039 return r[:i*4] 1040 case dst[1] == 0: 1041 return r[:i*4+1] 1042 case dst[2] == 0: 1043 return r[:i*4+2] 1044 case dst[3] == 0: 1045 return r[:i*4+3] 1046 } 1047 } 1048 return r 1049} 1050