1// Copyright (c) 2015 Klaus Post, released under MIT License. See LICENSE file. 2 3// Package cpuid provides information about the CPU running the current program. 4// 5// CPU features are detected on startup, and kept for fast access through the life of the application. 6// Currently x86 / x64 (AMD64) is supported. 7// 8// You can access the CPU information by accessing the shared CPU variable of the cpuid library. 9// 10// Package home: https://github.com/klauspost/cpuid 11package cpuid 12 13import "strings" 14 15// Vendor is a representation of a CPU vendor. 16type Vendor int 17 18const ( 19 Other Vendor = iota 20 Intel 21 AMD 22 VIA 23 Transmeta 24 NSC 25 KVM // Kernel-based Virtual Machine 26 MSVM // Microsoft Hyper-V or Windows Virtual PC 27 VMware 28 XenHVM 29) 30 31const ( 32 CMOV = 1 << iota // i686 CMOV 33 NX // NX (No-Execute) bit 34 AMD3DNOW // AMD 3DNOW 35 AMD3DNOWEXT // AMD 3DNowExt 36 MMX // standard MMX 37 MMXEXT // SSE integer functions or AMD MMX ext 38 SSE // SSE functions 39 SSE2 // P4 SSE functions 40 SSE3 // Prescott SSE3 functions 41 SSSE3 // Conroe SSSE3 functions 42 SSE4 // Penryn SSE4.1 functions 43 SSE4A // AMD Barcelona microarchitecture SSE4a instructions 44 SSE42 // Nehalem SSE4.2 functions 45 AVX // AVX functions 46 AVX2 // AVX2 functions 47 FMA3 // Intel FMA 3 48 FMA4 // Bulldozer FMA4 functions 49 XOP // Bulldozer XOP functions 50 F16C // Half-precision floating-point conversion 51 BMI1 // Bit Manipulation Instruction Set 1 52 BMI2 // Bit Manipulation Instruction Set 2 53 TBM // AMD Trailing Bit Manipulation 54 LZCNT // LZCNT instruction 55 POPCNT // POPCNT instruction 56 AESNI // Advanced Encryption Standard New Instructions 57 CLMUL // Carry-less Multiplication 58 HTT // Hyperthreading (enabled) 59 HLE // Hardware Lock Elision 60 RTM // Restricted Transactional Memory 61 RDRAND // RDRAND instruction is available 62 RDSEED // RDSEED instruction is available 63 ADX // Intel ADX (Multi-Precision Add-Carry Instruction Extensions) 64 SHA // Intel SHA Extensions 65 AVX512F // AVX-512 Foundation 66 AVX512DQ // AVX-512 Doubleword and Quadword Instructions 67 AVX512IFMA // AVX-512 Integer Fused Multiply-Add Instructions 68 AVX512PF // AVX-512 Prefetch Instructions 69 AVX512ER // AVX-512 Exponential and Reciprocal Instructions 70 AVX512CD // AVX-512 Conflict Detection Instructions 71 AVX512BW // AVX-512 Byte and Word Instructions 72 AVX512VL // AVX-512 Vector Length Extensions 73 AVX512VBMI // AVX-512 Vector Bit Manipulation Instructions 74 MPX // Intel MPX (Memory Protection Extensions) 75 ERMS // Enhanced REP MOVSB/STOSB 76 RDTSCP // RDTSCP Instruction 77 CX16 // CMPXCHG16B Instruction 78 SGX // Software Guard Extensions 79 IBPB // Indirect Branch Restricted Speculation (IBRS) and Indirect Branch Predictor Barrier (IBPB) 80 STIBP // Single Thread Indirect Branch Predictors 81 82 // Performance indicators 83 SSE2SLOW // SSE2 is supported, but usually not faster 84 SSE3SLOW // SSE3 is supported, but usually not faster 85 ATOM // Atom processor, some SSSE3 instructions are slower 86) 87 88var flagNames = map[Flags]string{ 89 CMOV: "CMOV", // i686 CMOV 90 NX: "NX", // NX (No-Execute) bit 91 AMD3DNOW: "AMD3DNOW", // AMD 3DNOW 92 AMD3DNOWEXT: "AMD3DNOWEXT", // AMD 3DNowExt 93 MMX: "MMX", // Standard MMX 94 MMXEXT: "MMXEXT", // SSE integer functions or AMD MMX ext 95 SSE: "SSE", // SSE functions 96 SSE2: "SSE2", // P4 SSE2 functions 97 SSE3: "SSE3", // Prescott SSE3 functions 98 SSSE3: "SSSE3", // Conroe SSSE3 functions 99 SSE4: "SSE4.1", // Penryn SSE4.1 functions 100 SSE4A: "SSE4A", // AMD Barcelona microarchitecture SSE4a instructions 101 SSE42: "SSE4.2", // Nehalem SSE4.2 functions 102 AVX: "AVX", // AVX functions 103 AVX2: "AVX2", // AVX functions 104 FMA3: "FMA3", // Intel FMA 3 105 FMA4: "FMA4", // Bulldozer FMA4 functions 106 XOP: "XOP", // Bulldozer XOP functions 107 F16C: "F16C", // Half-precision floating-point conversion 108 BMI1: "BMI1", // Bit Manipulation Instruction Set 1 109 BMI2: "BMI2", // Bit Manipulation Instruction Set 2 110 TBM: "TBM", // AMD Trailing Bit Manipulation 111 LZCNT: "LZCNT", // LZCNT instruction 112 POPCNT: "POPCNT", // POPCNT instruction 113 AESNI: "AESNI", // Advanced Encryption Standard New Instructions 114 CLMUL: "CLMUL", // Carry-less Multiplication 115 HTT: "HTT", // Hyperthreading (enabled) 116 HLE: "HLE", // Hardware Lock Elision 117 RTM: "RTM", // Restricted Transactional Memory 118 RDRAND: "RDRAND", // RDRAND instruction is available 119 RDSEED: "RDSEED", // RDSEED instruction is available 120 ADX: "ADX", // Intel ADX (Multi-Precision Add-Carry Instruction Extensions) 121 SHA: "SHA", // Intel SHA Extensions 122 AVX512F: "AVX512F", // AVX-512 Foundation 123 AVX512DQ: "AVX512DQ", // AVX-512 Doubleword and Quadword Instructions 124 AVX512IFMA: "AVX512IFMA", // AVX-512 Integer Fused Multiply-Add Instructions 125 AVX512PF: "AVX512PF", // AVX-512 Prefetch Instructions 126 AVX512ER: "AVX512ER", // AVX-512 Exponential and Reciprocal Instructions 127 AVX512CD: "AVX512CD", // AVX-512 Conflict Detection Instructions 128 AVX512BW: "AVX512BW", // AVX-512 Byte and Word Instructions 129 AVX512VL: "AVX512VL", // AVX-512 Vector Length Extensions 130 AVX512VBMI: "AVX512VBMI", // AVX-512 Vector Bit Manipulation Instructions 131 MPX: "MPX", // Intel MPX (Memory Protection Extensions) 132 ERMS: "ERMS", // Enhanced REP MOVSB/STOSB 133 RDTSCP: "RDTSCP", // RDTSCP Instruction 134 CX16: "CX16", // CMPXCHG16B Instruction 135 SGX: "SGX", // Software Guard Extensions 136 IBPB: "IBPB", // Indirect Branch Restricted Speculation and Indirect Branch Predictor Barrier 137 STIBP: "STIBP", // Single Thread Indirect Branch Predictors 138 139 // Performance indicators 140 SSE2SLOW: "SSE2SLOW", // SSE2 supported, but usually not faster 141 SSE3SLOW: "SSE3SLOW", // SSE3 supported, but usually not faster 142 ATOM: "ATOM", // Atom processor, some SSSE3 instructions are slower 143 144} 145 146// CPUInfo contains information about the detected system CPU. 147type CPUInfo struct { 148 BrandName string // Brand name reported by the CPU 149 VendorID Vendor // Comparable CPU vendor ID 150 Features Flags // Features of the CPU 151 PhysicalCores int // Number of physical processor cores in your CPU. Will be 0 if undetectable. 152 ThreadsPerCore int // Number of threads per physical core. Will be 1 if undetectable. 153 LogicalCores int // Number of physical cores times threads that can run on each core through the use of hyperthreading. Will be 0 if undetectable. 154 Family int // CPU family number 155 Model int // CPU model number 156 CacheLine int // Cache line size in bytes. Will be 0 if undetectable. 157 Cache struct { 158 L1I int // L1 Instruction Cache (per core or shared). Will be -1 if undetected 159 L1D int // L1 Data Cache (per core or shared). Will be -1 if undetected 160 L2 int // L2 Cache (per core or shared). Will be -1 if undetected 161 L3 int // L3 Instruction Cache (per core or shared). Will be -1 if undetected 162 } 163 SGX SGXSupport 164 maxFunc uint32 165 maxExFunc uint32 166} 167 168var cpuid func(op uint32) (eax, ebx, ecx, edx uint32) 169var cpuidex func(op, op2 uint32) (eax, ebx, ecx, edx uint32) 170var xgetbv func(index uint32) (eax, edx uint32) 171var rdtscpAsm func() (eax, ebx, ecx, edx uint32) 172 173// CPU contains information about the CPU as detected on startup, 174// or when Detect last was called. 175// 176// Use this as the primary entry point to you data, 177// this way queries are 178var CPU CPUInfo 179 180func init() { 181 initCPU() 182 Detect() 183} 184 185// Detect will re-detect current CPU info. 186// This will replace the content of the exported CPU variable. 187// 188// Unless you expect the CPU to change while you are running your program 189// you should not need to call this function. 190// If you call this, you must ensure that no other goroutine is accessing the 191// exported CPU variable. 192func Detect() { 193 CPU.maxFunc = maxFunctionID() 194 CPU.maxExFunc = maxExtendedFunction() 195 CPU.BrandName = brandName() 196 CPU.CacheLine = cacheLine() 197 CPU.Family, CPU.Model = familyModel() 198 CPU.Features = support() 199 CPU.SGX = hasSGX(CPU.Features&SGX != 0) 200 CPU.ThreadsPerCore = threadsPerCore() 201 CPU.LogicalCores = logicalCores() 202 CPU.PhysicalCores = physicalCores() 203 CPU.VendorID = vendorID() 204 CPU.cacheSize() 205} 206 207// Generated here: http://play.golang.org/p/BxFH2Gdc0G 208 209// Cmov indicates support of CMOV instructions 210func (c CPUInfo) Cmov() bool { 211 return c.Features&CMOV != 0 212} 213 214// Amd3dnow indicates support of AMD 3DNOW! instructions 215func (c CPUInfo) Amd3dnow() bool { 216 return c.Features&AMD3DNOW != 0 217} 218 219// Amd3dnowExt indicates support of AMD 3DNOW! Extended instructions 220func (c CPUInfo) Amd3dnowExt() bool { 221 return c.Features&AMD3DNOWEXT != 0 222} 223 224// MMX indicates support of MMX instructions 225func (c CPUInfo) MMX() bool { 226 return c.Features&MMX != 0 227} 228 229// MMXExt indicates support of MMXEXT instructions 230// (SSE integer functions or AMD MMX ext) 231func (c CPUInfo) MMXExt() bool { 232 return c.Features&MMXEXT != 0 233} 234 235// SSE indicates support of SSE instructions 236func (c CPUInfo) SSE() bool { 237 return c.Features&SSE != 0 238} 239 240// SSE2 indicates support of SSE 2 instructions 241func (c CPUInfo) SSE2() bool { 242 return c.Features&SSE2 != 0 243} 244 245// SSE3 indicates support of SSE 3 instructions 246func (c CPUInfo) SSE3() bool { 247 return c.Features&SSE3 != 0 248} 249 250// SSSE3 indicates support of SSSE 3 instructions 251func (c CPUInfo) SSSE3() bool { 252 return c.Features&SSSE3 != 0 253} 254 255// SSE4 indicates support of SSE 4 (also called SSE 4.1) instructions 256func (c CPUInfo) SSE4() bool { 257 return c.Features&SSE4 != 0 258} 259 260// SSE42 indicates support of SSE4.2 instructions 261func (c CPUInfo) SSE42() bool { 262 return c.Features&SSE42 != 0 263} 264 265// AVX indicates support of AVX instructions 266// and operating system support of AVX instructions 267func (c CPUInfo) AVX() bool { 268 return c.Features&AVX != 0 269} 270 271// AVX2 indicates support of AVX2 instructions 272func (c CPUInfo) AVX2() bool { 273 return c.Features&AVX2 != 0 274} 275 276// FMA3 indicates support of FMA3 instructions 277func (c CPUInfo) FMA3() bool { 278 return c.Features&FMA3 != 0 279} 280 281// FMA4 indicates support of FMA4 instructions 282func (c CPUInfo) FMA4() bool { 283 return c.Features&FMA4 != 0 284} 285 286// XOP indicates support of XOP instructions 287func (c CPUInfo) XOP() bool { 288 return c.Features&XOP != 0 289} 290 291// F16C indicates support of F16C instructions 292func (c CPUInfo) F16C() bool { 293 return c.Features&F16C != 0 294} 295 296// BMI1 indicates support of BMI1 instructions 297func (c CPUInfo) BMI1() bool { 298 return c.Features&BMI1 != 0 299} 300 301// BMI2 indicates support of BMI2 instructions 302func (c CPUInfo) BMI2() bool { 303 return c.Features&BMI2 != 0 304} 305 306// TBM indicates support of TBM instructions 307// (AMD Trailing Bit Manipulation) 308func (c CPUInfo) TBM() bool { 309 return c.Features&TBM != 0 310} 311 312// Lzcnt indicates support of LZCNT instruction 313func (c CPUInfo) Lzcnt() bool { 314 return c.Features&LZCNT != 0 315} 316 317// Popcnt indicates support of POPCNT instruction 318func (c CPUInfo) Popcnt() bool { 319 return c.Features&POPCNT != 0 320} 321 322// HTT indicates the processor has Hyperthreading enabled 323func (c CPUInfo) HTT() bool { 324 return c.Features&HTT != 0 325} 326 327// SSE2Slow indicates that SSE2 may be slow on this processor 328func (c CPUInfo) SSE2Slow() bool { 329 return c.Features&SSE2SLOW != 0 330} 331 332// SSE3Slow indicates that SSE3 may be slow on this processor 333func (c CPUInfo) SSE3Slow() bool { 334 return c.Features&SSE3SLOW != 0 335} 336 337// AesNi indicates support of AES-NI instructions 338// (Advanced Encryption Standard New Instructions) 339func (c CPUInfo) AesNi() bool { 340 return c.Features&AESNI != 0 341} 342 343// Clmul indicates support of CLMUL instructions 344// (Carry-less Multiplication) 345func (c CPUInfo) Clmul() bool { 346 return c.Features&CLMUL != 0 347} 348 349// NX indicates support of NX (No-Execute) bit 350func (c CPUInfo) NX() bool { 351 return c.Features&NX != 0 352} 353 354// SSE4A indicates support of AMD Barcelona microarchitecture SSE4a instructions 355func (c CPUInfo) SSE4A() bool { 356 return c.Features&SSE4A != 0 357} 358 359// HLE indicates support of Hardware Lock Elision 360func (c CPUInfo) HLE() bool { 361 return c.Features&HLE != 0 362} 363 364// RTM indicates support of Restricted Transactional Memory 365func (c CPUInfo) RTM() bool { 366 return c.Features&RTM != 0 367} 368 369// Rdrand indicates support of RDRAND instruction is available 370func (c CPUInfo) Rdrand() bool { 371 return c.Features&RDRAND != 0 372} 373 374// Rdseed indicates support of RDSEED instruction is available 375func (c CPUInfo) Rdseed() bool { 376 return c.Features&RDSEED != 0 377} 378 379// ADX indicates support of Intel ADX (Multi-Precision Add-Carry Instruction Extensions) 380func (c CPUInfo) ADX() bool { 381 return c.Features&ADX != 0 382} 383 384// SHA indicates support of Intel SHA Extensions 385func (c CPUInfo) SHA() bool { 386 return c.Features&SHA != 0 387} 388 389// AVX512F indicates support of AVX-512 Foundation 390func (c CPUInfo) AVX512F() bool { 391 return c.Features&AVX512F != 0 392} 393 394// AVX512DQ indicates support of AVX-512 Doubleword and Quadword Instructions 395func (c CPUInfo) AVX512DQ() bool { 396 return c.Features&AVX512DQ != 0 397} 398 399// AVX512IFMA indicates support of AVX-512 Integer Fused Multiply-Add Instructions 400func (c CPUInfo) AVX512IFMA() bool { 401 return c.Features&AVX512IFMA != 0 402} 403 404// AVX512PF indicates support of AVX-512 Prefetch Instructions 405func (c CPUInfo) AVX512PF() bool { 406 return c.Features&AVX512PF != 0 407} 408 409// AVX512ER indicates support of AVX-512 Exponential and Reciprocal Instructions 410func (c CPUInfo) AVX512ER() bool { 411 return c.Features&AVX512ER != 0 412} 413 414// AVX512CD indicates support of AVX-512 Conflict Detection Instructions 415func (c CPUInfo) AVX512CD() bool { 416 return c.Features&AVX512CD != 0 417} 418 419// AVX512BW indicates support of AVX-512 Byte and Word Instructions 420func (c CPUInfo) AVX512BW() bool { 421 return c.Features&AVX512BW != 0 422} 423 424// AVX512VL indicates support of AVX-512 Vector Length Extensions 425func (c CPUInfo) AVX512VL() bool { 426 return c.Features&AVX512VL != 0 427} 428 429// AVX512VBMI indicates support of AVX-512 Vector Bit Manipulation Instructions 430func (c CPUInfo) AVX512VBMI() bool { 431 return c.Features&AVX512VBMI != 0 432} 433 434// MPX indicates support of Intel MPX (Memory Protection Extensions) 435func (c CPUInfo) MPX() bool { 436 return c.Features&MPX != 0 437} 438 439// ERMS indicates support of Enhanced REP MOVSB/STOSB 440func (c CPUInfo) ERMS() bool { 441 return c.Features&ERMS != 0 442} 443 444// RDTSCP Instruction is available. 445func (c CPUInfo) RDTSCP() bool { 446 return c.Features&RDTSCP != 0 447} 448 449// CX16 indicates if CMPXCHG16B instruction is available. 450func (c CPUInfo) CX16() bool { 451 return c.Features&CX16 != 0 452} 453 454// TSX is split into HLE (Hardware Lock Elision) and RTM (Restricted Transactional Memory) detection. 455// So TSX simply checks that. 456func (c CPUInfo) TSX() bool { 457 return c.Features&(HLE|RTM) == HLE|RTM 458} 459 460// Atom indicates an Atom processor 461func (c CPUInfo) Atom() bool { 462 return c.Features&ATOM != 0 463} 464 465// Intel returns true if vendor is recognized as Intel 466func (c CPUInfo) Intel() bool { 467 return c.VendorID == Intel 468} 469 470// AMD returns true if vendor is recognized as AMD 471func (c CPUInfo) AMD() bool { 472 return c.VendorID == AMD 473} 474 475// Transmeta returns true if vendor is recognized as Transmeta 476func (c CPUInfo) Transmeta() bool { 477 return c.VendorID == Transmeta 478} 479 480// NSC returns true if vendor is recognized as National Semiconductor 481func (c CPUInfo) NSC() bool { 482 return c.VendorID == NSC 483} 484 485// VIA returns true if vendor is recognized as VIA 486func (c CPUInfo) VIA() bool { 487 return c.VendorID == VIA 488} 489 490// RTCounter returns the 64-bit time-stamp counter 491// Uses the RDTSCP instruction. The value 0 is returned 492// if the CPU does not support the instruction. 493func (c CPUInfo) RTCounter() uint64 { 494 if !c.RDTSCP() { 495 return 0 496 } 497 a, _, _, d := rdtscpAsm() 498 return uint64(a) | (uint64(d) << 32) 499} 500 501// Ia32TscAux returns the IA32_TSC_AUX part of the RDTSCP. 502// This variable is OS dependent, but on Linux contains information 503// about the current cpu/core the code is running on. 504// If the RDTSCP instruction isn't supported on the CPU, the value 0 is returned. 505func (c CPUInfo) Ia32TscAux() uint32 { 506 if !c.RDTSCP() { 507 return 0 508 } 509 _, _, ecx, _ := rdtscpAsm() 510 return ecx 511} 512 513// LogicalCPU will return the Logical CPU the code is currently executing on. 514// This is likely to change when the OS re-schedules the running thread 515// to another CPU. 516// If the current core cannot be detected, -1 will be returned. 517func (c CPUInfo) LogicalCPU() int { 518 if c.maxFunc < 1 { 519 return -1 520 } 521 _, ebx, _, _ := cpuid(1) 522 return int(ebx >> 24) 523} 524 525// VM Will return true if the cpu id indicates we are in 526// a virtual machine. This is only a hint, and will very likely 527// have many false negatives. 528func (c CPUInfo) VM() bool { 529 switch c.VendorID { 530 case MSVM, KVM, VMware, XenHVM: 531 return true 532 } 533 return false 534} 535 536// Flags contains detected cpu features and caracteristics 537type Flags uint64 538 539// String returns a string representation of the detected 540// CPU features. 541func (f Flags) String() string { 542 return strings.Join(f.Strings(), ",") 543} 544 545// Strings returns and array of the detected features. 546func (f Flags) Strings() []string { 547 s := support() 548 r := make([]string, 0, 20) 549 for i := uint(0); i < 64; i++ { 550 key := Flags(1 << i) 551 val := flagNames[key] 552 if s&key != 0 { 553 r = append(r, val) 554 } 555 } 556 return r 557} 558 559func maxExtendedFunction() uint32 { 560 eax, _, _, _ := cpuid(0x80000000) 561 return eax 562} 563 564func maxFunctionID() uint32 { 565 a, _, _, _ := cpuid(0) 566 return a 567} 568 569func brandName() string { 570 if maxExtendedFunction() >= 0x80000004 { 571 v := make([]uint32, 0, 48) 572 for i := uint32(0); i < 3; i++ { 573 a, b, c, d := cpuid(0x80000002 + i) 574 v = append(v, a, b, c, d) 575 } 576 return strings.Trim(string(valAsString(v...)), " ") 577 } 578 return "unknown" 579} 580 581func threadsPerCore() int { 582 mfi := maxFunctionID() 583 if mfi < 0x4 || vendorID() != Intel { 584 return 1 585 } 586 587 if mfi < 0xb { 588 _, b, _, d := cpuid(1) 589 if (d & (1 << 28)) != 0 { 590 // v will contain logical core count 591 v := (b >> 16) & 255 592 if v > 1 { 593 a4, _, _, _ := cpuid(4) 594 // physical cores 595 v2 := (a4 >> 26) + 1 596 if v2 > 0 { 597 return int(v) / int(v2) 598 } 599 } 600 } 601 return 1 602 } 603 _, b, _, _ := cpuidex(0xb, 0) 604 if b&0xffff == 0 { 605 return 1 606 } 607 return int(b & 0xffff) 608} 609 610func logicalCores() int { 611 mfi := maxFunctionID() 612 switch vendorID() { 613 case Intel: 614 // Use this on old Intel processors 615 if mfi < 0xb { 616 if mfi < 1 { 617 return 0 618 } 619 // CPUID.1:EBX[23:16] represents the maximum number of addressable IDs (initial APIC ID) 620 // that can be assigned to logical processors in a physical package. 621 // The value may not be the same as the number of logical processors that are present in the hardware of a physical package. 622 _, ebx, _, _ := cpuid(1) 623 logical := (ebx >> 16) & 0xff 624 return int(logical) 625 } 626 _, b, _, _ := cpuidex(0xb, 1) 627 return int(b & 0xffff) 628 case AMD: 629 _, b, _, _ := cpuid(1) 630 return int((b >> 16) & 0xff) 631 default: 632 return 0 633 } 634} 635 636func familyModel() (int, int) { 637 if maxFunctionID() < 0x1 { 638 return 0, 0 639 } 640 eax, _, _, _ := cpuid(1) 641 family := ((eax >> 8) & 0xf) + ((eax >> 20) & 0xff) 642 model := ((eax >> 4) & 0xf) + ((eax >> 12) & 0xf0) 643 return int(family), int(model) 644} 645 646func physicalCores() int { 647 switch vendorID() { 648 case Intel: 649 return logicalCores() / threadsPerCore() 650 case AMD: 651 if maxExtendedFunction() >= 0x80000008 { 652 _, _, c, _ := cpuid(0x80000008) 653 return int(c&0xff) + 1 654 } 655 } 656 return 0 657} 658 659// Except from http://en.wikipedia.org/wiki/CPUID#EAX.3D0:_Get_vendor_ID 660var vendorMapping = map[string]Vendor{ 661 "AMDisbetter!": AMD, 662 "AuthenticAMD": AMD, 663 "CentaurHauls": VIA, 664 "GenuineIntel": Intel, 665 "TransmetaCPU": Transmeta, 666 "GenuineTMx86": Transmeta, 667 "Geode by NSC": NSC, 668 "VIA VIA VIA ": VIA, 669 "KVMKVMKVMKVM": KVM, 670 "Microsoft Hv": MSVM, 671 "VMwareVMware": VMware, 672 "XenVMMXenVMM": XenHVM, 673} 674 675func vendorID() Vendor { 676 _, b, c, d := cpuid(0) 677 v := valAsString(b, d, c) 678 vend, ok := vendorMapping[string(v)] 679 if !ok { 680 return Other 681 } 682 return vend 683} 684 685func cacheLine() int { 686 if maxFunctionID() < 0x1 { 687 return 0 688 } 689 690 _, ebx, _, _ := cpuid(1) 691 cache := (ebx & 0xff00) >> 5 // cflush size 692 if cache == 0 && maxExtendedFunction() >= 0x80000006 { 693 _, _, ecx, _ := cpuid(0x80000006) 694 cache = ecx & 0xff // cacheline size 695 } 696 // TODO: Read from Cache and TLB Information 697 return int(cache) 698} 699 700func (c *CPUInfo) cacheSize() { 701 c.Cache.L1D = -1 702 c.Cache.L1I = -1 703 c.Cache.L2 = -1 704 c.Cache.L3 = -1 705 vendor := vendorID() 706 switch vendor { 707 case Intel: 708 if maxFunctionID() < 4 { 709 return 710 } 711 for i := uint32(0); ; i++ { 712 eax, ebx, ecx, _ := cpuidex(4, i) 713 cacheType := eax & 15 714 if cacheType == 0 { 715 break 716 } 717 cacheLevel := (eax >> 5) & 7 718 coherency := int(ebx&0xfff) + 1 719 partitions := int((ebx>>12)&0x3ff) + 1 720 associativity := int((ebx>>22)&0x3ff) + 1 721 sets := int(ecx) + 1 722 size := associativity * partitions * coherency * sets 723 switch cacheLevel { 724 case 1: 725 if cacheType == 1 { 726 // 1 = Data Cache 727 c.Cache.L1D = size 728 } else if cacheType == 2 { 729 // 2 = Instruction Cache 730 c.Cache.L1I = size 731 } else { 732 if c.Cache.L1D < 0 { 733 c.Cache.L1I = size 734 } 735 if c.Cache.L1I < 0 { 736 c.Cache.L1I = size 737 } 738 } 739 case 2: 740 c.Cache.L2 = size 741 case 3: 742 c.Cache.L3 = size 743 } 744 } 745 case AMD: 746 // Untested. 747 if maxExtendedFunction() < 0x80000005 { 748 return 749 } 750 _, _, ecx, edx := cpuid(0x80000005) 751 c.Cache.L1D = int(((ecx >> 24) & 0xFF) * 1024) 752 c.Cache.L1I = int(((edx >> 24) & 0xFF) * 1024) 753 754 if maxExtendedFunction() < 0x80000006 { 755 return 756 } 757 _, _, ecx, _ = cpuid(0x80000006) 758 c.Cache.L2 = int(((ecx >> 16) & 0xFFFF) * 1024) 759 } 760 761 return 762} 763 764type SGXSupport struct { 765 Available bool 766 SGX1Supported bool 767 SGX2Supported bool 768 MaxEnclaveSizeNot64 int64 769 MaxEnclaveSize64 int64 770} 771 772func hasSGX(available bool) (rval SGXSupport) { 773 rval.Available = available 774 775 if !available { 776 return 777 } 778 779 a, _, _, d := cpuidex(0x12, 0) 780 rval.SGX1Supported = a&0x01 != 0 781 rval.SGX2Supported = a&0x02 != 0 782 rval.MaxEnclaveSizeNot64 = 1 << (d & 0xFF) // pow 2 783 rval.MaxEnclaveSize64 = 1 << ((d >> 8) & 0xFF) // pow 2 784 785 return 786} 787 788func support() Flags { 789 mfi := maxFunctionID() 790 vend := vendorID() 791 if mfi < 0x1 { 792 return 0 793 } 794 rval := uint64(0) 795 _, _, c, d := cpuid(1) 796 if (d & (1 << 15)) != 0 { 797 rval |= CMOV 798 } 799 if (d & (1 << 23)) != 0 { 800 rval |= MMX 801 } 802 if (d & (1 << 25)) != 0 { 803 rval |= MMXEXT 804 } 805 if (d & (1 << 25)) != 0 { 806 rval |= SSE 807 } 808 if (d & (1 << 26)) != 0 { 809 rval |= SSE2 810 } 811 if (c & 1) != 0 { 812 rval |= SSE3 813 } 814 if (c & 0x00000200) != 0 { 815 rval |= SSSE3 816 } 817 if (c & 0x00080000) != 0 { 818 rval |= SSE4 819 } 820 if (c & 0x00100000) != 0 { 821 rval |= SSE42 822 } 823 if (c & (1 << 25)) != 0 { 824 rval |= AESNI 825 } 826 if (c & (1 << 1)) != 0 { 827 rval |= CLMUL 828 } 829 if c&(1<<23) != 0 { 830 rval |= POPCNT 831 } 832 if c&(1<<30) != 0 { 833 rval |= RDRAND 834 } 835 if c&(1<<29) != 0 { 836 rval |= F16C 837 } 838 if c&(1<<13) != 0 { 839 rval |= CX16 840 } 841 if vend == Intel && (d&(1<<28)) != 0 && mfi >= 4 { 842 if threadsPerCore() > 1 { 843 rval |= HTT 844 } 845 } 846 847 // Check XGETBV, OXSAVE and AVX bits 848 if c&(1<<26) != 0 && c&(1<<27) != 0 && c&(1<<28) != 0 { 849 // Check for OS support 850 eax, _ := xgetbv(0) 851 if (eax & 0x6) == 0x6 { 852 rval |= AVX 853 if (c & 0x00001000) != 0 { 854 rval |= FMA3 855 } 856 } 857 } 858 859 // Check AVX2, AVX2 requires OS support, but BMI1/2 don't. 860 if mfi >= 7 { 861 _, ebx, ecx, edx := cpuidex(7, 0) 862 if (rval&AVX) != 0 && (ebx&0x00000020) != 0 { 863 rval |= AVX2 864 } 865 if (ebx & 0x00000008) != 0 { 866 rval |= BMI1 867 if (ebx & 0x00000100) != 0 { 868 rval |= BMI2 869 } 870 } 871 if ebx&(1<<2) != 0 { 872 rval |= SGX 873 } 874 if ebx&(1<<4) != 0 { 875 rval |= HLE 876 } 877 if ebx&(1<<9) != 0 { 878 rval |= ERMS 879 } 880 if ebx&(1<<11) != 0 { 881 rval |= RTM 882 } 883 if ebx&(1<<14) != 0 { 884 rval |= MPX 885 } 886 if ebx&(1<<18) != 0 { 887 rval |= RDSEED 888 } 889 if ebx&(1<<19) != 0 { 890 rval |= ADX 891 } 892 if ebx&(1<<29) != 0 { 893 rval |= SHA 894 } 895 if edx&(1<<26) != 0 { 896 rval |= IBPB 897 } 898 if edx&(1<<27) != 0 { 899 rval |= STIBP 900 } 901 902 // Only detect AVX-512 features if XGETBV is supported 903 if c&((1<<26)|(1<<27)) == (1<<26)|(1<<27) { 904 // Check for OS support 905 eax, _ := xgetbv(0) 906 907 // Verify that XCR0[7:5] = ‘111b’ (OPMASK state, upper 256-bit of ZMM0-ZMM15 and 908 // ZMM16-ZMM31 state are enabled by OS) 909 /// and that XCR0[2:1] = ‘11b’ (XMM state and YMM state are enabled by OS). 910 if (eax>>5)&7 == 7 && (eax>>1)&3 == 3 { 911 if ebx&(1<<16) != 0 { 912 rval |= AVX512F 913 } 914 if ebx&(1<<17) != 0 { 915 rval |= AVX512DQ 916 } 917 if ebx&(1<<21) != 0 { 918 rval |= AVX512IFMA 919 } 920 if ebx&(1<<26) != 0 { 921 rval |= AVX512PF 922 } 923 if ebx&(1<<27) != 0 { 924 rval |= AVX512ER 925 } 926 if ebx&(1<<28) != 0 { 927 rval |= AVX512CD 928 } 929 if ebx&(1<<30) != 0 { 930 rval |= AVX512BW 931 } 932 if ebx&(1<<31) != 0 { 933 rval |= AVX512VL 934 } 935 // ecx 936 if ecx&(1<<1) != 0 { 937 rval |= AVX512VBMI 938 } 939 } 940 } 941 } 942 943 if maxExtendedFunction() >= 0x80000001 { 944 _, _, c, d := cpuid(0x80000001) 945 if (c & (1 << 5)) != 0 { 946 rval |= LZCNT 947 rval |= POPCNT 948 } 949 if (d & (1 << 31)) != 0 { 950 rval |= AMD3DNOW 951 } 952 if (d & (1 << 30)) != 0 { 953 rval |= AMD3DNOWEXT 954 } 955 if (d & (1 << 23)) != 0 { 956 rval |= MMX 957 } 958 if (d & (1 << 22)) != 0 { 959 rval |= MMXEXT 960 } 961 if (c & (1 << 6)) != 0 { 962 rval |= SSE4A 963 } 964 if d&(1<<20) != 0 { 965 rval |= NX 966 } 967 if d&(1<<27) != 0 { 968 rval |= RDTSCP 969 } 970 971 /* Allow for selectively disabling SSE2 functions on AMD processors 972 with SSE2 support but not SSE4a. This includes Athlon64, some 973 Opteron, and some Sempron processors. MMX, SSE, or 3DNow! are faster 974 than SSE2 often enough to utilize this special-case flag. 975 AV_CPU_FLAG_SSE2 and AV_CPU_FLAG_SSE2SLOW are both set in this case 976 so that SSE2 is used unless explicitly disabled by checking 977 AV_CPU_FLAG_SSE2SLOW. */ 978 if vendorID() != Intel && 979 rval&SSE2 != 0 && (c&0x00000040) == 0 { 980 rval |= SSE2SLOW 981 } 982 983 /* XOP and FMA4 use the AVX instruction coding scheme, so they can't be 984 * used unless the OS has AVX support. */ 985 if (rval & AVX) != 0 { 986 if (c & 0x00000800) != 0 { 987 rval |= XOP 988 } 989 if (c & 0x00010000) != 0 { 990 rval |= FMA4 991 } 992 } 993 994 if vendorID() == Intel { 995 family, model := familyModel() 996 if family == 6 && (model == 9 || model == 13 || model == 14) { 997 /* 6/9 (pentium-m "banias"), 6/13 (pentium-m "dothan"), and 998 * 6/14 (core1 "yonah") theoretically support sse2, but it's 999 * usually slower than mmx. */ 1000 if (rval & SSE2) != 0 { 1001 rval |= SSE2SLOW 1002 } 1003 if (rval & SSE3) != 0 { 1004 rval |= SSE3SLOW 1005 } 1006 } 1007 /* The Atom processor has SSSE3 support, which is useful in many cases, 1008 * but sometimes the SSSE3 version is slower than the SSE2 equivalent 1009 * on the Atom, but is generally faster on other processors supporting 1010 * SSSE3. This flag allows for selectively disabling certain SSSE3 1011 * functions on the Atom. */ 1012 if family == 6 && model == 28 { 1013 rval |= ATOM 1014 } 1015 } 1016 } 1017 return Flags(rval) 1018} 1019 1020func valAsString(values ...uint32) []byte { 1021 r := make([]byte, 4*len(values)) 1022 for i, v := range values { 1023 dst := r[i*4:] 1024 dst[0] = byte(v & 0xff) 1025 dst[1] = byte((v >> 8) & 0xff) 1026 dst[2] = byte((v >> 16) & 0xff) 1027 dst[3] = byte((v >> 24) & 0xff) 1028 switch { 1029 case dst[0] == 0: 1030 return r[:i*4] 1031 case dst[1] == 0: 1032 return r[:i*4+1] 1033 case dst[2] == 0: 1034 return r[:i*4+2] 1035 case dst[3] == 0: 1036 return r[:i*4+3] 1037 } 1038 } 1039 return r 1040} 1041