1// Copyright (c) 2015 Klaus Post, released under MIT License. See LICENSE file. 2 3// Package cpuid provides information about the CPU running the current program. 4// 5// CPU features are detected on startup, and kept for fast access through the life of the application. 6// Currently x86 / x64 (AMD64) is supported. 7// 8// You can access the CPU information by accessing the shared CPU variable of the cpuid library. 9// 10// Package home: https://github.com/klauspost/cpuid 11package cpuid 12 13import "strings" 14 15// Vendor is a representation of a CPU vendor. 16type Vendor int 17 18const ( 19 Other Vendor = iota 20 Intel 21 AMD 22 VIA 23 Transmeta 24 NSC 25 KVM // Kernel-based Virtual Machine 26 MSVM // Microsoft Hyper-V or Windows Virtual PC 27 VMware 28 XenHVM 29) 30 31const ( 32 CMOV = 1 << iota // i686 CMOV 33 NX // NX (No-Execute) bit 34 AMD3DNOW // AMD 3DNOW 35 AMD3DNOWEXT // AMD 3DNowExt 36 MMX // standard MMX 37 MMXEXT // SSE integer functions or AMD MMX ext 38 SSE // SSE functions 39 SSE2 // P4 SSE functions 40 SSE3 // Prescott SSE3 functions 41 SSSE3 // Conroe SSSE3 functions 42 SSE4 // Penryn SSE4.1 functions 43 SSE4A // AMD Barcelona microarchitecture SSE4a instructions 44 SSE42 // Nehalem SSE4.2 functions 45 AVX // AVX functions 46 AVX2 // AVX2 functions 47 FMA3 // Intel FMA 3 48 FMA4 // Bulldozer FMA4 functions 49 XOP // Bulldozer XOP functions 50 F16C // Half-precision floating-point conversion 51 BMI1 // Bit Manipulation Instruction Set 1 52 BMI2 // Bit Manipulation Instruction Set 2 53 TBM // AMD Trailing Bit Manipulation 54 LZCNT // LZCNT instruction 55 POPCNT // POPCNT instruction 56 AESNI // Advanced Encryption Standard New Instructions 57 CLMUL // Carry-less Multiplication 58 HTT // Hyperthreading (enabled) 59 HLE // Hardware Lock Elision 60 RTM // Restricted Transactional Memory 61 RDRAND // RDRAND instruction is available 62 RDSEED // RDSEED instruction is available 63 ADX // Intel ADX (Multi-Precision Add-Carry Instruction Extensions) 64 SHA // Intel SHA Extensions 65 AVX512F // AVX-512 Foundation 66 AVX512DQ // AVX-512 Doubleword and Quadword Instructions 67 AVX512IFMA // AVX-512 Integer Fused Multiply-Add Instructions 68 AVX512PF // AVX-512 Prefetch Instructions 69 AVX512ER // AVX-512 Exponential and Reciprocal Instructions 70 AVX512CD // AVX-512 Conflict Detection Instructions 71 AVX512BW // AVX-512 Byte and Word Instructions 72 AVX512VL // AVX-512 Vector Length Extensions 73 AVX512VBMI // AVX-512 Vector Bit Manipulation Instructions 74 MPX // Intel MPX (Memory Protection Extensions) 75 ERMS // Enhanced REP MOVSB/STOSB 76 RDTSCP // RDTSCP Instruction 77 CX16 // CMPXCHG16B Instruction 78 SGX // Software Guard Extensions 79 80 // Performance indicators 81 SSE2SLOW // SSE2 is supported, but usually not faster 82 SSE3SLOW // SSE3 is supported, but usually not faster 83 ATOM // Atom processor, some SSSE3 instructions are slower 84) 85 86var flagNames = map[Flags]string{ 87 CMOV: "CMOV", // i686 CMOV 88 NX: "NX", // NX (No-Execute) bit 89 AMD3DNOW: "AMD3DNOW", // AMD 3DNOW 90 AMD3DNOWEXT: "AMD3DNOWEXT", // AMD 3DNowExt 91 MMX: "MMX", // Standard MMX 92 MMXEXT: "MMXEXT", // SSE integer functions or AMD MMX ext 93 SSE: "SSE", // SSE functions 94 SSE2: "SSE2", // P4 SSE2 functions 95 SSE3: "SSE3", // Prescott SSE3 functions 96 SSSE3: "SSSE3", // Conroe SSSE3 functions 97 SSE4: "SSE4.1", // Penryn SSE4.1 functions 98 SSE4A: "SSE4A", // AMD Barcelona microarchitecture SSE4a instructions 99 SSE42: "SSE4.2", // Nehalem SSE4.2 functions 100 AVX: "AVX", // AVX functions 101 AVX2: "AVX2", // AVX functions 102 FMA3: "FMA3", // Intel FMA 3 103 FMA4: "FMA4", // Bulldozer FMA4 functions 104 XOP: "XOP", // Bulldozer XOP functions 105 F16C: "F16C", // Half-precision floating-point conversion 106 BMI1: "BMI1", // Bit Manipulation Instruction Set 1 107 BMI2: "BMI2", // Bit Manipulation Instruction Set 2 108 TBM: "TBM", // AMD Trailing Bit Manipulation 109 LZCNT: "LZCNT", // LZCNT instruction 110 POPCNT: "POPCNT", // POPCNT instruction 111 AESNI: "AESNI", // Advanced Encryption Standard New Instructions 112 CLMUL: "CLMUL", // Carry-less Multiplication 113 HTT: "HTT", // Hyperthreading (enabled) 114 HLE: "HLE", // Hardware Lock Elision 115 RTM: "RTM", // Restricted Transactional Memory 116 RDRAND: "RDRAND", // RDRAND instruction is available 117 RDSEED: "RDSEED", // RDSEED instruction is available 118 ADX: "ADX", // Intel ADX (Multi-Precision Add-Carry Instruction Extensions) 119 SHA: "SHA", // Intel SHA Extensions 120 AVX512F: "AVX512F", // AVX-512 Foundation 121 AVX512DQ: "AVX512DQ", // AVX-512 Doubleword and Quadword Instructions 122 AVX512IFMA: "AVX512IFMA", // AVX-512 Integer Fused Multiply-Add Instructions 123 AVX512PF: "AVX512PF", // AVX-512 Prefetch Instructions 124 AVX512ER: "AVX512ER", // AVX-512 Exponential and Reciprocal Instructions 125 AVX512CD: "AVX512CD", // AVX-512 Conflict Detection Instructions 126 AVX512BW: "AVX512BW", // AVX-512 Byte and Word Instructions 127 AVX512VL: "AVX512VL", // AVX-512 Vector Length Extensions 128 AVX512VBMI: "AVX512VBMI", // AVX-512 Vector Bit Manipulation Instructions 129 MPX: "MPX", // Intel MPX (Memory Protection Extensions) 130 ERMS: "ERMS", // Enhanced REP MOVSB/STOSB 131 RDTSCP: "RDTSCP", // RDTSCP Instruction 132 CX16: "CX16", // CMPXCHG16B Instruction 133 SGX: "SGX", // Software Guard Extensions 134 135 // Performance indicators 136 SSE2SLOW: "SSE2SLOW", // SSE2 supported, but usually not faster 137 SSE3SLOW: "SSE3SLOW", // SSE3 supported, but usually not faster 138 ATOM: "ATOM", // Atom processor, some SSSE3 instructions are slower 139 140} 141 142// CPUInfo contains information about the detected system CPU. 143type CPUInfo struct { 144 BrandName string // Brand name reported by the CPU 145 VendorID Vendor // Comparable CPU vendor ID 146 Features Flags // Features of the CPU 147 PhysicalCores int // Number of physical processor cores in your CPU. Will be 0 if undetectable. 148 ThreadsPerCore int // Number of threads per physical core. Will be 1 if undetectable. 149 LogicalCores int // Number of physical cores times threads that can run on each core through the use of hyperthreading. Will be 0 if undetectable. 150 Family int // CPU family number 151 Model int // CPU model number 152 CacheLine int // Cache line size in bytes. Will be 0 if undetectable. 153 Cache struct { 154 L1I int // L1 Instruction Cache (per core or shared). Will be -1 if undetected 155 L1D int // L1 Data Cache (per core or shared). Will be -1 if undetected 156 L2 int // L2 Cache (per core or shared). Will be -1 if undetected 157 L3 int // L3 Instruction Cache (per core or shared). Will be -1 if undetected 158 } 159 SGX SGXSupport 160 maxFunc uint32 161 maxExFunc uint32 162} 163 164var cpuid func(op uint32) (eax, ebx, ecx, edx uint32) 165var cpuidex func(op, op2 uint32) (eax, ebx, ecx, edx uint32) 166var xgetbv func(index uint32) (eax, edx uint32) 167var rdtscpAsm func() (eax, ebx, ecx, edx uint32) 168 169// CPU contains information about the CPU as detected on startup, 170// or when Detect last was called. 171// 172// Use this as the primary entry point to you data, 173// this way queries are 174var CPU CPUInfo 175 176func init() { 177 initCPU() 178 Detect() 179} 180 181// Detect will re-detect current CPU info. 182// This will replace the content of the exported CPU variable. 183// 184// Unless you expect the CPU to change while you are running your program 185// you should not need to call this function. 186// If you call this, you must ensure that no other goroutine is accessing the 187// exported CPU variable. 188func Detect() { 189 CPU.maxFunc = maxFunctionID() 190 CPU.maxExFunc = maxExtendedFunction() 191 CPU.BrandName = brandName() 192 CPU.CacheLine = cacheLine() 193 CPU.Family, CPU.Model = familyModel() 194 CPU.Features = support() 195 CPU.SGX = sgx(CPU.Features&SGX != 0) 196 CPU.ThreadsPerCore = threadsPerCore() 197 CPU.LogicalCores = logicalCores() 198 CPU.PhysicalCores = physicalCores() 199 CPU.VendorID = vendorID() 200 CPU.cacheSize() 201} 202 203// Generated here: http://play.golang.org/p/BxFH2Gdc0G 204 205// Cmov indicates support of CMOV instructions 206func (c CPUInfo) Cmov() bool { 207 return c.Features&CMOV != 0 208} 209 210// Amd3dnow indicates support of AMD 3DNOW! instructions 211func (c CPUInfo) Amd3dnow() bool { 212 return c.Features&AMD3DNOW != 0 213} 214 215// Amd3dnowExt indicates support of AMD 3DNOW! Extended instructions 216func (c CPUInfo) Amd3dnowExt() bool { 217 return c.Features&AMD3DNOWEXT != 0 218} 219 220// MMX indicates support of MMX instructions 221func (c CPUInfo) MMX() bool { 222 return c.Features&MMX != 0 223} 224 225// MMXExt indicates support of MMXEXT instructions 226// (SSE integer functions or AMD MMX ext) 227func (c CPUInfo) MMXExt() bool { 228 return c.Features&MMXEXT != 0 229} 230 231// SSE indicates support of SSE instructions 232func (c CPUInfo) SSE() bool { 233 return c.Features&SSE != 0 234} 235 236// SSE2 indicates support of SSE 2 instructions 237func (c CPUInfo) SSE2() bool { 238 return c.Features&SSE2 != 0 239} 240 241// SSE3 indicates support of SSE 3 instructions 242func (c CPUInfo) SSE3() bool { 243 return c.Features&SSE3 != 0 244} 245 246// SSSE3 indicates support of SSSE 3 instructions 247func (c CPUInfo) SSSE3() bool { 248 return c.Features&SSSE3 != 0 249} 250 251// SSE4 indicates support of SSE 4 (also called SSE 4.1) instructions 252func (c CPUInfo) SSE4() bool { 253 return c.Features&SSE4 != 0 254} 255 256// SSE42 indicates support of SSE4.2 instructions 257func (c CPUInfo) SSE42() bool { 258 return c.Features&SSE42 != 0 259} 260 261// AVX indicates support of AVX instructions 262// and operating system support of AVX instructions 263func (c CPUInfo) AVX() bool { 264 return c.Features&AVX != 0 265} 266 267// AVX2 indicates support of AVX2 instructions 268func (c CPUInfo) AVX2() bool { 269 return c.Features&AVX2 != 0 270} 271 272// FMA3 indicates support of FMA3 instructions 273func (c CPUInfo) FMA3() bool { 274 return c.Features&FMA3 != 0 275} 276 277// FMA4 indicates support of FMA4 instructions 278func (c CPUInfo) FMA4() bool { 279 return c.Features&FMA4 != 0 280} 281 282// XOP indicates support of XOP instructions 283func (c CPUInfo) XOP() bool { 284 return c.Features&XOP != 0 285} 286 287// F16C indicates support of F16C instructions 288func (c CPUInfo) F16C() bool { 289 return c.Features&F16C != 0 290} 291 292// BMI1 indicates support of BMI1 instructions 293func (c CPUInfo) BMI1() bool { 294 return c.Features&BMI1 != 0 295} 296 297// BMI2 indicates support of BMI2 instructions 298func (c CPUInfo) BMI2() bool { 299 return c.Features&BMI2 != 0 300} 301 302// TBM indicates support of TBM instructions 303// (AMD Trailing Bit Manipulation) 304func (c CPUInfo) TBM() bool { 305 return c.Features&TBM != 0 306} 307 308// Lzcnt indicates support of LZCNT instruction 309func (c CPUInfo) Lzcnt() bool { 310 return c.Features&LZCNT != 0 311} 312 313// Popcnt indicates support of POPCNT instruction 314func (c CPUInfo) Popcnt() bool { 315 return c.Features&POPCNT != 0 316} 317 318// HTT indicates the processor has Hyperthreading enabled 319func (c CPUInfo) HTT() bool { 320 return c.Features&HTT != 0 321} 322 323// SSE2Slow indicates that SSE2 may be slow on this processor 324func (c CPUInfo) SSE2Slow() bool { 325 return c.Features&SSE2SLOW != 0 326} 327 328// SSE3Slow indicates that SSE3 may be slow on this processor 329func (c CPUInfo) SSE3Slow() bool { 330 return c.Features&SSE3SLOW != 0 331} 332 333// AesNi indicates support of AES-NI instructions 334// (Advanced Encryption Standard New Instructions) 335func (c CPUInfo) AesNi() bool { 336 return c.Features&AESNI != 0 337} 338 339// Clmul indicates support of CLMUL instructions 340// (Carry-less Multiplication) 341func (c CPUInfo) Clmul() bool { 342 return c.Features&CLMUL != 0 343} 344 345// NX indicates support of NX (No-Execute) bit 346func (c CPUInfo) NX() bool { 347 return c.Features&NX != 0 348} 349 350// SSE4A indicates support of AMD Barcelona microarchitecture SSE4a instructions 351func (c CPUInfo) SSE4A() bool { 352 return c.Features&SSE4A != 0 353} 354 355// HLE indicates support of Hardware Lock Elision 356func (c CPUInfo) HLE() bool { 357 return c.Features&HLE != 0 358} 359 360// RTM indicates support of Restricted Transactional Memory 361func (c CPUInfo) RTM() bool { 362 return c.Features&RTM != 0 363} 364 365// Rdrand indicates support of RDRAND instruction is available 366func (c CPUInfo) Rdrand() bool { 367 return c.Features&RDRAND != 0 368} 369 370// Rdseed indicates support of RDSEED instruction is available 371func (c CPUInfo) Rdseed() bool { 372 return c.Features&RDSEED != 0 373} 374 375// ADX indicates support of Intel ADX (Multi-Precision Add-Carry Instruction Extensions) 376func (c CPUInfo) ADX() bool { 377 return c.Features&ADX != 0 378} 379 380// SHA indicates support of Intel SHA Extensions 381func (c CPUInfo) SHA() bool { 382 return c.Features&SHA != 0 383} 384 385// AVX512F indicates support of AVX-512 Foundation 386func (c CPUInfo) AVX512F() bool { 387 return c.Features&AVX512F != 0 388} 389 390// AVX512DQ indicates support of AVX-512 Doubleword and Quadword Instructions 391func (c CPUInfo) AVX512DQ() bool { 392 return c.Features&AVX512DQ != 0 393} 394 395// AVX512IFMA indicates support of AVX-512 Integer Fused Multiply-Add Instructions 396func (c CPUInfo) AVX512IFMA() bool { 397 return c.Features&AVX512IFMA != 0 398} 399 400// AVX512PF indicates support of AVX-512 Prefetch Instructions 401func (c CPUInfo) AVX512PF() bool { 402 return c.Features&AVX512PF != 0 403} 404 405// AVX512ER indicates support of AVX-512 Exponential and Reciprocal Instructions 406func (c CPUInfo) AVX512ER() bool { 407 return c.Features&AVX512ER != 0 408} 409 410// AVX512CD indicates support of AVX-512 Conflict Detection Instructions 411func (c CPUInfo) AVX512CD() bool { 412 return c.Features&AVX512CD != 0 413} 414 415// AVX512BW indicates support of AVX-512 Byte and Word Instructions 416func (c CPUInfo) AVX512BW() bool { 417 return c.Features&AVX512BW != 0 418} 419 420// AVX512VL indicates support of AVX-512 Vector Length Extensions 421func (c CPUInfo) AVX512VL() bool { 422 return c.Features&AVX512VL != 0 423} 424 425// AVX512VBMI indicates support of AVX-512 Vector Bit Manipulation Instructions 426func (c CPUInfo) AVX512VBMI() bool { 427 return c.Features&AVX512VBMI != 0 428} 429 430// MPX indicates support of Intel MPX (Memory Protection Extensions) 431func (c CPUInfo) MPX() bool { 432 return c.Features&MPX != 0 433} 434 435// ERMS indicates support of Enhanced REP MOVSB/STOSB 436func (c CPUInfo) ERMS() bool { 437 return c.Features&ERMS != 0 438} 439 440func (c CPUInfo) RDTSCP() bool { 441 return c.Features&RDTSCP != 0 442} 443 444func (c CPUInfo) CX16() bool { 445 return c.Features&CX16 != 0 446} 447 448// Atom indicates an Atom processor 449func (c CPUInfo) Atom() bool { 450 return c.Features&ATOM != 0 451} 452 453// Intel returns true if vendor is recognized as Intel 454func (c CPUInfo) Intel() bool { 455 return c.VendorID == Intel 456} 457 458// AMD returns true if vendor is recognized as AMD 459func (c CPUInfo) AMD() bool { 460 return c.VendorID == AMD 461} 462 463// Transmeta returns true if vendor is recognized as Transmeta 464func (c CPUInfo) Transmeta() bool { 465 return c.VendorID == Transmeta 466} 467 468// NSC returns true if vendor is recognized as National Semiconductor 469func (c CPUInfo) NSC() bool { 470 return c.VendorID == NSC 471} 472 473// VIA returns true if vendor is recognized as VIA 474func (c CPUInfo) VIA() bool { 475 return c.VendorID == VIA 476} 477 478// RTCounter returns the 64-bit time-stamp counter 479// Uses the RDTSCP instruction. The value 0 is returned 480// if the CPU does not support the instruction. 481func (c CPUInfo) RTCounter() uint64 { 482 if !c.RDTSCP() { 483 return 0 484 } 485 a, _, _, d := rdtscpAsm() 486 return uint64(a) | (uint64(d) << 32) 487} 488 489// Ia32TscAux returns the IA32_TSC_AUX part of the RDTSCP. 490// This variable is OS dependent, but on Linux contains information 491// about the current cpu/core the code is running on. 492// If the RDTSCP instruction isn't supported on the CPU, the value 0 is returned. 493func (c CPUInfo) Ia32TscAux() uint32 { 494 if !c.RDTSCP() { 495 return 0 496 } 497 _, _, ecx, _ := rdtscpAsm() 498 return ecx 499} 500 501// LogicalCPU will return the Logical CPU the code is currently executing on. 502// This is likely to change when the OS re-schedules the running thread 503// to another CPU. 504// If the current core cannot be detected, -1 will be returned. 505func (c CPUInfo) LogicalCPU() int { 506 if c.maxFunc < 1 { 507 return -1 508 } 509 _, ebx, _, _ := cpuid(1) 510 return int(ebx >> 24) 511} 512 513// VM Will return true if the cpu id indicates we are in 514// a virtual machine. This is only a hint, and will very likely 515// have many false negatives. 516func (c CPUInfo) VM() bool { 517 switch c.VendorID { 518 case MSVM, KVM, VMware, XenHVM: 519 return true 520 } 521 return false 522} 523 524// Flags contains detected cpu features and caracteristics 525type Flags uint64 526 527// String returns a string representation of the detected 528// CPU features. 529func (f Flags) String() string { 530 return strings.Join(f.Strings(), ",") 531} 532 533// Strings returns and array of the detected features. 534func (f Flags) Strings() []string { 535 s := support() 536 r := make([]string, 0, 20) 537 for i := uint(0); i < 64; i++ { 538 key := Flags(1 << i) 539 val := flagNames[key] 540 if s&key != 0 { 541 r = append(r, val) 542 } 543 } 544 return r 545} 546 547func maxExtendedFunction() uint32 { 548 eax, _, _, _ := cpuid(0x80000000) 549 return eax 550} 551 552func maxFunctionID() uint32 { 553 a, _, _, _ := cpuid(0) 554 return a 555} 556 557func brandName() string { 558 if maxExtendedFunction() >= 0x80000004 { 559 v := make([]uint32, 0, 48) 560 for i := uint32(0); i < 3; i++ { 561 a, b, c, d := cpuid(0x80000002 + i) 562 v = append(v, a, b, c, d) 563 } 564 return strings.Trim(string(valAsString(v...)), " ") 565 } 566 return "unknown" 567} 568 569func threadsPerCore() int { 570 mfi := maxFunctionID() 571 if mfi < 0x4 || vendorID() != Intel { 572 return 1 573 } 574 575 if mfi < 0xb { 576 _, b, _, d := cpuid(1) 577 if (d & (1 << 28)) != 0 { 578 // v will contain logical core count 579 v := (b >> 16) & 255 580 if v > 1 { 581 a4, _, _, _ := cpuid(4) 582 // physical cores 583 v2 := (a4 >> 26) + 1 584 if v2 > 0 { 585 return int(v) / int(v2) 586 } 587 } 588 } 589 return 1 590 } 591 _, b, _, _ := cpuidex(0xb, 0) 592 if b&0xffff == 0 { 593 return 1 594 } 595 return int(b & 0xffff) 596} 597 598func logicalCores() int { 599 mfi := maxFunctionID() 600 switch vendorID() { 601 case Intel: 602 // Use this on old Intel processors 603 if mfi < 0xb { 604 if mfi < 1 { 605 return 0 606 } 607 // CPUID.1:EBX[23:16] represents the maximum number of addressable IDs (initial APIC ID) 608 // that can be assigned to logical processors in a physical package. 609 // The value may not be the same as the number of logical processors that are present in the hardware of a physical package. 610 _, ebx, _, _ := cpuid(1) 611 logical := (ebx >> 16) & 0xff 612 return int(logical) 613 } 614 _, b, _, _ := cpuidex(0xb, 1) 615 return int(b & 0xffff) 616 case AMD: 617 _, b, _, _ := cpuid(1) 618 return int((b >> 16) & 0xff) 619 default: 620 return 0 621 } 622} 623 624func familyModel() (int, int) { 625 if maxFunctionID() < 0x1 { 626 return 0, 0 627 } 628 eax, _, _, _ := cpuid(1) 629 family := ((eax >> 8) & 0xf) + ((eax >> 20) & 0xff) 630 model := ((eax >> 4) & 0xf) + ((eax >> 12) & 0xf0) 631 return int(family), int(model) 632} 633 634func physicalCores() int { 635 switch vendorID() { 636 case Intel: 637 return logicalCores() / threadsPerCore() 638 case AMD: 639 if maxExtendedFunction() >= 0x80000008 { 640 _, _, c, _ := cpuid(0x80000008) 641 return int(c&0xff) + 1 642 } 643 } 644 return 0 645} 646 647// Except from http://en.wikipedia.org/wiki/CPUID#EAX.3D0:_Get_vendor_ID 648var vendorMapping = map[string]Vendor{ 649 "AMDisbetter!": AMD, 650 "AuthenticAMD": AMD, 651 "CentaurHauls": VIA, 652 "GenuineIntel": Intel, 653 "TransmetaCPU": Transmeta, 654 "GenuineTMx86": Transmeta, 655 "Geode by NSC": NSC, 656 "VIA VIA VIA ": VIA, 657 "KVMKVMKVMKVM": KVM, 658 "Microsoft Hv": MSVM, 659 "VMwareVMware": VMware, 660 "XenVMMXenVMM": XenHVM, 661} 662 663func vendorID() Vendor { 664 _, b, c, d := cpuid(0) 665 v := valAsString(b, d, c) 666 vend, ok := vendorMapping[string(v)] 667 if !ok { 668 return Other 669 } 670 return vend 671} 672 673func cacheLine() int { 674 if maxFunctionID() < 0x1 { 675 return 0 676 } 677 678 _, ebx, _, _ := cpuid(1) 679 cache := (ebx & 0xff00) >> 5 // cflush size 680 if cache == 0 && maxExtendedFunction() >= 0x80000006 { 681 _, _, ecx, _ := cpuid(0x80000006) 682 cache = ecx & 0xff // cacheline size 683 } 684 // TODO: Read from Cache and TLB Information 685 return int(cache) 686} 687 688func (c *CPUInfo) cacheSize() { 689 c.Cache.L1D = -1 690 c.Cache.L1I = -1 691 c.Cache.L2 = -1 692 c.Cache.L3 = -1 693 vendor := vendorID() 694 switch vendor { 695 case Intel: 696 if maxFunctionID() < 4 { 697 return 698 } 699 for i := uint32(0); ; i++ { 700 eax, ebx, ecx, _ := cpuidex(4, i) 701 cacheType := eax & 15 702 if cacheType == 0 { 703 break 704 } 705 cacheLevel := (eax >> 5) & 7 706 coherency := int(ebx&0xfff) + 1 707 partitions := int((ebx>>12)&0x3ff) + 1 708 associativity := int((ebx>>22)&0x3ff) + 1 709 sets := int(ecx) + 1 710 size := associativity * partitions * coherency * sets 711 switch cacheLevel { 712 case 1: 713 if cacheType == 1 { 714 // 1 = Data Cache 715 c.Cache.L1D = size 716 } else if cacheType == 2 { 717 // 2 = Instruction Cache 718 c.Cache.L1I = size 719 } else { 720 if c.Cache.L1D < 0 { 721 c.Cache.L1I = size 722 } 723 if c.Cache.L1I < 0 { 724 c.Cache.L1I = size 725 } 726 } 727 case 2: 728 c.Cache.L2 = size 729 case 3: 730 c.Cache.L3 = size 731 } 732 } 733 case AMD: 734 // Untested. 735 if maxExtendedFunction() < 0x80000005 { 736 return 737 } 738 _, _, ecx, edx := cpuid(0x80000005) 739 c.Cache.L1D = int(((ecx >> 24) & 0xFF) * 1024) 740 c.Cache.L1I = int(((edx >> 24) & 0xFF) * 1024) 741 742 if maxExtendedFunction() < 0x80000006 { 743 return 744 } 745 _, _, ecx, _ = cpuid(0x80000006) 746 c.Cache.L2 = int(((ecx >> 16) & 0xFFFF) * 1024) 747 } 748 749 return 750} 751 752type SGXSupport struct { 753 Available bool 754 SGX1Supported bool 755 SGX2Supported bool 756 MaxEnclaveSizeNot64 int64 757 MaxEnclaveSize64 int64 758} 759 760func sgx(available bool) (rval SGXSupport) { 761 rval.Available = available 762 763 if !available { 764 return 765 } 766 767 a, _, _, d := cpuidex(0x12, 0) 768 rval.SGX1Supported = a&0x01 != 0 769 rval.SGX2Supported = a&0x02 != 0 770 rval.MaxEnclaveSizeNot64 = 1 << (d & 0xFF) // pow 2 771 rval.MaxEnclaveSize64 = 1 << ((d >> 8) & 0xFF) // pow 2 772 773 return 774} 775 776func support() Flags { 777 mfi := maxFunctionID() 778 vend := vendorID() 779 if mfi < 0x1 { 780 return 0 781 } 782 rval := uint64(0) 783 _, _, c, d := cpuid(1) 784 if (d & (1 << 15)) != 0 { 785 rval |= CMOV 786 } 787 if (d & (1 << 23)) != 0 { 788 rval |= MMX 789 } 790 if (d & (1 << 25)) != 0 { 791 rval |= MMXEXT 792 } 793 if (d & (1 << 25)) != 0 { 794 rval |= SSE 795 } 796 if (d & (1 << 26)) != 0 { 797 rval |= SSE2 798 } 799 if (c & 1) != 0 { 800 rval |= SSE3 801 } 802 if (c & 0x00000200) != 0 { 803 rval |= SSSE3 804 } 805 if (c & 0x00080000) != 0 { 806 rval |= SSE4 807 } 808 if (c & 0x00100000) != 0 { 809 rval |= SSE42 810 } 811 if (c & (1 << 25)) != 0 { 812 rval |= AESNI 813 } 814 if (c & (1 << 1)) != 0 { 815 rval |= CLMUL 816 } 817 if c&(1<<23) != 0 { 818 rval |= POPCNT 819 } 820 if c&(1<<30) != 0 { 821 rval |= RDRAND 822 } 823 if c&(1<<29) != 0 { 824 rval |= F16C 825 } 826 if c&(1<<13) != 0 { 827 rval |= CX16 828 } 829 if vend == Intel && (d&(1<<28)) != 0 && mfi >= 4 { 830 if threadsPerCore() > 1 { 831 rval |= HTT 832 } 833 } 834 835 // Check XGETBV, OXSAVE and AVX bits 836 if c&(1<<26) != 0 && c&(1<<27) != 0 && c&(1<<28) != 0 { 837 // Check for OS support 838 eax, _ := xgetbv(0) 839 if (eax & 0x6) == 0x6 { 840 rval |= AVX 841 if (c & 0x00001000) != 0 { 842 rval |= FMA3 843 } 844 } 845 } 846 847 // Check AVX2, AVX2 requires OS support, but BMI1/2 don't. 848 if mfi >= 7 { 849 _, ebx, ecx, _ := cpuidex(7, 0) 850 if (rval&AVX) != 0 && (ebx&0x00000020) != 0 { 851 rval |= AVX2 852 } 853 if (ebx & 0x00000008) != 0 { 854 rval |= BMI1 855 if (ebx & 0x00000100) != 0 { 856 rval |= BMI2 857 } 858 } 859 if ebx&(1<<2) != 0 { 860 rval |= SGX 861 } 862 if ebx&(1<<4) != 0 { 863 rval |= HLE 864 } 865 if ebx&(1<<9) != 0 { 866 rval |= ERMS 867 } 868 if ebx&(1<<11) != 0 { 869 rval |= RTM 870 } 871 if ebx&(1<<14) != 0 { 872 rval |= MPX 873 } 874 if ebx&(1<<18) != 0 { 875 rval |= RDSEED 876 } 877 if ebx&(1<<19) != 0 { 878 rval |= ADX 879 } 880 if ebx&(1<<29) != 0 { 881 rval |= SHA 882 } 883 884 // Only detect AVX-512 features if XGETBV is supported 885 if c&((1<<26)|(1<<27)) == (1<<26)|(1<<27) { 886 // Check for OS support 887 eax, _ := xgetbv(0) 888 889 // Verify that XCR0[7:5] = ‘111b’ (OPMASK state, upper 256-bit of ZMM0-ZMM15 and 890 // ZMM16-ZMM31 state are enabled by OS) 891 /// and that XCR0[2:1] = ‘11b’ (XMM state and YMM state are enabled by OS). 892 if (eax>>5)&7 == 7 && (eax>>1)&3 == 3 { 893 if ebx&(1<<16) != 0 { 894 rval |= AVX512F 895 } 896 if ebx&(1<<17) != 0 { 897 rval |= AVX512DQ 898 } 899 if ebx&(1<<21) != 0 { 900 rval |= AVX512IFMA 901 } 902 if ebx&(1<<26) != 0 { 903 rval |= AVX512PF 904 } 905 if ebx&(1<<27) != 0 { 906 rval |= AVX512ER 907 } 908 if ebx&(1<<28) != 0 { 909 rval |= AVX512CD 910 } 911 if ebx&(1<<30) != 0 { 912 rval |= AVX512BW 913 } 914 if ebx&(1<<31) != 0 { 915 rval |= AVX512VL 916 } 917 // ecx 918 if ecx&(1<<1) != 0 { 919 rval |= AVX512VBMI 920 } 921 } 922 } 923 } 924 925 if maxExtendedFunction() >= 0x80000001 { 926 _, _, c, d := cpuid(0x80000001) 927 if (c & (1 << 5)) != 0 { 928 rval |= LZCNT 929 rval |= POPCNT 930 } 931 if (d & (1 << 31)) != 0 { 932 rval |= AMD3DNOW 933 } 934 if (d & (1 << 30)) != 0 { 935 rval |= AMD3DNOWEXT 936 } 937 if (d & (1 << 23)) != 0 { 938 rval |= MMX 939 } 940 if (d & (1 << 22)) != 0 { 941 rval |= MMXEXT 942 } 943 if (c & (1 << 6)) != 0 { 944 rval |= SSE4A 945 } 946 if d&(1<<20) != 0 { 947 rval |= NX 948 } 949 if d&(1<<27) != 0 { 950 rval |= RDTSCP 951 } 952 953 /* Allow for selectively disabling SSE2 functions on AMD processors 954 with SSE2 support but not SSE4a. This includes Athlon64, some 955 Opteron, and some Sempron processors. MMX, SSE, or 3DNow! are faster 956 than SSE2 often enough to utilize this special-case flag. 957 AV_CPU_FLAG_SSE2 and AV_CPU_FLAG_SSE2SLOW are both set in this case 958 so that SSE2 is used unless explicitly disabled by checking 959 AV_CPU_FLAG_SSE2SLOW. */ 960 if vendorID() != Intel && 961 rval&SSE2 != 0 && (c&0x00000040) == 0 { 962 rval |= SSE2SLOW 963 } 964 965 /* XOP and FMA4 use the AVX instruction coding scheme, so they can't be 966 * used unless the OS has AVX support. */ 967 if (rval & AVX) != 0 { 968 if (c & 0x00000800) != 0 { 969 rval |= XOP 970 } 971 if (c & 0x00010000) != 0 { 972 rval |= FMA4 973 } 974 } 975 976 if vendorID() == Intel { 977 family, model := familyModel() 978 if family == 6 && (model == 9 || model == 13 || model == 14) { 979 /* 6/9 (pentium-m "banias"), 6/13 (pentium-m "dothan"), and 980 * 6/14 (core1 "yonah") theoretically support sse2, but it's 981 * usually slower than mmx. */ 982 if (rval & SSE2) != 0 { 983 rval |= SSE2SLOW 984 } 985 if (rval & SSE3) != 0 { 986 rval |= SSE3SLOW 987 } 988 } 989 /* The Atom processor has SSSE3 support, which is useful in many cases, 990 * but sometimes the SSSE3 version is slower than the SSE2 equivalent 991 * on the Atom, but is generally faster on other processors supporting 992 * SSSE3. This flag allows for selectively disabling certain SSSE3 993 * functions on the Atom. */ 994 if family == 6 && model == 28 { 995 rval |= ATOM 996 } 997 } 998 } 999 return Flags(rval) 1000} 1001 1002func valAsString(values ...uint32) []byte { 1003 r := make([]byte, 4*len(values)) 1004 for i, v := range values { 1005 dst := r[i*4:] 1006 dst[0] = byte(v & 0xff) 1007 dst[1] = byte((v >> 8) & 0xff) 1008 dst[2] = byte((v >> 16) & 0xff) 1009 dst[3] = byte((v >> 24) & 0xff) 1010 switch { 1011 case dst[0] == 0: 1012 return r[:i*4] 1013 case dst[1] == 0: 1014 return r[:i*4+1] 1015 case dst[2] == 0: 1016 return r[:i*4+2] 1017 case dst[3] == 0: 1018 return r[:i*4+3] 1019 } 1020 } 1021 return r 1022} 1023