1// Generated, DO NOT EDIT, 2// but copy it to your own project and rename the package. 3// See more at http://github.com/klauspost/cpuid 4 5package cpuid 6 7import "strings" 8 9// Vendor is a representation of a CPU vendor. 10type vendor int 11 12const ( 13 other vendor = iota 14 intel 15 amd 16 via 17 transmeta 18 nsc 19 kvm // Kernel-based Virtual Machine 20 msvm // Microsoft Hyper-V or Windows Virtual PC 21 vmware 22 xenhvm 23 bhyve 24 hygon 25) 26 27const ( 28 cmov = 1 << iota // i686 CMOV 29 nx // NX (No-Execute) bit 30 amd3dnow // AMD 3DNOW 31 amd3dnowext // AMD 3DNowExt 32 mmx // standard MMX 33 mmxext // SSE integer functions or AMD MMX ext 34 sse // SSE functions 35 sse2 // P4 SSE functions 36 sse3 // Prescott SSE3 functions 37 ssse3 // Conroe SSSE3 functions 38 sse4 // Penryn SSE4.1 functions 39 sse4a // AMD Barcelona microarchitecture SSE4a instructions 40 sse42 // Nehalem SSE4.2 functions 41 avx // AVX functions 42 avx2 // AVX2 functions 43 fma3 // Intel FMA 3 44 fma4 // Bulldozer FMA4 functions 45 xop // Bulldozer XOP functions 46 f16c // Half-precision floating-point conversion 47 bmi1 // Bit Manipulation Instruction Set 1 48 bmi2 // Bit Manipulation Instruction Set 2 49 tbm // AMD Trailing Bit Manipulation 50 lzcnt // LZCNT instruction 51 popcnt // POPCNT instruction 52 aesni // Advanced Encryption Standard New Instructions 53 clmul // Carry-less Multiplication 54 htt // Hyperthreading (enabled) 55 hle // Hardware Lock Elision 56 rtm // Restricted Transactional Memory 57 rdrand // RDRAND instruction is available 58 rdseed // RDSEED instruction is available 59 adx // Intel ADX (Multi-Precision Add-Carry Instruction Extensions) 60 sha // Intel SHA Extensions 61 avx512f // AVX-512 Foundation 62 avx512dq // AVX-512 Doubleword and Quadword Instructions 63 avx512ifma // AVX-512 Integer Fused Multiply-Add Instructions 64 avx512pf // AVX-512 Prefetch Instructions 65 avx512er // AVX-512 Exponential and Reciprocal Instructions 66 avx512cd // AVX-512 Conflict Detection Instructions 67 avx512bw // AVX-512 Byte and Word Instructions 68 avx512vl // AVX-512 Vector Length Extensions 69 avx512vbmi // AVX-512 Vector Bit Manipulation Instructions 70 avx512vbmi2 // AVX-512 Vector Bit Manipulation Instructions, Version 2 71 avx512vnni // AVX-512 Vector Neural Network Instructions 72 avx512vpopcntdq // AVX-512 Vector Population Count Doubleword and Quadword 73 gfni // Galois Field New Instructions 74 vaes // Vector AES 75 avx512bitalg // AVX-512 Bit Algorithms 76 vpclmulqdq // Carry-Less Multiplication Quadword 77 avx512bf16 // AVX-512 BFLOAT16 Instructions 78 avx512vp2intersect // AVX-512 Intersect for D/Q 79 mpx // Intel MPX (Memory Protection Extensions) 80 erms // Enhanced REP MOVSB/STOSB 81 rdtscp // RDTSCP Instruction 82 cx16 // CMPXCHG16B Instruction 83 sgx // Software Guard Extensions 84 sgxlc // Software Guard Extensions Launch Control 85 ibpb // Indirect Branch Restricted Speculation (IBRS) and Indirect Branch Predictor Barrier (IBPB) 86 stibp // Single Thread Indirect Branch Predictors 87 vmx // Virtual Machine Extensions 88 89 // Performance indicators 90 sse2slow // SSE2 is supported, but usually not faster 91 sse3slow // SSE3 is supported, but usually not faster 92 atom // Atom processor, some SSSE3 instructions are slower 93) 94 95var flagNames = map[flags]string{ 96 cmov: "CMOV", // i686 CMOV 97 nx: "NX", // NX (No-Execute) bit 98 amd3dnow: "AMD3DNOW", // AMD 3DNOW 99 amd3dnowext: "AMD3DNOWEXT", // AMD 3DNowExt 100 mmx: "MMX", // Standard MMX 101 mmxext: "MMXEXT", // SSE integer functions or AMD MMX ext 102 sse: "SSE", // SSE functions 103 sse2: "SSE2", // P4 SSE2 functions 104 sse3: "SSE3", // Prescott SSE3 functions 105 ssse3: "SSSE3", // Conroe SSSE3 functions 106 sse4: "SSE4.1", // Penryn SSE4.1 functions 107 sse4a: "SSE4A", // AMD Barcelona microarchitecture SSE4a instructions 108 sse42: "SSE4.2", // Nehalem SSE4.2 functions 109 avx: "AVX", // AVX functions 110 avx2: "AVX2", // AVX functions 111 fma3: "FMA3", // Intel FMA 3 112 fma4: "FMA4", // Bulldozer FMA4 functions 113 xop: "XOP", // Bulldozer XOP functions 114 f16c: "F16C", // Half-precision floating-point conversion 115 bmi1: "BMI1", // Bit Manipulation Instruction Set 1 116 bmi2: "BMI2", // Bit Manipulation Instruction Set 2 117 tbm: "TBM", // AMD Trailing Bit Manipulation 118 lzcnt: "LZCNT", // LZCNT instruction 119 popcnt: "POPCNT", // POPCNT instruction 120 aesni: "AESNI", // Advanced Encryption Standard New Instructions 121 clmul: "CLMUL", // Carry-less Multiplication 122 htt: "HTT", // Hyperthreading (enabled) 123 hle: "HLE", // Hardware Lock Elision 124 rtm: "RTM", // Restricted Transactional Memory 125 rdrand: "RDRAND", // RDRAND instruction is available 126 rdseed: "RDSEED", // RDSEED instruction is available 127 adx: "ADX", // Intel ADX (Multi-Precision Add-Carry Instruction Extensions) 128 sha: "SHA", // Intel SHA Extensions 129 avx512f: "AVX512F", // AVX-512 Foundation 130 avx512dq: "AVX512DQ", // AVX-512 Doubleword and Quadword Instructions 131 avx512ifma: "AVX512IFMA", // AVX-512 Integer Fused Multiply-Add Instructions 132 avx512pf: "AVX512PF", // AVX-512 Prefetch Instructions 133 avx512er: "AVX512ER", // AVX-512 Exponential and Reciprocal Instructions 134 avx512cd: "AVX512CD", // AVX-512 Conflict Detection Instructions 135 avx512bw: "AVX512BW", // AVX-512 Byte and Word Instructions 136 avx512vl: "AVX512VL", // AVX-512 Vector Length Extensions 137 avx512vbmi: "AVX512VBMI", // AVX-512 Vector Bit Manipulation Instructions 138 avx512vbmi2: "AVX512VBMI2", // AVX-512 Vector Bit Manipulation Instructions, Version 2 139 avx512vnni: "AVX512VNNI", // AVX-512 Vector Neural Network Instructions 140 avx512vpopcntdq: "AVX512VPOPCNTDQ", // AVX-512 Vector Population Count Doubleword and Quadword 141 gfni: "GFNI", // Galois Field New Instructions 142 vaes: "VAES", // Vector AES 143 avx512bitalg: "AVX512BITALG", // AVX-512 Bit Algorithms 144 vpclmulqdq: "VPCLMULQDQ", // Carry-Less Multiplication Quadword 145 avx512bf16: "AVX512BF16", // AVX-512 BFLOAT16 Instruction 146 avx512vp2intersect: "AVX512VP2INTERSECT", // AVX-512 Intersect for D/Q 147 mpx: "MPX", // Intel MPX (Memory Protection Extensions) 148 erms: "ERMS", // Enhanced REP MOVSB/STOSB 149 rdtscp: "RDTSCP", // RDTSCP Instruction 150 cx16: "CX16", // CMPXCHG16B Instruction 151 sgx: "SGX", // Software Guard Extensions 152 sgxlc: "SGXLC", // Software Guard Extensions Launch Control 153 ibpb: "IBPB", // Indirect Branch Restricted Speculation and Indirect Branch Predictor Barrier 154 stibp: "STIBP", // Single Thread Indirect Branch Predictors 155 vmx: "VMX", // Virtual Machine Extensions 156 157 // Performance indicators 158 sse2slow: "SSE2SLOW", // SSE2 supported, but usually not faster 159 sse3slow: "SSE3SLOW", // SSE3 supported, but usually not faster 160 atom: "ATOM", // Atom processor, some SSSE3 instructions are slower 161 162} 163 164// CPUInfo contains information about the detected system CPU. 165type cpuInfo struct { 166 brandname string // Brand name reported by the CPU 167 vendorid vendor // Comparable CPU vendor ID 168 features flags // Features of the CPU 169 physicalcores int // Number of physical processor cores in your CPU. Will be 0 if undetectable. 170 threadspercore int // Number of threads per physical core. Will be 1 if undetectable. 171 logicalcores int // Number of physical cores times threads that can run on each core through the use of hyperthreading. Will be 0 if undetectable. 172 family int // CPU family number 173 model int // CPU model number 174 cacheline int // Cache line size in bytes. Will be 0 if undetectable. 175 cache struct { 176 l1i int // L1 Instruction Cache (per core or shared). Will be -1 if undetected 177 l1d int // L1 Data Cache (per core or shared). Will be -1 if undetected 178 l2 int // L2 Cache (per core or shared). Will be -1 if undetected 179 l3 int // L3 Instruction Cache (per core or shared). Will be -1 if undetected 180 } 181 sgx sgxsupport 182 maxFunc uint32 183 maxExFunc uint32 184} 185 186var cpuid func(op uint32) (eax, ebx, ecx, edx uint32) 187var cpuidex func(op, op2 uint32) (eax, ebx, ecx, edx uint32) 188var xgetbv func(index uint32) (eax, edx uint32) 189var rdtscpAsm func() (eax, ebx, ecx, edx uint32) 190 191// CPU contains information about the CPU as detected on startup, 192// or when Detect last was called. 193// 194// Use this as the primary entry point to you data, 195// this way queries are 196var cpu cpuInfo 197 198func init() { 199 initCPU() 200 detect() 201} 202 203// Detect will re-detect current CPU info. 204// This will replace the content of the exported CPU variable. 205// 206// Unless you expect the CPU to change while you are running your program 207// you should not need to call this function. 208// If you call this, you must ensure that no other goroutine is accessing the 209// exported CPU variable. 210func detect() { 211 cpu.maxFunc = maxFunctionID() 212 cpu.maxExFunc = maxExtendedFunction() 213 cpu.brandname = brandName() 214 cpu.cacheline = cacheLine() 215 cpu.family, cpu.model = familyModel() 216 cpu.features = support() 217 cpu.sgx = hasSGX(cpu.features&sgx != 0, cpu.features&sgxlc != 0) 218 cpu.threadspercore = threadsPerCore() 219 cpu.logicalcores = logicalCores() 220 cpu.physicalcores = physicalCores() 221 cpu.vendorid = vendorID() 222 cpu.cacheSize() 223} 224 225// Generated here: http://play.golang.org/p/BxFH2Gdc0G 226 227// Cmov indicates support of CMOV instructions 228func (c cpuInfo) cmov() bool { 229 return c.features&cmov != 0 230} 231 232// Amd3dnow indicates support of AMD 3DNOW! instructions 233func (c cpuInfo) amd3dnow() bool { 234 return c.features&amd3dnow != 0 235} 236 237// Amd3dnowExt indicates support of AMD 3DNOW! Extended instructions 238func (c cpuInfo) amd3dnowext() bool { 239 return c.features&amd3dnowext != 0 240} 241 242// VMX indicates support of VMX 243func (c cpuInfo) vmx() bool { 244 return c.features&vmx != 0 245} 246 247// MMX indicates support of MMX instructions 248func (c cpuInfo) mmx() bool { 249 return c.features&mmx != 0 250} 251 252// MMXExt indicates support of MMXEXT instructions 253// (SSE integer functions or AMD MMX ext) 254func (c cpuInfo) mmxext() bool { 255 return c.features&mmxext != 0 256} 257 258// SSE indicates support of SSE instructions 259func (c cpuInfo) sse() bool { 260 return c.features&sse != 0 261} 262 263// SSE2 indicates support of SSE 2 instructions 264func (c cpuInfo) sse2() bool { 265 return c.features&sse2 != 0 266} 267 268// SSE3 indicates support of SSE 3 instructions 269func (c cpuInfo) sse3() bool { 270 return c.features&sse3 != 0 271} 272 273// SSSE3 indicates support of SSSE 3 instructions 274func (c cpuInfo) ssse3() bool { 275 return c.features&ssse3 != 0 276} 277 278// SSE4 indicates support of SSE 4 (also called SSE 4.1) instructions 279func (c cpuInfo) sse4() bool { 280 return c.features&sse4 != 0 281} 282 283// SSE42 indicates support of SSE4.2 instructions 284func (c cpuInfo) sse42() bool { 285 return c.features&sse42 != 0 286} 287 288// AVX indicates support of AVX instructions 289// and operating system support of AVX instructions 290func (c cpuInfo) avx() bool { 291 return c.features&avx != 0 292} 293 294// AVX2 indicates support of AVX2 instructions 295func (c cpuInfo) avx2() bool { 296 return c.features&avx2 != 0 297} 298 299// FMA3 indicates support of FMA3 instructions 300func (c cpuInfo) fma3() bool { 301 return c.features&fma3 != 0 302} 303 304// FMA4 indicates support of FMA4 instructions 305func (c cpuInfo) fma4() bool { 306 return c.features&fma4 != 0 307} 308 309// XOP indicates support of XOP instructions 310func (c cpuInfo) xop() bool { 311 return c.features&xop != 0 312} 313 314// F16C indicates support of F16C instructions 315func (c cpuInfo) f16c() bool { 316 return c.features&f16c != 0 317} 318 319// BMI1 indicates support of BMI1 instructions 320func (c cpuInfo) bmi1() bool { 321 return c.features&bmi1 != 0 322} 323 324// BMI2 indicates support of BMI2 instructions 325func (c cpuInfo) bmi2() bool { 326 return c.features&bmi2 != 0 327} 328 329// TBM indicates support of TBM instructions 330// (AMD Trailing Bit Manipulation) 331func (c cpuInfo) tbm() bool { 332 return c.features&tbm != 0 333} 334 335// Lzcnt indicates support of LZCNT instruction 336func (c cpuInfo) lzcnt() bool { 337 return c.features&lzcnt != 0 338} 339 340// Popcnt indicates support of POPCNT instruction 341func (c cpuInfo) popcnt() bool { 342 return c.features&popcnt != 0 343} 344 345// HTT indicates the processor has Hyperthreading enabled 346func (c cpuInfo) htt() bool { 347 return c.features&htt != 0 348} 349 350// SSE2Slow indicates that SSE2 may be slow on this processor 351func (c cpuInfo) sse2slow() bool { 352 return c.features&sse2slow != 0 353} 354 355// SSE3Slow indicates that SSE3 may be slow on this processor 356func (c cpuInfo) sse3slow() bool { 357 return c.features&sse3slow != 0 358} 359 360// AesNi indicates support of AES-NI instructions 361// (Advanced Encryption Standard New Instructions) 362func (c cpuInfo) aesni() bool { 363 return c.features&aesni != 0 364} 365 366// Clmul indicates support of CLMUL instructions 367// (Carry-less Multiplication) 368func (c cpuInfo) clmul() bool { 369 return c.features&clmul != 0 370} 371 372// NX indicates support of NX (No-Execute) bit 373func (c cpuInfo) nx() bool { 374 return c.features&nx != 0 375} 376 377// SSE4A indicates support of AMD Barcelona microarchitecture SSE4a instructions 378func (c cpuInfo) sse4a() bool { 379 return c.features&sse4a != 0 380} 381 382// HLE indicates support of Hardware Lock Elision 383func (c cpuInfo) hle() bool { 384 return c.features&hle != 0 385} 386 387// RTM indicates support of Restricted Transactional Memory 388func (c cpuInfo) rtm() bool { 389 return c.features&rtm != 0 390} 391 392// Rdrand indicates support of RDRAND instruction is available 393func (c cpuInfo) rdrand() bool { 394 return c.features&rdrand != 0 395} 396 397// Rdseed indicates support of RDSEED instruction is available 398func (c cpuInfo) rdseed() bool { 399 return c.features&rdseed != 0 400} 401 402// ADX indicates support of Intel ADX (Multi-Precision Add-Carry Instruction Extensions) 403func (c cpuInfo) adx() bool { 404 return c.features&adx != 0 405} 406 407// SHA indicates support of Intel SHA Extensions 408func (c cpuInfo) sha() bool { 409 return c.features&sha != 0 410} 411 412// AVX512F indicates support of AVX-512 Foundation 413func (c cpuInfo) avx512f() bool { 414 return c.features&avx512f != 0 415} 416 417// AVX512DQ indicates support of AVX-512 Doubleword and Quadword Instructions 418func (c cpuInfo) avx512dq() bool { 419 return c.features&avx512dq != 0 420} 421 422// AVX512IFMA indicates support of AVX-512 Integer Fused Multiply-Add Instructions 423func (c cpuInfo) avx512ifma() bool { 424 return c.features&avx512ifma != 0 425} 426 427// AVX512PF indicates support of AVX-512 Prefetch Instructions 428func (c cpuInfo) avx512pf() bool { 429 return c.features&avx512pf != 0 430} 431 432// AVX512ER indicates support of AVX-512 Exponential and Reciprocal Instructions 433func (c cpuInfo) avx512er() bool { 434 return c.features&avx512er != 0 435} 436 437// AVX512CD indicates support of AVX-512 Conflict Detection Instructions 438func (c cpuInfo) avx512cd() bool { 439 return c.features&avx512cd != 0 440} 441 442// AVX512BW indicates support of AVX-512 Byte and Word Instructions 443func (c cpuInfo) avx512bw() bool { 444 return c.features&avx512bw != 0 445} 446 447// AVX512VL indicates support of AVX-512 Vector Length Extensions 448func (c cpuInfo) avx512vl() bool { 449 return c.features&avx512vl != 0 450} 451 452// AVX512VBMI indicates support of AVX-512 Vector Bit Manipulation Instructions 453func (c cpuInfo) avx512vbmi() bool { 454 return c.features&avx512vbmi != 0 455} 456 457// AVX512VBMI2 indicates support of AVX-512 Vector Bit Manipulation Instructions, Version 2 458func (c cpuInfo) avx512vbmi2() bool { 459 return c.features&avx512vbmi2 != 0 460} 461 462// AVX512VNNI indicates support of AVX-512 Vector Neural Network Instructions 463func (c cpuInfo) avx512vnni() bool { 464 return c.features&avx512vnni != 0 465} 466 467// AVX512VPOPCNTDQ indicates support of AVX-512 Vector Population Count Doubleword and Quadword 468func (c cpuInfo) avx512vpopcntdq() bool { 469 return c.features&avx512vpopcntdq != 0 470} 471 472// GFNI indicates support of Galois Field New Instructions 473func (c cpuInfo) gfni() bool { 474 return c.features&gfni != 0 475} 476 477// VAES indicates support of Vector AES 478func (c cpuInfo) vaes() bool { 479 return c.features&vaes != 0 480} 481 482// AVX512BITALG indicates support of AVX-512 Bit Algorithms 483func (c cpuInfo) avx512bitalg() bool { 484 return c.features&avx512bitalg != 0 485} 486 487// VPCLMULQDQ indicates support of Carry-Less Multiplication Quadword 488func (c cpuInfo) vpclmulqdq() bool { 489 return c.features&vpclmulqdq != 0 490} 491 492// AVX512BF16 indicates support of 493func (c cpuInfo) avx512bf16() bool { 494 return c.features&avx512bf16 != 0 495} 496 497// AVX512VP2INTERSECT indicates support of 498func (c cpuInfo) avx512vp2intersect() bool { 499 return c.features&avx512vp2intersect != 0 500} 501 502// MPX indicates support of Intel MPX (Memory Protection Extensions) 503func (c cpuInfo) mpx() bool { 504 return c.features&mpx != 0 505} 506 507// ERMS indicates support of Enhanced REP MOVSB/STOSB 508func (c cpuInfo) erms() bool { 509 return c.features&erms != 0 510} 511 512// RDTSCP Instruction is available. 513func (c cpuInfo) rdtscp() bool { 514 return c.features&rdtscp != 0 515} 516 517// CX16 indicates if CMPXCHG16B instruction is available. 518func (c cpuInfo) cx16() bool { 519 return c.features&cx16 != 0 520} 521 522// TSX is split into HLE (Hardware Lock Elision) and RTM (Restricted Transactional Memory) detection. 523// So TSX simply checks that. 524func (c cpuInfo) tsx() bool { 525 return c.features&(hle|rtm) == hle|rtm 526} 527 528// Atom indicates an Atom processor 529func (c cpuInfo) atom() bool { 530 return c.features&atom != 0 531} 532 533// Intel returns true if vendor is recognized as Intel 534func (c cpuInfo) intel() bool { 535 return c.vendorid == intel 536} 537 538// AMD returns true if vendor is recognized as AMD 539func (c cpuInfo) amd() bool { 540 return c.vendorid == amd 541} 542 543// Hygon returns true if vendor is recognized as Hygon 544func (c cpuInfo) hygon() bool { 545 return c.vendorid == hygon 546} 547 548// Transmeta returns true if vendor is recognized as Transmeta 549func (c cpuInfo) transmeta() bool { 550 return c.vendorid == transmeta 551} 552 553// NSC returns true if vendor is recognized as National Semiconductor 554func (c cpuInfo) nsc() bool { 555 return c.vendorid == nsc 556} 557 558// VIA returns true if vendor is recognized as VIA 559func (c cpuInfo) via() bool { 560 return c.vendorid == via 561} 562 563// RTCounter returns the 64-bit time-stamp counter 564// Uses the RDTSCP instruction. The value 0 is returned 565// if the CPU does not support the instruction. 566func (c cpuInfo) rtcounter() uint64 { 567 if !c.rdtscp() { 568 return 0 569 } 570 a, _, _, d := rdtscpAsm() 571 return uint64(a) | (uint64(d) << 32) 572} 573 574// Ia32TscAux returns the IA32_TSC_AUX part of the RDTSCP. 575// This variable is OS dependent, but on Linux contains information 576// about the current cpu/core the code is running on. 577// If the RDTSCP instruction isn't supported on the CPU, the value 0 is returned. 578func (c cpuInfo) ia32tscaux() uint32 { 579 if !c.rdtscp() { 580 return 0 581 } 582 _, _, ecx, _ := rdtscpAsm() 583 return ecx 584} 585 586// LogicalCPU will return the Logical CPU the code is currently executing on. 587// This is likely to change when the OS re-schedules the running thread 588// to another CPU. 589// If the current core cannot be detected, -1 will be returned. 590func (c cpuInfo) logicalcpu() int { 591 if c.maxFunc < 1 { 592 return -1 593 } 594 _, ebx, _, _ := cpuid(1) 595 return int(ebx >> 24) 596} 597 598// VM Will return true if the cpu id indicates we are in 599// a virtual machine. This is only a hint, and will very likely 600// have many false negatives. 601func (c cpuInfo) vm() bool { 602 switch c.vendorid { 603 case msvm, kvm, vmware, xenhvm, bhyve: 604 return true 605 } 606 return false 607} 608 609// Flags contains detected cpu features and caracteristics 610type flags uint64 611 612// String returns a string representation of the detected 613// CPU features. 614func (f flags) String() string { 615 return strings.Join(f.strings(), ",") 616} 617 618// Strings returns and array of the detected features. 619func (f flags) strings() []string { 620 s := support() 621 r := make([]string, 0, 20) 622 for i := uint(0); i < 64; i++ { 623 key := flags(1 << i) 624 val := flagNames[key] 625 if s&key != 0 { 626 r = append(r, val) 627 } 628 } 629 return r 630} 631 632func maxExtendedFunction() uint32 { 633 eax, _, _, _ := cpuid(0x80000000) 634 return eax 635} 636 637func maxFunctionID() uint32 { 638 a, _, _, _ := cpuid(0) 639 return a 640} 641 642func brandName() string { 643 if maxExtendedFunction() >= 0x80000004 { 644 v := make([]uint32, 0, 48) 645 for i := uint32(0); i < 3; i++ { 646 a, b, c, d := cpuid(0x80000002 + i) 647 v = append(v, a, b, c, d) 648 } 649 return strings.Trim(string(valAsString(v...)), " ") 650 } 651 return "unknown" 652} 653 654func threadsPerCore() int { 655 mfi := maxFunctionID() 656 if mfi < 0x4 || vendorID() != intel { 657 return 1 658 } 659 660 if mfi < 0xb { 661 _, b, _, d := cpuid(1) 662 if (d & (1 << 28)) != 0 { 663 // v will contain logical core count 664 v := (b >> 16) & 255 665 if v > 1 { 666 a4, _, _, _ := cpuid(4) 667 // physical cores 668 v2 := (a4 >> 26) + 1 669 if v2 > 0 { 670 return int(v) / int(v2) 671 } 672 } 673 } 674 return 1 675 } 676 _, b, _, _ := cpuidex(0xb, 0) 677 if b&0xffff == 0 { 678 return 1 679 } 680 return int(b & 0xffff) 681} 682 683func logicalCores() int { 684 mfi := maxFunctionID() 685 switch vendorID() { 686 case intel: 687 // Use this on old Intel processors 688 if mfi < 0xb { 689 if mfi < 1 { 690 return 0 691 } 692 // CPUID.1:EBX[23:16] represents the maximum number of addressable IDs (initial APIC ID) 693 // that can be assigned to logical processors in a physical package. 694 // The value may not be the same as the number of logical processors that are present in the hardware of a physical package. 695 _, ebx, _, _ := cpuid(1) 696 logical := (ebx >> 16) & 0xff 697 return int(logical) 698 } 699 _, b, _, _ := cpuidex(0xb, 1) 700 return int(b & 0xffff) 701 case amd, hygon: 702 _, b, _, _ := cpuid(1) 703 return int((b >> 16) & 0xff) 704 default: 705 return 0 706 } 707} 708 709func familyModel() (int, int) { 710 if maxFunctionID() < 0x1 { 711 return 0, 0 712 } 713 eax, _, _, _ := cpuid(1) 714 family := ((eax >> 8) & 0xf) + ((eax >> 20) & 0xff) 715 model := ((eax >> 4) & 0xf) + ((eax >> 12) & 0xf0) 716 return int(family), int(model) 717} 718 719func physicalCores() int { 720 switch vendorID() { 721 case intel: 722 return logicalCores() / threadsPerCore() 723 case amd, hygon: 724 if maxExtendedFunction() >= 0x80000008 { 725 _, _, c, _ := cpuid(0x80000008) 726 return int(c&0xff) + 1 727 } 728 } 729 return 0 730} 731 732// Except from http://en.wikipedia.org/wiki/CPUID#EAX.3D0:_Get_vendor_ID 733var vendorMapping = map[string]vendor{ 734 "AMDisbetter!": amd, 735 "AuthenticAMD": amd, 736 "CentaurHauls": via, 737 "GenuineIntel": intel, 738 "TransmetaCPU": transmeta, 739 "GenuineTMx86": transmeta, 740 "Geode by NSC": nsc, 741 "VIA VIA VIA ": via, 742 "KVMKVMKVMKVM": kvm, 743 "Microsoft Hv": msvm, 744 "VMwareVMware": vmware, 745 "XenVMMXenVMM": xenhvm, 746 "bhyve bhyve ": bhyve, 747 "HygonGenuine": hygon, 748} 749 750func vendorID() vendor { 751 _, b, c, d := cpuid(0) 752 v := valAsString(b, d, c) 753 vend, ok := vendorMapping[string(v)] 754 if !ok { 755 return other 756 } 757 return vend 758} 759 760func cacheLine() int { 761 if maxFunctionID() < 0x1 { 762 return 0 763 } 764 765 _, ebx, _, _ := cpuid(1) 766 cache := (ebx & 0xff00) >> 5 // cflush size 767 if cache == 0 && maxExtendedFunction() >= 0x80000006 { 768 _, _, ecx, _ := cpuid(0x80000006) 769 cache = ecx & 0xff // cacheline size 770 } 771 // TODO: Read from Cache and TLB Information 772 return int(cache) 773} 774 775func (c *cpuInfo) cacheSize() { 776 c.cache.l1d = -1 777 c.cache.l1i = -1 778 c.cache.l2 = -1 779 c.cache.l3 = -1 780 vendor := vendorID() 781 switch vendor { 782 case intel: 783 if maxFunctionID() < 4 { 784 return 785 } 786 for i := uint32(0); ; i++ { 787 eax, ebx, ecx, _ := cpuidex(4, i) 788 cacheType := eax & 15 789 if cacheType == 0 { 790 break 791 } 792 cacheLevel := (eax >> 5) & 7 793 coherency := int(ebx&0xfff) + 1 794 partitions := int((ebx>>12)&0x3ff) + 1 795 associativity := int((ebx>>22)&0x3ff) + 1 796 sets := int(ecx) + 1 797 size := associativity * partitions * coherency * sets 798 switch cacheLevel { 799 case 1: 800 if cacheType == 1 { 801 // 1 = Data Cache 802 c.cache.l1d = size 803 } else if cacheType == 2 { 804 // 2 = Instruction Cache 805 c.cache.l1i = size 806 } else { 807 if c.cache.l1d < 0 { 808 c.cache.l1i = size 809 } 810 if c.cache.l1i < 0 { 811 c.cache.l1i = size 812 } 813 } 814 case 2: 815 c.cache.l2 = size 816 case 3: 817 c.cache.l3 = size 818 } 819 } 820 case amd, hygon: 821 // Untested. 822 if maxExtendedFunction() < 0x80000005 { 823 return 824 } 825 _, _, ecx, edx := cpuid(0x80000005) 826 c.cache.l1d = int(((ecx >> 24) & 0xFF) * 1024) 827 c.cache.l1i = int(((edx >> 24) & 0xFF) * 1024) 828 829 if maxExtendedFunction() < 0x80000006 { 830 return 831 } 832 _, _, ecx, _ = cpuid(0x80000006) 833 c.cache.l2 = int(((ecx >> 16) & 0xFFFF) * 1024) 834 } 835 836 return 837} 838 839type sgxepcsection struct { 840 baseaddress uint64 841 epcsize uint64 842} 843 844type sgxsupport struct { 845 available bool 846 launchcontrol bool 847 sgx1supported bool 848 sgx2supported bool 849 maxenclavesizenot64 int64 850 maxenclavesize64 int64 851 epcsections []sgxepcsection 852} 853 854func hasSGX(available, lc bool) (rval sgxsupport) { 855 rval.available = available 856 857 if !available { 858 return 859 } 860 861 rval.launchcontrol = lc 862 863 a, _, _, d := cpuidex(0x12, 0) 864 rval.sgx1supported = a&0x01 != 0 865 rval.sgx2supported = a&0x02 != 0 866 rval.maxenclavesizenot64 = 1 << (d & 0xFF) // pow 2 867 rval.maxenclavesize64 = 1 << ((d >> 8) & 0xFF) // pow 2 868 rval.epcsections = make([]sgxepcsection, 0) 869 870 for subleaf := uint32(2); subleaf < 2+8; subleaf++ { 871 eax, ebx, ecx, edx := cpuidex(0x12, subleaf) 872 leafType := eax & 0xf 873 874 if leafType == 0 { 875 // Invalid subleaf, stop iterating 876 break 877 } else if leafType == 1 { 878 // EPC Section subleaf 879 baseAddress := uint64(eax&0xfffff000) + (uint64(ebx&0x000fffff) << 32) 880 size := uint64(ecx&0xfffff000) + (uint64(edx&0x000fffff) << 32) 881 882 section := sgxepcsection{baseaddress: baseAddress, epcsize: size} 883 rval.epcsections = append(rval.epcsections, section) 884 } 885 } 886 887 return 888} 889 890func support() flags { 891 mfi := maxFunctionID() 892 vend := vendorID() 893 if mfi < 0x1 { 894 return 0 895 } 896 rval := uint64(0) 897 _, _, c, d := cpuid(1) 898 if (d & (1 << 15)) != 0 { 899 rval |= cmov 900 } 901 if (d & (1 << 23)) != 0 { 902 rval |= mmx 903 } 904 if (d & (1 << 25)) != 0 { 905 rval |= mmxext 906 } 907 if (d & (1 << 25)) != 0 { 908 rval |= sse 909 } 910 if (d & (1 << 26)) != 0 { 911 rval |= sse2 912 } 913 if (c & 1) != 0 { 914 rval |= sse3 915 } 916 if (c & (1 << 5)) != 0 { 917 rval |= vmx 918 } 919 if (c & 0x00000200) != 0 { 920 rval |= ssse3 921 } 922 if (c & 0x00080000) != 0 { 923 rval |= sse4 924 } 925 if (c & 0x00100000) != 0 { 926 rval |= sse42 927 } 928 if (c & (1 << 25)) != 0 { 929 rval |= aesni 930 } 931 if (c & (1 << 1)) != 0 { 932 rval |= clmul 933 } 934 if c&(1<<23) != 0 { 935 rval |= popcnt 936 } 937 if c&(1<<30) != 0 { 938 rval |= rdrand 939 } 940 if c&(1<<29) != 0 { 941 rval |= f16c 942 } 943 if c&(1<<13) != 0 { 944 rval |= cx16 945 } 946 if vend == intel && (d&(1<<28)) != 0 && mfi >= 4 { 947 if threadsPerCore() > 1 { 948 rval |= htt 949 } 950 } 951 952 // Check XGETBV, OXSAVE and AVX bits 953 if c&(1<<26) != 0 && c&(1<<27) != 0 && c&(1<<28) != 0 { 954 // Check for OS support 955 eax, _ := xgetbv(0) 956 if (eax & 0x6) == 0x6 { 957 rval |= avx 958 if (c & 0x00001000) != 0 { 959 rval |= fma3 960 } 961 } 962 } 963 964 // Check AVX2, AVX2 requires OS support, but BMI1/2 don't. 965 if mfi >= 7 { 966 _, ebx, ecx, edx := cpuidex(7, 0) 967 eax1, _, _, _ := cpuidex(7, 1) 968 if (rval&avx) != 0 && (ebx&0x00000020) != 0 { 969 rval |= avx2 970 } 971 if (ebx & 0x00000008) != 0 { 972 rval |= bmi1 973 if (ebx & 0x00000100) != 0 { 974 rval |= bmi2 975 } 976 } 977 if ebx&(1<<2) != 0 { 978 rval |= sgx 979 } 980 if ebx&(1<<4) != 0 { 981 rval |= hle 982 } 983 if ebx&(1<<9) != 0 { 984 rval |= erms 985 } 986 if ebx&(1<<11) != 0 { 987 rval |= rtm 988 } 989 if ebx&(1<<14) != 0 { 990 rval |= mpx 991 } 992 if ebx&(1<<18) != 0 { 993 rval |= rdseed 994 } 995 if ebx&(1<<19) != 0 { 996 rval |= adx 997 } 998 if ebx&(1<<29) != 0 { 999 rval |= sha 1000 } 1001 if edx&(1<<26) != 0 { 1002 rval |= ibpb 1003 } 1004 if ecx&(1<<30) != 0 { 1005 rval |= sgxlc 1006 } 1007 if edx&(1<<27) != 0 { 1008 rval |= stibp 1009 } 1010 1011 // Only detect AVX-512 features if XGETBV is supported 1012 if c&((1<<26)|(1<<27)) == (1<<26)|(1<<27) { 1013 // Check for OS support 1014 eax, _ := xgetbv(0) 1015 1016 // Verify that XCR0[7:5] = ‘111b’ (OPMASK state, upper 256-bit of ZMM0-ZMM15 and 1017 // ZMM16-ZMM31 state are enabled by OS) 1018 /// and that XCR0[2:1] = ‘11b’ (XMM state and YMM state are enabled by OS). 1019 if (eax>>5)&7 == 7 && (eax>>1)&3 == 3 { 1020 if ebx&(1<<16) != 0 { 1021 rval |= avx512f 1022 } 1023 if ebx&(1<<17) != 0 { 1024 rval |= avx512dq 1025 } 1026 if ebx&(1<<21) != 0 { 1027 rval |= avx512ifma 1028 } 1029 if ebx&(1<<26) != 0 { 1030 rval |= avx512pf 1031 } 1032 if ebx&(1<<27) != 0 { 1033 rval |= avx512er 1034 } 1035 if ebx&(1<<28) != 0 { 1036 rval |= avx512cd 1037 } 1038 if ebx&(1<<30) != 0 { 1039 rval |= avx512bw 1040 } 1041 if ebx&(1<<31) != 0 { 1042 rval |= avx512vl 1043 } 1044 // ecx 1045 if ecx&(1<<1) != 0 { 1046 rval |= avx512vbmi 1047 } 1048 if ecx&(1<<6) != 0 { 1049 rval |= avx512vbmi2 1050 } 1051 if ecx&(1<<8) != 0 { 1052 rval |= gfni 1053 } 1054 if ecx&(1<<9) != 0 { 1055 rval |= vaes 1056 } 1057 if ecx&(1<<10) != 0 { 1058 rval |= vpclmulqdq 1059 } 1060 if ecx&(1<<11) != 0 { 1061 rval |= avx512vnni 1062 } 1063 if ecx&(1<<12) != 0 { 1064 rval |= avx512bitalg 1065 } 1066 if ecx&(1<<14) != 0 { 1067 rval |= avx512vpopcntdq 1068 } 1069 // edx 1070 if edx&(1<<8) != 0 { 1071 rval |= avx512vp2intersect 1072 } 1073 // cpuid eax 07h,ecx=1 1074 if eax1&(1<<5) != 0 { 1075 rval |= avx512bf16 1076 } 1077 } 1078 } 1079 } 1080 1081 if maxExtendedFunction() >= 0x80000001 { 1082 _, _, c, d := cpuid(0x80000001) 1083 if (c & (1 << 5)) != 0 { 1084 rval |= lzcnt 1085 rval |= popcnt 1086 } 1087 if (d & (1 << 31)) != 0 { 1088 rval |= amd3dnow 1089 } 1090 if (d & (1 << 30)) != 0 { 1091 rval |= amd3dnowext 1092 } 1093 if (d & (1 << 23)) != 0 { 1094 rval |= mmx 1095 } 1096 if (d & (1 << 22)) != 0 { 1097 rval |= mmxext 1098 } 1099 if (c & (1 << 6)) != 0 { 1100 rval |= sse4a 1101 } 1102 if d&(1<<20) != 0 { 1103 rval |= nx 1104 } 1105 if d&(1<<27) != 0 { 1106 rval |= rdtscp 1107 } 1108 1109 /* Allow for selectively disabling SSE2 functions on AMD processors 1110 with SSE2 support but not SSE4a. This includes Athlon64, some 1111 Opteron, and some Sempron processors. MMX, SSE, or 3DNow! are faster 1112 than SSE2 often enough to utilize this special-case flag. 1113 AV_CPU_FLAG_SSE2 and AV_CPU_FLAG_SSE2SLOW are both set in this case 1114 so that SSE2 is used unless explicitly disabled by checking 1115 AV_CPU_FLAG_SSE2SLOW. */ 1116 if vendorID() != intel && 1117 rval&sse2 != 0 && (c&0x00000040) == 0 { 1118 rval |= sse2slow 1119 } 1120 1121 /* XOP and FMA4 use the AVX instruction coding scheme, so they can't be 1122 * used unless the OS has AVX support. */ 1123 if (rval & avx) != 0 { 1124 if (c & 0x00000800) != 0 { 1125 rval |= xop 1126 } 1127 if (c & 0x00010000) != 0 { 1128 rval |= fma4 1129 } 1130 } 1131 1132 if vendorID() == intel { 1133 family, model := familyModel() 1134 if family == 6 && (model == 9 || model == 13 || model == 14) { 1135 /* 6/9 (pentium-m "banias"), 6/13 (pentium-m "dothan"), and 1136 * 6/14 (core1 "yonah") theoretically support sse2, but it's 1137 * usually slower than mmx. */ 1138 if (rval & sse2) != 0 { 1139 rval |= sse2slow 1140 } 1141 if (rval & sse3) != 0 { 1142 rval |= sse3slow 1143 } 1144 } 1145 /* The Atom processor has SSSE3 support, which is useful in many cases, 1146 * but sometimes the SSSE3 version is slower than the SSE2 equivalent 1147 * on the Atom, but is generally faster on other processors supporting 1148 * SSSE3. This flag allows for selectively disabling certain SSSE3 1149 * functions on the Atom. */ 1150 if family == 6 && model == 28 { 1151 rval |= atom 1152 } 1153 } 1154 } 1155 return flags(rval) 1156} 1157 1158func valAsString(values ...uint32) []byte { 1159 r := make([]byte, 4*len(values)) 1160 for i, v := range values { 1161 dst := r[i*4:] 1162 dst[0] = byte(v & 0xff) 1163 dst[1] = byte((v >> 8) & 0xff) 1164 dst[2] = byte((v >> 16) & 0xff) 1165 dst[3] = byte((v >> 24) & 0xff) 1166 switch { 1167 case dst[0] == 0: 1168 return r[:i*4] 1169 case dst[1] == 0: 1170 return r[:i*4+1] 1171 case dst[2] == 0: 1172 return r[:i*4+2] 1173 case dst[3] == 0: 1174 return r[:i*4+3] 1175 } 1176 } 1177 return r 1178} 1179