1// Generated, DO NOT EDIT, 2// but copy it to your own project and rename the package. 3// See more at http://github.com/klauspost/cpuid 4 5package cpuid 6 7import "strings" 8 9// Vendor is a representation of a CPU vendor. 10type vendor int 11 12const ( 13 other vendor = iota 14 intel 15 amd 16 via 17 transmeta 18 nsc 19 kvm // Kernel-based Virtual Machine 20 msvm // Microsoft Hyper-V or Windows Virtual PC 21 vmware 22 xenhvm 23) 24 25const ( 26 cmov = 1 << iota // i686 CMOV 27 nx // NX (No-Execute) bit 28 amd3dnow // AMD 3DNOW 29 amd3dnowext // AMD 3DNowExt 30 mmx // standard MMX 31 mmxext // SSE integer functions or AMD MMX ext 32 sse // SSE functions 33 sse2 // P4 SSE functions 34 sse3 // Prescott SSE3 functions 35 ssse3 // Conroe SSSE3 functions 36 sse4 // Penryn SSE4.1 functions 37 sse4a // AMD Barcelona microarchitecture SSE4a instructions 38 sse42 // Nehalem SSE4.2 functions 39 avx // AVX functions 40 avx2 // AVX2 functions 41 fma3 // Intel FMA 3 42 fma4 // Bulldozer FMA4 functions 43 xop // Bulldozer XOP functions 44 f16c // Half-precision floating-point conversion 45 bmi1 // Bit Manipulation Instruction Set 1 46 bmi2 // Bit Manipulation Instruction Set 2 47 tbm // AMD Trailing Bit Manipulation 48 lzcnt // LZCNT instruction 49 popcnt // POPCNT instruction 50 aesni // Advanced Encryption Standard New Instructions 51 clmul // Carry-less Multiplication 52 htt // Hyperthreading (enabled) 53 hle // Hardware Lock Elision 54 rtm // Restricted Transactional Memory 55 rdrand // RDRAND instruction is available 56 rdseed // RDSEED instruction is available 57 adx // Intel ADX (Multi-Precision Add-Carry Instruction Extensions) 58 sha // Intel SHA Extensions 59 avx512f // AVX-512 Foundation 60 avx512dq // AVX-512 Doubleword and Quadword Instructions 61 avx512ifma // AVX-512 Integer Fused Multiply-Add Instructions 62 avx512pf // AVX-512 Prefetch Instructions 63 avx512er // AVX-512 Exponential and Reciprocal Instructions 64 avx512cd // AVX-512 Conflict Detection Instructions 65 avx512bw // AVX-512 Byte and Word Instructions 66 avx512vl // AVX-512 Vector Length Extensions 67 avx512vbmi // AVX-512 Vector Bit Manipulation Instructions 68 mpx // Intel MPX (Memory Protection Extensions) 69 erms // Enhanced REP MOVSB/STOSB 70 rdtscp // RDTSCP Instruction 71 cx16 // CMPXCHG16B Instruction 72 sgx // Software Guard Extensions 73 ibpb // Indirect Branch Restricted Speculation (IBRS) and Indirect Branch Predictor Barrier (IBPB) 74 stibp // Single Thread Indirect Branch Predictors 75 76 // Performance indicators 77 sse2slow // SSE2 is supported, but usually not faster 78 sse3slow // SSE3 is supported, but usually not faster 79 atom // Atom processor, some SSSE3 instructions are slower 80) 81 82var flagNames = map[flags]string{ 83 cmov: "CMOV", // i686 CMOV 84 nx: "NX", // NX (No-Execute) bit 85 amd3dnow: "AMD3DNOW", // AMD 3DNOW 86 amd3dnowext: "AMD3DNOWEXT", // AMD 3DNowExt 87 mmx: "MMX", // Standard MMX 88 mmxext: "MMXEXT", // SSE integer functions or AMD MMX ext 89 sse: "SSE", // SSE functions 90 sse2: "SSE2", // P4 SSE2 functions 91 sse3: "SSE3", // Prescott SSE3 functions 92 ssse3: "SSSE3", // Conroe SSSE3 functions 93 sse4: "SSE4.1", // Penryn SSE4.1 functions 94 sse4a: "SSE4A", // AMD Barcelona microarchitecture SSE4a instructions 95 sse42: "SSE4.2", // Nehalem SSE4.2 functions 96 avx: "AVX", // AVX functions 97 avx2: "AVX2", // AVX functions 98 fma3: "FMA3", // Intel FMA 3 99 fma4: "FMA4", // Bulldozer FMA4 functions 100 xop: "XOP", // Bulldozer XOP functions 101 f16c: "F16C", // Half-precision floating-point conversion 102 bmi1: "BMI1", // Bit Manipulation Instruction Set 1 103 bmi2: "BMI2", // Bit Manipulation Instruction Set 2 104 tbm: "TBM", // AMD Trailing Bit Manipulation 105 lzcnt: "LZCNT", // LZCNT instruction 106 popcnt: "POPCNT", // POPCNT instruction 107 aesni: "AESNI", // Advanced Encryption Standard New Instructions 108 clmul: "CLMUL", // Carry-less Multiplication 109 htt: "HTT", // Hyperthreading (enabled) 110 hle: "HLE", // Hardware Lock Elision 111 rtm: "RTM", // Restricted Transactional Memory 112 rdrand: "RDRAND", // RDRAND instruction is available 113 rdseed: "RDSEED", // RDSEED instruction is available 114 adx: "ADX", // Intel ADX (Multi-Precision Add-Carry Instruction Extensions) 115 sha: "SHA", // Intel SHA Extensions 116 avx512f: "AVX512F", // AVX-512 Foundation 117 avx512dq: "AVX512DQ", // AVX-512 Doubleword and Quadword Instructions 118 avx512ifma: "AVX512IFMA", // AVX-512 Integer Fused Multiply-Add Instructions 119 avx512pf: "AVX512PF", // AVX-512 Prefetch Instructions 120 avx512er: "AVX512ER", // AVX-512 Exponential and Reciprocal Instructions 121 avx512cd: "AVX512CD", // AVX-512 Conflict Detection Instructions 122 avx512bw: "AVX512BW", // AVX-512 Byte and Word Instructions 123 avx512vl: "AVX512VL", // AVX-512 Vector Length Extensions 124 avx512vbmi: "AVX512VBMI", // AVX-512 Vector Bit Manipulation Instructions 125 mpx: "MPX", // Intel MPX (Memory Protection Extensions) 126 erms: "ERMS", // Enhanced REP MOVSB/STOSB 127 rdtscp: "RDTSCP", // RDTSCP Instruction 128 cx16: "CX16", // CMPXCHG16B Instruction 129 sgx: "SGX", // Software Guard Extensions 130 ibpb: "IBPB", // Indirect Branch Restricted Speculation and Indirect Branch Predictor Barrier 131 stibp: "STIBP", // Single Thread Indirect Branch Predictors 132 133 // Performance indicators 134 sse2slow: "SSE2SLOW", // SSE2 supported, but usually not faster 135 sse3slow: "SSE3SLOW", // SSE3 supported, but usually not faster 136 atom: "ATOM", // Atom processor, some SSSE3 instructions are slower 137 138} 139 140// CPUInfo contains information about the detected system CPU. 141type cpuInfo struct { 142 brandname string // Brand name reported by the CPU 143 vendorid vendor // Comparable CPU vendor ID 144 features flags // Features of the CPU 145 physicalcores int // Number of physical processor cores in your CPU. Will be 0 if undetectable. 146 threadspercore int // Number of threads per physical core. Will be 1 if undetectable. 147 logicalcores int // Number of physical cores times threads that can run on each core through the use of hyperthreading. Will be 0 if undetectable. 148 family int // CPU family number 149 model int // CPU model number 150 cacheline int // Cache line size in bytes. Will be 0 if undetectable. 151 cache struct { 152 l1i int // L1 Instruction Cache (per core or shared). Will be -1 if undetected 153 l1d int // L1 Data Cache (per core or shared). Will be -1 if undetected 154 l2 int // L2 Cache (per core or shared). Will be -1 if undetected 155 l3 int // L3 Instruction Cache (per core or shared). Will be -1 if undetected 156 } 157 sgx sgxsupport 158 maxFunc uint32 159 maxExFunc uint32 160} 161 162var cpuid func(op uint32) (eax, ebx, ecx, edx uint32) 163var cpuidex func(op, op2 uint32) (eax, ebx, ecx, edx uint32) 164var xgetbv func(index uint32) (eax, edx uint32) 165var rdtscpAsm func() (eax, ebx, ecx, edx uint32) 166 167// CPU contains information about the CPU as detected on startup, 168// or when Detect last was called. 169// 170// Use this as the primary entry point to you data, 171// this way queries are 172var cpu cpuInfo 173 174func init() { 175 initCPU() 176 detect() 177} 178 179// Detect will re-detect current CPU info. 180// This will replace the content of the exported CPU variable. 181// 182// Unless you expect the CPU to change while you are running your program 183// you should not need to call this function. 184// If you call this, you must ensure that no other goroutine is accessing the 185// exported CPU variable. 186func detect() { 187 cpu.maxFunc = maxFunctionID() 188 cpu.maxExFunc = maxExtendedFunction() 189 cpu.brandname = brandName() 190 cpu.cacheline = cacheLine() 191 cpu.family, cpu.model = familyModel() 192 cpu.features = support() 193 cpu.sgx = hasSGX(cpu.features&sgx != 0) 194 cpu.threadspercore = threadsPerCore() 195 cpu.logicalcores = logicalCores() 196 cpu.physicalcores = physicalCores() 197 cpu.vendorid = vendorID() 198 cpu.cacheSize() 199} 200 201// Generated here: http://play.golang.org/p/BxFH2Gdc0G 202 203// Cmov indicates support of CMOV instructions 204func (c cpuInfo) cmov() bool { 205 return c.features&cmov != 0 206} 207 208// Amd3dnow indicates support of AMD 3DNOW! instructions 209func (c cpuInfo) amd3dnow() bool { 210 return c.features&amd3dnow != 0 211} 212 213// Amd3dnowExt indicates support of AMD 3DNOW! Extended instructions 214func (c cpuInfo) amd3dnowext() bool { 215 return c.features&amd3dnowext != 0 216} 217 218// MMX indicates support of MMX instructions 219func (c cpuInfo) mmx() bool { 220 return c.features&mmx != 0 221} 222 223// MMXExt indicates support of MMXEXT instructions 224// (SSE integer functions or AMD MMX ext) 225func (c cpuInfo) mmxext() bool { 226 return c.features&mmxext != 0 227} 228 229// SSE indicates support of SSE instructions 230func (c cpuInfo) sse() bool { 231 return c.features&sse != 0 232} 233 234// SSE2 indicates support of SSE 2 instructions 235func (c cpuInfo) sse2() bool { 236 return c.features&sse2 != 0 237} 238 239// SSE3 indicates support of SSE 3 instructions 240func (c cpuInfo) sse3() bool { 241 return c.features&sse3 != 0 242} 243 244// SSSE3 indicates support of SSSE 3 instructions 245func (c cpuInfo) ssse3() bool { 246 return c.features&ssse3 != 0 247} 248 249// SSE4 indicates support of SSE 4 (also called SSE 4.1) instructions 250func (c cpuInfo) sse4() bool { 251 return c.features&sse4 != 0 252} 253 254// SSE42 indicates support of SSE4.2 instructions 255func (c cpuInfo) sse42() bool { 256 return c.features&sse42 != 0 257} 258 259// AVX indicates support of AVX instructions 260// and operating system support of AVX instructions 261func (c cpuInfo) avx() bool { 262 return c.features&avx != 0 263} 264 265// AVX2 indicates support of AVX2 instructions 266func (c cpuInfo) avx2() bool { 267 return c.features&avx2 != 0 268} 269 270// FMA3 indicates support of FMA3 instructions 271func (c cpuInfo) fma3() bool { 272 return c.features&fma3 != 0 273} 274 275// FMA4 indicates support of FMA4 instructions 276func (c cpuInfo) fma4() bool { 277 return c.features&fma4 != 0 278} 279 280// XOP indicates support of XOP instructions 281func (c cpuInfo) xop() bool { 282 return c.features&xop != 0 283} 284 285// F16C indicates support of F16C instructions 286func (c cpuInfo) f16c() bool { 287 return c.features&f16c != 0 288} 289 290// BMI1 indicates support of BMI1 instructions 291func (c cpuInfo) bmi1() bool { 292 return c.features&bmi1 != 0 293} 294 295// BMI2 indicates support of BMI2 instructions 296func (c cpuInfo) bmi2() bool { 297 return c.features&bmi2 != 0 298} 299 300// TBM indicates support of TBM instructions 301// (AMD Trailing Bit Manipulation) 302func (c cpuInfo) tbm() bool { 303 return c.features&tbm != 0 304} 305 306// Lzcnt indicates support of LZCNT instruction 307func (c cpuInfo) lzcnt() bool { 308 return c.features&lzcnt != 0 309} 310 311// Popcnt indicates support of POPCNT instruction 312func (c cpuInfo) popcnt() bool { 313 return c.features&popcnt != 0 314} 315 316// HTT indicates the processor has Hyperthreading enabled 317func (c cpuInfo) htt() bool { 318 return c.features&htt != 0 319} 320 321// SSE2Slow indicates that SSE2 may be slow on this processor 322func (c cpuInfo) sse2slow() bool { 323 return c.features&sse2slow != 0 324} 325 326// SSE3Slow indicates that SSE3 may be slow on this processor 327func (c cpuInfo) sse3slow() bool { 328 return c.features&sse3slow != 0 329} 330 331// AesNi indicates support of AES-NI instructions 332// (Advanced Encryption Standard New Instructions) 333func (c cpuInfo) aesni() bool { 334 return c.features&aesni != 0 335} 336 337// Clmul indicates support of CLMUL instructions 338// (Carry-less Multiplication) 339func (c cpuInfo) clmul() bool { 340 return c.features&clmul != 0 341} 342 343// NX indicates support of NX (No-Execute) bit 344func (c cpuInfo) nx() bool { 345 return c.features&nx != 0 346} 347 348// SSE4A indicates support of AMD Barcelona microarchitecture SSE4a instructions 349func (c cpuInfo) sse4a() bool { 350 return c.features&sse4a != 0 351} 352 353// HLE indicates support of Hardware Lock Elision 354func (c cpuInfo) hle() bool { 355 return c.features&hle != 0 356} 357 358// RTM indicates support of Restricted Transactional Memory 359func (c cpuInfo) rtm() bool { 360 return c.features&rtm != 0 361} 362 363// Rdrand indicates support of RDRAND instruction is available 364func (c cpuInfo) rdrand() bool { 365 return c.features&rdrand != 0 366} 367 368// Rdseed indicates support of RDSEED instruction is available 369func (c cpuInfo) rdseed() bool { 370 return c.features&rdseed != 0 371} 372 373// ADX indicates support of Intel ADX (Multi-Precision Add-Carry Instruction Extensions) 374func (c cpuInfo) adx() bool { 375 return c.features&adx != 0 376} 377 378// SHA indicates support of Intel SHA Extensions 379func (c cpuInfo) sha() bool { 380 return c.features&sha != 0 381} 382 383// AVX512F indicates support of AVX-512 Foundation 384func (c cpuInfo) avx512f() bool { 385 return c.features&avx512f != 0 386} 387 388// AVX512DQ indicates support of AVX-512 Doubleword and Quadword Instructions 389func (c cpuInfo) avx512dq() bool { 390 return c.features&avx512dq != 0 391} 392 393// AVX512IFMA indicates support of AVX-512 Integer Fused Multiply-Add Instructions 394func (c cpuInfo) avx512ifma() bool { 395 return c.features&avx512ifma != 0 396} 397 398// AVX512PF indicates support of AVX-512 Prefetch Instructions 399func (c cpuInfo) avx512pf() bool { 400 return c.features&avx512pf != 0 401} 402 403// AVX512ER indicates support of AVX-512 Exponential and Reciprocal Instructions 404func (c cpuInfo) avx512er() bool { 405 return c.features&avx512er != 0 406} 407 408// AVX512CD indicates support of AVX-512 Conflict Detection Instructions 409func (c cpuInfo) avx512cd() bool { 410 return c.features&avx512cd != 0 411} 412 413// AVX512BW indicates support of AVX-512 Byte and Word Instructions 414func (c cpuInfo) avx512bw() bool { 415 return c.features&avx512bw != 0 416} 417 418// AVX512VL indicates support of AVX-512 Vector Length Extensions 419func (c cpuInfo) avx512vl() bool { 420 return c.features&avx512vl != 0 421} 422 423// AVX512VBMI indicates support of AVX-512 Vector Bit Manipulation Instructions 424func (c cpuInfo) avx512vbmi() bool { 425 return c.features&avx512vbmi != 0 426} 427 428// MPX indicates support of Intel MPX (Memory Protection Extensions) 429func (c cpuInfo) mpx() bool { 430 return c.features&mpx != 0 431} 432 433// ERMS indicates support of Enhanced REP MOVSB/STOSB 434func (c cpuInfo) erms() bool { 435 return c.features&erms != 0 436} 437 438// RDTSCP Instruction is available. 439func (c cpuInfo) rdtscp() bool { 440 return c.features&rdtscp != 0 441} 442 443// CX16 indicates if CMPXCHG16B instruction is available. 444func (c cpuInfo) cx16() bool { 445 return c.features&cx16 != 0 446} 447 448// TSX is split into HLE (Hardware Lock Elision) and RTM (Restricted Transactional Memory) detection. 449// So TSX simply checks that. 450func (c cpuInfo) tsx() bool { 451 return c.features&(mpx|rtm) == mpx|rtm 452} 453 454// Atom indicates an Atom processor 455func (c cpuInfo) atom() bool { 456 return c.features&atom != 0 457} 458 459// Intel returns true if vendor is recognized as Intel 460func (c cpuInfo) intel() bool { 461 return c.vendorid == intel 462} 463 464// AMD returns true if vendor is recognized as AMD 465func (c cpuInfo) amd() bool { 466 return c.vendorid == amd 467} 468 469// Transmeta returns true if vendor is recognized as Transmeta 470func (c cpuInfo) transmeta() bool { 471 return c.vendorid == transmeta 472} 473 474// NSC returns true if vendor is recognized as National Semiconductor 475func (c cpuInfo) nsc() bool { 476 return c.vendorid == nsc 477} 478 479// VIA returns true if vendor is recognized as VIA 480func (c cpuInfo) via() bool { 481 return c.vendorid == via 482} 483 484// RTCounter returns the 64-bit time-stamp counter 485// Uses the RDTSCP instruction. The value 0 is returned 486// if the CPU does not support the instruction. 487func (c cpuInfo) rtcounter() uint64 { 488 if !c.rdtscp() { 489 return 0 490 } 491 a, _, _, d := rdtscpAsm() 492 return uint64(a) | (uint64(d) << 32) 493} 494 495// Ia32TscAux returns the IA32_TSC_AUX part of the RDTSCP. 496// This variable is OS dependent, but on Linux contains information 497// about the current cpu/core the code is running on. 498// If the RDTSCP instruction isn't supported on the CPU, the value 0 is returned. 499func (c cpuInfo) ia32tscaux() uint32 { 500 if !c.rdtscp() { 501 return 0 502 } 503 _, _, ecx, _ := rdtscpAsm() 504 return ecx 505} 506 507// LogicalCPU will return the Logical CPU the code is currently executing on. 508// This is likely to change when the OS re-schedules the running thread 509// to another CPU. 510// If the current core cannot be detected, -1 will be returned. 511func (c cpuInfo) logicalcpu() int { 512 if c.maxFunc < 1 { 513 return -1 514 } 515 _, ebx, _, _ := cpuid(1) 516 return int(ebx >> 24) 517} 518 519// VM Will return true if the cpu id indicates we are in 520// a virtual machine. This is only a hint, and will very likely 521// have many false negatives. 522func (c cpuInfo) vm() bool { 523 switch c.vendorid { 524 case msvm, kvm, vmware, xenhvm: 525 return true 526 } 527 return false 528} 529 530// Flags contains detected cpu features and caracteristics 531type flags uint64 532 533// String returns a string representation of the detected 534// CPU features. 535func (f flags) String() string { 536 return strings.Join(f.strings(), ",") 537} 538 539// Strings returns and array of the detected features. 540func (f flags) strings() []string { 541 s := support() 542 r := make([]string, 0, 20) 543 for i := uint(0); i < 64; i++ { 544 key := flags(1 << i) 545 val := flagNames[key] 546 if s&key != 0 { 547 r = append(r, val) 548 } 549 } 550 return r 551} 552 553func maxExtendedFunction() uint32 { 554 eax, _, _, _ := cpuid(0x80000000) 555 return eax 556} 557 558func maxFunctionID() uint32 { 559 a, _, _, _ := cpuid(0) 560 return a 561} 562 563func brandName() string { 564 if maxExtendedFunction() >= 0x80000004 { 565 v := make([]uint32, 0, 48) 566 for i := uint32(0); i < 3; i++ { 567 a, b, c, d := cpuid(0x80000002 + i) 568 v = append(v, a, b, c, d) 569 } 570 return strings.Trim(string(valAsString(v...)), " ") 571 } 572 return "unknown" 573} 574 575func threadsPerCore() int { 576 mfi := maxFunctionID() 577 if mfi < 0x4 || vendorID() != intel { 578 return 1 579 } 580 581 if mfi < 0xb { 582 _, b, _, d := cpuid(1) 583 if (d & (1 << 28)) != 0 { 584 // v will contain logical core count 585 v := (b >> 16) & 255 586 if v > 1 { 587 a4, _, _, _ := cpuid(4) 588 // physical cores 589 v2 := (a4 >> 26) + 1 590 if v2 > 0 { 591 return int(v) / int(v2) 592 } 593 } 594 } 595 return 1 596 } 597 _, b, _, _ := cpuidex(0xb, 0) 598 if b&0xffff == 0 { 599 return 1 600 } 601 return int(b & 0xffff) 602} 603 604func logicalCores() int { 605 mfi := maxFunctionID() 606 switch vendorID() { 607 case intel: 608 // Use this on old Intel processors 609 if mfi < 0xb { 610 if mfi < 1 { 611 return 0 612 } 613 // CPUID.1:EBX[23:16] represents the maximum number of addressable IDs (initial APIC ID) 614 // that can be assigned to logical processors in a physical package. 615 // The value may not be the same as the number of logical processors that are present in the hardware of a physical package. 616 _, ebx, _, _ := cpuid(1) 617 logical := (ebx >> 16) & 0xff 618 return int(logical) 619 } 620 _, b, _, _ := cpuidex(0xb, 1) 621 return int(b & 0xffff) 622 case amd: 623 _, b, _, _ := cpuid(1) 624 return int((b >> 16) & 0xff) 625 default: 626 return 0 627 } 628} 629 630func familyModel() (int, int) { 631 if maxFunctionID() < 0x1 { 632 return 0, 0 633 } 634 eax, _, _, _ := cpuid(1) 635 family := ((eax >> 8) & 0xf) + ((eax >> 20) & 0xff) 636 model := ((eax >> 4) & 0xf) + ((eax >> 12) & 0xf0) 637 return int(family), int(model) 638} 639 640func physicalCores() int { 641 switch vendorID() { 642 case intel: 643 return logicalCores() / threadsPerCore() 644 case amd: 645 if maxExtendedFunction() >= 0x80000008 { 646 _, _, c, _ := cpuid(0x80000008) 647 return int(c&0xff) + 1 648 } 649 } 650 return 0 651} 652 653// Except from http://en.wikipedia.org/wiki/CPUID#EAX.3D0:_Get_vendor_ID 654var vendorMapping = map[string]vendor{ 655 "AMDisbetter!": amd, 656 "AuthenticAMD": amd, 657 "CentaurHauls": via, 658 "GenuineIntel": intel, 659 "TransmetaCPU": transmeta, 660 "GenuineTMx86": transmeta, 661 "Geode by NSC": nsc, 662 "VIA VIA VIA ": via, 663 "KVMKVMKVMKVM": kvm, 664 "Microsoft Hv": msvm, 665 "VMwareVMware": vmware, 666 "XenVMMXenVMM": xenhvm, 667} 668 669func vendorID() vendor { 670 _, b, c, d := cpuid(0) 671 v := valAsString(b, d, c) 672 vend, ok := vendorMapping[string(v)] 673 if !ok { 674 return other 675 } 676 return vend 677} 678 679func cacheLine() int { 680 if maxFunctionID() < 0x1 { 681 return 0 682 } 683 684 _, ebx, _, _ := cpuid(1) 685 cache := (ebx & 0xff00) >> 5 // cflush size 686 if cache == 0 && maxExtendedFunction() >= 0x80000006 { 687 _, _, ecx, _ := cpuid(0x80000006) 688 cache = ecx & 0xff // cacheline size 689 } 690 // TODO: Read from Cache and TLB Information 691 return int(cache) 692} 693 694func (c *cpuInfo) cacheSize() { 695 c.cache.l1d = -1 696 c.cache.l1i = -1 697 c.cache.l2 = -1 698 c.cache.l3 = -1 699 vendor := vendorID() 700 switch vendor { 701 case intel: 702 if maxFunctionID() < 4 { 703 return 704 } 705 for i := uint32(0); ; i++ { 706 eax, ebx, ecx, _ := cpuidex(4, i) 707 cacheType := eax & 15 708 if cacheType == 0 { 709 break 710 } 711 cacheLevel := (eax >> 5) & 7 712 coherency := int(ebx&0xfff) + 1 713 partitions := int((ebx>>12)&0x3ff) + 1 714 associativity := int((ebx>>22)&0x3ff) + 1 715 sets := int(ecx) + 1 716 size := associativity * partitions * coherency * sets 717 switch cacheLevel { 718 case 1: 719 if cacheType == 1 { 720 // 1 = Data Cache 721 c.cache.l1d = size 722 } else if cacheType == 2 { 723 // 2 = Instruction Cache 724 c.cache.l1i = size 725 } else { 726 if c.cache.l1d < 0 { 727 c.cache.l1i = size 728 } 729 if c.cache.l1i < 0 { 730 c.cache.l1i = size 731 } 732 } 733 case 2: 734 c.cache.l2 = size 735 case 3: 736 c.cache.l3 = size 737 } 738 } 739 case amd: 740 // Untested. 741 if maxExtendedFunction() < 0x80000005 { 742 return 743 } 744 _, _, ecx, edx := cpuid(0x80000005) 745 c.cache.l1d = int(((ecx >> 24) & 0xFF) * 1024) 746 c.cache.l1i = int(((edx >> 24) & 0xFF) * 1024) 747 748 if maxExtendedFunction() < 0x80000006 { 749 return 750 } 751 _, _, ecx, _ = cpuid(0x80000006) 752 c.cache.l2 = int(((ecx >> 16) & 0xFFFF) * 1024) 753 } 754 755 return 756} 757 758type sgxsupport struct { 759 available bool 760 sgx1supported bool 761 sgx2supported bool 762 maxenclavesizenot64 int64 763 maxenclavesize64 int64 764} 765 766func hasSGX(available bool) (rval sgxsupport) { 767 rval.available = available 768 769 if !available { 770 return 771 } 772 773 a, _, _, d := cpuidex(0x12, 0) 774 rval.sgx1supported = a&0x01 != 0 775 rval.sgx2supported = a&0x02 != 0 776 rval.maxenclavesizenot64 = 1 << (d & 0xFF) // pow 2 777 rval.maxenclavesize64 = 1 << ((d >> 8) & 0xFF) // pow 2 778 779 return 780} 781 782func support() flags { 783 mfi := maxFunctionID() 784 vend := vendorID() 785 if mfi < 0x1 { 786 return 0 787 } 788 rval := uint64(0) 789 _, _, c, d := cpuid(1) 790 if (d & (1 << 15)) != 0 { 791 rval |= cmov 792 } 793 if (d & (1 << 23)) != 0 { 794 rval |= mmx 795 } 796 if (d & (1 << 25)) != 0 { 797 rval |= mmxext 798 } 799 if (d & (1 << 25)) != 0 { 800 rval |= sse 801 } 802 if (d & (1 << 26)) != 0 { 803 rval |= sse2 804 } 805 if (c & 1) != 0 { 806 rval |= sse3 807 } 808 if (c & 0x00000200) != 0 { 809 rval |= ssse3 810 } 811 if (c & 0x00080000) != 0 { 812 rval |= sse4 813 } 814 if (c & 0x00100000) != 0 { 815 rval |= sse42 816 } 817 if (c & (1 << 25)) != 0 { 818 rval |= aesni 819 } 820 if (c & (1 << 1)) != 0 { 821 rval |= clmul 822 } 823 if c&(1<<23) != 0 { 824 rval |= popcnt 825 } 826 if c&(1<<30) != 0 { 827 rval |= rdrand 828 } 829 if c&(1<<29) != 0 { 830 rval |= f16c 831 } 832 if c&(1<<13) != 0 { 833 rval |= cx16 834 } 835 if vend == intel && (d&(1<<28)) != 0 && mfi >= 4 { 836 if threadsPerCore() > 1 { 837 rval |= htt 838 } 839 } 840 841 // Check XGETBV, OXSAVE and AVX bits 842 if c&(1<<26) != 0 && c&(1<<27) != 0 && c&(1<<28) != 0 { 843 // Check for OS support 844 eax, _ := xgetbv(0) 845 if (eax & 0x6) == 0x6 { 846 rval |= avx 847 if (c & 0x00001000) != 0 { 848 rval |= fma3 849 } 850 } 851 } 852 853 // Check AVX2, AVX2 requires OS support, but BMI1/2 don't. 854 if mfi >= 7 { 855 _, ebx, ecx, edx := cpuidex(7, 0) 856 if (rval&avx) != 0 && (ebx&0x00000020) != 0 { 857 rval |= avx2 858 } 859 if (ebx & 0x00000008) != 0 { 860 rval |= bmi1 861 if (ebx & 0x00000100) != 0 { 862 rval |= bmi2 863 } 864 } 865 if ebx&(1<<2) != 0 { 866 rval |= sgx 867 } 868 if ebx&(1<<4) != 0 { 869 rval |= hle 870 } 871 if ebx&(1<<9) != 0 { 872 rval |= erms 873 } 874 if ebx&(1<<11) != 0 { 875 rval |= rtm 876 } 877 if ebx&(1<<14) != 0 { 878 rval |= mpx 879 } 880 if ebx&(1<<18) != 0 { 881 rval |= rdseed 882 } 883 if ebx&(1<<19) != 0 { 884 rval |= adx 885 } 886 if ebx&(1<<29) != 0 { 887 rval |= sha 888 } 889 if edx&(1<<26) != 0 { 890 rval |= ibpb 891 } 892 if edx&(1<<27) != 0 { 893 rval |= stibp 894 } 895 896 // Only detect AVX-512 features if XGETBV is supported 897 if c&((1<<26)|(1<<27)) == (1<<26)|(1<<27) { 898 // Check for OS support 899 eax, _ := xgetbv(0) 900 901 // Verify that XCR0[7:5] = ‘111b’ (OPMASK state, upper 256-bit of ZMM0-ZMM15 and 902 // ZMM16-ZMM31 state are enabled by OS) 903 /// and that XCR0[2:1] = ‘11b’ (XMM state and YMM state are enabled by OS). 904 if (eax>>5)&7 == 7 && (eax>>1)&3 == 3 { 905 if ebx&(1<<16) != 0 { 906 rval |= avx512f 907 } 908 if ebx&(1<<17) != 0 { 909 rval |= avx512dq 910 } 911 if ebx&(1<<21) != 0 { 912 rval |= avx512ifma 913 } 914 if ebx&(1<<26) != 0 { 915 rval |= avx512pf 916 } 917 if ebx&(1<<27) != 0 { 918 rval |= avx512er 919 } 920 if ebx&(1<<28) != 0 { 921 rval |= avx512cd 922 } 923 if ebx&(1<<30) != 0 { 924 rval |= avx512bw 925 } 926 if ebx&(1<<31) != 0 { 927 rval |= avx512vl 928 } 929 // ecx 930 if ecx&(1<<1) != 0 { 931 rval |= avx512vbmi 932 } 933 } 934 } 935 } 936 937 if maxExtendedFunction() >= 0x80000001 { 938 _, _, c, d := cpuid(0x80000001) 939 if (c & (1 << 5)) != 0 { 940 rval |= lzcnt 941 rval |= popcnt 942 } 943 if (d & (1 << 31)) != 0 { 944 rval |= amd3dnow 945 } 946 if (d & (1 << 30)) != 0 { 947 rval |= amd3dnowext 948 } 949 if (d & (1 << 23)) != 0 { 950 rval |= mmx 951 } 952 if (d & (1 << 22)) != 0 { 953 rval |= mmxext 954 } 955 if (c & (1 << 6)) != 0 { 956 rval |= sse4a 957 } 958 if d&(1<<20) != 0 { 959 rval |= nx 960 } 961 if d&(1<<27) != 0 { 962 rval |= rdtscp 963 } 964 965 /* Allow for selectively disabling SSE2 functions on AMD processors 966 with SSE2 support but not SSE4a. This includes Athlon64, some 967 Opteron, and some Sempron processors. MMX, SSE, or 3DNow! are faster 968 than SSE2 often enough to utilize this special-case flag. 969 AV_CPU_FLAG_SSE2 and AV_CPU_FLAG_SSE2SLOW are both set in this case 970 so that SSE2 is used unless explicitly disabled by checking 971 AV_CPU_FLAG_SSE2SLOW. */ 972 if vendorID() != intel && 973 rval&sse2 != 0 && (c&0x00000040) == 0 { 974 rval |= sse2slow 975 } 976 977 /* XOP and FMA4 use the AVX instruction coding scheme, so they can't be 978 * used unless the OS has AVX support. */ 979 if (rval & avx) != 0 { 980 if (c & 0x00000800) != 0 { 981 rval |= xop 982 } 983 if (c & 0x00010000) != 0 { 984 rval |= fma4 985 } 986 } 987 988 if vendorID() == intel { 989 family, model := familyModel() 990 if family == 6 && (model == 9 || model == 13 || model == 14) { 991 /* 6/9 (pentium-m "banias"), 6/13 (pentium-m "dothan"), and 992 * 6/14 (core1 "yonah") theoretically support sse2, but it's 993 * usually slower than mmx. */ 994 if (rval & sse2) != 0 { 995 rval |= sse2slow 996 } 997 if (rval & sse3) != 0 { 998 rval |= sse3slow 999 } 1000 } 1001 /* The Atom processor has SSSE3 support, which is useful in many cases, 1002 * but sometimes the SSSE3 version is slower than the SSE2 equivalent 1003 * on the Atom, but is generally faster on other processors supporting 1004 * SSSE3. This flag allows for selectively disabling certain SSSE3 1005 * functions on the Atom. */ 1006 if family == 6 && model == 28 { 1007 rval |= atom 1008 } 1009 } 1010 } 1011 return flags(rval) 1012} 1013 1014func valAsString(values ...uint32) []byte { 1015 r := make([]byte, 4*len(values)) 1016 for i, v := range values { 1017 dst := r[i*4:] 1018 dst[0] = byte(v & 0xff) 1019 dst[1] = byte((v >> 8) & 0xff) 1020 dst[2] = byte((v >> 16) & 0xff) 1021 dst[3] = byte((v >> 24) & 0xff) 1022 switch { 1023 case dst[0] == 0: 1024 return r[:i*4] 1025 case dst[1] == 0: 1026 return r[:i*4+1] 1027 case dst[2] == 0: 1028 return r[:i*4+2] 1029 case dst[3] == 0: 1030 return r[:i*4+3] 1031 } 1032 } 1033 return r 1034} 1035