1// Copyright (c) 2015 Klaus Post, released under MIT License. See LICENSE file.
2
3// Package cpuid provides information about the CPU running the current program.
4//
5// CPU features are detected on startup, and kept for fast access through the life of the application.
6// Currently x86 / x64 (AMD64) is supported.
7//
8// You can access the CPU information by accessing the shared CPU variable of the cpuid library.
9//
10// Package home: https://github.com/klauspost/cpuid
11package cpuid
12
13import "strings"
14
15// Vendor is a representation of a CPU vendor.
16type Vendor int
17
18const (
19	Other Vendor = iota
20	Intel
21	AMD
22	VIA
23	Transmeta
24	NSC
25	KVM  // Kernel-based Virtual Machine
26	MSVM // Microsoft Hyper-V or Windows Virtual PC
27	VMware
28	XenHVM
29)
30
31const (
32	CMOV        = 1 << iota // i686 CMOV
33	NX                      // NX (No-Execute) bit
34	AMD3DNOW                // AMD 3DNOW
35	AMD3DNOWEXT             // AMD 3DNowExt
36	MMX                     // standard MMX
37	MMXEXT                  // SSE integer functions or AMD MMX ext
38	SSE                     // SSE functions
39	SSE2                    // P4 SSE functions
40	SSE3                    // Prescott SSE3 functions
41	SSSE3                   // Conroe SSSE3 functions
42	SSE4                    // Penryn SSE4.1 functions
43	SSE4A                   // AMD Barcelona microarchitecture SSE4a instructions
44	SSE42                   // Nehalem SSE4.2 functions
45	AVX                     // AVX functions
46	AVX2                    // AVX2 functions
47	FMA3                    // Intel FMA 3
48	FMA4                    // Bulldozer FMA4 functions
49	XOP                     // Bulldozer XOP functions
50	F16C                    // Half-precision floating-point conversion
51	BMI1                    // Bit Manipulation Instruction Set 1
52	BMI2                    // Bit Manipulation Instruction Set 2
53	TBM                     // AMD Trailing Bit Manipulation
54	LZCNT                   // LZCNT instruction
55	POPCNT                  // POPCNT instruction
56	AESNI                   // Advanced Encryption Standard New Instructions
57	CLMUL                   // Carry-less Multiplication
58	HTT                     // Hyperthreading (enabled)
59	HLE                     // Hardware Lock Elision
60	RTM                     // Restricted Transactional Memory
61	RDRAND                  // RDRAND instruction is available
62	RDSEED                  // RDSEED instruction is available
63	ADX                     // Intel ADX (Multi-Precision Add-Carry Instruction Extensions)
64	SHA                     // Intel SHA Extensions
65	AVX512F                 // AVX-512 Foundation
66	AVX512DQ                // AVX-512 Doubleword and Quadword Instructions
67	AVX512IFMA              // AVX-512 Integer Fused Multiply-Add Instructions
68	AVX512PF                // AVX-512 Prefetch Instructions
69	AVX512ER                // AVX-512 Exponential and Reciprocal Instructions
70	AVX512CD                // AVX-512 Conflict Detection Instructions
71	AVX512BW                // AVX-512 Byte and Word Instructions
72	AVX512VL                // AVX-512 Vector Length Extensions
73	AVX512VBMI              // AVX-512 Vector Bit Manipulation Instructions
74	MPX                     // Intel MPX (Memory Protection Extensions)
75	ERMS                    // Enhanced REP MOVSB/STOSB
76	RDTSCP                  // RDTSCP Instruction
77	CX16                    // CMPXCHG16B Instruction
78	SGX                     // Software Guard Extensions
79
80	// Performance indicators
81	SSE2SLOW // SSE2 is supported, but usually not faster
82	SSE3SLOW // SSE3 is supported, but usually not faster
83	ATOM     // Atom processor, some SSSE3 instructions are slower
84)
85
86var flagNames = map[Flags]string{
87	CMOV:        "CMOV",        // i686 CMOV
88	NX:          "NX",          // NX (No-Execute) bit
89	AMD3DNOW:    "AMD3DNOW",    // AMD 3DNOW
90	AMD3DNOWEXT: "AMD3DNOWEXT", // AMD 3DNowExt
91	MMX:         "MMX",         // Standard MMX
92	MMXEXT:      "MMXEXT",      // SSE integer functions or AMD MMX ext
93	SSE:         "SSE",         // SSE functions
94	SSE2:        "SSE2",        // P4 SSE2 functions
95	SSE3:        "SSE3",        // Prescott SSE3 functions
96	SSSE3:       "SSSE3",       // Conroe SSSE3 functions
97	SSE4:        "SSE4.1",      // Penryn SSE4.1 functions
98	SSE4A:       "SSE4A",       // AMD Barcelona microarchitecture SSE4a instructions
99	SSE42:       "SSE4.2",      // Nehalem SSE4.2 functions
100	AVX:         "AVX",         // AVX functions
101	AVX2:        "AVX2",        // AVX functions
102	FMA3:        "FMA3",        // Intel FMA 3
103	FMA4:        "FMA4",        // Bulldozer FMA4 functions
104	XOP:         "XOP",         // Bulldozer XOP functions
105	F16C:        "F16C",        // Half-precision floating-point conversion
106	BMI1:        "BMI1",        // Bit Manipulation Instruction Set 1
107	BMI2:        "BMI2",        // Bit Manipulation Instruction Set 2
108	TBM:         "TBM",         // AMD Trailing Bit Manipulation
109	LZCNT:       "LZCNT",       // LZCNT instruction
110	POPCNT:      "POPCNT",      // POPCNT instruction
111	AESNI:       "AESNI",       // Advanced Encryption Standard New Instructions
112	CLMUL:       "CLMUL",       // Carry-less Multiplication
113	HTT:         "HTT",         // Hyperthreading (enabled)
114	HLE:         "HLE",         // Hardware Lock Elision
115	RTM:         "RTM",         // Restricted Transactional Memory
116	RDRAND:      "RDRAND",      // RDRAND instruction is available
117	RDSEED:      "RDSEED",      // RDSEED instruction is available
118	ADX:         "ADX",         // Intel ADX (Multi-Precision Add-Carry Instruction Extensions)
119	SHA:         "SHA",         // Intel SHA Extensions
120	AVX512F:     "AVX512F",     // AVX-512 Foundation
121	AVX512DQ:    "AVX512DQ",    // AVX-512 Doubleword and Quadword Instructions
122	AVX512IFMA:  "AVX512IFMA",  // AVX-512 Integer Fused Multiply-Add Instructions
123	AVX512PF:    "AVX512PF",    // AVX-512 Prefetch Instructions
124	AVX512ER:    "AVX512ER",    // AVX-512 Exponential and Reciprocal Instructions
125	AVX512CD:    "AVX512CD",    // AVX-512 Conflict Detection Instructions
126	AVX512BW:    "AVX512BW",    // AVX-512 Byte and Word Instructions
127	AVX512VL:    "AVX512VL",    // AVX-512 Vector Length Extensions
128	AVX512VBMI:  "AVX512VBMI",  // AVX-512 Vector Bit Manipulation Instructions
129	MPX:         "MPX",         // Intel MPX (Memory Protection Extensions)
130	ERMS:        "ERMS",        // Enhanced REP MOVSB/STOSB
131	RDTSCP:      "RDTSCP",      // RDTSCP Instruction
132	CX16:        "CX16",        // CMPXCHG16B Instruction
133	SGX:         "SGX",         // Software Guard Extensions
134
135	// Performance indicators
136	SSE2SLOW: "SSE2SLOW", // SSE2 supported, but usually not faster
137	SSE3SLOW: "SSE3SLOW", // SSE3 supported, but usually not faster
138	ATOM:     "ATOM",     // Atom processor, some SSSE3 instructions are slower
139
140}
141
142// CPUInfo contains information about the detected system CPU.
143type CPUInfo struct {
144	BrandName      string // Brand name reported by the CPU
145	VendorID       Vendor // Comparable CPU vendor ID
146	Features       Flags  // Features of the CPU
147	PhysicalCores  int    // Number of physical processor cores in your CPU. Will be 0 if undetectable.
148	ThreadsPerCore int    // Number of threads per physical core. Will be 1 if undetectable.
149	LogicalCores   int    // Number of physical cores times threads that can run on each core through the use of hyperthreading. Will be 0 if undetectable.
150	Family         int    // CPU family number
151	Model          int    // CPU model number
152	CacheLine      int    // Cache line size in bytes. Will be 0 if undetectable.
153	Cache          struct {
154		L1I int // L1 Instruction Cache (per core or shared). Will be -1 if undetected
155		L1D int // L1 Data Cache (per core or shared). Will be -1 if undetected
156		L2  int // L2 Cache (per core or shared). Will be -1 if undetected
157		L3  int // L3 Instruction Cache (per core or shared). Will be -1 if undetected
158	}
159	SGX       SGXSupport
160	maxFunc   uint32
161	maxExFunc uint32
162}
163
164var cpuid func(op uint32) (eax, ebx, ecx, edx uint32)
165var cpuidex func(op, op2 uint32) (eax, ebx, ecx, edx uint32)
166var xgetbv func(index uint32) (eax, edx uint32)
167var rdtscpAsm func() (eax, ebx, ecx, edx uint32)
168
169// CPU contains information about the CPU as detected on startup,
170// or when Detect last was called.
171//
172// Use this as the primary entry point to you data,
173// this way queries are
174var CPU CPUInfo
175
176func init() {
177	initCPU()
178	Detect()
179}
180
181// Detect will re-detect current CPU info.
182// This will replace the content of the exported CPU variable.
183//
184// Unless you expect the CPU to change while you are running your program
185// you should not need to call this function.
186// If you call this, you must ensure that no other goroutine is accessing the
187// exported CPU variable.
188func Detect() {
189	CPU.maxFunc = maxFunctionID()
190	CPU.maxExFunc = maxExtendedFunction()
191	CPU.BrandName = brandName()
192	CPU.CacheLine = cacheLine()
193	CPU.Family, CPU.Model = familyModel()
194	CPU.Features = support()
195	CPU.SGX = sgx(CPU.Features&SGX != 0)
196	CPU.ThreadsPerCore = threadsPerCore()
197	CPU.LogicalCores = logicalCores()
198	CPU.PhysicalCores = physicalCores()
199	CPU.VendorID = vendorID()
200	CPU.cacheSize()
201}
202
203// Generated here: http://play.golang.org/p/BxFH2Gdc0G
204
205// Cmov indicates support of CMOV instructions
206func (c CPUInfo) Cmov() bool {
207	return c.Features&CMOV != 0
208}
209
210// Amd3dnow indicates support of AMD 3DNOW! instructions
211func (c CPUInfo) Amd3dnow() bool {
212	return c.Features&AMD3DNOW != 0
213}
214
215// Amd3dnowExt indicates support of AMD 3DNOW! Extended instructions
216func (c CPUInfo) Amd3dnowExt() bool {
217	return c.Features&AMD3DNOWEXT != 0
218}
219
220// MMX indicates support of MMX instructions
221func (c CPUInfo) MMX() bool {
222	return c.Features&MMX != 0
223}
224
225// MMXExt indicates support of MMXEXT instructions
226// (SSE integer functions or AMD MMX ext)
227func (c CPUInfo) MMXExt() bool {
228	return c.Features&MMXEXT != 0
229}
230
231// SSE indicates support of SSE instructions
232func (c CPUInfo) SSE() bool {
233	return c.Features&SSE != 0
234}
235
236// SSE2 indicates support of SSE 2 instructions
237func (c CPUInfo) SSE2() bool {
238	return c.Features&SSE2 != 0
239}
240
241// SSE3 indicates support of SSE 3 instructions
242func (c CPUInfo) SSE3() bool {
243	return c.Features&SSE3 != 0
244}
245
246// SSSE3 indicates support of SSSE 3 instructions
247func (c CPUInfo) SSSE3() bool {
248	return c.Features&SSSE3 != 0
249}
250
251// SSE4 indicates support of SSE 4 (also called SSE 4.1) instructions
252func (c CPUInfo) SSE4() bool {
253	return c.Features&SSE4 != 0
254}
255
256// SSE42 indicates support of SSE4.2 instructions
257func (c CPUInfo) SSE42() bool {
258	return c.Features&SSE42 != 0
259}
260
261// AVX indicates support of AVX instructions
262// and operating system support of AVX instructions
263func (c CPUInfo) AVX() bool {
264	return c.Features&AVX != 0
265}
266
267// AVX2 indicates support of AVX2 instructions
268func (c CPUInfo) AVX2() bool {
269	return c.Features&AVX2 != 0
270}
271
272// FMA3 indicates support of FMA3 instructions
273func (c CPUInfo) FMA3() bool {
274	return c.Features&FMA3 != 0
275}
276
277// FMA4 indicates support of FMA4 instructions
278func (c CPUInfo) FMA4() bool {
279	return c.Features&FMA4 != 0
280}
281
282// XOP indicates support of XOP instructions
283func (c CPUInfo) XOP() bool {
284	return c.Features&XOP != 0
285}
286
287// F16C indicates support of F16C instructions
288func (c CPUInfo) F16C() bool {
289	return c.Features&F16C != 0
290}
291
292// BMI1 indicates support of BMI1 instructions
293func (c CPUInfo) BMI1() bool {
294	return c.Features&BMI1 != 0
295}
296
297// BMI2 indicates support of BMI2 instructions
298func (c CPUInfo) BMI2() bool {
299	return c.Features&BMI2 != 0
300}
301
302// TBM indicates support of TBM instructions
303// (AMD Trailing Bit Manipulation)
304func (c CPUInfo) TBM() bool {
305	return c.Features&TBM != 0
306}
307
308// Lzcnt indicates support of LZCNT instruction
309func (c CPUInfo) Lzcnt() bool {
310	return c.Features&LZCNT != 0
311}
312
313// Popcnt indicates support of POPCNT instruction
314func (c CPUInfo) Popcnt() bool {
315	return c.Features&POPCNT != 0
316}
317
318// HTT indicates the processor has Hyperthreading enabled
319func (c CPUInfo) HTT() bool {
320	return c.Features&HTT != 0
321}
322
323// SSE2Slow indicates that SSE2 may be slow on this processor
324func (c CPUInfo) SSE2Slow() bool {
325	return c.Features&SSE2SLOW != 0
326}
327
328// SSE3Slow indicates that SSE3 may be slow on this processor
329func (c CPUInfo) SSE3Slow() bool {
330	return c.Features&SSE3SLOW != 0
331}
332
333// AesNi indicates support of AES-NI instructions
334// (Advanced Encryption Standard New Instructions)
335func (c CPUInfo) AesNi() bool {
336	return c.Features&AESNI != 0
337}
338
339// Clmul indicates support of CLMUL instructions
340// (Carry-less Multiplication)
341func (c CPUInfo) Clmul() bool {
342	return c.Features&CLMUL != 0
343}
344
345// NX indicates support of NX (No-Execute) bit
346func (c CPUInfo) NX() bool {
347	return c.Features&NX != 0
348}
349
350// SSE4A indicates support of AMD Barcelona microarchitecture SSE4a instructions
351func (c CPUInfo) SSE4A() bool {
352	return c.Features&SSE4A != 0
353}
354
355// HLE indicates support of Hardware Lock Elision
356func (c CPUInfo) HLE() bool {
357	return c.Features&HLE != 0
358}
359
360// RTM indicates support of Restricted Transactional Memory
361func (c CPUInfo) RTM() bool {
362	return c.Features&RTM != 0
363}
364
365// Rdrand indicates support of RDRAND instruction is available
366func (c CPUInfo) Rdrand() bool {
367	return c.Features&RDRAND != 0
368}
369
370// Rdseed indicates support of RDSEED instruction is available
371func (c CPUInfo) Rdseed() bool {
372	return c.Features&RDSEED != 0
373}
374
375// ADX indicates support of Intel ADX (Multi-Precision Add-Carry Instruction Extensions)
376func (c CPUInfo) ADX() bool {
377	return c.Features&ADX != 0
378}
379
380// SHA indicates support of Intel SHA Extensions
381func (c CPUInfo) SHA() bool {
382	return c.Features&SHA != 0
383}
384
385// AVX512F indicates support of AVX-512 Foundation
386func (c CPUInfo) AVX512F() bool {
387	return c.Features&AVX512F != 0
388}
389
390// AVX512DQ indicates support of AVX-512 Doubleword and Quadword Instructions
391func (c CPUInfo) AVX512DQ() bool {
392	return c.Features&AVX512DQ != 0
393}
394
395// AVX512IFMA indicates support of AVX-512 Integer Fused Multiply-Add Instructions
396func (c CPUInfo) AVX512IFMA() bool {
397	return c.Features&AVX512IFMA != 0
398}
399
400// AVX512PF indicates support of AVX-512 Prefetch Instructions
401func (c CPUInfo) AVX512PF() bool {
402	return c.Features&AVX512PF != 0
403}
404
405// AVX512ER indicates support of AVX-512 Exponential and Reciprocal Instructions
406func (c CPUInfo) AVX512ER() bool {
407	return c.Features&AVX512ER != 0
408}
409
410// AVX512CD indicates support of AVX-512 Conflict Detection Instructions
411func (c CPUInfo) AVX512CD() bool {
412	return c.Features&AVX512CD != 0
413}
414
415// AVX512BW indicates support of AVX-512 Byte and Word Instructions
416func (c CPUInfo) AVX512BW() bool {
417	return c.Features&AVX512BW != 0
418}
419
420// AVX512VL indicates support of AVX-512 Vector Length Extensions
421func (c CPUInfo) AVX512VL() bool {
422	return c.Features&AVX512VL != 0
423}
424
425// AVX512VBMI indicates support of AVX-512 Vector Bit Manipulation Instructions
426func (c CPUInfo) AVX512VBMI() bool {
427	return c.Features&AVX512VBMI != 0
428}
429
430// MPX indicates support of Intel MPX (Memory Protection Extensions)
431func (c CPUInfo) MPX() bool {
432	return c.Features&MPX != 0
433}
434
435// ERMS indicates support of Enhanced REP MOVSB/STOSB
436func (c CPUInfo) ERMS() bool {
437	return c.Features&ERMS != 0
438}
439
440func (c CPUInfo) RDTSCP() bool {
441	return c.Features&RDTSCP != 0
442}
443
444func (c CPUInfo) CX16() bool {
445	return c.Features&CX16 != 0
446}
447
448// Atom indicates an Atom processor
449func (c CPUInfo) Atom() bool {
450	return c.Features&ATOM != 0
451}
452
453// Intel returns true if vendor is recognized as Intel
454func (c CPUInfo) Intel() bool {
455	return c.VendorID == Intel
456}
457
458// AMD returns true if vendor is recognized as AMD
459func (c CPUInfo) AMD() bool {
460	return c.VendorID == AMD
461}
462
463// Transmeta returns true if vendor is recognized as Transmeta
464func (c CPUInfo) Transmeta() bool {
465	return c.VendorID == Transmeta
466}
467
468// NSC returns true if vendor is recognized as National Semiconductor
469func (c CPUInfo) NSC() bool {
470	return c.VendorID == NSC
471}
472
473// VIA returns true if vendor is recognized as VIA
474func (c CPUInfo) VIA() bool {
475	return c.VendorID == VIA
476}
477
478// RTCounter returns the 64-bit time-stamp counter
479// Uses the RDTSCP instruction. The value 0 is returned
480// if the CPU does not support the instruction.
481func (c CPUInfo) RTCounter() uint64 {
482	if !c.RDTSCP() {
483		return 0
484	}
485	a, _, _, d := rdtscpAsm()
486	return uint64(a) | (uint64(d) << 32)
487}
488
489// Ia32TscAux returns the IA32_TSC_AUX part of the RDTSCP.
490// This variable is OS dependent, but on Linux contains information
491// about the current cpu/core the code is running on.
492// If the RDTSCP instruction isn't supported on the CPU, the value 0 is returned.
493func (c CPUInfo) Ia32TscAux() uint32 {
494	if !c.RDTSCP() {
495		return 0
496	}
497	_, _, ecx, _ := rdtscpAsm()
498	return ecx
499}
500
501// LogicalCPU will return the Logical CPU the code is currently executing on.
502// This is likely to change when the OS re-schedules the running thread
503// to another CPU.
504// If the current core cannot be detected, -1 will be returned.
505func (c CPUInfo) LogicalCPU() int {
506	if c.maxFunc < 1 {
507		return -1
508	}
509	_, ebx, _, _ := cpuid(1)
510	return int(ebx >> 24)
511}
512
513// VM Will return true if the cpu id indicates we are in
514// a virtual machine. This is only a hint, and will very likely
515// have many false negatives.
516func (c CPUInfo) VM() bool {
517	switch c.VendorID {
518	case MSVM, KVM, VMware, XenHVM:
519		return true
520	}
521	return false
522}
523
524// Flags contains detected cpu features and caracteristics
525type Flags uint64
526
527// String returns a string representation of the detected
528// CPU features.
529func (f Flags) String() string {
530	return strings.Join(f.Strings(), ",")
531}
532
533// Strings returns and array of the detected features.
534func (f Flags) Strings() []string {
535	s := support()
536	r := make([]string, 0, 20)
537	for i := uint(0); i < 64; i++ {
538		key := Flags(1 << i)
539		val := flagNames[key]
540		if s&key != 0 {
541			r = append(r, val)
542		}
543	}
544	return r
545}
546
547func maxExtendedFunction() uint32 {
548	eax, _, _, _ := cpuid(0x80000000)
549	return eax
550}
551
552func maxFunctionID() uint32 {
553	a, _, _, _ := cpuid(0)
554	return a
555}
556
557func brandName() string {
558	if maxExtendedFunction() >= 0x80000004 {
559		v := make([]uint32, 0, 48)
560		for i := uint32(0); i < 3; i++ {
561			a, b, c, d := cpuid(0x80000002 + i)
562			v = append(v, a, b, c, d)
563		}
564		return strings.Trim(string(valAsString(v...)), " ")
565	}
566	return "unknown"
567}
568
569func threadsPerCore() int {
570	mfi := maxFunctionID()
571	if mfi < 0x4 || vendorID() != Intel {
572		return 1
573	}
574
575	if mfi < 0xb {
576		_, b, _, d := cpuid(1)
577		if (d & (1 << 28)) != 0 {
578			// v will contain logical core count
579			v := (b >> 16) & 255
580			if v > 1 {
581				a4, _, _, _ := cpuid(4)
582				// physical cores
583				v2 := (a4 >> 26) + 1
584				if v2 > 0 {
585					return int(v) / int(v2)
586				}
587			}
588		}
589		return 1
590	}
591	_, b, _, _ := cpuidex(0xb, 0)
592	if b&0xffff == 0 {
593		return 1
594	}
595	return int(b & 0xffff)
596}
597
598func logicalCores() int {
599	mfi := maxFunctionID()
600	switch vendorID() {
601	case Intel:
602		// Use this on old Intel processors
603		if mfi < 0xb {
604			if mfi < 1 {
605				return 0
606			}
607			// CPUID.1:EBX[23:16] represents the maximum number of addressable IDs (initial APIC ID)
608			// that can be assigned to logical processors in a physical package.
609			// The value may not be the same as the number of logical processors that are present in the hardware of a physical package.
610			_, ebx, _, _ := cpuid(1)
611			logical := (ebx >> 16) & 0xff
612			return int(logical)
613		}
614		_, b, _, _ := cpuidex(0xb, 1)
615		return int(b & 0xffff)
616	case AMD:
617		_, b, _, _ := cpuid(1)
618		return int((b >> 16) & 0xff)
619	default:
620		return 0
621	}
622}
623
624func familyModel() (int, int) {
625	if maxFunctionID() < 0x1 {
626		return 0, 0
627	}
628	eax, _, _, _ := cpuid(1)
629	family := ((eax >> 8) & 0xf) + ((eax >> 20) & 0xff)
630	model := ((eax >> 4) & 0xf) + ((eax >> 12) & 0xf0)
631	return int(family), int(model)
632}
633
634func physicalCores() int {
635	switch vendorID() {
636	case Intel:
637		return logicalCores() / threadsPerCore()
638	case AMD:
639		if maxExtendedFunction() >= 0x80000008 {
640			_, _, c, _ := cpuid(0x80000008)
641			return int(c&0xff) + 1
642		}
643	}
644	return 0
645}
646
647// Except from http://en.wikipedia.org/wiki/CPUID#EAX.3D0:_Get_vendor_ID
648var vendorMapping = map[string]Vendor{
649	"AMDisbetter!": AMD,
650	"AuthenticAMD": AMD,
651	"CentaurHauls": VIA,
652	"GenuineIntel": Intel,
653	"TransmetaCPU": Transmeta,
654	"GenuineTMx86": Transmeta,
655	"Geode by NSC": NSC,
656	"VIA VIA VIA ": VIA,
657	"KVMKVMKVMKVM": KVM,
658	"Microsoft Hv": MSVM,
659	"VMwareVMware": VMware,
660	"XenVMMXenVMM": XenHVM,
661}
662
663func vendorID() Vendor {
664	_, b, c, d := cpuid(0)
665	v := valAsString(b, d, c)
666	vend, ok := vendorMapping[string(v)]
667	if !ok {
668		return Other
669	}
670	return vend
671}
672
673func cacheLine() int {
674	if maxFunctionID() < 0x1 {
675		return 0
676	}
677
678	_, ebx, _, _ := cpuid(1)
679	cache := (ebx & 0xff00) >> 5 // cflush size
680	if cache == 0 && maxExtendedFunction() >= 0x80000006 {
681		_, _, ecx, _ := cpuid(0x80000006)
682		cache = ecx & 0xff // cacheline size
683	}
684	// TODO: Read from Cache and TLB Information
685	return int(cache)
686}
687
688func (c *CPUInfo) cacheSize() {
689	c.Cache.L1D = -1
690	c.Cache.L1I = -1
691	c.Cache.L2 = -1
692	c.Cache.L3 = -1
693	vendor := vendorID()
694	switch vendor {
695	case Intel:
696		if maxFunctionID() < 4 {
697			return
698		}
699		for i := uint32(0); ; i++ {
700			eax, ebx, ecx, _ := cpuidex(4, i)
701			cacheType := eax & 15
702			if cacheType == 0 {
703				break
704			}
705			cacheLevel := (eax >> 5) & 7
706			coherency := int(ebx&0xfff) + 1
707			partitions := int((ebx>>12)&0x3ff) + 1
708			associativity := int((ebx>>22)&0x3ff) + 1
709			sets := int(ecx) + 1
710			size := associativity * partitions * coherency * sets
711			switch cacheLevel {
712			case 1:
713				if cacheType == 1 {
714					// 1 = Data Cache
715					c.Cache.L1D = size
716				} else if cacheType == 2 {
717					// 2 = Instruction Cache
718					c.Cache.L1I = size
719				} else {
720					if c.Cache.L1D < 0 {
721						c.Cache.L1I = size
722					}
723					if c.Cache.L1I < 0 {
724						c.Cache.L1I = size
725					}
726				}
727			case 2:
728				c.Cache.L2 = size
729			case 3:
730				c.Cache.L3 = size
731			}
732		}
733	case AMD:
734		// Untested.
735		if maxExtendedFunction() < 0x80000005 {
736			return
737		}
738		_, _, ecx, edx := cpuid(0x80000005)
739		c.Cache.L1D = int(((ecx >> 24) & 0xFF) * 1024)
740		c.Cache.L1I = int(((edx >> 24) & 0xFF) * 1024)
741
742		if maxExtendedFunction() < 0x80000006 {
743			return
744		}
745		_, _, ecx, _ = cpuid(0x80000006)
746		c.Cache.L2 = int(((ecx >> 16) & 0xFFFF) * 1024)
747	}
748
749	return
750}
751
752type SGXSupport struct {
753	Available           bool
754	SGX1Supported       bool
755	SGX2Supported       bool
756	MaxEnclaveSizeNot64 int64
757	MaxEnclaveSize64    int64
758}
759
760func sgx(available bool) (rval SGXSupport) {
761	rval.Available = available
762
763	if !available {
764		return
765	}
766
767	a, _, _, d := cpuidex(0x12, 0)
768	rval.SGX1Supported = a&0x01 != 0
769	rval.SGX2Supported = a&0x02 != 0
770	rval.MaxEnclaveSizeNot64 = 1 << (d & 0xFF)     // pow 2
771	rval.MaxEnclaveSize64 = 1 << ((d >> 8) & 0xFF) // pow 2
772
773	return
774}
775
776func support() Flags {
777	mfi := maxFunctionID()
778	vend := vendorID()
779	if mfi < 0x1 {
780		return 0
781	}
782	rval := uint64(0)
783	_, _, c, d := cpuid(1)
784	if (d & (1 << 15)) != 0 {
785		rval |= CMOV
786	}
787	if (d & (1 << 23)) != 0 {
788		rval |= MMX
789	}
790	if (d & (1 << 25)) != 0 {
791		rval |= MMXEXT
792	}
793	if (d & (1 << 25)) != 0 {
794		rval |= SSE
795	}
796	if (d & (1 << 26)) != 0 {
797		rval |= SSE2
798	}
799	if (c & 1) != 0 {
800		rval |= SSE3
801	}
802	if (c & 0x00000200) != 0 {
803		rval |= SSSE3
804	}
805	if (c & 0x00080000) != 0 {
806		rval |= SSE4
807	}
808	if (c & 0x00100000) != 0 {
809		rval |= SSE42
810	}
811	if (c & (1 << 25)) != 0 {
812		rval |= AESNI
813	}
814	if (c & (1 << 1)) != 0 {
815		rval |= CLMUL
816	}
817	if c&(1<<23) != 0 {
818		rval |= POPCNT
819	}
820	if c&(1<<30) != 0 {
821		rval |= RDRAND
822	}
823	if c&(1<<29) != 0 {
824		rval |= F16C
825	}
826	if c&(1<<13) != 0 {
827		rval |= CX16
828	}
829	if vend == Intel && (d&(1<<28)) != 0 && mfi >= 4 {
830		if threadsPerCore() > 1 {
831			rval |= HTT
832		}
833	}
834
835	// Check XGETBV, OXSAVE and AVX bits
836	if c&(1<<26) != 0 && c&(1<<27) != 0 && c&(1<<28) != 0 {
837		// Check for OS support
838		eax, _ := xgetbv(0)
839		if (eax & 0x6) == 0x6 {
840			rval |= AVX
841			if (c & 0x00001000) != 0 {
842				rval |= FMA3
843			}
844		}
845	}
846
847	// Check AVX2, AVX2 requires OS support, but BMI1/2 don't.
848	if mfi >= 7 {
849		_, ebx, ecx, _ := cpuidex(7, 0)
850		if (rval&AVX) != 0 && (ebx&0x00000020) != 0 {
851			rval |= AVX2
852		}
853		if (ebx & 0x00000008) != 0 {
854			rval |= BMI1
855			if (ebx & 0x00000100) != 0 {
856				rval |= BMI2
857			}
858		}
859		if ebx&(1<<2) != 0 {
860			rval |= SGX
861		}
862		if ebx&(1<<4) != 0 {
863			rval |= HLE
864		}
865		if ebx&(1<<9) != 0 {
866			rval |= ERMS
867		}
868		if ebx&(1<<11) != 0 {
869			rval |= RTM
870		}
871		if ebx&(1<<14) != 0 {
872			rval |= MPX
873		}
874		if ebx&(1<<18) != 0 {
875			rval |= RDSEED
876		}
877		if ebx&(1<<19) != 0 {
878			rval |= ADX
879		}
880		if ebx&(1<<29) != 0 {
881			rval |= SHA
882		}
883
884		// Only detect AVX-512 features if XGETBV is supported
885		if c&((1<<26)|(1<<27)) == (1<<26)|(1<<27) {
886			// Check for OS support
887			eax, _ := xgetbv(0)
888
889			// Verify that XCR0[7:5] = ‘111b’ (OPMASK state, upper 256-bit of ZMM0-ZMM15 and
890			// ZMM16-ZMM31 state are enabled by OS)
891			/// and that XCR0[2:1] = ‘11b’ (XMM state and YMM state are enabled by OS).
892			if (eax>>5)&7 == 7 && (eax>>1)&3 == 3 {
893				if ebx&(1<<16) != 0 {
894					rval |= AVX512F
895				}
896				if ebx&(1<<17) != 0 {
897					rval |= AVX512DQ
898				}
899				if ebx&(1<<21) != 0 {
900					rval |= AVX512IFMA
901				}
902				if ebx&(1<<26) != 0 {
903					rval |= AVX512PF
904				}
905				if ebx&(1<<27) != 0 {
906					rval |= AVX512ER
907				}
908				if ebx&(1<<28) != 0 {
909					rval |= AVX512CD
910				}
911				if ebx&(1<<30) != 0 {
912					rval |= AVX512BW
913				}
914				if ebx&(1<<31) != 0 {
915					rval |= AVX512VL
916				}
917				// ecx
918				if ecx&(1<<1) != 0 {
919					rval |= AVX512VBMI
920				}
921			}
922		}
923	}
924
925	if maxExtendedFunction() >= 0x80000001 {
926		_, _, c, d := cpuid(0x80000001)
927		if (c & (1 << 5)) != 0 {
928			rval |= LZCNT
929			rval |= POPCNT
930		}
931		if (d & (1 << 31)) != 0 {
932			rval |= AMD3DNOW
933		}
934		if (d & (1 << 30)) != 0 {
935			rval |= AMD3DNOWEXT
936		}
937		if (d & (1 << 23)) != 0 {
938			rval |= MMX
939		}
940		if (d & (1 << 22)) != 0 {
941			rval |= MMXEXT
942		}
943		if (c & (1 << 6)) != 0 {
944			rval |= SSE4A
945		}
946		if d&(1<<20) != 0 {
947			rval |= NX
948		}
949		if d&(1<<27) != 0 {
950			rval |= RDTSCP
951		}
952
953		/* Allow for selectively disabling SSE2 functions on AMD processors
954		   with SSE2 support but not SSE4a. This includes Athlon64, some
955		   Opteron, and some Sempron processors. MMX, SSE, or 3DNow! are faster
956		   than SSE2 often enough to utilize this special-case flag.
957		   AV_CPU_FLAG_SSE2 and AV_CPU_FLAG_SSE2SLOW are both set in this case
958		   so that SSE2 is used unless explicitly disabled by checking
959		   AV_CPU_FLAG_SSE2SLOW. */
960		if vendorID() != Intel &&
961			rval&SSE2 != 0 && (c&0x00000040) == 0 {
962			rval |= SSE2SLOW
963		}
964
965		/* XOP and FMA4 use the AVX instruction coding scheme, so they can't be
966		 * used unless the OS has AVX support. */
967		if (rval & AVX) != 0 {
968			if (c & 0x00000800) != 0 {
969				rval |= XOP
970			}
971			if (c & 0x00010000) != 0 {
972				rval |= FMA4
973			}
974		}
975
976		if vendorID() == Intel {
977			family, model := familyModel()
978			if family == 6 && (model == 9 || model == 13 || model == 14) {
979				/* 6/9 (pentium-m "banias"), 6/13 (pentium-m "dothan"), and
980				 * 6/14 (core1 "yonah") theoretically support sse2, but it's
981				 * usually slower than mmx. */
982				if (rval & SSE2) != 0 {
983					rval |= SSE2SLOW
984				}
985				if (rval & SSE3) != 0 {
986					rval |= SSE3SLOW
987				}
988			}
989			/* The Atom processor has SSSE3 support, which is useful in many cases,
990			 * but sometimes the SSSE3 version is slower than the SSE2 equivalent
991			 * on the Atom, but is generally faster on other processors supporting
992			 * SSSE3. This flag allows for selectively disabling certain SSSE3
993			 * functions on the Atom. */
994			if family == 6 && model == 28 {
995				rval |= ATOM
996			}
997		}
998	}
999	return Flags(rval)
1000}
1001
1002func valAsString(values ...uint32) []byte {
1003	r := make([]byte, 4*len(values))
1004	for i, v := range values {
1005		dst := r[i*4:]
1006		dst[0] = byte(v & 0xff)
1007		dst[1] = byte((v >> 8) & 0xff)
1008		dst[2] = byte((v >> 16) & 0xff)
1009		dst[3] = byte((v >> 24) & 0xff)
1010		switch {
1011		case dst[0] == 0:
1012			return r[:i*4]
1013		case dst[1] == 0:
1014			return r[:i*4+1]
1015		case dst[2] == 0:
1016			return r[:i*4+2]
1017		case dst[3] == 0:
1018			return r[:i*4+3]
1019		}
1020	}
1021	return r
1022}
1023