1// Copyright (c) 2015 Klaus Post, released under MIT License. See LICENSE file.
2
3// Package cpuid provides information about the CPU running the current program.
4//
5// CPU features are detected on startup, and kept for fast access through the life of the application.
6// Currently x86 / x64 (AMD64) is supported.
7//
8// You can access the CPU information by accessing the shared CPU variable of the cpuid library.
9//
10// Package home: https://github.com/klauspost/cpuid
11package cpuid
12
13import "strings"
14
15// Vendor is a representation of a CPU vendor.
16type Vendor int
17
18const (
19	Other Vendor = iota
20	Intel
21	AMD
22	VIA
23	Transmeta
24	NSC
25	KVM  // Kernel-based Virtual Machine
26	MSVM // Microsoft Hyper-V or Windows Virtual PC
27	VMware
28	XenHVM
29	Bhyve
30	Hygon
31)
32
33const (
34	CMOV        = 1 << iota // i686 CMOV
35	NX                      // NX (No-Execute) bit
36	AMD3DNOW                // AMD 3DNOW
37	AMD3DNOWEXT             // AMD 3DNowExt
38	MMX                     // standard MMX
39	MMXEXT                  // SSE integer functions or AMD MMX ext
40	SSE                     // SSE functions
41	SSE2                    // P4 SSE functions
42	SSE3                    // Prescott SSE3 functions
43	SSSE3                   // Conroe SSSE3 functions
44	SSE4                    // Penryn SSE4.1 functions
45	SSE4A                   // AMD Barcelona microarchitecture SSE4a instructions
46	SSE42                   // Nehalem SSE4.2 functions
47	AVX                     // AVX functions
48	AVX2                    // AVX2 functions
49	FMA3                    // Intel FMA 3
50	FMA4                    // Bulldozer FMA4 functions
51	XOP                     // Bulldozer XOP functions
52	F16C                    // Half-precision floating-point conversion
53	BMI1                    // Bit Manipulation Instruction Set 1
54	BMI2                    // Bit Manipulation Instruction Set 2
55	TBM                     // AMD Trailing Bit Manipulation
56	LZCNT                   // LZCNT instruction
57	POPCNT                  // POPCNT instruction
58	AESNI                   // Advanced Encryption Standard New Instructions
59	CLMUL                   // Carry-less Multiplication
60	HTT                     // Hyperthreading (enabled)
61	HLE                     // Hardware Lock Elision
62	RTM                     // Restricted Transactional Memory
63	RDRAND                  // RDRAND instruction is available
64	RDSEED                  // RDSEED instruction is available
65	ADX                     // Intel ADX (Multi-Precision Add-Carry Instruction Extensions)
66	SHA                     // Intel SHA Extensions
67	AVX512F                 // AVX-512 Foundation
68	AVX512DQ                // AVX-512 Doubleword and Quadword Instructions
69	AVX512IFMA              // AVX-512 Integer Fused Multiply-Add Instructions
70	AVX512PF                // AVX-512 Prefetch Instructions
71	AVX512ER                // AVX-512 Exponential and Reciprocal Instructions
72	AVX512CD                // AVX-512 Conflict Detection Instructions
73	AVX512BW                // AVX-512 Byte and Word Instructions
74	AVX512VL                // AVX-512 Vector Length Extensions
75	AVX512VBMI              // AVX-512 Vector Bit Manipulation Instructions
76	MPX                     // Intel MPX (Memory Protection Extensions)
77	ERMS                    // Enhanced REP MOVSB/STOSB
78	RDTSCP                  // RDTSCP Instruction
79	CX16                    // CMPXCHG16B Instruction
80	SGX                     // Software Guard Extensions
81	IBPB                    // Indirect Branch Restricted Speculation (IBRS) and Indirect Branch Predictor Barrier (IBPB)
82	STIBP                   // Single Thread Indirect Branch Predictors
83
84	// Performance indicators
85	SSE2SLOW // SSE2 is supported, but usually not faster
86	SSE3SLOW // SSE3 is supported, but usually not faster
87	ATOM     // Atom processor, some SSSE3 instructions are slower
88)
89
90var flagNames = map[Flags]string{
91	CMOV:        "CMOV",        // i686 CMOV
92	NX:          "NX",          // NX (No-Execute) bit
93	AMD3DNOW:    "AMD3DNOW",    // AMD 3DNOW
94	AMD3DNOWEXT: "AMD3DNOWEXT", // AMD 3DNowExt
95	MMX:         "MMX",         // Standard MMX
96	MMXEXT:      "MMXEXT",      // SSE integer functions or AMD MMX ext
97	SSE:         "SSE",         // SSE functions
98	SSE2:        "SSE2",        // P4 SSE2 functions
99	SSE3:        "SSE3",        // Prescott SSE3 functions
100	SSSE3:       "SSSE3",       // Conroe SSSE3 functions
101	SSE4:        "SSE4.1",      // Penryn SSE4.1 functions
102	SSE4A:       "SSE4A",       // AMD Barcelona microarchitecture SSE4a instructions
103	SSE42:       "SSE4.2",      // Nehalem SSE4.2 functions
104	AVX:         "AVX",         // AVX functions
105	AVX2:        "AVX2",        // AVX functions
106	FMA3:        "FMA3",        // Intel FMA 3
107	FMA4:        "FMA4",        // Bulldozer FMA4 functions
108	XOP:         "XOP",         // Bulldozer XOP functions
109	F16C:        "F16C",        // Half-precision floating-point conversion
110	BMI1:        "BMI1",        // Bit Manipulation Instruction Set 1
111	BMI2:        "BMI2",        // Bit Manipulation Instruction Set 2
112	TBM:         "TBM",         // AMD Trailing Bit Manipulation
113	LZCNT:       "LZCNT",       // LZCNT instruction
114	POPCNT:      "POPCNT",      // POPCNT instruction
115	AESNI:       "AESNI",       // Advanced Encryption Standard New Instructions
116	CLMUL:       "CLMUL",       // Carry-less Multiplication
117	HTT:         "HTT",         // Hyperthreading (enabled)
118	HLE:         "HLE",         // Hardware Lock Elision
119	RTM:         "RTM",         // Restricted Transactional Memory
120	RDRAND:      "RDRAND",      // RDRAND instruction is available
121	RDSEED:      "RDSEED",      // RDSEED instruction is available
122	ADX:         "ADX",         // Intel ADX (Multi-Precision Add-Carry Instruction Extensions)
123	SHA:         "SHA",         // Intel SHA Extensions
124	AVX512F:     "AVX512F",     // AVX-512 Foundation
125	AVX512DQ:    "AVX512DQ",    // AVX-512 Doubleword and Quadword Instructions
126	AVX512IFMA:  "AVX512IFMA",  // AVX-512 Integer Fused Multiply-Add Instructions
127	AVX512PF:    "AVX512PF",    // AVX-512 Prefetch Instructions
128	AVX512ER:    "AVX512ER",    // AVX-512 Exponential and Reciprocal Instructions
129	AVX512CD:    "AVX512CD",    // AVX-512 Conflict Detection Instructions
130	AVX512BW:    "AVX512BW",    // AVX-512 Byte and Word Instructions
131	AVX512VL:    "AVX512VL",    // AVX-512 Vector Length Extensions
132	AVX512VBMI:  "AVX512VBMI",  // AVX-512 Vector Bit Manipulation Instructions
133	MPX:         "MPX",         // Intel MPX (Memory Protection Extensions)
134	ERMS:        "ERMS",        // Enhanced REP MOVSB/STOSB
135	RDTSCP:      "RDTSCP",      // RDTSCP Instruction
136	CX16:        "CX16",        // CMPXCHG16B Instruction
137	SGX:         "SGX",         // Software Guard Extensions
138	IBPB:        "IBPB",        // Indirect Branch Restricted Speculation and Indirect Branch Predictor Barrier
139	STIBP:       "STIBP",       // Single Thread Indirect Branch Predictors
140
141	// Performance indicators
142	SSE2SLOW: "SSE2SLOW", // SSE2 supported, but usually not faster
143	SSE3SLOW: "SSE3SLOW", // SSE3 supported, but usually not faster
144	ATOM:     "ATOM",     // Atom processor, some SSSE3 instructions are slower
145
146}
147
148// CPUInfo contains information about the detected system CPU.
149type CPUInfo struct {
150	BrandName      string // Brand name reported by the CPU
151	VendorID       Vendor // Comparable CPU vendor ID
152	Features       Flags  // Features of the CPU
153	PhysicalCores  int    // Number of physical processor cores in your CPU. Will be 0 if undetectable.
154	ThreadsPerCore int    // Number of threads per physical core. Will be 1 if undetectable.
155	LogicalCores   int    // Number of physical cores times threads that can run on each core through the use of hyperthreading. Will be 0 if undetectable.
156	Family         int    // CPU family number
157	Model          int    // CPU model number
158	CacheLine      int    // Cache line size in bytes. Will be 0 if undetectable.
159	Cache          struct {
160		L1I int // L1 Instruction Cache (per core or shared). Will be -1 if undetected
161		L1D int // L1 Data Cache (per core or shared). Will be -1 if undetected
162		L2  int // L2 Cache (per core or shared). Will be -1 if undetected
163		L3  int // L3 Instruction Cache (per core or shared). Will be -1 if undetected
164	}
165	SGX       SGXSupport
166	maxFunc   uint32
167	maxExFunc uint32
168}
169
170var cpuid func(op uint32) (eax, ebx, ecx, edx uint32)
171var cpuidex func(op, op2 uint32) (eax, ebx, ecx, edx uint32)
172var xgetbv func(index uint32) (eax, edx uint32)
173var rdtscpAsm func() (eax, ebx, ecx, edx uint32)
174
175// CPU contains information about the CPU as detected on startup,
176// or when Detect last was called.
177//
178// Use this as the primary entry point to you data,
179// this way queries are
180var CPU CPUInfo
181
182func init() {
183	initCPU()
184	Detect()
185}
186
187// Detect will re-detect current CPU info.
188// This will replace the content of the exported CPU variable.
189//
190// Unless you expect the CPU to change while you are running your program
191// you should not need to call this function.
192// If you call this, you must ensure that no other goroutine is accessing the
193// exported CPU variable.
194func Detect() {
195	CPU.maxFunc = maxFunctionID()
196	CPU.maxExFunc = maxExtendedFunction()
197	CPU.BrandName = brandName()
198	CPU.CacheLine = cacheLine()
199	CPU.Family, CPU.Model = familyModel()
200	CPU.Features = support()
201	CPU.SGX = hasSGX(CPU.Features&SGX != 0)
202	CPU.ThreadsPerCore = threadsPerCore()
203	CPU.LogicalCores = logicalCores()
204	CPU.PhysicalCores = physicalCores()
205	CPU.VendorID = vendorID()
206	CPU.cacheSize()
207}
208
209// Generated here: http://play.golang.org/p/BxFH2Gdc0G
210
211// Cmov indicates support of CMOV instructions
212func (c CPUInfo) Cmov() bool {
213	return c.Features&CMOV != 0
214}
215
216// Amd3dnow indicates support of AMD 3DNOW! instructions
217func (c CPUInfo) Amd3dnow() bool {
218	return c.Features&AMD3DNOW != 0
219}
220
221// Amd3dnowExt indicates support of AMD 3DNOW! Extended instructions
222func (c CPUInfo) Amd3dnowExt() bool {
223	return c.Features&AMD3DNOWEXT != 0
224}
225
226// MMX indicates support of MMX instructions
227func (c CPUInfo) MMX() bool {
228	return c.Features&MMX != 0
229}
230
231// MMXExt indicates support of MMXEXT instructions
232// (SSE integer functions or AMD MMX ext)
233func (c CPUInfo) MMXExt() bool {
234	return c.Features&MMXEXT != 0
235}
236
237// SSE indicates support of SSE instructions
238func (c CPUInfo) SSE() bool {
239	return c.Features&SSE != 0
240}
241
242// SSE2 indicates support of SSE 2 instructions
243func (c CPUInfo) SSE2() bool {
244	return c.Features&SSE2 != 0
245}
246
247// SSE3 indicates support of SSE 3 instructions
248func (c CPUInfo) SSE3() bool {
249	return c.Features&SSE3 != 0
250}
251
252// SSSE3 indicates support of SSSE 3 instructions
253func (c CPUInfo) SSSE3() bool {
254	return c.Features&SSSE3 != 0
255}
256
257// SSE4 indicates support of SSE 4 (also called SSE 4.1) instructions
258func (c CPUInfo) SSE4() bool {
259	return c.Features&SSE4 != 0
260}
261
262// SSE42 indicates support of SSE4.2 instructions
263func (c CPUInfo) SSE42() bool {
264	return c.Features&SSE42 != 0
265}
266
267// AVX indicates support of AVX instructions
268// and operating system support of AVX instructions
269func (c CPUInfo) AVX() bool {
270	return c.Features&AVX != 0
271}
272
273// AVX2 indicates support of AVX2 instructions
274func (c CPUInfo) AVX2() bool {
275	return c.Features&AVX2 != 0
276}
277
278// FMA3 indicates support of FMA3 instructions
279func (c CPUInfo) FMA3() bool {
280	return c.Features&FMA3 != 0
281}
282
283// FMA4 indicates support of FMA4 instructions
284func (c CPUInfo) FMA4() bool {
285	return c.Features&FMA4 != 0
286}
287
288// XOP indicates support of XOP instructions
289func (c CPUInfo) XOP() bool {
290	return c.Features&XOP != 0
291}
292
293// F16C indicates support of F16C instructions
294func (c CPUInfo) F16C() bool {
295	return c.Features&F16C != 0
296}
297
298// BMI1 indicates support of BMI1 instructions
299func (c CPUInfo) BMI1() bool {
300	return c.Features&BMI1 != 0
301}
302
303// BMI2 indicates support of BMI2 instructions
304func (c CPUInfo) BMI2() bool {
305	return c.Features&BMI2 != 0
306}
307
308// TBM indicates support of TBM instructions
309// (AMD Trailing Bit Manipulation)
310func (c CPUInfo) TBM() bool {
311	return c.Features&TBM != 0
312}
313
314// Lzcnt indicates support of LZCNT instruction
315func (c CPUInfo) Lzcnt() bool {
316	return c.Features&LZCNT != 0
317}
318
319// Popcnt indicates support of POPCNT instruction
320func (c CPUInfo) Popcnt() bool {
321	return c.Features&POPCNT != 0
322}
323
324// HTT indicates the processor has Hyperthreading enabled
325func (c CPUInfo) HTT() bool {
326	return c.Features&HTT != 0
327}
328
329// SSE2Slow indicates that SSE2 may be slow on this processor
330func (c CPUInfo) SSE2Slow() bool {
331	return c.Features&SSE2SLOW != 0
332}
333
334// SSE3Slow indicates that SSE3 may be slow on this processor
335func (c CPUInfo) SSE3Slow() bool {
336	return c.Features&SSE3SLOW != 0
337}
338
339// AesNi indicates support of AES-NI instructions
340// (Advanced Encryption Standard New Instructions)
341func (c CPUInfo) AesNi() bool {
342	return c.Features&AESNI != 0
343}
344
345// Clmul indicates support of CLMUL instructions
346// (Carry-less Multiplication)
347func (c CPUInfo) Clmul() bool {
348	return c.Features&CLMUL != 0
349}
350
351// NX indicates support of NX (No-Execute) bit
352func (c CPUInfo) NX() bool {
353	return c.Features&NX != 0
354}
355
356// SSE4A indicates support of AMD Barcelona microarchitecture SSE4a instructions
357func (c CPUInfo) SSE4A() bool {
358	return c.Features&SSE4A != 0
359}
360
361// HLE indicates support of Hardware Lock Elision
362func (c CPUInfo) HLE() bool {
363	return c.Features&HLE != 0
364}
365
366// RTM indicates support of Restricted Transactional Memory
367func (c CPUInfo) RTM() bool {
368	return c.Features&RTM != 0
369}
370
371// Rdrand indicates support of RDRAND instruction is available
372func (c CPUInfo) Rdrand() bool {
373	return c.Features&RDRAND != 0
374}
375
376// Rdseed indicates support of RDSEED instruction is available
377func (c CPUInfo) Rdseed() bool {
378	return c.Features&RDSEED != 0
379}
380
381// ADX indicates support of Intel ADX (Multi-Precision Add-Carry Instruction Extensions)
382func (c CPUInfo) ADX() bool {
383	return c.Features&ADX != 0
384}
385
386// SHA indicates support of Intel SHA Extensions
387func (c CPUInfo) SHA() bool {
388	return c.Features&SHA != 0
389}
390
391// AVX512F indicates support of AVX-512 Foundation
392func (c CPUInfo) AVX512F() bool {
393	return c.Features&AVX512F != 0
394}
395
396// AVX512DQ indicates support of AVX-512 Doubleword and Quadword Instructions
397func (c CPUInfo) AVX512DQ() bool {
398	return c.Features&AVX512DQ != 0
399}
400
401// AVX512IFMA indicates support of AVX-512 Integer Fused Multiply-Add Instructions
402func (c CPUInfo) AVX512IFMA() bool {
403	return c.Features&AVX512IFMA != 0
404}
405
406// AVX512PF indicates support of AVX-512 Prefetch Instructions
407func (c CPUInfo) AVX512PF() bool {
408	return c.Features&AVX512PF != 0
409}
410
411// AVX512ER indicates support of AVX-512 Exponential and Reciprocal Instructions
412func (c CPUInfo) AVX512ER() bool {
413	return c.Features&AVX512ER != 0
414}
415
416// AVX512CD indicates support of AVX-512 Conflict Detection Instructions
417func (c CPUInfo) AVX512CD() bool {
418	return c.Features&AVX512CD != 0
419}
420
421// AVX512BW indicates support of AVX-512 Byte and Word Instructions
422func (c CPUInfo) AVX512BW() bool {
423	return c.Features&AVX512BW != 0
424}
425
426// AVX512VL indicates support of AVX-512 Vector Length Extensions
427func (c CPUInfo) AVX512VL() bool {
428	return c.Features&AVX512VL != 0
429}
430
431// AVX512VBMI indicates support of AVX-512 Vector Bit Manipulation Instructions
432func (c CPUInfo) AVX512VBMI() bool {
433	return c.Features&AVX512VBMI != 0
434}
435
436// MPX indicates support of Intel MPX (Memory Protection Extensions)
437func (c CPUInfo) MPX() bool {
438	return c.Features&MPX != 0
439}
440
441// ERMS indicates support of Enhanced REP MOVSB/STOSB
442func (c CPUInfo) ERMS() bool {
443	return c.Features&ERMS != 0
444}
445
446// RDTSCP Instruction is available.
447func (c CPUInfo) RDTSCP() bool {
448	return c.Features&RDTSCP != 0
449}
450
451// CX16 indicates if CMPXCHG16B instruction is available.
452func (c CPUInfo) CX16() bool {
453	return c.Features&CX16 != 0
454}
455
456// TSX is split into HLE (Hardware Lock Elision) and RTM (Restricted Transactional Memory) detection.
457// So TSX simply checks that.
458func (c CPUInfo) TSX() bool {
459	return c.Features&(HLE|RTM) == HLE|RTM
460}
461
462// Atom indicates an Atom processor
463func (c CPUInfo) Atom() bool {
464	return c.Features&ATOM != 0
465}
466
467// Intel returns true if vendor is recognized as Intel
468func (c CPUInfo) Intel() bool {
469	return c.VendorID == Intel
470}
471
472// AMD returns true if vendor is recognized as AMD
473func (c CPUInfo) AMD() bool {
474	return c.VendorID == AMD
475}
476
477// Hygon returns true if vendor is recognized as Hygon
478func (c CPUInfo) Hygon() bool {
479	return c.VendorID == Hygon
480}
481
482// Transmeta returns true if vendor is recognized as Transmeta
483func (c CPUInfo) Transmeta() bool {
484	return c.VendorID == Transmeta
485}
486
487// NSC returns true if vendor is recognized as National Semiconductor
488func (c CPUInfo) NSC() bool {
489	return c.VendorID == NSC
490}
491
492// VIA returns true if vendor is recognized as VIA
493func (c CPUInfo) VIA() bool {
494	return c.VendorID == VIA
495}
496
497// RTCounter returns the 64-bit time-stamp counter
498// Uses the RDTSCP instruction. The value 0 is returned
499// if the CPU does not support the instruction.
500func (c CPUInfo) RTCounter() uint64 {
501	if !c.RDTSCP() {
502		return 0
503	}
504	a, _, _, d := rdtscpAsm()
505	return uint64(a) | (uint64(d) << 32)
506}
507
508// Ia32TscAux returns the IA32_TSC_AUX part of the RDTSCP.
509// This variable is OS dependent, but on Linux contains information
510// about the current cpu/core the code is running on.
511// If the RDTSCP instruction isn't supported on the CPU, the value 0 is returned.
512func (c CPUInfo) Ia32TscAux() uint32 {
513	if !c.RDTSCP() {
514		return 0
515	}
516	_, _, ecx, _ := rdtscpAsm()
517	return ecx
518}
519
520// LogicalCPU will return the Logical CPU the code is currently executing on.
521// This is likely to change when the OS re-schedules the running thread
522// to another CPU.
523// If the current core cannot be detected, -1 will be returned.
524func (c CPUInfo) LogicalCPU() int {
525	if c.maxFunc < 1 {
526		return -1
527	}
528	_, ebx, _, _ := cpuid(1)
529	return int(ebx >> 24)
530}
531
532// VM Will return true if the cpu id indicates we are in
533// a virtual machine. This is only a hint, and will very likely
534// have many false negatives.
535func (c CPUInfo) VM() bool {
536	switch c.VendorID {
537	case MSVM, KVM, VMware, XenHVM, Bhyve:
538		return true
539	}
540	return false
541}
542
543// Flags contains detected cpu features and caracteristics
544type Flags uint64
545
546// String returns a string representation of the detected
547// CPU features.
548func (f Flags) String() string {
549	return strings.Join(f.Strings(), ",")
550}
551
552// Strings returns and array of the detected features.
553func (f Flags) Strings() []string {
554	s := support()
555	r := make([]string, 0, 20)
556	for i := uint(0); i < 64; i++ {
557		key := Flags(1 << i)
558		val := flagNames[key]
559		if s&key != 0 {
560			r = append(r, val)
561		}
562	}
563	return r
564}
565
566func maxExtendedFunction() uint32 {
567	eax, _, _, _ := cpuid(0x80000000)
568	return eax
569}
570
571func maxFunctionID() uint32 {
572	a, _, _, _ := cpuid(0)
573	return a
574}
575
576func brandName() string {
577	if maxExtendedFunction() >= 0x80000004 {
578		v := make([]uint32, 0, 48)
579		for i := uint32(0); i < 3; i++ {
580			a, b, c, d := cpuid(0x80000002 + i)
581			v = append(v, a, b, c, d)
582		}
583		return strings.Trim(string(valAsString(v...)), " ")
584	}
585	return "unknown"
586}
587
588func threadsPerCore() int {
589	mfi := maxFunctionID()
590	if mfi < 0x4 || vendorID() != Intel {
591		return 1
592	}
593
594	if mfi < 0xb {
595		_, b, _, d := cpuid(1)
596		if (d & (1 << 28)) != 0 {
597			// v will contain logical core count
598			v := (b >> 16) & 255
599			if v > 1 {
600				a4, _, _, _ := cpuid(4)
601				// physical cores
602				v2 := (a4 >> 26) + 1
603				if v2 > 0 {
604					return int(v) / int(v2)
605				}
606			}
607		}
608		return 1
609	}
610	_, b, _, _ := cpuidex(0xb, 0)
611	if b&0xffff == 0 {
612		return 1
613	}
614	return int(b & 0xffff)
615}
616
617func logicalCores() int {
618	mfi := maxFunctionID()
619	switch vendorID() {
620	case Intel:
621		// Use this on old Intel processors
622		if mfi < 0xb {
623			if mfi < 1 {
624				return 0
625			}
626			// CPUID.1:EBX[23:16] represents the maximum number of addressable IDs (initial APIC ID)
627			// that can be assigned to logical processors in a physical package.
628			// The value may not be the same as the number of logical processors that are present in the hardware of a physical package.
629			_, ebx, _, _ := cpuid(1)
630			logical := (ebx >> 16) & 0xff
631			return int(logical)
632		}
633		_, b, _, _ := cpuidex(0xb, 1)
634		return int(b & 0xffff)
635	case AMD, Hygon:
636		_, b, _, _ := cpuid(1)
637		return int((b >> 16) & 0xff)
638	default:
639		return 0
640	}
641}
642
643func familyModel() (int, int) {
644	if maxFunctionID() < 0x1 {
645		return 0, 0
646	}
647	eax, _, _, _ := cpuid(1)
648	family := ((eax >> 8) & 0xf) + ((eax >> 20) & 0xff)
649	model := ((eax >> 4) & 0xf) + ((eax >> 12) & 0xf0)
650	return int(family), int(model)
651}
652
653func physicalCores() int {
654	switch vendorID() {
655	case Intel:
656		return logicalCores() / threadsPerCore()
657	case AMD, Hygon:
658		if maxExtendedFunction() >= 0x80000008 {
659			_, _, c, _ := cpuid(0x80000008)
660			return int(c&0xff) + 1
661		}
662	}
663	return 0
664}
665
666// Except from http://en.wikipedia.org/wiki/CPUID#EAX.3D0:_Get_vendor_ID
667var vendorMapping = map[string]Vendor{
668	"AMDisbetter!": AMD,
669	"AuthenticAMD": AMD,
670	"CentaurHauls": VIA,
671	"GenuineIntel": Intel,
672	"TransmetaCPU": Transmeta,
673	"GenuineTMx86": Transmeta,
674	"Geode by NSC": NSC,
675	"VIA VIA VIA ": VIA,
676	"KVMKVMKVMKVM": KVM,
677	"Microsoft Hv": MSVM,
678	"VMwareVMware": VMware,
679	"XenVMMXenVMM": XenHVM,
680	"bhyve bhyve ": Bhyve,
681	"HygonGenuine": Hygon,
682}
683
684func vendorID() Vendor {
685	_, b, c, d := cpuid(0)
686	v := valAsString(b, d, c)
687	vend, ok := vendorMapping[string(v)]
688	if !ok {
689		return Other
690	}
691	return vend
692}
693
694func cacheLine() int {
695	if maxFunctionID() < 0x1 {
696		return 0
697	}
698
699	_, ebx, _, _ := cpuid(1)
700	cache := (ebx & 0xff00) >> 5 // cflush size
701	if cache == 0 && maxExtendedFunction() >= 0x80000006 {
702		_, _, ecx, _ := cpuid(0x80000006)
703		cache = ecx & 0xff // cacheline size
704	}
705	// TODO: Read from Cache and TLB Information
706	return int(cache)
707}
708
709func (c *CPUInfo) cacheSize() {
710	c.Cache.L1D = -1
711	c.Cache.L1I = -1
712	c.Cache.L2 = -1
713	c.Cache.L3 = -1
714	vendor := vendorID()
715	switch vendor {
716	case Intel:
717		if maxFunctionID() < 4 {
718			return
719		}
720		for i := uint32(0); ; i++ {
721			eax, ebx, ecx, _ := cpuidex(4, i)
722			cacheType := eax & 15
723			if cacheType == 0 {
724				break
725			}
726			cacheLevel := (eax >> 5) & 7
727			coherency := int(ebx&0xfff) + 1
728			partitions := int((ebx>>12)&0x3ff) + 1
729			associativity := int((ebx>>22)&0x3ff) + 1
730			sets := int(ecx) + 1
731			size := associativity * partitions * coherency * sets
732			switch cacheLevel {
733			case 1:
734				if cacheType == 1 {
735					// 1 = Data Cache
736					c.Cache.L1D = size
737				} else if cacheType == 2 {
738					// 2 = Instruction Cache
739					c.Cache.L1I = size
740				} else {
741					if c.Cache.L1D < 0 {
742						c.Cache.L1I = size
743					}
744					if c.Cache.L1I < 0 {
745						c.Cache.L1I = size
746					}
747				}
748			case 2:
749				c.Cache.L2 = size
750			case 3:
751				c.Cache.L3 = size
752			}
753		}
754	case AMD, Hygon:
755		// Untested.
756		if maxExtendedFunction() < 0x80000005 {
757			return
758		}
759		_, _, ecx, edx := cpuid(0x80000005)
760		c.Cache.L1D = int(((ecx >> 24) & 0xFF) * 1024)
761		c.Cache.L1I = int(((edx >> 24) & 0xFF) * 1024)
762
763		if maxExtendedFunction() < 0x80000006 {
764			return
765		}
766		_, _, ecx, _ = cpuid(0x80000006)
767		c.Cache.L2 = int(((ecx >> 16) & 0xFFFF) * 1024)
768	}
769
770	return
771}
772
773type SGXSupport struct {
774	Available           bool
775	SGX1Supported       bool
776	SGX2Supported       bool
777	MaxEnclaveSizeNot64 int64
778	MaxEnclaveSize64    int64
779}
780
781func hasSGX(available bool) (rval SGXSupport) {
782	rval.Available = available
783
784	if !available {
785		return
786	}
787
788	a, _, _, d := cpuidex(0x12, 0)
789	rval.SGX1Supported = a&0x01 != 0
790	rval.SGX2Supported = a&0x02 != 0
791	rval.MaxEnclaveSizeNot64 = 1 << (d & 0xFF)     // pow 2
792	rval.MaxEnclaveSize64 = 1 << ((d >> 8) & 0xFF) // pow 2
793
794	return
795}
796
797func support() Flags {
798	mfi := maxFunctionID()
799	vend := vendorID()
800	if mfi < 0x1 {
801		return 0
802	}
803	rval := uint64(0)
804	_, _, c, d := cpuid(1)
805	if (d & (1 << 15)) != 0 {
806		rval |= CMOV
807	}
808	if (d & (1 << 23)) != 0 {
809		rval |= MMX
810	}
811	if (d & (1 << 25)) != 0 {
812		rval |= MMXEXT
813	}
814	if (d & (1 << 25)) != 0 {
815		rval |= SSE
816	}
817	if (d & (1 << 26)) != 0 {
818		rval |= SSE2
819	}
820	if (c & 1) != 0 {
821		rval |= SSE3
822	}
823	if (c & 0x00000200) != 0 {
824		rval |= SSSE3
825	}
826	if (c & 0x00080000) != 0 {
827		rval |= SSE4
828	}
829	if (c & 0x00100000) != 0 {
830		rval |= SSE42
831	}
832	if (c & (1 << 25)) != 0 {
833		rval |= AESNI
834	}
835	if (c & (1 << 1)) != 0 {
836		rval |= CLMUL
837	}
838	if c&(1<<23) != 0 {
839		rval |= POPCNT
840	}
841	if c&(1<<30) != 0 {
842		rval |= RDRAND
843	}
844	if c&(1<<29) != 0 {
845		rval |= F16C
846	}
847	if c&(1<<13) != 0 {
848		rval |= CX16
849	}
850	if vend == Intel && (d&(1<<28)) != 0 && mfi >= 4 {
851		if threadsPerCore() > 1 {
852			rval |= HTT
853		}
854	}
855
856	// Check XGETBV, OXSAVE and AVX bits
857	if c&(1<<26) != 0 && c&(1<<27) != 0 && c&(1<<28) != 0 {
858		// Check for OS support
859		eax, _ := xgetbv(0)
860		if (eax & 0x6) == 0x6 {
861			rval |= AVX
862			if (c & 0x00001000) != 0 {
863				rval |= FMA3
864			}
865		}
866	}
867
868	// Check AVX2, AVX2 requires OS support, but BMI1/2 don't.
869	if mfi >= 7 {
870		_, ebx, ecx, edx := cpuidex(7, 0)
871		if (rval&AVX) != 0 && (ebx&0x00000020) != 0 {
872			rval |= AVX2
873		}
874		if (ebx & 0x00000008) != 0 {
875			rval |= BMI1
876			if (ebx & 0x00000100) != 0 {
877				rval |= BMI2
878			}
879		}
880		if ebx&(1<<2) != 0 {
881			rval |= SGX
882		}
883		if ebx&(1<<4) != 0 {
884			rval |= HLE
885		}
886		if ebx&(1<<9) != 0 {
887			rval |= ERMS
888		}
889		if ebx&(1<<11) != 0 {
890			rval |= RTM
891		}
892		if ebx&(1<<14) != 0 {
893			rval |= MPX
894		}
895		if ebx&(1<<18) != 0 {
896			rval |= RDSEED
897		}
898		if ebx&(1<<19) != 0 {
899			rval |= ADX
900		}
901		if ebx&(1<<29) != 0 {
902			rval |= SHA
903		}
904		if edx&(1<<26) != 0 {
905			rval |= IBPB
906		}
907		if edx&(1<<27) != 0 {
908			rval |= STIBP
909		}
910
911		// Only detect AVX-512 features if XGETBV is supported
912		if c&((1<<26)|(1<<27)) == (1<<26)|(1<<27) {
913			// Check for OS support
914			eax, _ := xgetbv(0)
915
916			// Verify that XCR0[7:5] = ‘111b’ (OPMASK state, upper 256-bit of ZMM0-ZMM15 and
917			// ZMM16-ZMM31 state are enabled by OS)
918			/// and that XCR0[2:1] = ‘11b’ (XMM state and YMM state are enabled by OS).
919			if (eax>>5)&7 == 7 && (eax>>1)&3 == 3 {
920				if ebx&(1<<16) != 0 {
921					rval |= AVX512F
922				}
923				if ebx&(1<<17) != 0 {
924					rval |= AVX512DQ
925				}
926				if ebx&(1<<21) != 0 {
927					rval |= AVX512IFMA
928				}
929				if ebx&(1<<26) != 0 {
930					rval |= AVX512PF
931				}
932				if ebx&(1<<27) != 0 {
933					rval |= AVX512ER
934				}
935				if ebx&(1<<28) != 0 {
936					rval |= AVX512CD
937				}
938				if ebx&(1<<30) != 0 {
939					rval |= AVX512BW
940				}
941				if ebx&(1<<31) != 0 {
942					rval |= AVX512VL
943				}
944				// ecx
945				if ecx&(1<<1) != 0 {
946					rval |= AVX512VBMI
947				}
948			}
949		}
950	}
951
952	if maxExtendedFunction() >= 0x80000001 {
953		_, _, c, d := cpuid(0x80000001)
954		if (c & (1 << 5)) != 0 {
955			rval |= LZCNT
956			rval |= POPCNT
957		}
958		if (d & (1 << 31)) != 0 {
959			rval |= AMD3DNOW
960		}
961		if (d & (1 << 30)) != 0 {
962			rval |= AMD3DNOWEXT
963		}
964		if (d & (1 << 23)) != 0 {
965			rval |= MMX
966		}
967		if (d & (1 << 22)) != 0 {
968			rval |= MMXEXT
969		}
970		if (c & (1 << 6)) != 0 {
971			rval |= SSE4A
972		}
973		if d&(1<<20) != 0 {
974			rval |= NX
975		}
976		if d&(1<<27) != 0 {
977			rval |= RDTSCP
978		}
979
980		/* Allow for selectively disabling SSE2 functions on AMD processors
981		   with SSE2 support but not SSE4a. This includes Athlon64, some
982		   Opteron, and some Sempron processors. MMX, SSE, or 3DNow! are faster
983		   than SSE2 often enough to utilize this special-case flag.
984		   AV_CPU_FLAG_SSE2 and AV_CPU_FLAG_SSE2SLOW are both set in this case
985		   so that SSE2 is used unless explicitly disabled by checking
986		   AV_CPU_FLAG_SSE2SLOW. */
987		if vendorID() != Intel &&
988			rval&SSE2 != 0 && (c&0x00000040) == 0 {
989			rval |= SSE2SLOW
990		}
991
992		/* XOP and FMA4 use the AVX instruction coding scheme, so they can't be
993		 * used unless the OS has AVX support. */
994		if (rval & AVX) != 0 {
995			if (c & 0x00000800) != 0 {
996				rval |= XOP
997			}
998			if (c & 0x00010000) != 0 {
999				rval |= FMA4
1000			}
1001		}
1002
1003		if vendorID() == Intel {
1004			family, model := familyModel()
1005			if family == 6 && (model == 9 || model == 13 || model == 14) {
1006				/* 6/9 (pentium-m "banias"), 6/13 (pentium-m "dothan"), and
1007				 * 6/14 (core1 "yonah") theoretically support sse2, but it's
1008				 * usually slower than mmx. */
1009				if (rval & SSE2) != 0 {
1010					rval |= SSE2SLOW
1011				}
1012				if (rval & SSE3) != 0 {
1013					rval |= SSE3SLOW
1014				}
1015			}
1016			/* The Atom processor has SSSE3 support, which is useful in many cases,
1017			 * but sometimes the SSSE3 version is slower than the SSE2 equivalent
1018			 * on the Atom, but is generally faster on other processors supporting
1019			 * SSSE3. This flag allows for selectively disabling certain SSSE3
1020			 * functions on the Atom. */
1021			if family == 6 && model == 28 {
1022				rval |= ATOM
1023			}
1024		}
1025	}
1026	return Flags(rval)
1027}
1028
1029func valAsString(values ...uint32) []byte {
1030	r := make([]byte, 4*len(values))
1031	for i, v := range values {
1032		dst := r[i*4:]
1033		dst[0] = byte(v & 0xff)
1034		dst[1] = byte((v >> 8) & 0xff)
1035		dst[2] = byte((v >> 16) & 0xff)
1036		dst[3] = byte((v >> 24) & 0xff)
1037		switch {
1038		case dst[0] == 0:
1039			return r[:i*4]
1040		case dst[1] == 0:
1041			return r[:i*4+1]
1042		case dst[2] == 0:
1043			return r[:i*4+2]
1044		case dst[3] == 0:
1045			return r[:i*4+3]
1046		}
1047	}
1048	return r
1049}
1050