1// Copyright (c) 2015 Klaus Post, released under MIT License. See LICENSE file.
2
3// Package cpuid provides information about the CPU running the current program.
4//
5// CPU features are detected on startup, and kept for fast access through the life of the application.
6// Currently x86 / x64 (AMD64) is supported.
7//
8// You can access the CPU information by accessing the shared CPU variable of the cpuid library.
9//
10// Package home: https://github.com/klauspost/cpuid
11package cpuid
12
13import "strings"
14
15// Vendor is a representation of a CPU vendor.
16type Vendor int
17
18const (
19	Other Vendor = iota
20	Intel
21	AMD
22	VIA
23	Transmeta
24	NSC
25	KVM  // Kernel-based Virtual Machine
26	MSVM // Microsoft Hyper-V or Windows Virtual PC
27	VMware
28	XenHVM
29)
30
31const (
32	CMOV        = 1 << iota // i686 CMOV
33	NX                      // NX (No-Execute) bit
34	AMD3DNOW                // AMD 3DNOW
35	AMD3DNOWEXT             // AMD 3DNowExt
36	MMX                     // standard MMX
37	MMXEXT                  // SSE integer functions or AMD MMX ext
38	SSE                     // SSE functions
39	SSE2                    // P4 SSE functions
40	SSE3                    // Prescott SSE3 functions
41	SSSE3                   // Conroe SSSE3 functions
42	SSE4                    // Penryn SSE4.1 functions
43	SSE4A                   // AMD Barcelona microarchitecture SSE4a instructions
44	SSE42                   // Nehalem SSE4.2 functions
45	AVX                     // AVX functions
46	AVX2                    // AVX2 functions
47	FMA3                    // Intel FMA 3
48	FMA4                    // Bulldozer FMA4 functions
49	XOP                     // Bulldozer XOP functions
50	F16C                    // Half-precision floating-point conversion
51	BMI1                    // Bit Manipulation Instruction Set 1
52	BMI2                    // Bit Manipulation Instruction Set 2
53	TBM                     // AMD Trailing Bit Manipulation
54	LZCNT                   // LZCNT instruction
55	POPCNT                  // POPCNT instruction
56	AESNI                   // Advanced Encryption Standard New Instructions
57	CLMUL                   // Carry-less Multiplication
58	HTT                     // Hyperthreading (enabled)
59	HLE                     // Hardware Lock Elision
60	RTM                     // Restricted Transactional Memory
61	RDRAND                  // RDRAND instruction is available
62	RDSEED                  // RDSEED instruction is available
63	ADX                     // Intel ADX (Multi-Precision Add-Carry Instruction Extensions)
64	SHA                     // Intel SHA Extensions
65	AVX512F                 // AVX-512 Foundation
66	AVX512DQ                // AVX-512 Doubleword and Quadword Instructions
67	AVX512IFMA              // AVX-512 Integer Fused Multiply-Add Instructions
68	AVX512PF                // AVX-512 Prefetch Instructions
69	AVX512ER                // AVX-512 Exponential and Reciprocal Instructions
70	AVX512CD                // AVX-512 Conflict Detection Instructions
71	AVX512BW                // AVX-512 Byte and Word Instructions
72	AVX512VL                // AVX-512 Vector Length Extensions
73	AVX512VBMI              // AVX-512 Vector Bit Manipulation Instructions
74	MPX                     // Intel MPX (Memory Protection Extensions)
75	ERMS                    // Enhanced REP MOVSB/STOSB
76	RDTSCP                  // RDTSCP Instruction
77	CX16                    // CMPXCHG16B Instruction
78	SGX                     // Software Guard Extensions
79	IBPB                    // Indirect Branch Restricted Speculation (IBRS) and Indirect Branch Predictor Barrier (IBPB)
80	STIBP                   // Single Thread Indirect Branch Predictors
81
82	// Performance indicators
83	SSE2SLOW // SSE2 is supported, but usually not faster
84	SSE3SLOW // SSE3 is supported, but usually not faster
85	ATOM     // Atom processor, some SSSE3 instructions are slower
86)
87
88var flagNames = map[Flags]string{
89	CMOV:        "CMOV",        // i686 CMOV
90	NX:          "NX",          // NX (No-Execute) bit
91	AMD3DNOW:    "AMD3DNOW",    // AMD 3DNOW
92	AMD3DNOWEXT: "AMD3DNOWEXT", // AMD 3DNowExt
93	MMX:         "MMX",         // Standard MMX
94	MMXEXT:      "MMXEXT",      // SSE integer functions or AMD MMX ext
95	SSE:         "SSE",         // SSE functions
96	SSE2:        "SSE2",        // P4 SSE2 functions
97	SSE3:        "SSE3",        // Prescott SSE3 functions
98	SSSE3:       "SSSE3",       // Conroe SSSE3 functions
99	SSE4:        "SSE4.1",      // Penryn SSE4.1 functions
100	SSE4A:       "SSE4A",       // AMD Barcelona microarchitecture SSE4a instructions
101	SSE42:       "SSE4.2",      // Nehalem SSE4.2 functions
102	AVX:         "AVX",         // AVX functions
103	AVX2:        "AVX2",        // AVX functions
104	FMA3:        "FMA3",        // Intel FMA 3
105	FMA4:        "FMA4",        // Bulldozer FMA4 functions
106	XOP:         "XOP",         // Bulldozer XOP functions
107	F16C:        "F16C",        // Half-precision floating-point conversion
108	BMI1:        "BMI1",        // Bit Manipulation Instruction Set 1
109	BMI2:        "BMI2",        // Bit Manipulation Instruction Set 2
110	TBM:         "TBM",         // AMD Trailing Bit Manipulation
111	LZCNT:       "LZCNT",       // LZCNT instruction
112	POPCNT:      "POPCNT",      // POPCNT instruction
113	AESNI:       "AESNI",       // Advanced Encryption Standard New Instructions
114	CLMUL:       "CLMUL",       // Carry-less Multiplication
115	HTT:         "HTT",         // Hyperthreading (enabled)
116	HLE:         "HLE",         // Hardware Lock Elision
117	RTM:         "RTM",         // Restricted Transactional Memory
118	RDRAND:      "RDRAND",      // RDRAND instruction is available
119	RDSEED:      "RDSEED",      // RDSEED instruction is available
120	ADX:         "ADX",         // Intel ADX (Multi-Precision Add-Carry Instruction Extensions)
121	SHA:         "SHA",         // Intel SHA Extensions
122	AVX512F:     "AVX512F",     // AVX-512 Foundation
123	AVX512DQ:    "AVX512DQ",    // AVX-512 Doubleword and Quadword Instructions
124	AVX512IFMA:  "AVX512IFMA",  // AVX-512 Integer Fused Multiply-Add Instructions
125	AVX512PF:    "AVX512PF",    // AVX-512 Prefetch Instructions
126	AVX512ER:    "AVX512ER",    // AVX-512 Exponential and Reciprocal Instructions
127	AVX512CD:    "AVX512CD",    // AVX-512 Conflict Detection Instructions
128	AVX512BW:    "AVX512BW",    // AVX-512 Byte and Word Instructions
129	AVX512VL:    "AVX512VL",    // AVX-512 Vector Length Extensions
130	AVX512VBMI:  "AVX512VBMI",  // AVX-512 Vector Bit Manipulation Instructions
131	MPX:         "MPX",         // Intel MPX (Memory Protection Extensions)
132	ERMS:        "ERMS",        // Enhanced REP MOVSB/STOSB
133	RDTSCP:      "RDTSCP",      // RDTSCP Instruction
134	CX16:        "CX16",        // CMPXCHG16B Instruction
135	SGX:         "SGX",         // Software Guard Extensions
136	IBPB:        "IBPB",        // Indirect Branch Restricted Speculation and Indirect Branch Predictor Barrier
137	STIBP:       "STIBP",       // Single Thread Indirect Branch Predictors
138
139	// Performance indicators
140	SSE2SLOW: "SSE2SLOW", // SSE2 supported, but usually not faster
141	SSE3SLOW: "SSE3SLOW", // SSE3 supported, but usually not faster
142	ATOM:     "ATOM",     // Atom processor, some SSSE3 instructions are slower
143
144}
145
146// CPUInfo contains information about the detected system CPU.
147type CPUInfo struct {
148	BrandName      string // Brand name reported by the CPU
149	VendorID       Vendor // Comparable CPU vendor ID
150	Features       Flags  // Features of the CPU
151	PhysicalCores  int    // Number of physical processor cores in your CPU. Will be 0 if undetectable.
152	ThreadsPerCore int    // Number of threads per physical core. Will be 1 if undetectable.
153	LogicalCores   int    // Number of physical cores times threads that can run on each core through the use of hyperthreading. Will be 0 if undetectable.
154	Family         int    // CPU family number
155	Model          int    // CPU model number
156	CacheLine      int    // Cache line size in bytes. Will be 0 if undetectable.
157	Cache          struct {
158		L1I int // L1 Instruction Cache (per core or shared). Will be -1 if undetected
159		L1D int // L1 Data Cache (per core or shared). Will be -1 if undetected
160		L2  int // L2 Cache (per core or shared). Will be -1 if undetected
161		L3  int // L3 Instruction Cache (per core or shared). Will be -1 if undetected
162	}
163	SGX       SGXSupport
164	maxFunc   uint32
165	maxExFunc uint32
166}
167
168var cpuid func(op uint32) (eax, ebx, ecx, edx uint32)
169var cpuidex func(op, op2 uint32) (eax, ebx, ecx, edx uint32)
170var xgetbv func(index uint32) (eax, edx uint32)
171var rdtscpAsm func() (eax, ebx, ecx, edx uint32)
172
173// CPU contains information about the CPU as detected on startup,
174// or when Detect last was called.
175//
176// Use this as the primary entry point to you data,
177// this way queries are
178var CPU CPUInfo
179
180func init() {
181	initCPU()
182	Detect()
183}
184
185// Detect will re-detect current CPU info.
186// This will replace the content of the exported CPU variable.
187//
188// Unless you expect the CPU to change while you are running your program
189// you should not need to call this function.
190// If you call this, you must ensure that no other goroutine is accessing the
191// exported CPU variable.
192func Detect() {
193	CPU.maxFunc = maxFunctionID()
194	CPU.maxExFunc = maxExtendedFunction()
195	CPU.BrandName = brandName()
196	CPU.CacheLine = cacheLine()
197	CPU.Family, CPU.Model = familyModel()
198	CPU.Features = support()
199	CPU.SGX = hasSGX(CPU.Features&SGX != 0)
200	CPU.ThreadsPerCore = threadsPerCore()
201	CPU.LogicalCores = logicalCores()
202	CPU.PhysicalCores = physicalCores()
203	CPU.VendorID = vendorID()
204	CPU.cacheSize()
205}
206
207// Generated here: http://play.golang.org/p/BxFH2Gdc0G
208
209// Cmov indicates support of CMOV instructions
210func (c CPUInfo) Cmov() bool {
211	return c.Features&CMOV != 0
212}
213
214// Amd3dnow indicates support of AMD 3DNOW! instructions
215func (c CPUInfo) Amd3dnow() bool {
216	return c.Features&AMD3DNOW != 0
217}
218
219// Amd3dnowExt indicates support of AMD 3DNOW! Extended instructions
220func (c CPUInfo) Amd3dnowExt() bool {
221	return c.Features&AMD3DNOWEXT != 0
222}
223
224// MMX indicates support of MMX instructions
225func (c CPUInfo) MMX() bool {
226	return c.Features&MMX != 0
227}
228
229// MMXExt indicates support of MMXEXT instructions
230// (SSE integer functions or AMD MMX ext)
231func (c CPUInfo) MMXExt() bool {
232	return c.Features&MMXEXT != 0
233}
234
235// SSE indicates support of SSE instructions
236func (c CPUInfo) SSE() bool {
237	return c.Features&SSE != 0
238}
239
240// SSE2 indicates support of SSE 2 instructions
241func (c CPUInfo) SSE2() bool {
242	return c.Features&SSE2 != 0
243}
244
245// SSE3 indicates support of SSE 3 instructions
246func (c CPUInfo) SSE3() bool {
247	return c.Features&SSE3 != 0
248}
249
250// SSSE3 indicates support of SSSE 3 instructions
251func (c CPUInfo) SSSE3() bool {
252	return c.Features&SSSE3 != 0
253}
254
255// SSE4 indicates support of SSE 4 (also called SSE 4.1) instructions
256func (c CPUInfo) SSE4() bool {
257	return c.Features&SSE4 != 0
258}
259
260// SSE42 indicates support of SSE4.2 instructions
261func (c CPUInfo) SSE42() bool {
262	return c.Features&SSE42 != 0
263}
264
265// AVX indicates support of AVX instructions
266// and operating system support of AVX instructions
267func (c CPUInfo) AVX() bool {
268	return c.Features&AVX != 0
269}
270
271// AVX2 indicates support of AVX2 instructions
272func (c CPUInfo) AVX2() bool {
273	return c.Features&AVX2 != 0
274}
275
276// FMA3 indicates support of FMA3 instructions
277func (c CPUInfo) FMA3() bool {
278	return c.Features&FMA3 != 0
279}
280
281// FMA4 indicates support of FMA4 instructions
282func (c CPUInfo) FMA4() bool {
283	return c.Features&FMA4 != 0
284}
285
286// XOP indicates support of XOP instructions
287func (c CPUInfo) XOP() bool {
288	return c.Features&XOP != 0
289}
290
291// F16C indicates support of F16C instructions
292func (c CPUInfo) F16C() bool {
293	return c.Features&F16C != 0
294}
295
296// BMI1 indicates support of BMI1 instructions
297func (c CPUInfo) BMI1() bool {
298	return c.Features&BMI1 != 0
299}
300
301// BMI2 indicates support of BMI2 instructions
302func (c CPUInfo) BMI2() bool {
303	return c.Features&BMI2 != 0
304}
305
306// TBM indicates support of TBM instructions
307// (AMD Trailing Bit Manipulation)
308func (c CPUInfo) TBM() bool {
309	return c.Features&TBM != 0
310}
311
312// Lzcnt indicates support of LZCNT instruction
313func (c CPUInfo) Lzcnt() bool {
314	return c.Features&LZCNT != 0
315}
316
317// Popcnt indicates support of POPCNT instruction
318func (c CPUInfo) Popcnt() bool {
319	return c.Features&POPCNT != 0
320}
321
322// HTT indicates the processor has Hyperthreading enabled
323func (c CPUInfo) HTT() bool {
324	return c.Features&HTT != 0
325}
326
327// SSE2Slow indicates that SSE2 may be slow on this processor
328func (c CPUInfo) SSE2Slow() bool {
329	return c.Features&SSE2SLOW != 0
330}
331
332// SSE3Slow indicates that SSE3 may be slow on this processor
333func (c CPUInfo) SSE3Slow() bool {
334	return c.Features&SSE3SLOW != 0
335}
336
337// AesNi indicates support of AES-NI instructions
338// (Advanced Encryption Standard New Instructions)
339func (c CPUInfo) AesNi() bool {
340	return c.Features&AESNI != 0
341}
342
343// Clmul indicates support of CLMUL instructions
344// (Carry-less Multiplication)
345func (c CPUInfo) Clmul() bool {
346	return c.Features&CLMUL != 0
347}
348
349// NX indicates support of NX (No-Execute) bit
350func (c CPUInfo) NX() bool {
351	return c.Features&NX != 0
352}
353
354// SSE4A indicates support of AMD Barcelona microarchitecture SSE4a instructions
355func (c CPUInfo) SSE4A() bool {
356	return c.Features&SSE4A != 0
357}
358
359// HLE indicates support of Hardware Lock Elision
360func (c CPUInfo) HLE() bool {
361	return c.Features&HLE != 0
362}
363
364// RTM indicates support of Restricted Transactional Memory
365func (c CPUInfo) RTM() bool {
366	return c.Features&RTM != 0
367}
368
369// Rdrand indicates support of RDRAND instruction is available
370func (c CPUInfo) Rdrand() bool {
371	return c.Features&RDRAND != 0
372}
373
374// Rdseed indicates support of RDSEED instruction is available
375func (c CPUInfo) Rdseed() bool {
376	return c.Features&RDSEED != 0
377}
378
379// ADX indicates support of Intel ADX (Multi-Precision Add-Carry Instruction Extensions)
380func (c CPUInfo) ADX() bool {
381	return c.Features&ADX != 0
382}
383
384// SHA indicates support of Intel SHA Extensions
385func (c CPUInfo) SHA() bool {
386	return c.Features&SHA != 0
387}
388
389// AVX512F indicates support of AVX-512 Foundation
390func (c CPUInfo) AVX512F() bool {
391	return c.Features&AVX512F != 0
392}
393
394// AVX512DQ indicates support of AVX-512 Doubleword and Quadword Instructions
395func (c CPUInfo) AVX512DQ() bool {
396	return c.Features&AVX512DQ != 0
397}
398
399// AVX512IFMA indicates support of AVX-512 Integer Fused Multiply-Add Instructions
400func (c CPUInfo) AVX512IFMA() bool {
401	return c.Features&AVX512IFMA != 0
402}
403
404// AVX512PF indicates support of AVX-512 Prefetch Instructions
405func (c CPUInfo) AVX512PF() bool {
406	return c.Features&AVX512PF != 0
407}
408
409// AVX512ER indicates support of AVX-512 Exponential and Reciprocal Instructions
410func (c CPUInfo) AVX512ER() bool {
411	return c.Features&AVX512ER != 0
412}
413
414// AVX512CD indicates support of AVX-512 Conflict Detection Instructions
415func (c CPUInfo) AVX512CD() bool {
416	return c.Features&AVX512CD != 0
417}
418
419// AVX512BW indicates support of AVX-512 Byte and Word Instructions
420func (c CPUInfo) AVX512BW() bool {
421	return c.Features&AVX512BW != 0
422}
423
424// AVX512VL indicates support of AVX-512 Vector Length Extensions
425func (c CPUInfo) AVX512VL() bool {
426	return c.Features&AVX512VL != 0
427}
428
429// AVX512VBMI indicates support of AVX-512 Vector Bit Manipulation Instructions
430func (c CPUInfo) AVX512VBMI() bool {
431	return c.Features&AVX512VBMI != 0
432}
433
434// MPX indicates support of Intel MPX (Memory Protection Extensions)
435func (c CPUInfo) MPX() bool {
436	return c.Features&MPX != 0
437}
438
439// ERMS indicates support of Enhanced REP MOVSB/STOSB
440func (c CPUInfo) ERMS() bool {
441	return c.Features&ERMS != 0
442}
443
444// RDTSCP Instruction is available.
445func (c CPUInfo) RDTSCP() bool {
446	return c.Features&RDTSCP != 0
447}
448
449// CX16 indicates if CMPXCHG16B instruction is available.
450func (c CPUInfo) CX16() bool {
451	return c.Features&CX16 != 0
452}
453
454// TSX is split into HLE (Hardware Lock Elision) and RTM (Restricted Transactional Memory) detection.
455// So TSX simply checks that.
456func (c CPUInfo) TSX() bool {
457	return c.Features&(HLE|RTM) == HLE|RTM
458}
459
460// Atom indicates an Atom processor
461func (c CPUInfo) Atom() bool {
462	return c.Features&ATOM != 0
463}
464
465// Intel returns true if vendor is recognized as Intel
466func (c CPUInfo) Intel() bool {
467	return c.VendorID == Intel
468}
469
470// AMD returns true if vendor is recognized as AMD
471func (c CPUInfo) AMD() bool {
472	return c.VendorID == AMD
473}
474
475// Transmeta returns true if vendor is recognized as Transmeta
476func (c CPUInfo) Transmeta() bool {
477	return c.VendorID == Transmeta
478}
479
480// NSC returns true if vendor is recognized as National Semiconductor
481func (c CPUInfo) NSC() bool {
482	return c.VendorID == NSC
483}
484
485// VIA returns true if vendor is recognized as VIA
486func (c CPUInfo) VIA() bool {
487	return c.VendorID == VIA
488}
489
490// RTCounter returns the 64-bit time-stamp counter
491// Uses the RDTSCP instruction. The value 0 is returned
492// if the CPU does not support the instruction.
493func (c CPUInfo) RTCounter() uint64 {
494	if !c.RDTSCP() {
495		return 0
496	}
497	a, _, _, d := rdtscpAsm()
498	return uint64(a) | (uint64(d) << 32)
499}
500
501// Ia32TscAux returns the IA32_TSC_AUX part of the RDTSCP.
502// This variable is OS dependent, but on Linux contains information
503// about the current cpu/core the code is running on.
504// If the RDTSCP instruction isn't supported on the CPU, the value 0 is returned.
505func (c CPUInfo) Ia32TscAux() uint32 {
506	if !c.RDTSCP() {
507		return 0
508	}
509	_, _, ecx, _ := rdtscpAsm()
510	return ecx
511}
512
513// LogicalCPU will return the Logical CPU the code is currently executing on.
514// This is likely to change when the OS re-schedules the running thread
515// to another CPU.
516// If the current core cannot be detected, -1 will be returned.
517func (c CPUInfo) LogicalCPU() int {
518	if c.maxFunc < 1 {
519		return -1
520	}
521	_, ebx, _, _ := cpuid(1)
522	return int(ebx >> 24)
523}
524
525// VM Will return true if the cpu id indicates we are in
526// a virtual machine. This is only a hint, and will very likely
527// have many false negatives.
528func (c CPUInfo) VM() bool {
529	switch c.VendorID {
530	case MSVM, KVM, VMware, XenHVM:
531		return true
532	}
533	return false
534}
535
536// Flags contains detected cpu features and caracteristics
537type Flags uint64
538
539// String returns a string representation of the detected
540// CPU features.
541func (f Flags) String() string {
542	return strings.Join(f.Strings(), ",")
543}
544
545// Strings returns and array of the detected features.
546func (f Flags) Strings() []string {
547	s := support()
548	r := make([]string, 0, 20)
549	for i := uint(0); i < 64; i++ {
550		key := Flags(1 << i)
551		val := flagNames[key]
552		if s&key != 0 {
553			r = append(r, val)
554		}
555	}
556	return r
557}
558
559func maxExtendedFunction() uint32 {
560	eax, _, _, _ := cpuid(0x80000000)
561	return eax
562}
563
564func maxFunctionID() uint32 {
565	a, _, _, _ := cpuid(0)
566	return a
567}
568
569func brandName() string {
570	if maxExtendedFunction() >= 0x80000004 {
571		v := make([]uint32, 0, 48)
572		for i := uint32(0); i < 3; i++ {
573			a, b, c, d := cpuid(0x80000002 + i)
574			v = append(v, a, b, c, d)
575		}
576		return strings.Trim(string(valAsString(v...)), " ")
577	}
578	return "unknown"
579}
580
581func threadsPerCore() int {
582	mfi := maxFunctionID()
583	if mfi < 0x4 || vendorID() != Intel {
584		return 1
585	}
586
587	if mfi < 0xb {
588		_, b, _, d := cpuid(1)
589		if (d & (1 << 28)) != 0 {
590			// v will contain logical core count
591			v := (b >> 16) & 255
592			if v > 1 {
593				a4, _, _, _ := cpuid(4)
594				// physical cores
595				v2 := (a4 >> 26) + 1
596				if v2 > 0 {
597					return int(v) / int(v2)
598				}
599			}
600		}
601		return 1
602	}
603	_, b, _, _ := cpuidex(0xb, 0)
604	if b&0xffff == 0 {
605		return 1
606	}
607	return int(b & 0xffff)
608}
609
610func logicalCores() int {
611	mfi := maxFunctionID()
612	switch vendorID() {
613	case Intel:
614		// Use this on old Intel processors
615		if mfi < 0xb {
616			if mfi < 1 {
617				return 0
618			}
619			// CPUID.1:EBX[23:16] represents the maximum number of addressable IDs (initial APIC ID)
620			// that can be assigned to logical processors in a physical package.
621			// The value may not be the same as the number of logical processors that are present in the hardware of a physical package.
622			_, ebx, _, _ := cpuid(1)
623			logical := (ebx >> 16) & 0xff
624			return int(logical)
625		}
626		_, b, _, _ := cpuidex(0xb, 1)
627		return int(b & 0xffff)
628	case AMD:
629		_, b, _, _ := cpuid(1)
630		return int((b >> 16) & 0xff)
631	default:
632		return 0
633	}
634}
635
636func familyModel() (int, int) {
637	if maxFunctionID() < 0x1 {
638		return 0, 0
639	}
640	eax, _, _, _ := cpuid(1)
641	family := ((eax >> 8) & 0xf) + ((eax >> 20) & 0xff)
642	model := ((eax >> 4) & 0xf) + ((eax >> 12) & 0xf0)
643	return int(family), int(model)
644}
645
646func physicalCores() int {
647	switch vendorID() {
648	case Intel:
649		return logicalCores() / threadsPerCore()
650	case AMD:
651		if maxExtendedFunction() >= 0x80000008 {
652			_, _, c, _ := cpuid(0x80000008)
653			return int(c&0xff) + 1
654		}
655	}
656	return 0
657}
658
659// Except from http://en.wikipedia.org/wiki/CPUID#EAX.3D0:_Get_vendor_ID
660var vendorMapping = map[string]Vendor{
661	"AMDisbetter!": AMD,
662	"AuthenticAMD": AMD,
663	"CentaurHauls": VIA,
664	"GenuineIntel": Intel,
665	"TransmetaCPU": Transmeta,
666	"GenuineTMx86": Transmeta,
667	"Geode by NSC": NSC,
668	"VIA VIA VIA ": VIA,
669	"KVMKVMKVMKVM": KVM,
670	"Microsoft Hv": MSVM,
671	"VMwareVMware": VMware,
672	"XenVMMXenVMM": XenHVM,
673}
674
675func vendorID() Vendor {
676	_, b, c, d := cpuid(0)
677	v := valAsString(b, d, c)
678	vend, ok := vendorMapping[string(v)]
679	if !ok {
680		return Other
681	}
682	return vend
683}
684
685func cacheLine() int {
686	if maxFunctionID() < 0x1 {
687		return 0
688	}
689
690	_, ebx, _, _ := cpuid(1)
691	cache := (ebx & 0xff00) >> 5 // cflush size
692	if cache == 0 && maxExtendedFunction() >= 0x80000006 {
693		_, _, ecx, _ := cpuid(0x80000006)
694		cache = ecx & 0xff // cacheline size
695	}
696	// TODO: Read from Cache and TLB Information
697	return int(cache)
698}
699
700func (c *CPUInfo) cacheSize() {
701	c.Cache.L1D = -1
702	c.Cache.L1I = -1
703	c.Cache.L2 = -1
704	c.Cache.L3 = -1
705	vendor := vendorID()
706	switch vendor {
707	case Intel:
708		if maxFunctionID() < 4 {
709			return
710		}
711		for i := uint32(0); ; i++ {
712			eax, ebx, ecx, _ := cpuidex(4, i)
713			cacheType := eax & 15
714			if cacheType == 0 {
715				break
716			}
717			cacheLevel := (eax >> 5) & 7
718			coherency := int(ebx&0xfff) + 1
719			partitions := int((ebx>>12)&0x3ff) + 1
720			associativity := int((ebx>>22)&0x3ff) + 1
721			sets := int(ecx) + 1
722			size := associativity * partitions * coherency * sets
723			switch cacheLevel {
724			case 1:
725				if cacheType == 1 {
726					// 1 = Data Cache
727					c.Cache.L1D = size
728				} else if cacheType == 2 {
729					// 2 = Instruction Cache
730					c.Cache.L1I = size
731				} else {
732					if c.Cache.L1D < 0 {
733						c.Cache.L1I = size
734					}
735					if c.Cache.L1I < 0 {
736						c.Cache.L1I = size
737					}
738				}
739			case 2:
740				c.Cache.L2 = size
741			case 3:
742				c.Cache.L3 = size
743			}
744		}
745	case AMD:
746		// Untested.
747		if maxExtendedFunction() < 0x80000005 {
748			return
749		}
750		_, _, ecx, edx := cpuid(0x80000005)
751		c.Cache.L1D = int(((ecx >> 24) & 0xFF) * 1024)
752		c.Cache.L1I = int(((edx >> 24) & 0xFF) * 1024)
753
754		if maxExtendedFunction() < 0x80000006 {
755			return
756		}
757		_, _, ecx, _ = cpuid(0x80000006)
758		c.Cache.L2 = int(((ecx >> 16) & 0xFFFF) * 1024)
759	}
760
761	return
762}
763
764type SGXSupport struct {
765	Available           bool
766	SGX1Supported       bool
767	SGX2Supported       bool
768	MaxEnclaveSizeNot64 int64
769	MaxEnclaveSize64    int64
770}
771
772func hasSGX(available bool) (rval SGXSupport) {
773	rval.Available = available
774
775	if !available {
776		return
777	}
778
779	a, _, _, d := cpuidex(0x12, 0)
780	rval.SGX1Supported = a&0x01 != 0
781	rval.SGX2Supported = a&0x02 != 0
782	rval.MaxEnclaveSizeNot64 = 1 << (d & 0xFF)     // pow 2
783	rval.MaxEnclaveSize64 = 1 << ((d >> 8) & 0xFF) // pow 2
784
785	return
786}
787
788func support() Flags {
789	mfi := maxFunctionID()
790	vend := vendorID()
791	if mfi < 0x1 {
792		return 0
793	}
794	rval := uint64(0)
795	_, _, c, d := cpuid(1)
796	if (d & (1 << 15)) != 0 {
797		rval |= CMOV
798	}
799	if (d & (1 << 23)) != 0 {
800		rval |= MMX
801	}
802	if (d & (1 << 25)) != 0 {
803		rval |= MMXEXT
804	}
805	if (d & (1 << 25)) != 0 {
806		rval |= SSE
807	}
808	if (d & (1 << 26)) != 0 {
809		rval |= SSE2
810	}
811	if (c & 1) != 0 {
812		rval |= SSE3
813	}
814	if (c & 0x00000200) != 0 {
815		rval |= SSSE3
816	}
817	if (c & 0x00080000) != 0 {
818		rval |= SSE4
819	}
820	if (c & 0x00100000) != 0 {
821		rval |= SSE42
822	}
823	if (c & (1 << 25)) != 0 {
824		rval |= AESNI
825	}
826	if (c & (1 << 1)) != 0 {
827		rval |= CLMUL
828	}
829	if c&(1<<23) != 0 {
830		rval |= POPCNT
831	}
832	if c&(1<<30) != 0 {
833		rval |= RDRAND
834	}
835	if c&(1<<29) != 0 {
836		rval |= F16C
837	}
838	if c&(1<<13) != 0 {
839		rval |= CX16
840	}
841	if vend == Intel && (d&(1<<28)) != 0 && mfi >= 4 {
842		if threadsPerCore() > 1 {
843			rval |= HTT
844		}
845	}
846
847	// Check XGETBV, OXSAVE and AVX bits
848	if c&(1<<26) != 0 && c&(1<<27) != 0 && c&(1<<28) != 0 {
849		// Check for OS support
850		eax, _ := xgetbv(0)
851		if (eax & 0x6) == 0x6 {
852			rval |= AVX
853			if (c & 0x00001000) != 0 {
854				rval |= FMA3
855			}
856		}
857	}
858
859	// Check AVX2, AVX2 requires OS support, but BMI1/2 don't.
860	if mfi >= 7 {
861		_, ebx, ecx, edx := cpuidex(7, 0)
862		if (rval&AVX) != 0 && (ebx&0x00000020) != 0 {
863			rval |= AVX2
864		}
865		if (ebx & 0x00000008) != 0 {
866			rval |= BMI1
867			if (ebx & 0x00000100) != 0 {
868				rval |= BMI2
869			}
870		}
871		if ebx&(1<<2) != 0 {
872			rval |= SGX
873		}
874		if ebx&(1<<4) != 0 {
875			rval |= HLE
876		}
877		if ebx&(1<<9) != 0 {
878			rval |= ERMS
879		}
880		if ebx&(1<<11) != 0 {
881			rval |= RTM
882		}
883		if ebx&(1<<14) != 0 {
884			rval |= MPX
885		}
886		if ebx&(1<<18) != 0 {
887			rval |= RDSEED
888		}
889		if ebx&(1<<19) != 0 {
890			rval |= ADX
891		}
892		if ebx&(1<<29) != 0 {
893			rval |= SHA
894		}
895		if edx&(1<<26) != 0 {
896			rval |= IBPB
897		}
898		if edx&(1<<27) != 0 {
899			rval |= STIBP
900		}
901
902		// Only detect AVX-512 features if XGETBV is supported
903		if c&((1<<26)|(1<<27)) == (1<<26)|(1<<27) {
904			// Check for OS support
905			eax, _ := xgetbv(0)
906
907			// Verify that XCR0[7:5] = ‘111b’ (OPMASK state, upper 256-bit of ZMM0-ZMM15 and
908			// ZMM16-ZMM31 state are enabled by OS)
909			/// and that XCR0[2:1] = ‘11b’ (XMM state and YMM state are enabled by OS).
910			if (eax>>5)&7 == 7 && (eax>>1)&3 == 3 {
911				if ebx&(1<<16) != 0 {
912					rval |= AVX512F
913				}
914				if ebx&(1<<17) != 0 {
915					rval |= AVX512DQ
916				}
917				if ebx&(1<<21) != 0 {
918					rval |= AVX512IFMA
919				}
920				if ebx&(1<<26) != 0 {
921					rval |= AVX512PF
922				}
923				if ebx&(1<<27) != 0 {
924					rval |= AVX512ER
925				}
926				if ebx&(1<<28) != 0 {
927					rval |= AVX512CD
928				}
929				if ebx&(1<<30) != 0 {
930					rval |= AVX512BW
931				}
932				if ebx&(1<<31) != 0 {
933					rval |= AVX512VL
934				}
935				// ecx
936				if ecx&(1<<1) != 0 {
937					rval |= AVX512VBMI
938				}
939			}
940		}
941	}
942
943	if maxExtendedFunction() >= 0x80000001 {
944		_, _, c, d := cpuid(0x80000001)
945		if (c & (1 << 5)) != 0 {
946			rval |= LZCNT
947			rval |= POPCNT
948		}
949		if (d & (1 << 31)) != 0 {
950			rval |= AMD3DNOW
951		}
952		if (d & (1 << 30)) != 0 {
953			rval |= AMD3DNOWEXT
954		}
955		if (d & (1 << 23)) != 0 {
956			rval |= MMX
957		}
958		if (d & (1 << 22)) != 0 {
959			rval |= MMXEXT
960		}
961		if (c & (1 << 6)) != 0 {
962			rval |= SSE4A
963		}
964		if d&(1<<20) != 0 {
965			rval |= NX
966		}
967		if d&(1<<27) != 0 {
968			rval |= RDTSCP
969		}
970
971		/* Allow for selectively disabling SSE2 functions on AMD processors
972		   with SSE2 support but not SSE4a. This includes Athlon64, some
973		   Opteron, and some Sempron processors. MMX, SSE, or 3DNow! are faster
974		   than SSE2 often enough to utilize this special-case flag.
975		   AV_CPU_FLAG_SSE2 and AV_CPU_FLAG_SSE2SLOW are both set in this case
976		   so that SSE2 is used unless explicitly disabled by checking
977		   AV_CPU_FLAG_SSE2SLOW. */
978		if vendorID() != Intel &&
979			rval&SSE2 != 0 && (c&0x00000040) == 0 {
980			rval |= SSE2SLOW
981		}
982
983		/* XOP and FMA4 use the AVX instruction coding scheme, so they can't be
984		 * used unless the OS has AVX support. */
985		if (rval & AVX) != 0 {
986			if (c & 0x00000800) != 0 {
987				rval |= XOP
988			}
989			if (c & 0x00010000) != 0 {
990				rval |= FMA4
991			}
992		}
993
994		if vendorID() == Intel {
995			family, model := familyModel()
996			if family == 6 && (model == 9 || model == 13 || model == 14) {
997				/* 6/9 (pentium-m "banias"), 6/13 (pentium-m "dothan"), and
998				 * 6/14 (core1 "yonah") theoretically support sse2, but it's
999				 * usually slower than mmx. */
1000				if (rval & SSE2) != 0 {
1001					rval |= SSE2SLOW
1002				}
1003				if (rval & SSE3) != 0 {
1004					rval |= SSE3SLOW
1005				}
1006			}
1007			/* The Atom processor has SSSE3 support, which is useful in many cases,
1008			 * but sometimes the SSSE3 version is slower than the SSE2 equivalent
1009			 * on the Atom, but is generally faster on other processors supporting
1010			 * SSSE3. This flag allows for selectively disabling certain SSSE3
1011			 * functions on the Atom. */
1012			if family == 6 && model == 28 {
1013				rval |= ATOM
1014			}
1015		}
1016	}
1017	return Flags(rval)
1018}
1019
1020func valAsString(values ...uint32) []byte {
1021	r := make([]byte, 4*len(values))
1022	for i, v := range values {
1023		dst := r[i*4:]
1024		dst[0] = byte(v & 0xff)
1025		dst[1] = byte((v >> 8) & 0xff)
1026		dst[2] = byte((v >> 16) & 0xff)
1027		dst[3] = byte((v >> 24) & 0xff)
1028		switch {
1029		case dst[0] == 0:
1030			return r[:i*4]
1031		case dst[1] == 0:
1032			return r[:i*4+1]
1033		case dst[2] == 0:
1034			return r[:i*4+2]
1035		case dst[3] == 0:
1036			return r[:i*4+3]
1037		}
1038	}
1039	return r
1040}
1041