1// Copyright (c) 2015 Klaus Post, released under MIT License. See LICENSE file.
2
3// Package cpuid provides information about the CPU running the current program.
4//
5// CPU features are detected on startup, and kept for fast access through the life of the application.
6// Currently x86 / x64 (AMD64) as well as arm64 is supported.
7//
8// You can access the CPU information by accessing the shared CPU variable of the cpuid library.
9//
10// Package home: https://github.com/klauspost/cpuid
11package cpuid
12
13import (
14	"flag"
15	"fmt"
16	"math"
17	"os"
18	"runtime"
19	"strings"
20)
21
22// AMD refererence: https://www.amd.com/system/files/TechDocs/25481.pdf
23// and Processor Programming Reference (PPR)
24
25// Vendor is a representation of a CPU vendor.
26type Vendor int
27
28const (
29	VendorUnknown Vendor = iota
30	Intel
31	AMD
32	VIA
33	Transmeta
34	NSC
35	KVM  // Kernel-based Virtual Machine
36	MSVM // Microsoft Hyper-V or Windows Virtual PC
37	VMware
38	XenHVM
39	Bhyve
40	Hygon
41	SiS
42	RDC
43
44	Ampere
45	ARM
46	Broadcom
47	Cavium
48	DEC
49	Fujitsu
50	Infineon
51	Motorola
52	NVIDIA
53	AMCC
54	Qualcomm
55	Marvell
56
57	lastVendor
58)
59
60//go:generate stringer -type=FeatureID,Vendor
61
62// FeatureID is the ID of a specific cpu feature.
63type FeatureID int
64
65const (
66	// Keep index -1 as unknown
67	UNKNOWN = -1
68
69	// Add features
70	ADX                FeatureID = iota // Intel ADX (Multi-Precision Add-Carry Instruction Extensions)
71	AESNI                               // Advanced Encryption Standard New Instructions
72	AMD3DNOW                            // AMD 3DNOW
73	AMD3DNOWEXT                         // AMD 3DNowExt
74	AMXBF16                             // Tile computational operations on BFLOAT16 numbers
75	AMXINT8                             // Tile computational operations on 8-bit integers
76	AMXTILE                             // Tile architecture
77	AVX                                 // AVX functions
78	AVX2                                // AVX2 functions
79	AVX512BF16                          // AVX-512 BFLOAT16 Instructions
80	AVX512BITALG                        // AVX-512 Bit Algorithms
81	AVX512BW                            // AVX-512 Byte and Word Instructions
82	AVX512CD                            // AVX-512 Conflict Detection Instructions
83	AVX512DQ                            // AVX-512 Doubleword and Quadword Instructions
84	AVX512ER                            // AVX-512 Exponential and Reciprocal Instructions
85	AVX512F                             // AVX-512 Foundation
86	AVX512FP16                          // AVX-512 FP16 Instructions
87	AVX512IFMA                          // AVX-512 Integer Fused Multiply-Add Instructions
88	AVX512PF                            // AVX-512 Prefetch Instructions
89	AVX512VBMI                          // AVX-512 Vector Bit Manipulation Instructions
90	AVX512VBMI2                         // AVX-512 Vector Bit Manipulation Instructions, Version 2
91	AVX512VL                            // AVX-512 Vector Length Extensions
92	AVX512VNNI                          // AVX-512 Vector Neural Network Instructions
93	AVX512VP2INTERSECT                  // AVX-512 Intersect for D/Q
94	AVX512VPOPCNTDQ                     // AVX-512 Vector Population Count Doubleword and Quadword
95	AVXSLOW                             // Indicates the CPU performs 2 128 bit operations instead of one.
96	BMI1                                // Bit Manipulation Instruction Set 1
97	BMI2                                // Bit Manipulation Instruction Set 2
98	CLDEMOTE                            // Cache Line Demote
99	CLMUL                               // Carry-less Multiplication
100	CLZERO                              // CLZERO instruction supported
101	CMOV                                // i686 CMOV
102	CPBOOST                             // Core Performance Boost
103	CX16                                // CMPXCHG16B Instruction
104	ENQCMD                              // Enqueue Command
105	ERMS                                // Enhanced REP MOVSB/STOSB
106	F16C                                // Half-precision floating-point conversion
107	FMA3                                // Intel FMA 3. Does not imply AVX.
108	FMA4                                // Bulldozer FMA4 functions
109	GFNI                                // Galois Field New Instructions
110	HLE                                 // Hardware Lock Elision
111	HTT                                 // Hyperthreading (enabled)
112	HWA                                 // Hardware assert supported. Indicates support for MSRC001_10
113	HYPERVISOR                          // This bit has been reserved by Intel & AMD for use by hypervisors
114	IBPB                                // Indirect Branch Restricted Speculation (IBRS) and Indirect Branch Predictor Barrier (IBPB)
115	IBS                                 // Instruction Based Sampling (AMD)
116	IBSBRNTRGT                          // Instruction Based Sampling Feature (AMD)
117	IBSFETCHSAM                         // Instruction Based Sampling Feature (AMD)
118	IBSFFV                              // Instruction Based Sampling Feature (AMD)
119	IBSOPCNT                            // Instruction Based Sampling Feature (AMD)
120	IBSOPCNTEXT                         // Instruction Based Sampling Feature (AMD)
121	IBSOPSAM                            // Instruction Based Sampling Feature (AMD)
122	IBSRDWROPCNT                        // Instruction Based Sampling Feature (AMD)
123	IBSRIPINVALIDCHK                    // Instruction Based Sampling Feature (AMD)
124	INT_WBINVD                          // WBINVD/WBNOINVD are interruptible.
125	INVLPGB                             // NVLPGB and TLBSYNC instruction supported
126	LZCNT                               // LZCNT instruction
127	MCAOVERFLOW                         // MCA overflow recovery support.
128	MCOMMIT                             // MCOMMIT instruction supported
129	MMX                                 // standard MMX
130	MMXEXT                              // SSE integer functions or AMD MMX ext
131	MOVDIR64B                           // Move 64 Bytes as Direct Store
132	MOVDIRI                             // Move Doubleword as Direct Store
133	MPX                                 // Intel MPX (Memory Protection Extensions)
134	MSRIRC                              // Instruction Retired Counter MSR available
135	NX                                  // NX (No-Execute) bit
136	POPCNT                              // POPCNT instruction
137	RDPRU                               // RDPRU instruction supported
138	RDRAND                              // RDRAND instruction is available
139	RDSEED                              // RDSEED instruction is available
140	RDTSCP                              // RDTSCP Instruction
141	RTM                                 // Restricted Transactional Memory
142	RTM_ALWAYS_ABORT                    // Indicates that the loaded microcode is forcing RTM abort.
143	SERIALIZE                           // Serialize Instruction Execution
144	SGX                                 // Software Guard Extensions
145	SGXLC                               // Software Guard Extensions Launch Control
146	SHA                                 // Intel SHA Extensions
147	SSE                                 // SSE functions
148	SSE2                                // P4 SSE functions
149	SSE3                                // Prescott SSE3 functions
150	SSE4                                // Penryn SSE4.1 functions
151	SSE42                               // Nehalem SSE4.2 functions
152	SSE4A                               // AMD Barcelona microarchitecture SSE4a instructions
153	SSSE3                               // Conroe SSSE3 functions
154	STIBP                               // Single Thread Indirect Branch Predictors
155	SUCCOR                              // Software uncorrectable error containment and recovery capability.
156	TBM                                 // AMD Trailing Bit Manipulation
157	TSXLDTRK                            // Intel TSX Suspend Load Address Tracking
158	VAES                                // Vector AES
159	VMX                                 // Virtual Machine Extensions
160	VPCLMULQDQ                          // Carry-Less Multiplication Quadword
161	WAITPKG                             // TPAUSE, UMONITOR, UMWAIT
162	WBNOINVD                            // Write Back and Do Not Invalidate Cache
163	XOP                                 // Bulldozer XOP functions
164
165	// ARM features:
166	AESARM   // AES instructions
167	ARMCPUID // Some CPU ID registers readable at user-level
168	ASIMD    // Advanced SIMD
169	ASIMDDP  // SIMD Dot Product
170	ASIMDHP  // Advanced SIMD half-precision floating point
171	ASIMDRDM // Rounding Double Multiply Accumulate/Subtract (SQRDMLAH/SQRDMLSH)
172	ATOMICS  // Large System Extensions (LSE)
173	CRC32    // CRC32/CRC32C instructions
174	DCPOP    // Data cache clean to Point of Persistence (DC CVAP)
175	EVTSTRM  // Generic timer
176	FCMA     // Floatin point complex number addition and multiplication
177	FP       // Single-precision and double-precision floating point
178	FPHP     // Half-precision floating point
179	GPA      // Generic Pointer Authentication
180	JSCVT    // Javascript-style double->int convert (FJCVTZS)
181	LRCPC    // Weaker release consistency (LDAPR, etc)
182	PMULL    // Polynomial Multiply instructions (PMULL/PMULL2)
183	SHA1     // SHA-1 instructions (SHA1C, etc)
184	SHA2     // SHA-2 instructions (SHA256H, etc)
185	SHA3     // SHA-3 instructions (EOR3, RAXI, XAR, BCAX)
186	SHA512   // SHA512 instructions
187	SM3      // SM3 instructions
188	SM4      // SM4 instructions
189	SVE      // Scalable Vector Extension
190
191	// Keep it last. It automatically defines the size of []flagSet
192	lastID
193
194	firstID FeatureID = UNKNOWN + 1
195)
196
197// CPUInfo contains information about the detected system CPU.
198type CPUInfo struct {
199	BrandName      string  // Brand name reported by the CPU
200	VendorID       Vendor  // Comparable CPU vendor ID
201	VendorString   string  // Raw vendor string.
202	featureSet     flagSet // Features of the CPU
203	PhysicalCores  int     // Number of physical processor cores in your CPU. Will be 0 if undetectable.
204	ThreadsPerCore int     // Number of threads per physical core. Will be 1 if undetectable.
205	LogicalCores   int     // Number of physical cores times threads that can run on each core through the use of hyperthreading. Will be 0 if undetectable.
206	Family         int     // CPU family number
207	Model          int     // CPU model number
208	CacheLine      int     // Cache line size in bytes. Will be 0 if undetectable.
209	Hz             int64   // Clock speed, if known, 0 otherwise. Will attempt to contain base clock speed.
210	BoostFreq      int64   // Max clock speed, if known, 0 otherwise
211	Cache          struct {
212		L1I int // L1 Instruction Cache (per core or shared). Will be -1 if undetected
213		L1D int // L1 Data Cache (per core or shared). Will be -1 if undetected
214		L2  int // L2 Cache (per core or shared). Will be -1 if undetected
215		L3  int // L3 Cache (per core, per ccx or shared). Will be -1 if undetected
216	}
217	SGX       SGXSupport
218	maxFunc   uint32
219	maxExFunc uint32
220}
221
222var cpuid func(op uint32) (eax, ebx, ecx, edx uint32)
223var cpuidex func(op, op2 uint32) (eax, ebx, ecx, edx uint32)
224var xgetbv func(index uint32) (eax, edx uint32)
225var rdtscpAsm func() (eax, ebx, ecx, edx uint32)
226var darwinHasAVX512 = func() bool { return false }
227
228// CPU contains information about the CPU as detected on startup,
229// or when Detect last was called.
230//
231// Use this as the primary entry point to you data.
232var CPU CPUInfo
233
234func init() {
235	initCPU()
236	Detect()
237}
238
239// Detect will re-detect current CPU info.
240// This will replace the content of the exported CPU variable.
241//
242// Unless you expect the CPU to change while you are running your program
243// you should not need to call this function.
244// If you call this, you must ensure that no other goroutine is accessing the
245// exported CPU variable.
246func Detect() {
247	// Set defaults
248	CPU.ThreadsPerCore = 1
249	CPU.Cache.L1I = -1
250	CPU.Cache.L1D = -1
251	CPU.Cache.L2 = -1
252	CPU.Cache.L3 = -1
253	safe := true
254	if detectArmFlag != nil {
255		safe = !*detectArmFlag
256	}
257	addInfo(&CPU, safe)
258	if displayFeats != nil && *displayFeats {
259		fmt.Println("cpu features:", strings.Join(CPU.FeatureSet(), ","))
260		// Exit with non-zero so tests will print value.
261		os.Exit(1)
262	}
263	if disableFlag != nil {
264		s := strings.Split(*disableFlag, ",")
265		for _, feat := range s {
266			feat := ParseFeature(strings.TrimSpace(feat))
267			if feat != UNKNOWN {
268				CPU.featureSet.unset(feat)
269			}
270		}
271	}
272}
273
274// DetectARM will detect ARM64 features.
275// This is NOT done automatically since it can potentially crash
276// if the OS does not handle the command.
277// If in the future this can be done safely this function may not
278// do anything.
279func DetectARM() {
280	addInfo(&CPU, false)
281}
282
283var detectArmFlag *bool
284var displayFeats *bool
285var disableFlag *string
286
287// Flags will enable flags.
288// This must be called *before* flag.Parse AND
289// Detect must be called after the flags have been parsed.
290// Note that this means that any detection used in init() functions
291// will not contain these flags.
292func Flags() {
293	disableFlag = flag.String("cpu.disable", "", "disable cpu features; comma separated list")
294	displayFeats = flag.Bool("cpu.features", false, "lists cpu features and exits")
295	detectArmFlag = flag.Bool("cpu.arm", false, "allow ARM features to be detected; can potentially crash")
296}
297
298// Supports returns whether the CPU supports all of the requested features.
299func (c CPUInfo) Supports(ids ...FeatureID) bool {
300	for _, id := range ids {
301		if !c.featureSet.inSet(id) {
302			return false
303		}
304	}
305	return true
306}
307
308// Has allows for checking a single feature.
309// Should be inlined by the compiler.
310func (c CPUInfo) Has(id FeatureID) bool {
311	return c.featureSet.inSet(id)
312}
313
314// Disable will disable one or several features.
315func (c *CPUInfo) Disable(ids ...FeatureID) bool {
316	for _, id := range ids {
317		c.featureSet.unset(id)
318	}
319	return true
320}
321
322// Enable will disable one or several features even if they were undetected.
323// This is of course not recommended for obvious reasons.
324func (c *CPUInfo) Enable(ids ...FeatureID) bool {
325	for _, id := range ids {
326		c.featureSet.set(id)
327	}
328	return true
329}
330
331// IsVendor returns true if vendor is recognized as Intel
332func (c CPUInfo) IsVendor(v Vendor) bool {
333	return c.VendorID == v
334}
335
336func (c CPUInfo) FeatureSet() []string {
337	s := make([]string, 0)
338	for _, f := range c.featureSet.Strings() {
339		s = append(s, f)
340	}
341	return s
342}
343
344// RTCounter returns the 64-bit time-stamp counter
345// Uses the RDTSCP instruction. The value 0 is returned
346// if the CPU does not support the instruction.
347func (c CPUInfo) RTCounter() uint64 {
348	if !c.Supports(RDTSCP) {
349		return 0
350	}
351	a, _, _, d := rdtscpAsm()
352	return uint64(a) | (uint64(d) << 32)
353}
354
355// Ia32TscAux returns the IA32_TSC_AUX part of the RDTSCP.
356// This variable is OS dependent, but on Linux contains information
357// about the current cpu/core the code is running on.
358// If the RDTSCP instruction isn't supported on the CPU, the value 0 is returned.
359func (c CPUInfo) Ia32TscAux() uint32 {
360	if !c.Supports(RDTSCP) {
361		return 0
362	}
363	_, _, ecx, _ := rdtscpAsm()
364	return ecx
365}
366
367// LogicalCPU will return the Logical CPU the code is currently executing on.
368// This is likely to change when the OS re-schedules the running thread
369// to another CPU.
370// If the current core cannot be detected, -1 will be returned.
371func (c CPUInfo) LogicalCPU() int {
372	if c.maxFunc < 1 {
373		return -1
374	}
375	_, ebx, _, _ := cpuid(1)
376	return int(ebx >> 24)
377}
378
379// frequencies tries to compute the clock speed of the CPU. If leaf 15 is
380// supported, use it, otherwise parse the brand string. Yes, really.
381func (c *CPUInfo) frequencies() {
382	c.Hz, c.BoostFreq = 0, 0
383	mfi := maxFunctionID()
384	if mfi >= 0x15 {
385		eax, ebx, ecx, _ := cpuid(0x15)
386		if eax != 0 && ebx != 0 && ecx != 0 {
387			c.Hz = (int64(ecx) * int64(ebx)) / int64(eax)
388		}
389	}
390	if mfi >= 0x16 {
391		a, b, _, _ := cpuid(0x16)
392		// Base...
393		if a&0xffff > 0 {
394			c.Hz = int64(a&0xffff) * 1_000_000
395		}
396		// Boost...
397		if b&0xffff > 0 {
398			c.BoostFreq = int64(b&0xffff) * 1_000_000
399		}
400	}
401	if c.Hz > 0 {
402		return
403	}
404
405	// computeHz determines the official rated speed of a CPU from its brand
406	// string. This insanity is *actually the official documented way to do
407	// this according to Intel*, prior to leaf 0x15 existing. The official
408	// documentation only shows this working for exactly `x.xx` or `xxxx`
409	// cases, e.g., `2.50GHz` or `1300MHz`; this parser will accept other
410	// sizes.
411	model := c.BrandName
412	hz := strings.LastIndex(model, "Hz")
413	if hz < 3 {
414		return
415	}
416	var multiplier int64
417	switch model[hz-1] {
418	case 'M':
419		multiplier = 1000 * 1000
420	case 'G':
421		multiplier = 1000 * 1000 * 1000
422	case 'T':
423		multiplier = 1000 * 1000 * 1000 * 1000
424	}
425	if multiplier == 0 {
426		return
427	}
428	freq := int64(0)
429	divisor := int64(0)
430	decimalShift := int64(1)
431	var i int
432	for i = hz - 2; i >= 0 && model[i] != ' '; i-- {
433		if model[i] >= '0' && model[i] <= '9' {
434			freq += int64(model[i]-'0') * decimalShift
435			decimalShift *= 10
436		} else if model[i] == '.' {
437			if divisor != 0 {
438				return
439			}
440			divisor = decimalShift
441		} else {
442			return
443		}
444	}
445	// we didn't find a space
446	if i < 0 {
447		return
448	}
449	if divisor != 0 {
450		c.Hz = (freq * multiplier) / divisor
451		return
452	}
453	c.Hz = freq * multiplier
454}
455
456// VM Will return true if the cpu id indicates we are in
457// a virtual machine.
458func (c CPUInfo) VM() bool {
459	return CPU.featureSet.inSet(HYPERVISOR)
460}
461
462// flags contains detected cpu features and characteristics
463type flags uint64
464
465// log2(bits_in_uint64)
466const flagBitsLog2 = 6
467const flagBits = 1 << flagBitsLog2
468const flagMask = flagBits - 1
469
470// flagSet contains detected cpu features and characteristics in an array of flags
471type flagSet [(lastID + flagMask) / flagBits]flags
472
473func (s flagSet) inSet(feat FeatureID) bool {
474	return s[feat>>flagBitsLog2]&(1<<(feat&flagMask)) != 0
475}
476
477func (s *flagSet) set(feat FeatureID) {
478	s[feat>>flagBitsLog2] |= 1 << (feat & flagMask)
479}
480
481// setIf will set a feature if boolean is true.
482func (s *flagSet) setIf(cond bool, features ...FeatureID) {
483	if cond {
484		for _, offset := range features {
485			s[offset>>flagBitsLog2] |= 1 << (offset & flagMask)
486		}
487	}
488}
489
490func (s *flagSet) unset(offset FeatureID) {
491	bit := flags(1 << (offset & flagMask))
492	s[offset>>flagBitsLog2] = s[offset>>flagBitsLog2] & ^bit
493}
494
495// or with another flagset.
496func (s *flagSet) or(other flagSet) {
497	for i, v := range other[:] {
498		s[i] |= v
499	}
500}
501
502// ParseFeature will parse the string and return the ID of the matching feature.
503// Will return UNKNOWN if not found.
504func ParseFeature(s string) FeatureID {
505	s = strings.ToUpper(s)
506	for i := firstID; i < lastID; i++ {
507		if i.String() == s {
508			return i
509		}
510	}
511	return UNKNOWN
512}
513
514// Strings returns an array of the detected features for FlagsSet.
515func (s flagSet) Strings() []string {
516	if len(s) == 0 {
517		return []string{""}
518	}
519	r := make([]string, 0)
520	for i := firstID; i < lastID; i++ {
521		if s.inSet(i) {
522			r = append(r, i.String())
523		}
524	}
525	return r
526}
527
528func maxExtendedFunction() uint32 {
529	eax, _, _, _ := cpuid(0x80000000)
530	return eax
531}
532
533func maxFunctionID() uint32 {
534	a, _, _, _ := cpuid(0)
535	return a
536}
537
538func brandName() string {
539	if maxExtendedFunction() >= 0x80000004 {
540		v := make([]uint32, 0, 48)
541		for i := uint32(0); i < 3; i++ {
542			a, b, c, d := cpuid(0x80000002 + i)
543			v = append(v, a, b, c, d)
544		}
545		return strings.Trim(string(valAsString(v...)), " ")
546	}
547	return "unknown"
548}
549
550func threadsPerCore() int {
551	mfi := maxFunctionID()
552	vend, _ := vendorID()
553
554	if mfi < 0x4 || (vend != Intel && vend != AMD) {
555		return 1
556	}
557
558	if mfi < 0xb {
559		if vend != Intel {
560			return 1
561		}
562		_, b, _, d := cpuid(1)
563		if (d & (1 << 28)) != 0 {
564			// v will contain logical core count
565			v := (b >> 16) & 255
566			if v > 1 {
567				a4, _, _, _ := cpuid(4)
568				// physical cores
569				v2 := (a4 >> 26) + 1
570				if v2 > 0 {
571					return int(v) / int(v2)
572				}
573			}
574		}
575		return 1
576	}
577	_, b, _, _ := cpuidex(0xb, 0)
578	if b&0xffff == 0 {
579		if vend == AMD {
580			// Workaround for AMD returning 0, assume 2 if >= Zen 2
581			// It will be more correct than not.
582			fam, _ := familyModel()
583			_, _, _, d := cpuid(1)
584			if (d&(1<<28)) != 0 && fam >= 23 {
585				return 2
586			}
587		}
588		return 1
589	}
590	return int(b & 0xffff)
591}
592
593func logicalCores() int {
594	mfi := maxFunctionID()
595	v, _ := vendorID()
596	switch v {
597	case Intel:
598		// Use this on old Intel processors
599		if mfi < 0xb {
600			if mfi < 1 {
601				return 0
602			}
603			// CPUID.1:EBX[23:16] represents the maximum number of addressable IDs (initial APIC ID)
604			// that can be assigned to logical processors in a physical package.
605			// The value may not be the same as the number of logical processors that are present in the hardware of a physical package.
606			_, ebx, _, _ := cpuid(1)
607			logical := (ebx >> 16) & 0xff
608			return int(logical)
609		}
610		_, b, _, _ := cpuidex(0xb, 1)
611		return int(b & 0xffff)
612	case AMD, Hygon:
613		_, b, _, _ := cpuid(1)
614		return int((b >> 16) & 0xff)
615	default:
616		return 0
617	}
618}
619
620func familyModel() (int, int) {
621	if maxFunctionID() < 0x1 {
622		return 0, 0
623	}
624	eax, _, _, _ := cpuid(1)
625	family := ((eax >> 8) & 0xf) + ((eax >> 20) & 0xff)
626	model := ((eax >> 4) & 0xf) + ((eax >> 12) & 0xf0)
627	return int(family), int(model)
628}
629
630func physicalCores() int {
631	v, _ := vendorID()
632	switch v {
633	case Intel:
634		return logicalCores() / threadsPerCore()
635	case AMD, Hygon:
636		lc := logicalCores()
637		tpc := threadsPerCore()
638		if lc > 0 && tpc > 0 {
639			return lc / tpc
640		}
641
642		// The following is inaccurate on AMD EPYC 7742 64-Core Processor
643		if maxExtendedFunction() >= 0x80000008 {
644			_, _, c, _ := cpuid(0x80000008)
645			if c&0xff > 0 {
646				return int(c&0xff) + 1
647			}
648		}
649	}
650	return 0
651}
652
653// Except from http://en.wikipedia.org/wiki/CPUID#EAX.3D0:_Get_vendor_ID
654var vendorMapping = map[string]Vendor{
655	"AMDisbetter!": AMD,
656	"AuthenticAMD": AMD,
657	"CentaurHauls": VIA,
658	"GenuineIntel": Intel,
659	"TransmetaCPU": Transmeta,
660	"GenuineTMx86": Transmeta,
661	"Geode by NSC": NSC,
662	"VIA VIA VIA ": VIA,
663	"KVMKVMKVMKVM": KVM,
664	"Microsoft Hv": MSVM,
665	"VMwareVMware": VMware,
666	"XenVMMXenVMM": XenHVM,
667	"bhyve bhyve ": Bhyve,
668	"HygonGenuine": Hygon,
669	"Vortex86 SoC": SiS,
670	"SiS SiS SiS ": SiS,
671	"RiseRiseRise": SiS,
672	"Genuine  RDC": RDC,
673}
674
675func vendorID() (Vendor, string) {
676	_, b, c, d := cpuid(0)
677	v := string(valAsString(b, d, c))
678	vend, ok := vendorMapping[v]
679	if !ok {
680		return VendorUnknown, v
681	}
682	return vend, v
683}
684
685func cacheLine() int {
686	if maxFunctionID() < 0x1 {
687		return 0
688	}
689
690	_, ebx, _, _ := cpuid(1)
691	cache := (ebx & 0xff00) >> 5 // cflush size
692	if cache == 0 && maxExtendedFunction() >= 0x80000006 {
693		_, _, ecx, _ := cpuid(0x80000006)
694		cache = ecx & 0xff // cacheline size
695	}
696	// TODO: Read from Cache and TLB Information
697	return int(cache)
698}
699
700func (c *CPUInfo) cacheSize() {
701	c.Cache.L1D = -1
702	c.Cache.L1I = -1
703	c.Cache.L2 = -1
704	c.Cache.L3 = -1
705	vendor, _ := vendorID()
706	switch vendor {
707	case Intel:
708		if maxFunctionID() < 4 {
709			return
710		}
711		for i := uint32(0); ; i++ {
712			eax, ebx, ecx, _ := cpuidex(4, i)
713			cacheType := eax & 15
714			if cacheType == 0 {
715				break
716			}
717			cacheLevel := (eax >> 5) & 7
718			coherency := int(ebx&0xfff) + 1
719			partitions := int((ebx>>12)&0x3ff) + 1
720			associativity := int((ebx>>22)&0x3ff) + 1
721			sets := int(ecx) + 1
722			size := associativity * partitions * coherency * sets
723			switch cacheLevel {
724			case 1:
725				if cacheType == 1 {
726					// 1 = Data Cache
727					c.Cache.L1D = size
728				} else if cacheType == 2 {
729					// 2 = Instruction Cache
730					c.Cache.L1I = size
731				} else {
732					if c.Cache.L1D < 0 {
733						c.Cache.L1I = size
734					}
735					if c.Cache.L1I < 0 {
736						c.Cache.L1I = size
737					}
738				}
739			case 2:
740				c.Cache.L2 = size
741			case 3:
742				c.Cache.L3 = size
743			}
744		}
745	case AMD, Hygon:
746		// Untested.
747		if maxExtendedFunction() < 0x80000005 {
748			return
749		}
750		_, _, ecx, edx := cpuid(0x80000005)
751		c.Cache.L1D = int(((ecx >> 24) & 0xFF) * 1024)
752		c.Cache.L1I = int(((edx >> 24) & 0xFF) * 1024)
753
754		if maxExtendedFunction() < 0x80000006 {
755			return
756		}
757		_, _, ecx, _ = cpuid(0x80000006)
758		c.Cache.L2 = int(((ecx >> 16) & 0xFFFF) * 1024)
759
760		// CPUID Fn8000_001D_EAX_x[N:0] Cache Properties
761		if maxExtendedFunction() < 0x8000001D {
762			return
763		}
764		for i := uint32(0); i < math.MaxUint32; i++ {
765			eax, ebx, ecx, _ := cpuidex(0x8000001D, i)
766
767			level := (eax >> 5) & 7
768			cacheNumSets := ecx + 1
769			cacheLineSize := 1 + (ebx & 2047)
770			cachePhysPartitions := 1 + ((ebx >> 12) & 511)
771			cacheNumWays := 1 + ((ebx >> 22) & 511)
772
773			typ := eax & 15
774			size := int(cacheNumSets * cacheLineSize * cachePhysPartitions * cacheNumWays)
775			if typ == 0 {
776				return
777			}
778
779			switch level {
780			case 1:
781				switch typ {
782				case 1:
783					// Data cache
784					c.Cache.L1D = size
785				case 2:
786					// Inst cache
787					c.Cache.L1I = size
788				default:
789					if c.Cache.L1D < 0 {
790						c.Cache.L1I = size
791					}
792					if c.Cache.L1I < 0 {
793						c.Cache.L1I = size
794					}
795				}
796			case 2:
797				c.Cache.L2 = size
798			case 3:
799				c.Cache.L3 = size
800			}
801		}
802	}
803
804	return
805}
806
807type SGXEPCSection struct {
808	BaseAddress uint64
809	EPCSize     uint64
810}
811
812type SGXSupport struct {
813	Available           bool
814	LaunchControl       bool
815	SGX1Supported       bool
816	SGX2Supported       bool
817	MaxEnclaveSizeNot64 int64
818	MaxEnclaveSize64    int64
819	EPCSections         []SGXEPCSection
820}
821
822func hasSGX(available, lc bool) (rval SGXSupport) {
823	rval.Available = available
824
825	if !available {
826		return
827	}
828
829	rval.LaunchControl = lc
830
831	a, _, _, d := cpuidex(0x12, 0)
832	rval.SGX1Supported = a&0x01 != 0
833	rval.SGX2Supported = a&0x02 != 0
834	rval.MaxEnclaveSizeNot64 = 1 << (d & 0xFF)     // pow 2
835	rval.MaxEnclaveSize64 = 1 << ((d >> 8) & 0xFF) // pow 2
836	rval.EPCSections = make([]SGXEPCSection, 0)
837
838	for subleaf := uint32(2); subleaf < 2+8; subleaf++ {
839		eax, ebx, ecx, edx := cpuidex(0x12, subleaf)
840		leafType := eax & 0xf
841
842		if leafType == 0 {
843			// Invalid subleaf, stop iterating
844			break
845		} else if leafType == 1 {
846			// EPC Section subleaf
847			baseAddress := uint64(eax&0xfffff000) + (uint64(ebx&0x000fffff) << 32)
848			size := uint64(ecx&0xfffff000) + (uint64(edx&0x000fffff) << 32)
849
850			section := SGXEPCSection{BaseAddress: baseAddress, EPCSize: size}
851			rval.EPCSections = append(rval.EPCSections, section)
852		}
853	}
854
855	return
856}
857
858func support() flagSet {
859	var fs flagSet
860	mfi := maxFunctionID()
861	vend, _ := vendorID()
862	if mfi < 0x1 {
863		return fs
864	}
865	family, model := familyModel()
866
867	_, _, c, d := cpuid(1)
868	fs.setIf((d&(1<<15)) != 0, CMOV)
869	fs.setIf((d&(1<<23)) != 0, MMX)
870	fs.setIf((d&(1<<25)) != 0, MMXEXT)
871	fs.setIf((d&(1<<25)) != 0, SSE)
872	fs.setIf((d&(1<<26)) != 0, SSE2)
873	fs.setIf((c&1) != 0, SSE3)
874	fs.setIf((c&(1<<5)) != 0, VMX)
875	fs.setIf((c&0x00000200) != 0, SSSE3)
876	fs.setIf((c&0x00080000) != 0, SSE4)
877	fs.setIf((c&0x00100000) != 0, SSE42)
878	fs.setIf((c&(1<<25)) != 0, AESNI)
879	fs.setIf((c&(1<<1)) != 0, CLMUL)
880	fs.setIf(c&(1<<23) != 0, POPCNT)
881	fs.setIf(c&(1<<30) != 0, RDRAND)
882
883	// This bit has been reserved by Intel & AMD for use by hypervisors,
884	// and indicates the presence of a hypervisor.
885	fs.setIf(c&(1<<31) != 0, HYPERVISOR)
886	fs.setIf(c&(1<<29) != 0, F16C)
887	fs.setIf(c&(1<<13) != 0, CX16)
888
889	if vend == Intel && (d&(1<<28)) != 0 && mfi >= 4 {
890		fs.setIf(threadsPerCore() > 1, HTT)
891	}
892	if vend == AMD && (d&(1<<28)) != 0 && mfi >= 4 {
893		fs.setIf(threadsPerCore() > 1, HTT)
894	}
895	// Check XGETBV/XSAVE (26), OXSAVE (27) and AVX (28) bits
896	const avxCheck = 1<<26 | 1<<27 | 1<<28
897	if c&avxCheck == avxCheck {
898		// Check for OS support
899		eax, _ := xgetbv(0)
900		if (eax & 0x6) == 0x6 {
901			fs.set(AVX)
902			switch vend {
903			case Intel:
904				// Older than Haswell.
905				fs.setIf(family == 6 && model < 60, AVXSLOW)
906			case AMD:
907				// Older than Zen 2
908				fs.setIf(family < 23 || (family == 23 && model < 49), AVXSLOW)
909			}
910		}
911	}
912	// FMA3 can be used with SSE registers, so no OS support is strictly needed.
913	// fma3 and OSXSAVE needed.
914	const fma3Check = 1<<12 | 1<<27
915	fs.setIf(c&fma3Check == fma3Check, FMA3)
916
917	// Check AVX2, AVX2 requires OS support, but BMI1/2 don't.
918	if mfi >= 7 {
919		_, ebx, ecx, edx := cpuidex(7, 0)
920		eax1, _, _, _ := cpuidex(7, 1)
921		if fs.inSet(AVX) && (ebx&0x00000020) != 0 {
922			fs.set(AVX2)
923		}
924		// CPUID.(EAX=7, ECX=0).EBX
925		if (ebx & 0x00000008) != 0 {
926			fs.set(BMI1)
927			fs.setIf((ebx&0x00000100) != 0, BMI2)
928		}
929		fs.setIf(ebx&(1<<2) != 0, SGX)
930		fs.setIf(ebx&(1<<4) != 0, HLE)
931		fs.setIf(ebx&(1<<9) != 0, ERMS)
932		fs.setIf(ebx&(1<<11) != 0, RTM)
933		fs.setIf(ebx&(1<<14) != 0, MPX)
934		fs.setIf(ebx&(1<<18) != 0, RDSEED)
935		fs.setIf(ebx&(1<<19) != 0, ADX)
936		fs.setIf(ebx&(1<<29) != 0, SHA)
937		// CPUID.(EAX=7, ECX=0).ECX
938		fs.setIf(ecx&(1<<5) != 0, WAITPKG)
939		fs.setIf(ecx&(1<<25) != 0, CLDEMOTE)
940		fs.setIf(ecx&(1<<27) != 0, MOVDIRI)
941		fs.setIf(ecx&(1<<28) != 0, MOVDIR64B)
942		fs.setIf(ecx&(1<<29) != 0, ENQCMD)
943		fs.setIf(ecx&(1<<30) != 0, SGXLC)
944		// CPUID.(EAX=7, ECX=0).EDX
945		fs.setIf(edx&(1<<11) != 0, RTM_ALWAYS_ABORT)
946		fs.setIf(edx&(1<<14) != 0, SERIALIZE)
947		fs.setIf(edx&(1<<16) != 0, TSXLDTRK)
948		fs.setIf(edx&(1<<26) != 0, IBPB)
949		fs.setIf(edx&(1<<27) != 0, STIBP)
950
951		// Only detect AVX-512 features if XGETBV is supported
952		if c&((1<<26)|(1<<27)) == (1<<26)|(1<<27) {
953			// Check for OS support
954			eax, _ := xgetbv(0)
955
956			// Verify that XCR0[7:5] = ‘111b’ (OPMASK state, upper 256-bit of ZMM0-ZMM15 and
957			// ZMM16-ZMM31 state are enabled by OS)
958			/// and that XCR0[2:1] = ‘11b’ (XMM state and YMM state are enabled by OS).
959			hasAVX512 := (eax>>5)&7 == 7 && (eax>>1)&3 == 3
960			if runtime.GOOS == "darwin" {
961				hasAVX512 = fs.inSet(AVX) && darwinHasAVX512()
962			}
963			if hasAVX512 {
964				fs.setIf(ebx&(1<<16) != 0, AVX512F)
965				fs.setIf(ebx&(1<<17) != 0, AVX512DQ)
966				fs.setIf(ebx&(1<<21) != 0, AVX512IFMA)
967				fs.setIf(ebx&(1<<26) != 0, AVX512PF)
968				fs.setIf(ebx&(1<<27) != 0, AVX512ER)
969				fs.setIf(ebx&(1<<28) != 0, AVX512CD)
970				fs.setIf(ebx&(1<<30) != 0, AVX512BW)
971				fs.setIf(ebx&(1<<31) != 0, AVX512VL)
972				// ecx
973				fs.setIf(ecx&(1<<1) != 0, AVX512VBMI)
974				fs.setIf(ecx&(1<<6) != 0, AVX512VBMI2)
975				fs.setIf(ecx&(1<<8) != 0, GFNI)
976				fs.setIf(ecx&(1<<9) != 0, VAES)
977				fs.setIf(ecx&(1<<10) != 0, VPCLMULQDQ)
978				fs.setIf(ecx&(1<<11) != 0, AVX512VNNI)
979				fs.setIf(ecx&(1<<12) != 0, AVX512BITALG)
980				fs.setIf(ecx&(1<<14) != 0, AVX512VPOPCNTDQ)
981				// edx
982				fs.setIf(edx&(1<<8) != 0, AVX512VP2INTERSECT)
983				fs.setIf(edx&(1<<22) != 0, AMXBF16)
984				fs.setIf(edx&(1<<23) != 0, AVX512FP16)
985				fs.setIf(edx&(1<<24) != 0, AMXTILE)
986				fs.setIf(edx&(1<<25) != 0, AMXINT8)
987				// eax1 = CPUID.(EAX=7, ECX=1).EAX
988				fs.setIf(eax1&(1<<5) != 0, AVX512BF16)
989			}
990		}
991	}
992
993	if maxExtendedFunction() >= 0x80000001 {
994		_, _, c, d := cpuid(0x80000001)
995		if (c & (1 << 5)) != 0 {
996			fs.set(LZCNT)
997			fs.set(POPCNT)
998		}
999		fs.setIf((c&(1<<10)) != 0, IBS)
1000		fs.setIf((d&(1<<31)) != 0, AMD3DNOW)
1001		fs.setIf((d&(1<<30)) != 0, AMD3DNOWEXT)
1002		fs.setIf((d&(1<<23)) != 0, MMX)
1003		fs.setIf((d&(1<<22)) != 0, MMXEXT)
1004		fs.setIf((c&(1<<6)) != 0, SSE4A)
1005		fs.setIf(d&(1<<20) != 0, NX)
1006		fs.setIf(d&(1<<27) != 0, RDTSCP)
1007
1008		/* XOP and FMA4 use the AVX instruction coding scheme, so they can't be
1009		 * used unless the OS has AVX support. */
1010		if fs.inSet(AVX) {
1011			fs.setIf((c&0x00000800) != 0, XOP)
1012			fs.setIf((c&0x00010000) != 0, FMA4)
1013		}
1014
1015	}
1016	if maxExtendedFunction() >= 0x80000007 {
1017		_, b, _, d := cpuid(0x80000007)
1018		fs.setIf((b&(1<<0)) != 0, MCAOVERFLOW)
1019		fs.setIf((b&(1<<1)) != 0, SUCCOR)
1020		fs.setIf((b&(1<<2)) != 0, HWA)
1021		fs.setIf((d&(1<<9)) != 0, CPBOOST)
1022	}
1023
1024	if maxExtendedFunction() >= 0x80000008 {
1025		_, b, _, _ := cpuid(0x80000008)
1026		fs.setIf((b&(1<<9)) != 0, WBNOINVD)
1027		fs.setIf((b&(1<<8)) != 0, MCOMMIT)
1028		fs.setIf((b&(1<<13)) != 0, INT_WBINVD)
1029		fs.setIf((b&(1<<4)) != 0, RDPRU)
1030		fs.setIf((b&(1<<3)) != 0, INVLPGB)
1031		fs.setIf((b&(1<<1)) != 0, MSRIRC)
1032		fs.setIf((b&(1<<0)) != 0, CLZERO)
1033	}
1034
1035	if maxExtendedFunction() >= 0x8000001b && fs.inSet(IBS) {
1036		eax, _, _, _ := cpuid(0x8000001b)
1037		fs.setIf((eax>>0)&1 == 1, IBSFFV)
1038		fs.setIf((eax>>1)&1 == 1, IBSFETCHSAM)
1039		fs.setIf((eax>>2)&1 == 1, IBSOPSAM)
1040		fs.setIf((eax>>3)&1 == 1, IBSRDWROPCNT)
1041		fs.setIf((eax>>4)&1 == 1, IBSOPCNT)
1042		fs.setIf((eax>>5)&1 == 1, IBSBRNTRGT)
1043		fs.setIf((eax>>6)&1 == 1, IBSOPCNTEXT)
1044		fs.setIf((eax>>7)&1 == 1, IBSRIPINVALIDCHK)
1045	}
1046
1047	return fs
1048}
1049
1050func valAsString(values ...uint32) []byte {
1051	r := make([]byte, 4*len(values))
1052	for i, v := range values {
1053		dst := r[i*4:]
1054		dst[0] = byte(v & 0xff)
1055		dst[1] = byte((v >> 8) & 0xff)
1056		dst[2] = byte((v >> 16) & 0xff)
1057		dst[3] = byte((v >> 24) & 0xff)
1058		switch {
1059		case dst[0] == 0:
1060			return r[:i*4]
1061		case dst[1] == 0:
1062			return r[:i*4+1]
1063		case dst[2] == 0:
1064			return r[:i*4+2]
1065		case dst[3] == 0:
1066			return r[:i*4+3]
1067		}
1068	}
1069	return r
1070}
1071