1// Generated, DO NOT EDIT,
2// but copy it to your own project and rename the package.
3// See more at http://github.com/klauspost/cpuid
4
5package cpuid
6
7import "strings"
8
9// Vendor is a representation of a CPU vendor.
10type vendor int
11
12const (
13	other vendor = iota
14	intel
15	amd
16	via
17	transmeta
18	nsc
19	kvm  // Kernel-based Virtual Machine
20	msvm // Microsoft Hyper-V or Windows Virtual PC
21	vmware
22	xenhvm
23	bhyve
24	hygon
25)
26
27const (
28	cmov               = 1 << iota // i686 CMOV
29	nx                             // NX (No-Execute) bit
30	amd3dnow                       // AMD 3DNOW
31	amd3dnowext                    // AMD 3DNowExt
32	mmx                            // standard MMX
33	mmxext                         // SSE integer functions or AMD MMX ext
34	sse                            // SSE functions
35	sse2                           // P4 SSE functions
36	sse3                           // Prescott SSE3 functions
37	ssse3                          // Conroe SSSE3 functions
38	sse4                           // Penryn SSE4.1 functions
39	sse4a                          // AMD Barcelona microarchitecture SSE4a instructions
40	sse42                          // Nehalem SSE4.2 functions
41	avx                            // AVX functions
42	avx2                           // AVX2 functions
43	fma3                           // Intel FMA 3
44	fma4                           // Bulldozer FMA4 functions
45	xop                            // Bulldozer XOP functions
46	f16c                           // Half-precision floating-point conversion
47	bmi1                           // Bit Manipulation Instruction Set 1
48	bmi2                           // Bit Manipulation Instruction Set 2
49	tbm                            // AMD Trailing Bit Manipulation
50	lzcnt                          // LZCNT instruction
51	popcnt                         // POPCNT instruction
52	aesni                          // Advanced Encryption Standard New Instructions
53	clmul                          // Carry-less Multiplication
54	htt                            // Hyperthreading (enabled)
55	hle                            // Hardware Lock Elision
56	rtm                            // Restricted Transactional Memory
57	rdrand                         // RDRAND instruction is available
58	rdseed                         // RDSEED instruction is available
59	adx                            // Intel ADX (Multi-Precision Add-Carry Instruction Extensions)
60	sha                            // Intel SHA Extensions
61	avx512f                        // AVX-512 Foundation
62	avx512dq                       // AVX-512 Doubleword and Quadword Instructions
63	avx512ifma                     // AVX-512 Integer Fused Multiply-Add Instructions
64	avx512pf                       // AVX-512 Prefetch Instructions
65	avx512er                       // AVX-512 Exponential and Reciprocal Instructions
66	avx512cd                       // AVX-512 Conflict Detection Instructions
67	avx512bw                       // AVX-512 Byte and Word Instructions
68	avx512vl                       // AVX-512 Vector Length Extensions
69	avx512vbmi                     // AVX-512 Vector Bit Manipulation Instructions
70	avx512vbmi2                    // AVX-512 Vector Bit Manipulation Instructions, Version 2
71	avx512vnni                     // AVX-512 Vector Neural Network Instructions
72	avx512vpopcntdq                // AVX-512 Vector Population Count Doubleword and Quadword
73	gfni                           // Galois Field New Instructions
74	vaes                           // Vector AES
75	avx512bitalg                   // AVX-512 Bit Algorithms
76	vpclmulqdq                     // Carry-Less Multiplication Quadword
77	avx512bf16                     // AVX-512 BFLOAT16 Instructions
78	avx512vp2intersect             // AVX-512 Intersect for D/Q
79	mpx                            // Intel MPX (Memory Protection Extensions)
80	erms                           // Enhanced REP MOVSB/STOSB
81	rdtscp                         // RDTSCP Instruction
82	cx16                           // CMPXCHG16B Instruction
83	sgx                            // Software Guard Extensions
84	sgxlc                          // Software Guard Extensions Launch Control
85	ibpb                           // Indirect Branch Restricted Speculation (IBRS) and Indirect Branch Predictor Barrier (IBPB)
86	stibp                          // Single Thread Indirect Branch Predictors
87	vmx                            // Virtual Machine Extensions
88
89	// Performance indicators
90	sse2slow // SSE2 is supported, but usually not faster
91	sse3slow // SSE3 is supported, but usually not faster
92	atom     // Atom processor, some SSSE3 instructions are slower
93)
94
95var flagNames = map[flags]string{
96	cmov:               "CMOV",               // i686 CMOV
97	nx:                 "NX",                 // NX (No-Execute) bit
98	amd3dnow:           "AMD3DNOW",           // AMD 3DNOW
99	amd3dnowext:        "AMD3DNOWEXT",        // AMD 3DNowExt
100	mmx:                "MMX",                // Standard MMX
101	mmxext:             "MMXEXT",             // SSE integer functions or AMD MMX ext
102	sse:                "SSE",                // SSE functions
103	sse2:               "SSE2",               // P4 SSE2 functions
104	sse3:               "SSE3",               // Prescott SSE3 functions
105	ssse3:              "SSSE3",              // Conroe SSSE3 functions
106	sse4:               "SSE4.1",             // Penryn SSE4.1 functions
107	sse4a:              "SSE4A",              // AMD Barcelona microarchitecture SSE4a instructions
108	sse42:              "SSE4.2",             // Nehalem SSE4.2 functions
109	avx:                "AVX",                // AVX functions
110	avx2:               "AVX2",               // AVX functions
111	fma3:               "FMA3",               // Intel FMA 3
112	fma4:               "FMA4",               // Bulldozer FMA4 functions
113	xop:                "XOP",                // Bulldozer XOP functions
114	f16c:               "F16C",               // Half-precision floating-point conversion
115	bmi1:               "BMI1",               // Bit Manipulation Instruction Set 1
116	bmi2:               "BMI2",               // Bit Manipulation Instruction Set 2
117	tbm:                "TBM",                // AMD Trailing Bit Manipulation
118	lzcnt:              "LZCNT",              // LZCNT instruction
119	popcnt:             "POPCNT",             // POPCNT instruction
120	aesni:              "AESNI",              // Advanced Encryption Standard New Instructions
121	clmul:              "CLMUL",              // Carry-less Multiplication
122	htt:                "HTT",                // Hyperthreading (enabled)
123	hle:                "HLE",                // Hardware Lock Elision
124	rtm:                "RTM",                // Restricted Transactional Memory
125	rdrand:             "RDRAND",             // RDRAND instruction is available
126	rdseed:             "RDSEED",             // RDSEED instruction is available
127	adx:                "ADX",                // Intel ADX (Multi-Precision Add-Carry Instruction Extensions)
128	sha:                "SHA",                // Intel SHA Extensions
129	avx512f:            "AVX512F",            // AVX-512 Foundation
130	avx512dq:           "AVX512DQ",           // AVX-512 Doubleword and Quadword Instructions
131	avx512ifma:         "AVX512IFMA",         // AVX-512 Integer Fused Multiply-Add Instructions
132	avx512pf:           "AVX512PF",           // AVX-512 Prefetch Instructions
133	avx512er:           "AVX512ER",           // AVX-512 Exponential and Reciprocal Instructions
134	avx512cd:           "AVX512CD",           // AVX-512 Conflict Detection Instructions
135	avx512bw:           "AVX512BW",           // AVX-512 Byte and Word Instructions
136	avx512vl:           "AVX512VL",           // AVX-512 Vector Length Extensions
137	avx512vbmi:         "AVX512VBMI",         // AVX-512 Vector Bit Manipulation Instructions
138	avx512vbmi2:        "AVX512VBMI2",        // AVX-512 Vector Bit Manipulation Instructions, Version 2
139	avx512vnni:         "AVX512VNNI",         // AVX-512 Vector Neural Network Instructions
140	avx512vpopcntdq:    "AVX512VPOPCNTDQ",    // AVX-512 Vector Population Count Doubleword and Quadword
141	gfni:               "GFNI",               // Galois Field New Instructions
142	vaes:               "VAES",               // Vector AES
143	avx512bitalg:       "AVX512BITALG",       // AVX-512 Bit Algorithms
144	vpclmulqdq:         "VPCLMULQDQ",         // Carry-Less Multiplication Quadword
145	avx512bf16:         "AVX512BF16",         // AVX-512 BFLOAT16 Instruction
146	avx512vp2intersect: "AVX512VP2INTERSECT", // AVX-512 Intersect for D/Q
147	mpx:                "MPX",                // Intel MPX (Memory Protection Extensions)
148	erms:               "ERMS",               // Enhanced REP MOVSB/STOSB
149	rdtscp:             "RDTSCP",             // RDTSCP Instruction
150	cx16:               "CX16",               // CMPXCHG16B Instruction
151	sgx:                "SGX",                // Software Guard Extensions
152	sgxlc:              "SGXLC",              // Software Guard Extensions Launch Control
153	ibpb:               "IBPB",               // Indirect Branch Restricted Speculation and Indirect Branch Predictor Barrier
154	stibp:              "STIBP",              // Single Thread Indirect Branch Predictors
155	vmx:                "VMX",                // Virtual Machine Extensions
156
157	// Performance indicators
158	sse2slow: "SSE2SLOW", // SSE2 supported, but usually not faster
159	sse3slow: "SSE3SLOW", // SSE3 supported, but usually not faster
160	atom:     "ATOM",     // Atom processor, some SSSE3 instructions are slower
161
162}
163
164// CPUInfo contains information about the detected system CPU.
165type cpuInfo struct {
166	brandname      string // Brand name reported by the CPU
167	vendorid       vendor // Comparable CPU vendor ID
168	features       flags  // Features of the CPU
169	physicalcores  int    // Number of physical processor cores in your CPU. Will be 0 if undetectable.
170	threadspercore int    // Number of threads per physical core. Will be 1 if undetectable.
171	logicalcores   int    // Number of physical cores times threads that can run on each core through the use of hyperthreading. Will be 0 if undetectable.
172	family         int    // CPU family number
173	model          int    // CPU model number
174	cacheline      int    // Cache line size in bytes. Will be 0 if undetectable.
175	cache          struct {
176		l1i int // L1 Instruction Cache (per core or shared). Will be -1 if undetected
177		l1d int // L1 Data Cache (per core or shared). Will be -1 if undetected
178		l2  int // L2 Cache (per core or shared). Will be -1 if undetected
179		l3  int // L3 Instruction Cache (per core or shared). Will be -1 if undetected
180	}
181	sgx       sgxsupport
182	maxFunc   uint32
183	maxExFunc uint32
184}
185
186var cpuid func(op uint32) (eax, ebx, ecx, edx uint32)
187var cpuidex func(op, op2 uint32) (eax, ebx, ecx, edx uint32)
188var xgetbv func(index uint32) (eax, edx uint32)
189var rdtscpAsm func() (eax, ebx, ecx, edx uint32)
190
191// CPU contains information about the CPU as detected on startup,
192// or when Detect last was called.
193//
194// Use this as the primary entry point to you data,
195// this way queries are
196var cpu cpuInfo
197
198func init() {
199	initCPU()
200	detect()
201}
202
203// Detect will re-detect current CPU info.
204// This will replace the content of the exported CPU variable.
205//
206// Unless you expect the CPU to change while you are running your program
207// you should not need to call this function.
208// If you call this, you must ensure that no other goroutine is accessing the
209// exported CPU variable.
210func detect() {
211	cpu.maxFunc = maxFunctionID()
212	cpu.maxExFunc = maxExtendedFunction()
213	cpu.brandname = brandName()
214	cpu.cacheline = cacheLine()
215	cpu.family, cpu.model = familyModel()
216	cpu.features = support()
217	cpu.sgx = hasSGX(cpu.features&sgx != 0, cpu.features&sgxlc != 0)
218	cpu.threadspercore = threadsPerCore()
219	cpu.logicalcores = logicalCores()
220	cpu.physicalcores = physicalCores()
221	cpu.vendorid = vendorID()
222	cpu.cacheSize()
223}
224
225// Generated here: http://play.golang.org/p/BxFH2Gdc0G
226
227// Cmov indicates support of CMOV instructions
228func (c cpuInfo) cmov() bool {
229	return c.features&cmov != 0
230}
231
232// Amd3dnow indicates support of AMD 3DNOW! instructions
233func (c cpuInfo) amd3dnow() bool {
234	return c.features&amd3dnow != 0
235}
236
237// Amd3dnowExt indicates support of AMD 3DNOW! Extended instructions
238func (c cpuInfo) amd3dnowext() bool {
239	return c.features&amd3dnowext != 0
240}
241
242// VMX indicates support of VMX
243func (c cpuInfo) vmx() bool {
244	return c.features&vmx != 0
245}
246
247// MMX indicates support of MMX instructions
248func (c cpuInfo) mmx() bool {
249	return c.features&mmx != 0
250}
251
252// MMXExt indicates support of MMXEXT instructions
253// (SSE integer functions or AMD MMX ext)
254func (c cpuInfo) mmxext() bool {
255	return c.features&mmxext != 0
256}
257
258// SSE indicates support of SSE instructions
259func (c cpuInfo) sse() bool {
260	return c.features&sse != 0
261}
262
263// SSE2 indicates support of SSE 2 instructions
264func (c cpuInfo) sse2() bool {
265	return c.features&sse2 != 0
266}
267
268// SSE3 indicates support of SSE 3 instructions
269func (c cpuInfo) sse3() bool {
270	return c.features&sse3 != 0
271}
272
273// SSSE3 indicates support of SSSE 3 instructions
274func (c cpuInfo) ssse3() bool {
275	return c.features&ssse3 != 0
276}
277
278// SSE4 indicates support of SSE 4 (also called SSE 4.1) instructions
279func (c cpuInfo) sse4() bool {
280	return c.features&sse4 != 0
281}
282
283// SSE42 indicates support of SSE4.2 instructions
284func (c cpuInfo) sse42() bool {
285	return c.features&sse42 != 0
286}
287
288// AVX indicates support of AVX instructions
289// and operating system support of AVX instructions
290func (c cpuInfo) avx() bool {
291	return c.features&avx != 0
292}
293
294// AVX2 indicates support of AVX2 instructions
295func (c cpuInfo) avx2() bool {
296	return c.features&avx2 != 0
297}
298
299// FMA3 indicates support of FMA3 instructions
300func (c cpuInfo) fma3() bool {
301	return c.features&fma3 != 0
302}
303
304// FMA4 indicates support of FMA4 instructions
305func (c cpuInfo) fma4() bool {
306	return c.features&fma4 != 0
307}
308
309// XOP indicates support of XOP instructions
310func (c cpuInfo) xop() bool {
311	return c.features&xop != 0
312}
313
314// F16C indicates support of F16C instructions
315func (c cpuInfo) f16c() bool {
316	return c.features&f16c != 0
317}
318
319// BMI1 indicates support of BMI1 instructions
320func (c cpuInfo) bmi1() bool {
321	return c.features&bmi1 != 0
322}
323
324// BMI2 indicates support of BMI2 instructions
325func (c cpuInfo) bmi2() bool {
326	return c.features&bmi2 != 0
327}
328
329// TBM indicates support of TBM instructions
330// (AMD Trailing Bit Manipulation)
331func (c cpuInfo) tbm() bool {
332	return c.features&tbm != 0
333}
334
335// Lzcnt indicates support of LZCNT instruction
336func (c cpuInfo) lzcnt() bool {
337	return c.features&lzcnt != 0
338}
339
340// Popcnt indicates support of POPCNT instruction
341func (c cpuInfo) popcnt() bool {
342	return c.features&popcnt != 0
343}
344
345// HTT indicates the processor has Hyperthreading enabled
346func (c cpuInfo) htt() bool {
347	return c.features&htt != 0
348}
349
350// SSE2Slow indicates that SSE2 may be slow on this processor
351func (c cpuInfo) sse2slow() bool {
352	return c.features&sse2slow != 0
353}
354
355// SSE3Slow indicates that SSE3 may be slow on this processor
356func (c cpuInfo) sse3slow() bool {
357	return c.features&sse3slow != 0
358}
359
360// AesNi indicates support of AES-NI instructions
361// (Advanced Encryption Standard New Instructions)
362func (c cpuInfo) aesni() bool {
363	return c.features&aesni != 0
364}
365
366// Clmul indicates support of CLMUL instructions
367// (Carry-less Multiplication)
368func (c cpuInfo) clmul() bool {
369	return c.features&clmul != 0
370}
371
372// NX indicates support of NX (No-Execute) bit
373func (c cpuInfo) nx() bool {
374	return c.features&nx != 0
375}
376
377// SSE4A indicates support of AMD Barcelona microarchitecture SSE4a instructions
378func (c cpuInfo) sse4a() bool {
379	return c.features&sse4a != 0
380}
381
382// HLE indicates support of Hardware Lock Elision
383func (c cpuInfo) hle() bool {
384	return c.features&hle != 0
385}
386
387// RTM indicates support of Restricted Transactional Memory
388func (c cpuInfo) rtm() bool {
389	return c.features&rtm != 0
390}
391
392// Rdrand indicates support of RDRAND instruction is available
393func (c cpuInfo) rdrand() bool {
394	return c.features&rdrand != 0
395}
396
397// Rdseed indicates support of RDSEED instruction is available
398func (c cpuInfo) rdseed() bool {
399	return c.features&rdseed != 0
400}
401
402// ADX indicates support of Intel ADX (Multi-Precision Add-Carry Instruction Extensions)
403func (c cpuInfo) adx() bool {
404	return c.features&adx != 0
405}
406
407// SHA indicates support of Intel SHA Extensions
408func (c cpuInfo) sha() bool {
409	return c.features&sha != 0
410}
411
412// AVX512F indicates support of AVX-512 Foundation
413func (c cpuInfo) avx512f() bool {
414	return c.features&avx512f != 0
415}
416
417// AVX512DQ indicates support of AVX-512 Doubleword and Quadword Instructions
418func (c cpuInfo) avx512dq() bool {
419	return c.features&avx512dq != 0
420}
421
422// AVX512IFMA indicates support of AVX-512 Integer Fused Multiply-Add Instructions
423func (c cpuInfo) avx512ifma() bool {
424	return c.features&avx512ifma != 0
425}
426
427// AVX512PF indicates support of AVX-512 Prefetch Instructions
428func (c cpuInfo) avx512pf() bool {
429	return c.features&avx512pf != 0
430}
431
432// AVX512ER indicates support of AVX-512 Exponential and Reciprocal Instructions
433func (c cpuInfo) avx512er() bool {
434	return c.features&avx512er != 0
435}
436
437// AVX512CD indicates support of AVX-512 Conflict Detection Instructions
438func (c cpuInfo) avx512cd() bool {
439	return c.features&avx512cd != 0
440}
441
442// AVX512BW indicates support of AVX-512 Byte and Word Instructions
443func (c cpuInfo) avx512bw() bool {
444	return c.features&avx512bw != 0
445}
446
447// AVX512VL indicates support of AVX-512 Vector Length Extensions
448func (c cpuInfo) avx512vl() bool {
449	return c.features&avx512vl != 0
450}
451
452// AVX512VBMI indicates support of AVX-512 Vector Bit Manipulation Instructions
453func (c cpuInfo) avx512vbmi() bool {
454	return c.features&avx512vbmi != 0
455}
456
457// AVX512VBMI2 indicates support of AVX-512 Vector Bit Manipulation Instructions, Version 2
458func (c cpuInfo) avx512vbmi2() bool {
459	return c.features&avx512vbmi2 != 0
460}
461
462// AVX512VNNI indicates support of AVX-512 Vector Neural Network Instructions
463func (c cpuInfo) avx512vnni() bool {
464	return c.features&avx512vnni != 0
465}
466
467// AVX512VPOPCNTDQ indicates support of AVX-512 Vector Population Count Doubleword and Quadword
468func (c cpuInfo) avx512vpopcntdq() bool {
469	return c.features&avx512vpopcntdq != 0
470}
471
472// GFNI indicates support of Galois Field New Instructions
473func (c cpuInfo) gfni() bool {
474	return c.features&gfni != 0
475}
476
477// VAES indicates support of Vector AES
478func (c cpuInfo) vaes() bool {
479	return c.features&vaes != 0
480}
481
482// AVX512BITALG indicates support of AVX-512 Bit Algorithms
483func (c cpuInfo) avx512bitalg() bool {
484	return c.features&avx512bitalg != 0
485}
486
487// VPCLMULQDQ indicates support of Carry-Less Multiplication Quadword
488func (c cpuInfo) vpclmulqdq() bool {
489	return c.features&vpclmulqdq != 0
490}
491
492// AVX512BF16 indicates support of
493func (c cpuInfo) avx512bf16() bool {
494	return c.features&avx512bf16 != 0
495}
496
497// AVX512VP2INTERSECT indicates support of
498func (c cpuInfo) avx512vp2intersect() bool {
499	return c.features&avx512vp2intersect != 0
500}
501
502// MPX indicates support of Intel MPX (Memory Protection Extensions)
503func (c cpuInfo) mpx() bool {
504	return c.features&mpx != 0
505}
506
507// ERMS indicates support of Enhanced REP MOVSB/STOSB
508func (c cpuInfo) erms() bool {
509	return c.features&erms != 0
510}
511
512// RDTSCP Instruction is available.
513func (c cpuInfo) rdtscp() bool {
514	return c.features&rdtscp != 0
515}
516
517// CX16 indicates if CMPXCHG16B instruction is available.
518func (c cpuInfo) cx16() bool {
519	return c.features&cx16 != 0
520}
521
522// TSX is split into HLE (Hardware Lock Elision) and RTM (Restricted Transactional Memory) detection.
523// So TSX simply checks that.
524func (c cpuInfo) tsx() bool {
525	return c.features&(hle|rtm) == hle|rtm
526}
527
528// Atom indicates an Atom processor
529func (c cpuInfo) atom() bool {
530	return c.features&atom != 0
531}
532
533// Intel returns true if vendor is recognized as Intel
534func (c cpuInfo) intel() bool {
535	return c.vendorid == intel
536}
537
538// AMD returns true if vendor is recognized as AMD
539func (c cpuInfo) amd() bool {
540	return c.vendorid == amd
541}
542
543// Hygon returns true if vendor is recognized as Hygon
544func (c cpuInfo) hygon() bool {
545	return c.vendorid == hygon
546}
547
548// Transmeta returns true if vendor is recognized as Transmeta
549func (c cpuInfo) transmeta() bool {
550	return c.vendorid == transmeta
551}
552
553// NSC returns true if vendor is recognized as National Semiconductor
554func (c cpuInfo) nsc() bool {
555	return c.vendorid == nsc
556}
557
558// VIA returns true if vendor is recognized as VIA
559func (c cpuInfo) via() bool {
560	return c.vendorid == via
561}
562
563// RTCounter returns the 64-bit time-stamp counter
564// Uses the RDTSCP instruction. The value 0 is returned
565// if the CPU does not support the instruction.
566func (c cpuInfo) rtcounter() uint64 {
567	if !c.rdtscp() {
568		return 0
569	}
570	a, _, _, d := rdtscpAsm()
571	return uint64(a) | (uint64(d) << 32)
572}
573
574// Ia32TscAux returns the IA32_TSC_AUX part of the RDTSCP.
575// This variable is OS dependent, but on Linux contains information
576// about the current cpu/core the code is running on.
577// If the RDTSCP instruction isn't supported on the CPU, the value 0 is returned.
578func (c cpuInfo) ia32tscaux() uint32 {
579	if !c.rdtscp() {
580		return 0
581	}
582	_, _, ecx, _ := rdtscpAsm()
583	return ecx
584}
585
586// LogicalCPU will return the Logical CPU the code is currently executing on.
587// This is likely to change when the OS re-schedules the running thread
588// to another CPU.
589// If the current core cannot be detected, -1 will be returned.
590func (c cpuInfo) logicalcpu() int {
591	if c.maxFunc < 1 {
592		return -1
593	}
594	_, ebx, _, _ := cpuid(1)
595	return int(ebx >> 24)
596}
597
598// VM Will return true if the cpu id indicates we are in
599// a virtual machine. This is only a hint, and will very likely
600// have many false negatives.
601func (c cpuInfo) vm() bool {
602	switch c.vendorid {
603	case msvm, kvm, vmware, xenhvm, bhyve:
604		return true
605	}
606	return false
607}
608
609// Flags contains detected cpu features and caracteristics
610type flags uint64
611
612// String returns a string representation of the detected
613// CPU features.
614func (f flags) String() string {
615	return strings.Join(f.strings(), ",")
616}
617
618// Strings returns and array of the detected features.
619func (f flags) strings() []string {
620	s := support()
621	r := make([]string, 0, 20)
622	for i := uint(0); i < 64; i++ {
623		key := flags(1 << i)
624		val := flagNames[key]
625		if s&key != 0 {
626			r = append(r, val)
627		}
628	}
629	return r
630}
631
632func maxExtendedFunction() uint32 {
633	eax, _, _, _ := cpuid(0x80000000)
634	return eax
635}
636
637func maxFunctionID() uint32 {
638	a, _, _, _ := cpuid(0)
639	return a
640}
641
642func brandName() string {
643	if maxExtendedFunction() >= 0x80000004 {
644		v := make([]uint32, 0, 48)
645		for i := uint32(0); i < 3; i++ {
646			a, b, c, d := cpuid(0x80000002 + i)
647			v = append(v, a, b, c, d)
648		}
649		return strings.Trim(string(valAsString(v...)), " ")
650	}
651	return "unknown"
652}
653
654func threadsPerCore() int {
655	mfi := maxFunctionID()
656	if mfi < 0x4 || vendorID() != intel {
657		return 1
658	}
659
660	if mfi < 0xb {
661		_, b, _, d := cpuid(1)
662		if (d & (1 << 28)) != 0 {
663			// v will contain logical core count
664			v := (b >> 16) & 255
665			if v > 1 {
666				a4, _, _, _ := cpuid(4)
667				// physical cores
668				v2 := (a4 >> 26) + 1
669				if v2 > 0 {
670					return int(v) / int(v2)
671				}
672			}
673		}
674		return 1
675	}
676	_, b, _, _ := cpuidex(0xb, 0)
677	if b&0xffff == 0 {
678		return 1
679	}
680	return int(b & 0xffff)
681}
682
683func logicalCores() int {
684	mfi := maxFunctionID()
685	switch vendorID() {
686	case intel:
687		// Use this on old Intel processors
688		if mfi < 0xb {
689			if mfi < 1 {
690				return 0
691			}
692			// CPUID.1:EBX[23:16] represents the maximum number of addressable IDs (initial APIC ID)
693			// that can be assigned to logical processors in a physical package.
694			// The value may not be the same as the number of logical processors that are present in the hardware of a physical package.
695			_, ebx, _, _ := cpuid(1)
696			logical := (ebx >> 16) & 0xff
697			return int(logical)
698		}
699		_, b, _, _ := cpuidex(0xb, 1)
700		return int(b & 0xffff)
701	case amd, hygon:
702		_, b, _, _ := cpuid(1)
703		return int((b >> 16) & 0xff)
704	default:
705		return 0
706	}
707}
708
709func familyModel() (int, int) {
710	if maxFunctionID() < 0x1 {
711		return 0, 0
712	}
713	eax, _, _, _ := cpuid(1)
714	family := ((eax >> 8) & 0xf) + ((eax >> 20) & 0xff)
715	model := ((eax >> 4) & 0xf) + ((eax >> 12) & 0xf0)
716	return int(family), int(model)
717}
718
719func physicalCores() int {
720	switch vendorID() {
721	case intel:
722		return logicalCores() / threadsPerCore()
723	case amd, hygon:
724		if maxExtendedFunction() >= 0x80000008 {
725			_, _, c, _ := cpuid(0x80000008)
726			return int(c&0xff) + 1
727		}
728	}
729	return 0
730}
731
732// Except from http://en.wikipedia.org/wiki/CPUID#EAX.3D0:_Get_vendor_ID
733var vendorMapping = map[string]vendor{
734	"AMDisbetter!": amd,
735	"AuthenticAMD": amd,
736	"CentaurHauls": via,
737	"GenuineIntel": intel,
738	"TransmetaCPU": transmeta,
739	"GenuineTMx86": transmeta,
740	"Geode by NSC": nsc,
741	"VIA VIA VIA ": via,
742	"KVMKVMKVMKVM": kvm,
743	"Microsoft Hv": msvm,
744	"VMwareVMware": vmware,
745	"XenVMMXenVMM": xenhvm,
746	"bhyve bhyve ": bhyve,
747	"HygonGenuine": hygon,
748}
749
750func vendorID() vendor {
751	_, b, c, d := cpuid(0)
752	v := valAsString(b, d, c)
753	vend, ok := vendorMapping[string(v)]
754	if !ok {
755		return other
756	}
757	return vend
758}
759
760func cacheLine() int {
761	if maxFunctionID() < 0x1 {
762		return 0
763	}
764
765	_, ebx, _, _ := cpuid(1)
766	cache := (ebx & 0xff00) >> 5 // cflush size
767	if cache == 0 && maxExtendedFunction() >= 0x80000006 {
768		_, _, ecx, _ := cpuid(0x80000006)
769		cache = ecx & 0xff // cacheline size
770	}
771	// TODO: Read from Cache and TLB Information
772	return int(cache)
773}
774
775func (c *cpuInfo) cacheSize() {
776	c.cache.l1d = -1
777	c.cache.l1i = -1
778	c.cache.l2 = -1
779	c.cache.l3 = -1
780	vendor := vendorID()
781	switch vendor {
782	case intel:
783		if maxFunctionID() < 4 {
784			return
785		}
786		for i := uint32(0); ; i++ {
787			eax, ebx, ecx, _ := cpuidex(4, i)
788			cacheType := eax & 15
789			if cacheType == 0 {
790				break
791			}
792			cacheLevel := (eax >> 5) & 7
793			coherency := int(ebx&0xfff) + 1
794			partitions := int((ebx>>12)&0x3ff) + 1
795			associativity := int((ebx>>22)&0x3ff) + 1
796			sets := int(ecx) + 1
797			size := associativity * partitions * coherency * sets
798			switch cacheLevel {
799			case 1:
800				if cacheType == 1 {
801					// 1 = Data Cache
802					c.cache.l1d = size
803				} else if cacheType == 2 {
804					// 2 = Instruction Cache
805					c.cache.l1i = size
806				} else {
807					if c.cache.l1d < 0 {
808						c.cache.l1i = size
809					}
810					if c.cache.l1i < 0 {
811						c.cache.l1i = size
812					}
813				}
814			case 2:
815				c.cache.l2 = size
816			case 3:
817				c.cache.l3 = size
818			}
819		}
820	case amd, hygon:
821		// Untested.
822		if maxExtendedFunction() < 0x80000005 {
823			return
824		}
825		_, _, ecx, edx := cpuid(0x80000005)
826		c.cache.l1d = int(((ecx >> 24) & 0xFF) * 1024)
827		c.cache.l1i = int(((edx >> 24) & 0xFF) * 1024)
828
829		if maxExtendedFunction() < 0x80000006 {
830			return
831		}
832		_, _, ecx, _ = cpuid(0x80000006)
833		c.cache.l2 = int(((ecx >> 16) & 0xFFFF) * 1024)
834	}
835
836	return
837}
838
839type sgxepcsection struct {
840	baseaddress uint64
841	epcsize     uint64
842}
843
844type sgxsupport struct {
845	available           bool
846	launchcontrol       bool
847	sgx1supported       bool
848	sgx2supported       bool
849	maxenclavesizenot64 int64
850	maxenclavesize64    int64
851	epcsections         []sgxepcsection
852}
853
854func hasSGX(available, lc bool) (rval sgxsupport) {
855	rval.available = available
856
857	if !available {
858		return
859	}
860
861	rval.launchcontrol = lc
862
863	a, _, _, d := cpuidex(0x12, 0)
864	rval.sgx1supported = a&0x01 != 0
865	rval.sgx2supported = a&0x02 != 0
866	rval.maxenclavesizenot64 = 1 << (d & 0xFF)     // pow 2
867	rval.maxenclavesize64 = 1 << ((d >> 8) & 0xFF) // pow 2
868	rval.epcsections = make([]sgxepcsection, 0)
869
870	for subleaf := uint32(2); subleaf < 2+8; subleaf++ {
871		eax, ebx, ecx, edx := cpuidex(0x12, subleaf)
872		leafType := eax & 0xf
873
874		if leafType == 0 {
875			// Invalid subleaf, stop iterating
876			break
877		} else if leafType == 1 {
878			// EPC Section subleaf
879			baseAddress := uint64(eax&0xfffff000) + (uint64(ebx&0x000fffff) << 32)
880			size := uint64(ecx&0xfffff000) + (uint64(edx&0x000fffff) << 32)
881
882			section := sgxepcsection{baseaddress: baseAddress, epcsize: size}
883			rval.epcsections = append(rval.epcsections, section)
884		}
885	}
886
887	return
888}
889
890func support() flags {
891	mfi := maxFunctionID()
892	vend := vendorID()
893	if mfi < 0x1 {
894		return 0
895	}
896	rval := uint64(0)
897	_, _, c, d := cpuid(1)
898	if (d & (1 << 15)) != 0 {
899		rval |= cmov
900	}
901	if (d & (1 << 23)) != 0 {
902		rval |= mmx
903	}
904	if (d & (1 << 25)) != 0 {
905		rval |= mmxext
906	}
907	if (d & (1 << 25)) != 0 {
908		rval |= sse
909	}
910	if (d & (1 << 26)) != 0 {
911		rval |= sse2
912	}
913	if (c & 1) != 0 {
914		rval |= sse3
915	}
916	if (c & (1 << 5)) != 0 {
917		rval |= vmx
918	}
919	if (c & 0x00000200) != 0 {
920		rval |= ssse3
921	}
922	if (c & 0x00080000) != 0 {
923		rval |= sse4
924	}
925	if (c & 0x00100000) != 0 {
926		rval |= sse42
927	}
928	if (c & (1 << 25)) != 0 {
929		rval |= aesni
930	}
931	if (c & (1 << 1)) != 0 {
932		rval |= clmul
933	}
934	if c&(1<<23) != 0 {
935		rval |= popcnt
936	}
937	if c&(1<<30) != 0 {
938		rval |= rdrand
939	}
940	if c&(1<<29) != 0 {
941		rval |= f16c
942	}
943	if c&(1<<13) != 0 {
944		rval |= cx16
945	}
946	if vend == intel && (d&(1<<28)) != 0 && mfi >= 4 {
947		if threadsPerCore() > 1 {
948			rval |= htt
949		}
950	}
951
952	// Check XGETBV, OXSAVE and AVX bits
953	if c&(1<<26) != 0 && c&(1<<27) != 0 && c&(1<<28) != 0 {
954		// Check for OS support
955		eax, _ := xgetbv(0)
956		if (eax & 0x6) == 0x6 {
957			rval |= avx
958			if (c & 0x00001000) != 0 {
959				rval |= fma3
960			}
961		}
962	}
963
964	// Check AVX2, AVX2 requires OS support, but BMI1/2 don't.
965	if mfi >= 7 {
966		_, ebx, ecx, edx := cpuidex(7, 0)
967		eax1, _, _, _ := cpuidex(7, 1)
968		if (rval&avx) != 0 && (ebx&0x00000020) != 0 {
969			rval |= avx2
970		}
971		if (ebx & 0x00000008) != 0 {
972			rval |= bmi1
973			if (ebx & 0x00000100) != 0 {
974				rval |= bmi2
975			}
976		}
977		if ebx&(1<<2) != 0 {
978			rval |= sgx
979		}
980		if ebx&(1<<4) != 0 {
981			rval |= hle
982		}
983		if ebx&(1<<9) != 0 {
984			rval |= erms
985		}
986		if ebx&(1<<11) != 0 {
987			rval |= rtm
988		}
989		if ebx&(1<<14) != 0 {
990			rval |= mpx
991		}
992		if ebx&(1<<18) != 0 {
993			rval |= rdseed
994		}
995		if ebx&(1<<19) != 0 {
996			rval |= adx
997		}
998		if ebx&(1<<29) != 0 {
999			rval |= sha
1000		}
1001		if edx&(1<<26) != 0 {
1002			rval |= ibpb
1003		}
1004		if ecx&(1<<30) != 0 {
1005			rval |= sgxlc
1006		}
1007		if edx&(1<<27) != 0 {
1008			rval |= stibp
1009		}
1010
1011		// Only detect AVX-512 features if XGETBV is supported
1012		if c&((1<<26)|(1<<27)) == (1<<26)|(1<<27) {
1013			// Check for OS support
1014			eax, _ := xgetbv(0)
1015
1016			// Verify that XCR0[7:5] = ‘111b’ (OPMASK state, upper 256-bit of ZMM0-ZMM15 and
1017			// ZMM16-ZMM31 state are enabled by OS)
1018			/// and that XCR0[2:1] = ‘11b’ (XMM state and YMM state are enabled by OS).
1019			if (eax>>5)&7 == 7 && (eax>>1)&3 == 3 {
1020				if ebx&(1<<16) != 0 {
1021					rval |= avx512f
1022				}
1023				if ebx&(1<<17) != 0 {
1024					rval |= avx512dq
1025				}
1026				if ebx&(1<<21) != 0 {
1027					rval |= avx512ifma
1028				}
1029				if ebx&(1<<26) != 0 {
1030					rval |= avx512pf
1031				}
1032				if ebx&(1<<27) != 0 {
1033					rval |= avx512er
1034				}
1035				if ebx&(1<<28) != 0 {
1036					rval |= avx512cd
1037				}
1038				if ebx&(1<<30) != 0 {
1039					rval |= avx512bw
1040				}
1041				if ebx&(1<<31) != 0 {
1042					rval |= avx512vl
1043				}
1044				// ecx
1045				if ecx&(1<<1) != 0 {
1046					rval |= avx512vbmi
1047				}
1048				if ecx&(1<<6) != 0 {
1049					rval |= avx512vbmi2
1050				}
1051				if ecx&(1<<8) != 0 {
1052					rval |= gfni
1053				}
1054				if ecx&(1<<9) != 0 {
1055					rval |= vaes
1056				}
1057				if ecx&(1<<10) != 0 {
1058					rval |= vpclmulqdq
1059				}
1060				if ecx&(1<<11) != 0 {
1061					rval |= avx512vnni
1062				}
1063				if ecx&(1<<12) != 0 {
1064					rval |= avx512bitalg
1065				}
1066				if ecx&(1<<14) != 0 {
1067					rval |= avx512vpopcntdq
1068				}
1069				// edx
1070				if edx&(1<<8) != 0 {
1071					rval |= avx512vp2intersect
1072				}
1073				// cpuid eax 07h,ecx=1
1074				if eax1&(1<<5) != 0 {
1075					rval |= avx512bf16
1076				}
1077			}
1078		}
1079	}
1080
1081	if maxExtendedFunction() >= 0x80000001 {
1082		_, _, c, d := cpuid(0x80000001)
1083		if (c & (1 << 5)) != 0 {
1084			rval |= lzcnt
1085			rval |= popcnt
1086		}
1087		if (d & (1 << 31)) != 0 {
1088			rval |= amd3dnow
1089		}
1090		if (d & (1 << 30)) != 0 {
1091			rval |= amd3dnowext
1092		}
1093		if (d & (1 << 23)) != 0 {
1094			rval |= mmx
1095		}
1096		if (d & (1 << 22)) != 0 {
1097			rval |= mmxext
1098		}
1099		if (c & (1 << 6)) != 0 {
1100			rval |= sse4a
1101		}
1102		if d&(1<<20) != 0 {
1103			rval |= nx
1104		}
1105		if d&(1<<27) != 0 {
1106			rval |= rdtscp
1107		}
1108
1109		/* Allow for selectively disabling SSE2 functions on AMD processors
1110		   with SSE2 support but not SSE4a. This includes Athlon64, some
1111		   Opteron, and some Sempron processors. MMX, SSE, or 3DNow! are faster
1112		   than SSE2 often enough to utilize this special-case flag.
1113		   AV_CPU_FLAG_SSE2 and AV_CPU_FLAG_SSE2SLOW are both set in this case
1114		   so that SSE2 is used unless explicitly disabled by checking
1115		   AV_CPU_FLAG_SSE2SLOW. */
1116		if vendorID() != intel &&
1117			rval&sse2 != 0 && (c&0x00000040) == 0 {
1118			rval |= sse2slow
1119		}
1120
1121		/* XOP and FMA4 use the AVX instruction coding scheme, so they can't be
1122		 * used unless the OS has AVX support. */
1123		if (rval & avx) != 0 {
1124			if (c & 0x00000800) != 0 {
1125				rval |= xop
1126			}
1127			if (c & 0x00010000) != 0 {
1128				rval |= fma4
1129			}
1130		}
1131
1132		if vendorID() == intel {
1133			family, model := familyModel()
1134			if family == 6 && (model == 9 || model == 13 || model == 14) {
1135				/* 6/9 (pentium-m "banias"), 6/13 (pentium-m "dothan"), and
1136				 * 6/14 (core1 "yonah") theoretically support sse2, but it's
1137				 * usually slower than mmx. */
1138				if (rval & sse2) != 0 {
1139					rval |= sse2slow
1140				}
1141				if (rval & sse3) != 0 {
1142					rval |= sse3slow
1143				}
1144			}
1145			/* The Atom processor has SSSE3 support, which is useful in many cases,
1146			 * but sometimes the SSSE3 version is slower than the SSE2 equivalent
1147			 * on the Atom, but is generally faster on other processors supporting
1148			 * SSSE3. This flag allows for selectively disabling certain SSSE3
1149			 * functions on the Atom. */
1150			if family == 6 && model == 28 {
1151				rval |= atom
1152			}
1153		}
1154	}
1155	return flags(rval)
1156}
1157
1158func valAsString(values ...uint32) []byte {
1159	r := make([]byte, 4*len(values))
1160	for i, v := range values {
1161		dst := r[i*4:]
1162		dst[0] = byte(v & 0xff)
1163		dst[1] = byte((v >> 8) & 0xff)
1164		dst[2] = byte((v >> 16) & 0xff)
1165		dst[3] = byte((v >> 24) & 0xff)
1166		switch {
1167		case dst[0] == 0:
1168			return r[:i*4]
1169		case dst[1] == 0:
1170			return r[:i*4+1]
1171		case dst[2] == 0:
1172			return r[:i*4+2]
1173		case dst[3] == 0:
1174			return r[:i*4+3]
1175		}
1176	}
1177	return r
1178}
1179