1// Generated, DO NOT EDIT,
2// but copy it to your own project and rename the package.
3// See more at http://github.com/klauspost/cpuid
4
5package cpuid
6
7import "strings"
8
9// Vendor is a representation of a CPU vendor.
10type vendor int
11
12const (
13	other vendor = iota
14	intel
15	amd
16	via
17	transmeta
18	nsc
19	kvm  // Kernel-based Virtual Machine
20	msvm // Microsoft Hyper-V or Windows Virtual PC
21	vmware
22	xenhvm
23)
24
25const (
26	cmov        = 1 << iota // i686 CMOV
27	nx                      // NX (No-Execute) bit
28	amd3dnow                // AMD 3DNOW
29	amd3dnowext             // AMD 3DNowExt
30	mmx                     // standard MMX
31	mmxext                  // SSE integer functions or AMD MMX ext
32	sse                     // SSE functions
33	sse2                    // P4 SSE functions
34	sse3                    // Prescott SSE3 functions
35	ssse3                   // Conroe SSSE3 functions
36	sse4                    // Penryn SSE4.1 functions
37	sse4a                   // AMD Barcelona microarchitecture SSE4a instructions
38	sse42                   // Nehalem SSE4.2 functions
39	avx                     // AVX functions
40	avx2                    // AVX2 functions
41	fma3                    // Intel FMA 3
42	fma4                    // Bulldozer FMA4 functions
43	xop                     // Bulldozer XOP functions
44	f16c                    // Half-precision floating-point conversion
45	bmi1                    // Bit Manipulation Instruction Set 1
46	bmi2                    // Bit Manipulation Instruction Set 2
47	tbm                     // AMD Trailing Bit Manipulation
48	lzcnt                   // LZCNT instruction
49	popcnt                  // POPCNT instruction
50	aesni                   // Advanced Encryption Standard New Instructions
51	clmul                   // Carry-less Multiplication
52	htt                     // Hyperthreading (enabled)
53	hle                     // Hardware Lock Elision
54	rtm                     // Restricted Transactional Memory
55	rdrand                  // RDRAND instruction is available
56	rdseed                  // RDSEED instruction is available
57	adx                     // Intel ADX (Multi-Precision Add-Carry Instruction Extensions)
58	sha                     // Intel SHA Extensions
59	avx512f                 // AVX-512 Foundation
60	avx512dq                // AVX-512 Doubleword and Quadword Instructions
61	avx512ifma              // AVX-512 Integer Fused Multiply-Add Instructions
62	avx512pf                // AVX-512 Prefetch Instructions
63	avx512er                // AVX-512 Exponential and Reciprocal Instructions
64	avx512cd                // AVX-512 Conflict Detection Instructions
65	avx512bw                // AVX-512 Byte and Word Instructions
66	avx512vl                // AVX-512 Vector Length Extensions
67	avx512vbmi              // AVX-512 Vector Bit Manipulation Instructions
68	mpx                     // Intel MPX (Memory Protection Extensions)
69	erms                    // Enhanced REP MOVSB/STOSB
70	rdtscp                  // RDTSCP Instruction
71	cx16                    // CMPXCHG16B Instruction
72	sgx                     // Software Guard Extensions
73	ibpb                    // Indirect Branch Restricted Speculation (IBRS) and Indirect Branch Predictor Barrier (IBPB)
74	stibp                   // Single Thread Indirect Branch Predictors
75
76	// Performance indicators
77	sse2slow // SSE2 is supported, but usually not faster
78	sse3slow // SSE3 is supported, but usually not faster
79	atom     // Atom processor, some SSSE3 instructions are slower
80)
81
82var flagNames = map[flags]string{
83	cmov:        "CMOV",        // i686 CMOV
84	nx:          "NX",          // NX (No-Execute) bit
85	amd3dnow:    "AMD3DNOW",    // AMD 3DNOW
86	amd3dnowext: "AMD3DNOWEXT", // AMD 3DNowExt
87	mmx:         "MMX",         // Standard MMX
88	mmxext:      "MMXEXT",      // SSE integer functions or AMD MMX ext
89	sse:         "SSE",         // SSE functions
90	sse2:        "SSE2",        // P4 SSE2 functions
91	sse3:        "SSE3",        // Prescott SSE3 functions
92	ssse3:       "SSSE3",       // Conroe SSSE3 functions
93	sse4:        "SSE4.1",      // Penryn SSE4.1 functions
94	sse4a:       "SSE4A",       // AMD Barcelona microarchitecture SSE4a instructions
95	sse42:       "SSE4.2",      // Nehalem SSE4.2 functions
96	avx:         "AVX",         // AVX functions
97	avx2:        "AVX2",        // AVX functions
98	fma3:        "FMA3",        // Intel FMA 3
99	fma4:        "FMA4",        // Bulldozer FMA4 functions
100	xop:         "XOP",         // Bulldozer XOP functions
101	f16c:        "F16C",        // Half-precision floating-point conversion
102	bmi1:        "BMI1",        // Bit Manipulation Instruction Set 1
103	bmi2:        "BMI2",        // Bit Manipulation Instruction Set 2
104	tbm:         "TBM",         // AMD Trailing Bit Manipulation
105	lzcnt:       "LZCNT",       // LZCNT instruction
106	popcnt:      "POPCNT",      // POPCNT instruction
107	aesni:       "AESNI",       // Advanced Encryption Standard New Instructions
108	clmul:       "CLMUL",       // Carry-less Multiplication
109	htt:         "HTT",         // Hyperthreading (enabled)
110	hle:         "HLE",         // Hardware Lock Elision
111	rtm:         "RTM",         // Restricted Transactional Memory
112	rdrand:      "RDRAND",      // RDRAND instruction is available
113	rdseed:      "RDSEED",      // RDSEED instruction is available
114	adx:         "ADX",         // Intel ADX (Multi-Precision Add-Carry Instruction Extensions)
115	sha:         "SHA",         // Intel SHA Extensions
116	avx512f:     "AVX512F",     // AVX-512 Foundation
117	avx512dq:    "AVX512DQ",    // AVX-512 Doubleword and Quadword Instructions
118	avx512ifma:  "AVX512IFMA",  // AVX-512 Integer Fused Multiply-Add Instructions
119	avx512pf:    "AVX512PF",    // AVX-512 Prefetch Instructions
120	avx512er:    "AVX512ER",    // AVX-512 Exponential and Reciprocal Instructions
121	avx512cd:    "AVX512CD",    // AVX-512 Conflict Detection Instructions
122	avx512bw:    "AVX512BW",    // AVX-512 Byte and Word Instructions
123	avx512vl:    "AVX512VL",    // AVX-512 Vector Length Extensions
124	avx512vbmi:  "AVX512VBMI",  // AVX-512 Vector Bit Manipulation Instructions
125	mpx:         "MPX",         // Intel MPX (Memory Protection Extensions)
126	erms:        "ERMS",        // Enhanced REP MOVSB/STOSB
127	rdtscp:      "RDTSCP",      // RDTSCP Instruction
128	cx16:        "CX16",        // CMPXCHG16B Instruction
129	sgx:         "SGX",         // Software Guard Extensions
130	ibpb:        "IBPB",        // Indirect Branch Restricted Speculation and Indirect Branch Predictor Barrier
131	stibp:       "STIBP",       // Single Thread Indirect Branch Predictors
132
133	// Performance indicators
134	sse2slow: "SSE2SLOW", // SSE2 supported, but usually not faster
135	sse3slow: "SSE3SLOW", // SSE3 supported, but usually not faster
136	atom:     "ATOM",     // Atom processor, some SSSE3 instructions are slower
137
138}
139
140// CPUInfo contains information about the detected system CPU.
141type cpuInfo struct {
142	brandname      string // Brand name reported by the CPU
143	vendorid       vendor // Comparable CPU vendor ID
144	features       flags  // Features of the CPU
145	physicalcores  int    // Number of physical processor cores in your CPU. Will be 0 if undetectable.
146	threadspercore int    // Number of threads per physical core. Will be 1 if undetectable.
147	logicalcores   int    // Number of physical cores times threads that can run on each core through the use of hyperthreading. Will be 0 if undetectable.
148	family         int    // CPU family number
149	model          int    // CPU model number
150	cacheline      int    // Cache line size in bytes. Will be 0 if undetectable.
151	cache          struct {
152		l1i int // L1 Instruction Cache (per core or shared). Will be -1 if undetected
153		l1d int // L1 Data Cache (per core or shared). Will be -1 if undetected
154		l2  int // L2 Cache (per core or shared). Will be -1 if undetected
155		l3  int // L3 Instruction Cache (per core or shared). Will be -1 if undetected
156	}
157	sgx       sgxsupport
158	maxFunc   uint32
159	maxExFunc uint32
160}
161
162var cpuid func(op uint32) (eax, ebx, ecx, edx uint32)
163var cpuidex func(op, op2 uint32) (eax, ebx, ecx, edx uint32)
164var xgetbv func(index uint32) (eax, edx uint32)
165var rdtscpAsm func() (eax, ebx, ecx, edx uint32)
166
167// CPU contains information about the CPU as detected on startup,
168// or when Detect last was called.
169//
170// Use this as the primary entry point to you data,
171// this way queries are
172var cpu cpuInfo
173
174func init() {
175	initCPU()
176	detect()
177}
178
179// Detect will re-detect current CPU info.
180// This will replace the content of the exported CPU variable.
181//
182// Unless you expect the CPU to change while you are running your program
183// you should not need to call this function.
184// If you call this, you must ensure that no other goroutine is accessing the
185// exported CPU variable.
186func detect() {
187	cpu.maxFunc = maxFunctionID()
188	cpu.maxExFunc = maxExtendedFunction()
189	cpu.brandname = brandName()
190	cpu.cacheline = cacheLine()
191	cpu.family, cpu.model = familyModel()
192	cpu.features = support()
193	cpu.sgx = hasSGX(cpu.features&sgx != 0)
194	cpu.threadspercore = threadsPerCore()
195	cpu.logicalcores = logicalCores()
196	cpu.physicalcores = physicalCores()
197	cpu.vendorid = vendorID()
198	cpu.cacheSize()
199}
200
201// Generated here: http://play.golang.org/p/BxFH2Gdc0G
202
203// Cmov indicates support of CMOV instructions
204func (c cpuInfo) cmov() bool {
205	return c.features&cmov != 0
206}
207
208// Amd3dnow indicates support of AMD 3DNOW! instructions
209func (c cpuInfo) amd3dnow() bool {
210	return c.features&amd3dnow != 0
211}
212
213// Amd3dnowExt indicates support of AMD 3DNOW! Extended instructions
214func (c cpuInfo) amd3dnowext() bool {
215	return c.features&amd3dnowext != 0
216}
217
218// MMX indicates support of MMX instructions
219func (c cpuInfo) mmx() bool {
220	return c.features&mmx != 0
221}
222
223// MMXExt indicates support of MMXEXT instructions
224// (SSE integer functions or AMD MMX ext)
225func (c cpuInfo) mmxext() bool {
226	return c.features&mmxext != 0
227}
228
229// SSE indicates support of SSE instructions
230func (c cpuInfo) sse() bool {
231	return c.features&sse != 0
232}
233
234// SSE2 indicates support of SSE 2 instructions
235func (c cpuInfo) sse2() bool {
236	return c.features&sse2 != 0
237}
238
239// SSE3 indicates support of SSE 3 instructions
240func (c cpuInfo) sse3() bool {
241	return c.features&sse3 != 0
242}
243
244// SSSE3 indicates support of SSSE 3 instructions
245func (c cpuInfo) ssse3() bool {
246	return c.features&ssse3 != 0
247}
248
249// SSE4 indicates support of SSE 4 (also called SSE 4.1) instructions
250func (c cpuInfo) sse4() bool {
251	return c.features&sse4 != 0
252}
253
254// SSE42 indicates support of SSE4.2 instructions
255func (c cpuInfo) sse42() bool {
256	return c.features&sse42 != 0
257}
258
259// AVX indicates support of AVX instructions
260// and operating system support of AVX instructions
261func (c cpuInfo) avx() bool {
262	return c.features&avx != 0
263}
264
265// AVX2 indicates support of AVX2 instructions
266func (c cpuInfo) avx2() bool {
267	return c.features&avx2 != 0
268}
269
270// FMA3 indicates support of FMA3 instructions
271func (c cpuInfo) fma3() bool {
272	return c.features&fma3 != 0
273}
274
275// FMA4 indicates support of FMA4 instructions
276func (c cpuInfo) fma4() bool {
277	return c.features&fma4 != 0
278}
279
280// XOP indicates support of XOP instructions
281func (c cpuInfo) xop() bool {
282	return c.features&xop != 0
283}
284
285// F16C indicates support of F16C instructions
286func (c cpuInfo) f16c() bool {
287	return c.features&f16c != 0
288}
289
290// BMI1 indicates support of BMI1 instructions
291func (c cpuInfo) bmi1() bool {
292	return c.features&bmi1 != 0
293}
294
295// BMI2 indicates support of BMI2 instructions
296func (c cpuInfo) bmi2() bool {
297	return c.features&bmi2 != 0
298}
299
300// TBM indicates support of TBM instructions
301// (AMD Trailing Bit Manipulation)
302func (c cpuInfo) tbm() bool {
303	return c.features&tbm != 0
304}
305
306// Lzcnt indicates support of LZCNT instruction
307func (c cpuInfo) lzcnt() bool {
308	return c.features&lzcnt != 0
309}
310
311// Popcnt indicates support of POPCNT instruction
312func (c cpuInfo) popcnt() bool {
313	return c.features&popcnt != 0
314}
315
316// HTT indicates the processor has Hyperthreading enabled
317func (c cpuInfo) htt() bool {
318	return c.features&htt != 0
319}
320
321// SSE2Slow indicates that SSE2 may be slow on this processor
322func (c cpuInfo) sse2slow() bool {
323	return c.features&sse2slow != 0
324}
325
326// SSE3Slow indicates that SSE3 may be slow on this processor
327func (c cpuInfo) sse3slow() bool {
328	return c.features&sse3slow != 0
329}
330
331// AesNi indicates support of AES-NI instructions
332// (Advanced Encryption Standard New Instructions)
333func (c cpuInfo) aesni() bool {
334	return c.features&aesni != 0
335}
336
337// Clmul indicates support of CLMUL instructions
338// (Carry-less Multiplication)
339func (c cpuInfo) clmul() bool {
340	return c.features&clmul != 0
341}
342
343// NX indicates support of NX (No-Execute) bit
344func (c cpuInfo) nx() bool {
345	return c.features&nx != 0
346}
347
348// SSE4A indicates support of AMD Barcelona microarchitecture SSE4a instructions
349func (c cpuInfo) sse4a() bool {
350	return c.features&sse4a != 0
351}
352
353// HLE indicates support of Hardware Lock Elision
354func (c cpuInfo) hle() bool {
355	return c.features&hle != 0
356}
357
358// RTM indicates support of Restricted Transactional Memory
359func (c cpuInfo) rtm() bool {
360	return c.features&rtm != 0
361}
362
363// Rdrand indicates support of RDRAND instruction is available
364func (c cpuInfo) rdrand() bool {
365	return c.features&rdrand != 0
366}
367
368// Rdseed indicates support of RDSEED instruction is available
369func (c cpuInfo) rdseed() bool {
370	return c.features&rdseed != 0
371}
372
373// ADX indicates support of Intel ADX (Multi-Precision Add-Carry Instruction Extensions)
374func (c cpuInfo) adx() bool {
375	return c.features&adx != 0
376}
377
378// SHA indicates support of Intel SHA Extensions
379func (c cpuInfo) sha() bool {
380	return c.features&sha != 0
381}
382
383// AVX512F indicates support of AVX-512 Foundation
384func (c cpuInfo) avx512f() bool {
385	return c.features&avx512f != 0
386}
387
388// AVX512DQ indicates support of AVX-512 Doubleword and Quadword Instructions
389func (c cpuInfo) avx512dq() bool {
390	return c.features&avx512dq != 0
391}
392
393// AVX512IFMA indicates support of AVX-512 Integer Fused Multiply-Add Instructions
394func (c cpuInfo) avx512ifma() bool {
395	return c.features&avx512ifma != 0
396}
397
398// AVX512PF indicates support of AVX-512 Prefetch Instructions
399func (c cpuInfo) avx512pf() bool {
400	return c.features&avx512pf != 0
401}
402
403// AVX512ER indicates support of AVX-512 Exponential and Reciprocal Instructions
404func (c cpuInfo) avx512er() bool {
405	return c.features&avx512er != 0
406}
407
408// AVX512CD indicates support of AVX-512 Conflict Detection Instructions
409func (c cpuInfo) avx512cd() bool {
410	return c.features&avx512cd != 0
411}
412
413// AVX512BW indicates support of AVX-512 Byte and Word Instructions
414func (c cpuInfo) avx512bw() bool {
415	return c.features&avx512bw != 0
416}
417
418// AVX512VL indicates support of AVX-512 Vector Length Extensions
419func (c cpuInfo) avx512vl() bool {
420	return c.features&avx512vl != 0
421}
422
423// AVX512VBMI indicates support of AVX-512 Vector Bit Manipulation Instructions
424func (c cpuInfo) avx512vbmi() bool {
425	return c.features&avx512vbmi != 0
426}
427
428// MPX indicates support of Intel MPX (Memory Protection Extensions)
429func (c cpuInfo) mpx() bool {
430	return c.features&mpx != 0
431}
432
433// ERMS indicates support of Enhanced REP MOVSB/STOSB
434func (c cpuInfo) erms() bool {
435	return c.features&erms != 0
436}
437
438// RDTSCP Instruction is available.
439func (c cpuInfo) rdtscp() bool {
440	return c.features&rdtscp != 0
441}
442
443// CX16 indicates if CMPXCHG16B instruction is available.
444func (c cpuInfo) cx16() bool {
445	return c.features&cx16 != 0
446}
447
448// TSX is split into HLE (Hardware Lock Elision) and RTM (Restricted Transactional Memory) detection.
449// So TSX simply checks that.
450func (c cpuInfo) tsx() bool {
451	return c.features&(mpx|rtm) == mpx|rtm
452}
453
454// Atom indicates an Atom processor
455func (c cpuInfo) atom() bool {
456	return c.features&atom != 0
457}
458
459// Intel returns true if vendor is recognized as Intel
460func (c cpuInfo) intel() bool {
461	return c.vendorid == intel
462}
463
464// AMD returns true if vendor is recognized as AMD
465func (c cpuInfo) amd() bool {
466	return c.vendorid == amd
467}
468
469// Transmeta returns true if vendor is recognized as Transmeta
470func (c cpuInfo) transmeta() bool {
471	return c.vendorid == transmeta
472}
473
474// NSC returns true if vendor is recognized as National Semiconductor
475func (c cpuInfo) nsc() bool {
476	return c.vendorid == nsc
477}
478
479// VIA returns true if vendor is recognized as VIA
480func (c cpuInfo) via() bool {
481	return c.vendorid == via
482}
483
484// RTCounter returns the 64-bit time-stamp counter
485// Uses the RDTSCP instruction. The value 0 is returned
486// if the CPU does not support the instruction.
487func (c cpuInfo) rtcounter() uint64 {
488	if !c.rdtscp() {
489		return 0
490	}
491	a, _, _, d := rdtscpAsm()
492	return uint64(a) | (uint64(d) << 32)
493}
494
495// Ia32TscAux returns the IA32_TSC_AUX part of the RDTSCP.
496// This variable is OS dependent, but on Linux contains information
497// about the current cpu/core the code is running on.
498// If the RDTSCP instruction isn't supported on the CPU, the value 0 is returned.
499func (c cpuInfo) ia32tscaux() uint32 {
500	if !c.rdtscp() {
501		return 0
502	}
503	_, _, ecx, _ := rdtscpAsm()
504	return ecx
505}
506
507// LogicalCPU will return the Logical CPU the code is currently executing on.
508// This is likely to change when the OS re-schedules the running thread
509// to another CPU.
510// If the current core cannot be detected, -1 will be returned.
511func (c cpuInfo) logicalcpu() int {
512	if c.maxFunc < 1 {
513		return -1
514	}
515	_, ebx, _, _ := cpuid(1)
516	return int(ebx >> 24)
517}
518
519// VM Will return true if the cpu id indicates we are in
520// a virtual machine. This is only a hint, and will very likely
521// have many false negatives.
522func (c cpuInfo) vm() bool {
523	switch c.vendorid {
524	case msvm, kvm, vmware, xenhvm:
525		return true
526	}
527	return false
528}
529
530// Flags contains detected cpu features and caracteristics
531type flags uint64
532
533// String returns a string representation of the detected
534// CPU features.
535func (f flags) String() string {
536	return strings.Join(f.strings(), ",")
537}
538
539// Strings returns and array of the detected features.
540func (f flags) strings() []string {
541	s := support()
542	r := make([]string, 0, 20)
543	for i := uint(0); i < 64; i++ {
544		key := flags(1 << i)
545		val := flagNames[key]
546		if s&key != 0 {
547			r = append(r, val)
548		}
549	}
550	return r
551}
552
553func maxExtendedFunction() uint32 {
554	eax, _, _, _ := cpuid(0x80000000)
555	return eax
556}
557
558func maxFunctionID() uint32 {
559	a, _, _, _ := cpuid(0)
560	return a
561}
562
563func brandName() string {
564	if maxExtendedFunction() >= 0x80000004 {
565		v := make([]uint32, 0, 48)
566		for i := uint32(0); i < 3; i++ {
567			a, b, c, d := cpuid(0x80000002 + i)
568			v = append(v, a, b, c, d)
569		}
570		return strings.Trim(string(valAsString(v...)), " ")
571	}
572	return "unknown"
573}
574
575func threadsPerCore() int {
576	mfi := maxFunctionID()
577	if mfi < 0x4 || vendorID() != intel {
578		return 1
579	}
580
581	if mfi < 0xb {
582		_, b, _, d := cpuid(1)
583		if (d & (1 << 28)) != 0 {
584			// v will contain logical core count
585			v := (b >> 16) & 255
586			if v > 1 {
587				a4, _, _, _ := cpuid(4)
588				// physical cores
589				v2 := (a4 >> 26) + 1
590				if v2 > 0 {
591					return int(v) / int(v2)
592				}
593			}
594		}
595		return 1
596	}
597	_, b, _, _ := cpuidex(0xb, 0)
598	if b&0xffff == 0 {
599		return 1
600	}
601	return int(b & 0xffff)
602}
603
604func logicalCores() int {
605	mfi := maxFunctionID()
606	switch vendorID() {
607	case intel:
608		// Use this on old Intel processors
609		if mfi < 0xb {
610			if mfi < 1 {
611				return 0
612			}
613			// CPUID.1:EBX[23:16] represents the maximum number of addressable IDs (initial APIC ID)
614			// that can be assigned to logical processors in a physical package.
615			// The value may not be the same as the number of logical processors that are present in the hardware of a physical package.
616			_, ebx, _, _ := cpuid(1)
617			logical := (ebx >> 16) & 0xff
618			return int(logical)
619		}
620		_, b, _, _ := cpuidex(0xb, 1)
621		return int(b & 0xffff)
622	case amd:
623		_, b, _, _ := cpuid(1)
624		return int((b >> 16) & 0xff)
625	default:
626		return 0
627	}
628}
629
630func familyModel() (int, int) {
631	if maxFunctionID() < 0x1 {
632		return 0, 0
633	}
634	eax, _, _, _ := cpuid(1)
635	family := ((eax >> 8) & 0xf) + ((eax >> 20) & 0xff)
636	model := ((eax >> 4) & 0xf) + ((eax >> 12) & 0xf0)
637	return int(family), int(model)
638}
639
640func physicalCores() int {
641	switch vendorID() {
642	case intel:
643		return logicalCores() / threadsPerCore()
644	case amd:
645		if maxExtendedFunction() >= 0x80000008 {
646			_, _, c, _ := cpuid(0x80000008)
647			return int(c&0xff) + 1
648		}
649	}
650	return 0
651}
652
653// Except from http://en.wikipedia.org/wiki/CPUID#EAX.3D0:_Get_vendor_ID
654var vendorMapping = map[string]vendor{
655	"AMDisbetter!": amd,
656	"AuthenticAMD": amd,
657	"CentaurHauls": via,
658	"GenuineIntel": intel,
659	"TransmetaCPU": transmeta,
660	"GenuineTMx86": transmeta,
661	"Geode by NSC": nsc,
662	"VIA VIA VIA ": via,
663	"KVMKVMKVMKVM": kvm,
664	"Microsoft Hv": msvm,
665	"VMwareVMware": vmware,
666	"XenVMMXenVMM": xenhvm,
667}
668
669func vendorID() vendor {
670	_, b, c, d := cpuid(0)
671	v := valAsString(b, d, c)
672	vend, ok := vendorMapping[string(v)]
673	if !ok {
674		return other
675	}
676	return vend
677}
678
679func cacheLine() int {
680	if maxFunctionID() < 0x1 {
681		return 0
682	}
683
684	_, ebx, _, _ := cpuid(1)
685	cache := (ebx & 0xff00) >> 5 // cflush size
686	if cache == 0 && maxExtendedFunction() >= 0x80000006 {
687		_, _, ecx, _ := cpuid(0x80000006)
688		cache = ecx & 0xff // cacheline size
689	}
690	// TODO: Read from Cache and TLB Information
691	return int(cache)
692}
693
694func (c *cpuInfo) cacheSize() {
695	c.cache.l1d = -1
696	c.cache.l1i = -1
697	c.cache.l2 = -1
698	c.cache.l3 = -1
699	vendor := vendorID()
700	switch vendor {
701	case intel:
702		if maxFunctionID() < 4 {
703			return
704		}
705		for i := uint32(0); ; i++ {
706			eax, ebx, ecx, _ := cpuidex(4, i)
707			cacheType := eax & 15
708			if cacheType == 0 {
709				break
710			}
711			cacheLevel := (eax >> 5) & 7
712			coherency := int(ebx&0xfff) + 1
713			partitions := int((ebx>>12)&0x3ff) + 1
714			associativity := int((ebx>>22)&0x3ff) + 1
715			sets := int(ecx) + 1
716			size := associativity * partitions * coherency * sets
717			switch cacheLevel {
718			case 1:
719				if cacheType == 1 {
720					// 1 = Data Cache
721					c.cache.l1d = size
722				} else if cacheType == 2 {
723					// 2 = Instruction Cache
724					c.cache.l1i = size
725				} else {
726					if c.cache.l1d < 0 {
727						c.cache.l1i = size
728					}
729					if c.cache.l1i < 0 {
730						c.cache.l1i = size
731					}
732				}
733			case 2:
734				c.cache.l2 = size
735			case 3:
736				c.cache.l3 = size
737			}
738		}
739	case amd:
740		// Untested.
741		if maxExtendedFunction() < 0x80000005 {
742			return
743		}
744		_, _, ecx, edx := cpuid(0x80000005)
745		c.cache.l1d = int(((ecx >> 24) & 0xFF) * 1024)
746		c.cache.l1i = int(((edx >> 24) & 0xFF) * 1024)
747
748		if maxExtendedFunction() < 0x80000006 {
749			return
750		}
751		_, _, ecx, _ = cpuid(0x80000006)
752		c.cache.l2 = int(((ecx >> 16) & 0xFFFF) * 1024)
753	}
754
755	return
756}
757
758type sgxsupport struct {
759	available           bool
760	sgx1supported       bool
761	sgx2supported       bool
762	maxenclavesizenot64 int64
763	maxenclavesize64    int64
764}
765
766func hasSGX(available bool) (rval sgxsupport) {
767	rval.available = available
768
769	if !available {
770		return
771	}
772
773	a, _, _, d := cpuidex(0x12, 0)
774	rval.sgx1supported = a&0x01 != 0
775	rval.sgx2supported = a&0x02 != 0
776	rval.maxenclavesizenot64 = 1 << (d & 0xFF)     // pow 2
777	rval.maxenclavesize64 = 1 << ((d >> 8) & 0xFF) // pow 2
778
779	return
780}
781
782func support() flags {
783	mfi := maxFunctionID()
784	vend := vendorID()
785	if mfi < 0x1 {
786		return 0
787	}
788	rval := uint64(0)
789	_, _, c, d := cpuid(1)
790	if (d & (1 << 15)) != 0 {
791		rval |= cmov
792	}
793	if (d & (1 << 23)) != 0 {
794		rval |= mmx
795	}
796	if (d & (1 << 25)) != 0 {
797		rval |= mmxext
798	}
799	if (d & (1 << 25)) != 0 {
800		rval |= sse
801	}
802	if (d & (1 << 26)) != 0 {
803		rval |= sse2
804	}
805	if (c & 1) != 0 {
806		rval |= sse3
807	}
808	if (c & 0x00000200) != 0 {
809		rval |= ssse3
810	}
811	if (c & 0x00080000) != 0 {
812		rval |= sse4
813	}
814	if (c & 0x00100000) != 0 {
815		rval |= sse42
816	}
817	if (c & (1 << 25)) != 0 {
818		rval |= aesni
819	}
820	if (c & (1 << 1)) != 0 {
821		rval |= clmul
822	}
823	if c&(1<<23) != 0 {
824		rval |= popcnt
825	}
826	if c&(1<<30) != 0 {
827		rval |= rdrand
828	}
829	if c&(1<<29) != 0 {
830		rval |= f16c
831	}
832	if c&(1<<13) != 0 {
833		rval |= cx16
834	}
835	if vend == intel && (d&(1<<28)) != 0 && mfi >= 4 {
836		if threadsPerCore() > 1 {
837			rval |= htt
838		}
839	}
840
841	// Check XGETBV, OXSAVE and AVX bits
842	if c&(1<<26) != 0 && c&(1<<27) != 0 && c&(1<<28) != 0 {
843		// Check for OS support
844		eax, _ := xgetbv(0)
845		if (eax & 0x6) == 0x6 {
846			rval |= avx
847			if (c & 0x00001000) != 0 {
848				rval |= fma3
849			}
850		}
851	}
852
853	// Check AVX2, AVX2 requires OS support, but BMI1/2 don't.
854	if mfi >= 7 {
855		_, ebx, ecx, edx := cpuidex(7, 0)
856		if (rval&avx) != 0 && (ebx&0x00000020) != 0 {
857			rval |= avx2
858		}
859		if (ebx & 0x00000008) != 0 {
860			rval |= bmi1
861			if (ebx & 0x00000100) != 0 {
862				rval |= bmi2
863			}
864		}
865		if ebx&(1<<2) != 0 {
866			rval |= sgx
867		}
868		if ebx&(1<<4) != 0 {
869			rval |= hle
870		}
871		if ebx&(1<<9) != 0 {
872			rval |= erms
873		}
874		if ebx&(1<<11) != 0 {
875			rval |= rtm
876		}
877		if ebx&(1<<14) != 0 {
878			rval |= mpx
879		}
880		if ebx&(1<<18) != 0 {
881			rval |= rdseed
882		}
883		if ebx&(1<<19) != 0 {
884			rval |= adx
885		}
886		if ebx&(1<<29) != 0 {
887			rval |= sha
888		}
889		if edx&(1<<26) != 0 {
890			rval |= ibpb
891		}
892		if edx&(1<<27) != 0 {
893			rval |= stibp
894		}
895
896		// Only detect AVX-512 features if XGETBV is supported
897		if c&((1<<26)|(1<<27)) == (1<<26)|(1<<27) {
898			// Check for OS support
899			eax, _ := xgetbv(0)
900
901			// Verify that XCR0[7:5] = ‘111b’ (OPMASK state, upper 256-bit of ZMM0-ZMM15 and
902			// ZMM16-ZMM31 state are enabled by OS)
903			/// and that XCR0[2:1] = ‘11b’ (XMM state and YMM state are enabled by OS).
904			if (eax>>5)&7 == 7 && (eax>>1)&3 == 3 {
905				if ebx&(1<<16) != 0 {
906					rval |= avx512f
907				}
908				if ebx&(1<<17) != 0 {
909					rval |= avx512dq
910				}
911				if ebx&(1<<21) != 0 {
912					rval |= avx512ifma
913				}
914				if ebx&(1<<26) != 0 {
915					rval |= avx512pf
916				}
917				if ebx&(1<<27) != 0 {
918					rval |= avx512er
919				}
920				if ebx&(1<<28) != 0 {
921					rval |= avx512cd
922				}
923				if ebx&(1<<30) != 0 {
924					rval |= avx512bw
925				}
926				if ebx&(1<<31) != 0 {
927					rval |= avx512vl
928				}
929				// ecx
930				if ecx&(1<<1) != 0 {
931					rval |= avx512vbmi
932				}
933			}
934		}
935	}
936
937	if maxExtendedFunction() >= 0x80000001 {
938		_, _, c, d := cpuid(0x80000001)
939		if (c & (1 << 5)) != 0 {
940			rval |= lzcnt
941			rval |= popcnt
942		}
943		if (d & (1 << 31)) != 0 {
944			rval |= amd3dnow
945		}
946		if (d & (1 << 30)) != 0 {
947			rval |= amd3dnowext
948		}
949		if (d & (1 << 23)) != 0 {
950			rval |= mmx
951		}
952		if (d & (1 << 22)) != 0 {
953			rval |= mmxext
954		}
955		if (c & (1 << 6)) != 0 {
956			rval |= sse4a
957		}
958		if d&(1<<20) != 0 {
959			rval |= nx
960		}
961		if d&(1<<27) != 0 {
962			rval |= rdtscp
963		}
964
965		/* Allow for selectively disabling SSE2 functions on AMD processors
966		   with SSE2 support but not SSE4a. This includes Athlon64, some
967		   Opteron, and some Sempron processors. MMX, SSE, or 3DNow! are faster
968		   than SSE2 often enough to utilize this special-case flag.
969		   AV_CPU_FLAG_SSE2 and AV_CPU_FLAG_SSE2SLOW are both set in this case
970		   so that SSE2 is used unless explicitly disabled by checking
971		   AV_CPU_FLAG_SSE2SLOW. */
972		if vendorID() != intel &&
973			rval&sse2 != 0 && (c&0x00000040) == 0 {
974			rval |= sse2slow
975		}
976
977		/* XOP and FMA4 use the AVX instruction coding scheme, so they can't be
978		 * used unless the OS has AVX support. */
979		if (rval & avx) != 0 {
980			if (c & 0x00000800) != 0 {
981				rval |= xop
982			}
983			if (c & 0x00010000) != 0 {
984				rval |= fma4
985			}
986		}
987
988		if vendorID() == intel {
989			family, model := familyModel()
990			if family == 6 && (model == 9 || model == 13 || model == 14) {
991				/* 6/9 (pentium-m "banias"), 6/13 (pentium-m "dothan"), and
992				 * 6/14 (core1 "yonah") theoretically support sse2, but it's
993				 * usually slower than mmx. */
994				if (rval & sse2) != 0 {
995					rval |= sse2slow
996				}
997				if (rval & sse3) != 0 {
998					rval |= sse3slow
999				}
1000			}
1001			/* The Atom processor has SSSE3 support, which is useful in many cases,
1002			 * but sometimes the SSSE3 version is slower than the SSE2 equivalent
1003			 * on the Atom, but is generally faster on other processors supporting
1004			 * SSSE3. This flag allows for selectively disabling certain SSSE3
1005			 * functions on the Atom. */
1006			if family == 6 && model == 28 {
1007				rval |= atom
1008			}
1009		}
1010	}
1011	return flags(rval)
1012}
1013
1014func valAsString(values ...uint32) []byte {
1015	r := make([]byte, 4*len(values))
1016	for i, v := range values {
1017		dst := r[i*4:]
1018		dst[0] = byte(v & 0xff)
1019		dst[1] = byte((v >> 8) & 0xff)
1020		dst[2] = byte((v >> 16) & 0xff)
1021		dst[3] = byte((v >> 24) & 0xff)
1022		switch {
1023		case dst[0] == 0:
1024			return r[:i*4]
1025		case dst[1] == 0:
1026			return r[:i*4+1]
1027		case dst[2] == 0:
1028			return r[:i*4+2]
1029		case dst[3] == 0:
1030			return r[:i*4+3]
1031		}
1032	}
1033	return r
1034}
1035