1// Copyright (c) 2015 Klaus Post, released under MIT License. See LICENSE file.
2
3// Package cpuid provides information about the CPU running the current program.
4//
5// CPU features are detected on startup, and kept for fast access through the life of the application.
6// Currently x86 / x64 (AMD64) as well as arm64 is supported.
7//
8// You can access the CPU information by accessing the shared CPU variable of the cpuid library.
9//
10// Package home: https://github.com/klauspost/cpuid
11package cpuid
12
13import (
14	"math"
15	"strings"
16)
17
18// AMD refererence: https://www.amd.com/system/files/TechDocs/25481.pdf
19// and Processor Programming Reference (PPR)
20
21// Vendor is a representation of a CPU vendor.
22type Vendor int
23
24const (
25	Other Vendor = iota
26	Intel
27	AMD
28	VIA
29	Transmeta
30	NSC
31	KVM  // Kernel-based Virtual Machine
32	MSVM // Microsoft Hyper-V or Windows Virtual PC
33	VMware
34	XenHVM
35	Bhyve
36	Hygon
37	SiS
38	RDC
39)
40
41const (
42	CMOV               = 1 << iota // i686 CMOV
43	NX                             // NX (No-Execute) bit
44	AMD3DNOW                       // AMD 3DNOW
45	AMD3DNOWEXT                    // AMD 3DNowExt
46	MMX                            // standard MMX
47	MMXEXT                         // SSE integer functions or AMD MMX ext
48	SSE                            // SSE functions
49	SSE2                           // P4 SSE functions
50	SSE3                           // Prescott SSE3 functions
51	SSSE3                          // Conroe SSSE3 functions
52	SSE4                           // Penryn SSE4.1 functions
53	SSE4A                          // AMD Barcelona microarchitecture SSE4a instructions
54	SSE42                          // Nehalem SSE4.2 functions
55	AVX                            // AVX functions
56	AVX2                           // AVX2 functions
57	FMA3                           // Intel FMA 3
58	FMA4                           // Bulldozer FMA4 functions
59	XOP                            // Bulldozer XOP functions
60	F16C                           // Half-precision floating-point conversion
61	BMI1                           // Bit Manipulation Instruction Set 1
62	BMI2                           // Bit Manipulation Instruction Set 2
63	TBM                            // AMD Trailing Bit Manipulation
64	LZCNT                          // LZCNT instruction
65	POPCNT                         // POPCNT instruction
66	AESNI                          // Advanced Encryption Standard New Instructions
67	CLMUL                          // Carry-less Multiplication
68	HTT                            // Hyperthreading (enabled)
69	HLE                            // Hardware Lock Elision
70	RTM                            // Restricted Transactional Memory
71	RDRAND                         // RDRAND instruction is available
72	RDSEED                         // RDSEED instruction is available
73	ADX                            // Intel ADX (Multi-Precision Add-Carry Instruction Extensions)
74	SHA                            // Intel SHA Extensions
75	AVX512F                        // AVX-512 Foundation
76	AVX512DQ                       // AVX-512 Doubleword and Quadword Instructions
77	AVX512IFMA                     // AVX-512 Integer Fused Multiply-Add Instructions
78	AVX512PF                       // AVX-512 Prefetch Instructions
79	AVX512ER                       // AVX-512 Exponential and Reciprocal Instructions
80	AVX512CD                       // AVX-512 Conflict Detection Instructions
81	AVX512BW                       // AVX-512 Byte and Word Instructions
82	AVX512VL                       // AVX-512 Vector Length Extensions
83	AVX512VBMI                     // AVX-512 Vector Bit Manipulation Instructions
84	AVX512VBMI2                    // AVX-512 Vector Bit Manipulation Instructions, Version 2
85	AVX512VNNI                     // AVX-512 Vector Neural Network Instructions
86	AVX512VPOPCNTDQ                // AVX-512 Vector Population Count Doubleword and Quadword
87	GFNI                           // Galois Field New Instructions
88	VAES                           // Vector AES
89	AVX512BITALG                   // AVX-512 Bit Algorithms
90	VPCLMULQDQ                     // Carry-Less Multiplication Quadword
91	AVX512BF16                     // AVX-512 BFLOAT16 Instructions
92	AVX512VP2INTERSECT             // AVX-512 Intersect for D/Q
93	MPX                            // Intel MPX (Memory Protection Extensions)
94	ERMS                           // Enhanced REP MOVSB/STOSB
95	RDTSCP                         // RDTSCP Instruction
96	CX16                           // CMPXCHG16B Instruction
97	SGX                            // Software Guard Extensions
98	SGXLC                          // Software Guard Extensions Launch Control
99	IBPB                           // Indirect Branch Restricted Speculation (IBRS) and Indirect Branch Predictor Barrier (IBPB)
100	STIBP                          // Single Thread Indirect Branch Predictors
101	VMX                            // Virtual Machine Extensions
102
103	// Performance indicators
104	SSE2SLOW // SSE2 is supported, but usually not faster
105	SSE3SLOW // SSE3 is supported, but usually not faster
106	ATOM     // Atom processor, some SSSE3 instructions are slower
107)
108
109var flagNames = map[Flags]string{
110	CMOV:               "CMOV",               // i686 CMOV
111	NX:                 "NX",                 // NX (No-Execute) bit
112	AMD3DNOW:           "AMD3DNOW",           // AMD 3DNOW
113	AMD3DNOWEXT:        "AMD3DNOWEXT",        // AMD 3DNowExt
114	MMX:                "MMX",                // Standard MMX
115	MMXEXT:             "MMXEXT",             // SSE integer functions or AMD MMX ext
116	SSE:                "SSE",                // SSE functions
117	SSE2:               "SSE2",               // P4 SSE2 functions
118	SSE3:               "SSE3",               // Prescott SSE3 functions
119	SSSE3:              "SSSE3",              // Conroe SSSE3 functions
120	SSE4:               "SSE4.1",             // Penryn SSE4.1 functions
121	SSE4A:              "SSE4A",              // AMD Barcelona microarchitecture SSE4a instructions
122	SSE42:              "SSE4.2",             // Nehalem SSE4.2 functions
123	AVX:                "AVX",                // AVX functions
124	AVX2:               "AVX2",               // AVX functions
125	FMA3:               "FMA3",               // Intel FMA 3
126	FMA4:               "FMA4",               // Bulldozer FMA4 functions
127	XOP:                "XOP",                // Bulldozer XOP functions
128	F16C:               "F16C",               // Half-precision floating-point conversion
129	BMI1:               "BMI1",               // Bit Manipulation Instruction Set 1
130	BMI2:               "BMI2",               // Bit Manipulation Instruction Set 2
131	TBM:                "TBM",                // AMD Trailing Bit Manipulation
132	LZCNT:              "LZCNT",              // LZCNT instruction
133	POPCNT:             "POPCNT",             // POPCNT instruction
134	AESNI:              "AESNI",              // Advanced Encryption Standard New Instructions
135	CLMUL:              "CLMUL",              // Carry-less Multiplication
136	HTT:                "HTT",                // Hyperthreading (enabled)
137	HLE:                "HLE",                // Hardware Lock Elision
138	RTM:                "RTM",                // Restricted Transactional Memory
139	RDRAND:             "RDRAND",             // RDRAND instruction is available
140	RDSEED:             "RDSEED",             // RDSEED instruction is available
141	ADX:                "ADX",                // Intel ADX (Multi-Precision Add-Carry Instruction Extensions)
142	SHA:                "SHA",                // Intel SHA Extensions
143	AVX512F:            "AVX512F",            // AVX-512 Foundation
144	AVX512DQ:           "AVX512DQ",           // AVX-512 Doubleword and Quadword Instructions
145	AVX512IFMA:         "AVX512IFMA",         // AVX-512 Integer Fused Multiply-Add Instructions
146	AVX512PF:           "AVX512PF",           // AVX-512 Prefetch Instructions
147	AVX512ER:           "AVX512ER",           // AVX-512 Exponential and Reciprocal Instructions
148	AVX512CD:           "AVX512CD",           // AVX-512 Conflict Detection Instructions
149	AVX512BW:           "AVX512BW",           // AVX-512 Byte and Word Instructions
150	AVX512VL:           "AVX512VL",           // AVX-512 Vector Length Extensions
151	AVX512VBMI:         "AVX512VBMI",         // AVX-512 Vector Bit Manipulation Instructions
152	AVX512VBMI2:        "AVX512VBMI2",        // AVX-512 Vector Bit Manipulation Instructions, Version 2
153	AVX512VNNI:         "AVX512VNNI",         // AVX-512 Vector Neural Network Instructions
154	AVX512VPOPCNTDQ:    "AVX512VPOPCNTDQ",    // AVX-512 Vector Population Count Doubleword and Quadword
155	GFNI:               "GFNI",               // Galois Field New Instructions
156	VAES:               "VAES",               // Vector AES
157	AVX512BITALG:       "AVX512BITALG",       // AVX-512 Bit Algorithms
158	VPCLMULQDQ:         "VPCLMULQDQ",         // Carry-Less Multiplication Quadword
159	AVX512BF16:         "AVX512BF16",         // AVX-512 BFLOAT16 Instruction
160	AVX512VP2INTERSECT: "AVX512VP2INTERSECT", // AVX-512 Intersect for D/Q
161	MPX:                "MPX",                // Intel MPX (Memory Protection Extensions)
162	ERMS:               "ERMS",               // Enhanced REP MOVSB/STOSB
163	RDTSCP:             "RDTSCP",             // RDTSCP Instruction
164	CX16:               "CX16",               // CMPXCHG16B Instruction
165	SGX:                "SGX",                // Software Guard Extensions
166	SGXLC:              "SGXLC",              // Software Guard Extensions Launch Control
167	IBPB:               "IBPB",               // Indirect Branch Restricted Speculation and Indirect Branch Predictor Barrier
168	STIBP:              "STIBP",              // Single Thread Indirect Branch Predictors
169	VMX:                "VMX",                // Virtual Machine Extensions
170
171	// Performance indicators
172	SSE2SLOW: "SSE2SLOW", // SSE2 supported, but usually not faster
173	SSE3SLOW: "SSE3SLOW", // SSE3 supported, but usually not faster
174	ATOM:     "ATOM",     // Atom processor, some SSSE3 instructions are slower
175
176}
177
178/* all special features for arm64 should be defined here */
179const (
180	/* extension instructions */
181	FP ArmFlags = 1 << iota
182	ASIMD
183	EVTSTRM
184	AES
185	PMULL
186	SHA1
187	SHA2
188	CRC32
189	ATOMICS
190	FPHP
191	ASIMDHP
192	ARMCPUID
193	ASIMDRDM
194	JSCVT
195	FCMA
196	LRCPC
197	DCPOP
198	SHA3
199	SM3
200	SM4
201	ASIMDDP
202	SHA512
203	SVE
204	GPA
205)
206
207var flagNamesArm = map[ArmFlags]string{
208	FP:       "FP",       // Single-precision and double-precision floating point
209	ASIMD:    "ASIMD",    // Advanced SIMD
210	EVTSTRM:  "EVTSTRM",  // Generic timer
211	AES:      "AES",      // AES instructions
212	PMULL:    "PMULL",    // Polynomial Multiply instructions (PMULL/PMULL2)
213	SHA1:     "SHA1",     // SHA-1 instructions (SHA1C, etc)
214	SHA2:     "SHA2",     // SHA-2 instructions (SHA256H, etc)
215	CRC32:    "CRC32",    // CRC32/CRC32C instructions
216	ATOMICS:  "ATOMICS",  // Large System Extensions (LSE)
217	FPHP:     "FPHP",     // Half-precision floating point
218	ASIMDHP:  "ASIMDHP",  // Advanced SIMD half-precision floating point
219	ARMCPUID: "CPUID",    // Some CPU ID registers readable at user-level
220	ASIMDRDM: "ASIMDRDM", // Rounding Double Multiply Accumulate/Subtract (SQRDMLAH/SQRDMLSH)
221	JSCVT:    "JSCVT",    // Javascript-style double->int convert (FJCVTZS)
222	FCMA:     "FCMA",     // Floatin point complex number addition and multiplication
223	LRCPC:    "LRCPC",    // Weaker release consistency (LDAPR, etc)
224	DCPOP:    "DCPOP",    // Data cache clean to Point of Persistence (DC CVAP)
225	SHA3:     "SHA3",     // SHA-3 instructions (EOR3, RAXI, XAR, BCAX)
226	SM3:      "SM3",      // SM3 instructions
227	SM4:      "SM4",      // SM4 instructions
228	ASIMDDP:  "ASIMDDP",  // SIMD Dot Product
229	SHA512:   "SHA512",   // SHA512 instructions
230	SVE:      "SVE",      // Scalable Vector Extension
231	GPA:      "GPA",      // Generic Pointer Authentication
232}
233
234// CPUInfo contains information about the detected system CPU.
235type CPUInfo struct {
236	BrandName      string   // Brand name reported by the CPU
237	VendorID       Vendor   // Comparable CPU vendor ID
238	VendorString   string   // Raw vendor string.
239	Features       Flags    // Features of the CPU (x64)
240	Arm            ArmFlags // Features of the CPU (arm)
241	PhysicalCores  int      // Number of physical processor cores in your CPU. Will be 0 if undetectable.
242	ThreadsPerCore int      // Number of threads per physical core. Will be 1 if undetectable.
243	LogicalCores   int      // Number of physical cores times threads that can run on each core through the use of hyperthreading. Will be 0 if undetectable.
244	Family         int      // CPU family number
245	Model          int      // CPU model number
246	CacheLine      int      // Cache line size in bytes. Will be 0 if undetectable.
247	Hz             int64    // Clock speed, if known
248	Cache          struct {
249		L1I int // L1 Instruction Cache (per core or shared). Will be -1 if undetected
250		L1D int // L1 Data Cache (per core or shared). Will be -1 if undetected
251		L2  int // L2 Cache (per core or shared). Will be -1 if undetected
252		L3  int // L3 Cache (per core, per ccx or shared). Will be -1 if undetected
253	}
254	SGX       SGXSupport
255	maxFunc   uint32
256	maxExFunc uint32
257}
258
259var cpuid func(op uint32) (eax, ebx, ecx, edx uint32)
260var cpuidex func(op, op2 uint32) (eax, ebx, ecx, edx uint32)
261var xgetbv func(index uint32) (eax, edx uint32)
262var rdtscpAsm func() (eax, ebx, ecx, edx uint32)
263
264// CPU contains information about the CPU as detected on startup,
265// or when Detect last was called.
266//
267// Use this as the primary entry point to you data.
268var CPU CPUInfo
269
270func init() {
271	initCPU()
272	Detect()
273}
274
275// Detect will re-detect current CPU info.
276// This will replace the content of the exported CPU variable.
277//
278// Unless you expect the CPU to change while you are running your program
279// you should not need to call this function.
280// If you call this, you must ensure that no other goroutine is accessing the
281// exported CPU variable.
282func Detect() {
283	// Set defaults
284	CPU.ThreadsPerCore = 1
285	CPU.Cache.L1I = -1
286	CPU.Cache.L1D = -1
287	CPU.Cache.L2 = -1
288	CPU.Cache.L3 = -1
289	addInfo(&CPU)
290}
291
292// Generated here: http://play.golang.org/p/BxFH2Gdc0G
293
294// Cmov indicates support of CMOV instructions
295func (c CPUInfo) Cmov() bool {
296	return c.Features&CMOV != 0
297}
298
299// Amd3dnow indicates support of AMD 3DNOW! instructions
300func (c CPUInfo) Amd3dnow() bool {
301	return c.Features&AMD3DNOW != 0
302}
303
304// Amd3dnowExt indicates support of AMD 3DNOW! Extended instructions
305func (c CPUInfo) Amd3dnowExt() bool {
306	return c.Features&AMD3DNOWEXT != 0
307}
308
309// VMX indicates support of VMX
310func (c CPUInfo) VMX() bool {
311	return c.Features&VMX != 0
312}
313
314// MMX indicates support of MMX instructions
315func (c CPUInfo) MMX() bool {
316	return c.Features&MMX != 0
317}
318
319// MMXExt indicates support of MMXEXT instructions
320// (SSE integer functions or AMD MMX ext)
321func (c CPUInfo) MMXExt() bool {
322	return c.Features&MMXEXT != 0
323}
324
325// SSE indicates support of SSE instructions
326func (c CPUInfo) SSE() bool {
327	return c.Features&SSE != 0
328}
329
330// SSE2 indicates support of SSE 2 instructions
331func (c CPUInfo) SSE2() bool {
332	return c.Features&SSE2 != 0
333}
334
335// SSE3 indicates support of SSE 3 instructions
336func (c CPUInfo) SSE3() bool {
337	return c.Features&SSE3 != 0
338}
339
340// SSSE3 indicates support of SSSE 3 instructions
341func (c CPUInfo) SSSE3() bool {
342	return c.Features&SSSE3 != 0
343}
344
345// SSE4 indicates support of SSE 4 (also called SSE 4.1) instructions
346func (c CPUInfo) SSE4() bool {
347	return c.Features&SSE4 != 0
348}
349
350// SSE42 indicates support of SSE4.2 instructions
351func (c CPUInfo) SSE42() bool {
352	return c.Features&SSE42 != 0
353}
354
355// AVX indicates support of AVX instructions
356// and operating system support of AVX instructions
357func (c CPUInfo) AVX() bool {
358	return c.Features&AVX != 0
359}
360
361// AVX2 indicates support of AVX2 instructions
362func (c CPUInfo) AVX2() bool {
363	return c.Features&AVX2 != 0
364}
365
366// FMA3 indicates support of FMA3 instructions
367func (c CPUInfo) FMA3() bool {
368	return c.Features&FMA3 != 0
369}
370
371// FMA4 indicates support of FMA4 instructions
372func (c CPUInfo) FMA4() bool {
373	return c.Features&FMA4 != 0
374}
375
376// XOP indicates support of XOP instructions
377func (c CPUInfo) XOP() bool {
378	return c.Features&XOP != 0
379}
380
381// F16C indicates support of F16C instructions
382func (c CPUInfo) F16C() bool {
383	return c.Features&F16C != 0
384}
385
386// BMI1 indicates support of BMI1 instructions
387func (c CPUInfo) BMI1() bool {
388	return c.Features&BMI1 != 0
389}
390
391// BMI2 indicates support of BMI2 instructions
392func (c CPUInfo) BMI2() bool {
393	return c.Features&BMI2 != 0
394}
395
396// TBM indicates support of TBM instructions
397// (AMD Trailing Bit Manipulation)
398func (c CPUInfo) TBM() bool {
399	return c.Features&TBM != 0
400}
401
402// Lzcnt indicates support of LZCNT instruction
403func (c CPUInfo) Lzcnt() bool {
404	return c.Features&LZCNT != 0
405}
406
407// Popcnt indicates support of POPCNT instruction
408func (c CPUInfo) Popcnt() bool {
409	return c.Features&POPCNT != 0
410}
411
412// HTT indicates the processor has Hyperthreading enabled
413func (c CPUInfo) HTT() bool {
414	return c.Features&HTT != 0
415}
416
417// SSE2Slow indicates that SSE2 may be slow on this processor
418func (c CPUInfo) SSE2Slow() bool {
419	return c.Features&SSE2SLOW != 0
420}
421
422// SSE3Slow indicates that SSE3 may be slow on this processor
423func (c CPUInfo) SSE3Slow() bool {
424	return c.Features&SSE3SLOW != 0
425}
426
427// AesNi indicates support of AES-NI instructions
428// (Advanced Encryption Standard New Instructions)
429func (c CPUInfo) AesNi() bool {
430	return c.Features&AESNI != 0
431}
432
433// Clmul indicates support of CLMUL instructions
434// (Carry-less Multiplication)
435func (c CPUInfo) Clmul() bool {
436	return c.Features&CLMUL != 0
437}
438
439// NX indicates support of NX (No-Execute) bit
440func (c CPUInfo) NX() bool {
441	return c.Features&NX != 0
442}
443
444// SSE4A indicates support of AMD Barcelona microarchitecture SSE4a instructions
445func (c CPUInfo) SSE4A() bool {
446	return c.Features&SSE4A != 0
447}
448
449// HLE indicates support of Hardware Lock Elision
450func (c CPUInfo) HLE() bool {
451	return c.Features&HLE != 0
452}
453
454// RTM indicates support of Restricted Transactional Memory
455func (c CPUInfo) RTM() bool {
456	return c.Features&RTM != 0
457}
458
459// Rdrand indicates support of RDRAND instruction is available
460func (c CPUInfo) Rdrand() bool {
461	return c.Features&RDRAND != 0
462}
463
464// Rdseed indicates support of RDSEED instruction is available
465func (c CPUInfo) Rdseed() bool {
466	return c.Features&RDSEED != 0
467}
468
469// ADX indicates support of Intel ADX (Multi-Precision Add-Carry Instruction Extensions)
470func (c CPUInfo) ADX() bool {
471	return c.Features&ADX != 0
472}
473
474// SHA indicates support of Intel SHA Extensions
475func (c CPUInfo) SHA() bool {
476	return c.Features&SHA != 0
477}
478
479// AVX512F indicates support of AVX-512 Foundation
480func (c CPUInfo) AVX512F() bool {
481	return c.Features&AVX512F != 0
482}
483
484// AVX512DQ indicates support of AVX-512 Doubleword and Quadword Instructions
485func (c CPUInfo) AVX512DQ() bool {
486	return c.Features&AVX512DQ != 0
487}
488
489// AVX512IFMA indicates support of AVX-512 Integer Fused Multiply-Add Instructions
490func (c CPUInfo) AVX512IFMA() bool {
491	return c.Features&AVX512IFMA != 0
492}
493
494// AVX512PF indicates support of AVX-512 Prefetch Instructions
495func (c CPUInfo) AVX512PF() bool {
496	return c.Features&AVX512PF != 0
497}
498
499// AVX512ER indicates support of AVX-512 Exponential and Reciprocal Instructions
500func (c CPUInfo) AVX512ER() bool {
501	return c.Features&AVX512ER != 0
502}
503
504// AVX512CD indicates support of AVX-512 Conflict Detection Instructions
505func (c CPUInfo) AVX512CD() bool {
506	return c.Features&AVX512CD != 0
507}
508
509// AVX512BW indicates support of AVX-512 Byte and Word Instructions
510func (c CPUInfo) AVX512BW() bool {
511	return c.Features&AVX512BW != 0
512}
513
514// AVX512VL indicates support of AVX-512 Vector Length Extensions
515func (c CPUInfo) AVX512VL() bool {
516	return c.Features&AVX512VL != 0
517}
518
519// AVX512VBMI indicates support of AVX-512 Vector Bit Manipulation Instructions
520func (c CPUInfo) AVX512VBMI() bool {
521	return c.Features&AVX512VBMI != 0
522}
523
524// AVX512VBMI2 indicates support of AVX-512 Vector Bit Manipulation Instructions, Version 2
525func (c CPUInfo) AVX512VBMI2() bool {
526	return c.Features&AVX512VBMI2 != 0
527}
528
529// AVX512VNNI indicates support of AVX-512 Vector Neural Network Instructions
530func (c CPUInfo) AVX512VNNI() bool {
531	return c.Features&AVX512VNNI != 0
532}
533
534// AVX512VPOPCNTDQ indicates support of AVX-512 Vector Population Count Doubleword and Quadword
535func (c CPUInfo) AVX512VPOPCNTDQ() bool {
536	return c.Features&AVX512VPOPCNTDQ != 0
537}
538
539// GFNI indicates support of Galois Field New Instructions
540func (c CPUInfo) GFNI() bool {
541	return c.Features&GFNI != 0
542}
543
544// VAES indicates support of Vector AES
545func (c CPUInfo) VAES() bool {
546	return c.Features&VAES != 0
547}
548
549// AVX512BITALG indicates support of AVX-512 Bit Algorithms
550func (c CPUInfo) AVX512BITALG() bool {
551	return c.Features&AVX512BITALG != 0
552}
553
554// VPCLMULQDQ indicates support of Carry-Less Multiplication Quadword
555func (c CPUInfo) VPCLMULQDQ() bool {
556	return c.Features&VPCLMULQDQ != 0
557}
558
559// AVX512BF16 indicates support of
560func (c CPUInfo) AVX512BF16() bool {
561	return c.Features&AVX512BF16 != 0
562}
563
564// AVX512VP2INTERSECT indicates support of
565func (c CPUInfo) AVX512VP2INTERSECT() bool {
566	return c.Features&AVX512VP2INTERSECT != 0
567}
568
569// MPX indicates support of Intel MPX (Memory Protection Extensions)
570func (c CPUInfo) MPX() bool {
571	return c.Features&MPX != 0
572}
573
574// ERMS indicates support of Enhanced REP MOVSB/STOSB
575func (c CPUInfo) ERMS() bool {
576	return c.Features&ERMS != 0
577}
578
579// RDTSCP Instruction is available.
580func (c CPUInfo) RDTSCP() bool {
581	return c.Features&RDTSCP != 0
582}
583
584// CX16 indicates if CMPXCHG16B instruction is available.
585func (c CPUInfo) CX16() bool {
586	return c.Features&CX16 != 0
587}
588
589// TSX is split into HLE (Hardware Lock Elision) and RTM (Restricted Transactional Memory) detection.
590// So TSX simply checks that.
591func (c CPUInfo) TSX() bool {
592	return c.Features&(HLE|RTM) == HLE|RTM
593}
594
595// Atom indicates an Atom processor
596func (c CPUInfo) Atom() bool {
597	return c.Features&ATOM != 0
598}
599
600// Intel returns true if vendor is recognized as Intel
601func (c CPUInfo) Intel() bool {
602	return c.VendorID == Intel
603}
604
605// AMD returns true if vendor is recognized as AMD
606func (c CPUInfo) AMD() bool {
607	return c.VendorID == AMD
608}
609
610// Hygon returns true if vendor is recognized as Hygon
611func (c CPUInfo) Hygon() bool {
612	return c.VendorID == Hygon
613}
614
615// Transmeta returns true if vendor is recognized as Transmeta
616func (c CPUInfo) Transmeta() bool {
617	return c.VendorID == Transmeta
618}
619
620// NSC returns true if vendor is recognized as National Semiconductor
621func (c CPUInfo) NSC() bool {
622	return c.VendorID == NSC
623}
624
625// VIA returns true if vendor is recognized as VIA
626func (c CPUInfo) VIA() bool {
627	return c.VendorID == VIA
628}
629
630// RTCounter returns the 64-bit time-stamp counter
631// Uses the RDTSCP instruction. The value 0 is returned
632// if the CPU does not support the instruction.
633func (c CPUInfo) RTCounter() uint64 {
634	if !c.RDTSCP() {
635		return 0
636	}
637	a, _, _, d := rdtscpAsm()
638	return uint64(a) | (uint64(d) << 32)
639}
640
641// Ia32TscAux returns the IA32_TSC_AUX part of the RDTSCP.
642// This variable is OS dependent, but on Linux contains information
643// about the current cpu/core the code is running on.
644// If the RDTSCP instruction isn't supported on the CPU, the value 0 is returned.
645func (c CPUInfo) Ia32TscAux() uint32 {
646	if !c.RDTSCP() {
647		return 0
648	}
649	_, _, ecx, _ := rdtscpAsm()
650	return ecx
651}
652
653// LogicalCPU will return the Logical CPU the code is currently executing on.
654// This is likely to change when the OS re-schedules the running thread
655// to another CPU.
656// If the current core cannot be detected, -1 will be returned.
657func (c CPUInfo) LogicalCPU() int {
658	if c.maxFunc < 1 {
659		return -1
660	}
661	_, ebx, _, _ := cpuid(1)
662	return int(ebx >> 24)
663}
664
665// hertz tries to compute the clock speed of the CPU. If leaf 15 is
666// supported, use it, otherwise parse the brand string. Yes, really.
667func hertz(model string) int64 {
668	mfi := maxFunctionID()
669	if mfi >= 0x15 {
670		eax, ebx, ecx, _ := cpuid(0x15)
671		if eax != 0 && ebx != 0 && ecx != 0 {
672			return int64((int64(ecx) * int64(ebx)) / int64(eax))
673		}
674	}
675	// computeHz determines the official rated speed of a CPU from its brand
676	// string. This insanity is *actually the official documented way to do
677	// this according to Intel*, prior to leaf 0x15 existing. The official
678	// documentation only shows this working for exactly `x.xx` or `xxxx`
679	// cases, e.g., `2.50GHz` or `1300MHz`; this parser will accept other
680	// sizes.
681	hz := strings.LastIndex(model, "Hz")
682	if hz < 3 {
683		return -1
684	}
685	var multiplier int64
686	switch model[hz-1] {
687	case 'M':
688		multiplier = 1000 * 1000
689	case 'G':
690		multiplier = 1000 * 1000 * 1000
691	case 'T':
692		multiplier = 1000 * 1000 * 1000 * 1000
693	}
694	if multiplier == 0 {
695		return -1
696	}
697	freq := int64(0)
698	divisor := int64(0)
699	decimalShift := int64(1)
700	var i int
701	for i = hz - 2; i >= 0 && model[i] != ' '; i-- {
702		if model[i] >= '0' && model[i] <= '9' {
703			freq += int64(model[i]-'0') * decimalShift
704			decimalShift *= 10
705		} else if model[i] == '.' {
706			if divisor != 0 {
707				return -1
708			}
709			divisor = decimalShift
710		} else {
711			return -1
712		}
713	}
714	// we didn't find a space
715	if i < 0 {
716		return -1
717	}
718	if divisor != 0 {
719		return (freq * multiplier) / divisor
720	}
721	return freq * multiplier
722}
723
724// VM Will return true if the cpu id indicates we are in
725// a virtual machine. This is only a hint, and will very likely
726// have many false negatives.
727func (c CPUInfo) VM() bool {
728	switch c.VendorID {
729	case MSVM, KVM, VMware, XenHVM, Bhyve:
730		return true
731	}
732	return false
733}
734
735// Flags contains detected cpu features and characteristics
736type Flags uint64
737
738// ArmFlags contains detected ARM cpu features and characteristics
739type ArmFlags uint64
740
741// String returns a string representation of the detected
742// CPU features.
743func (f Flags) String() string {
744	return strings.Join(f.Strings(), ",")
745}
746
747// Strings returns an array of the detected features.
748func (f Flags) Strings() []string {
749	r := make([]string, 0, 20)
750	for i := uint(0); i < 64; i++ {
751		key := Flags(1 << i)
752		val := flagNames[key]
753		if f&key != 0 {
754			r = append(r, val)
755		}
756	}
757	return r
758}
759
760// String returns a string representation of the detected
761// CPU features.
762func (f ArmFlags) String() string {
763	return strings.Join(f.Strings(), ",")
764}
765
766// Strings returns an array of the detected features.
767func (f ArmFlags) Strings() []string {
768	r := make([]string, 0, 20)
769	for i := uint(0); i < 64; i++ {
770		key := ArmFlags(1 << i)
771		val := flagNamesArm[key]
772		if f&key != 0 {
773			r = append(r, val)
774		}
775	}
776	return r
777}
778func maxExtendedFunction() uint32 {
779	eax, _, _, _ := cpuid(0x80000000)
780	return eax
781}
782
783func maxFunctionID() uint32 {
784	a, _, _, _ := cpuid(0)
785	return a
786}
787
788func brandName() string {
789	if maxExtendedFunction() >= 0x80000004 {
790		v := make([]uint32, 0, 48)
791		for i := uint32(0); i < 3; i++ {
792			a, b, c, d := cpuid(0x80000002 + i)
793			v = append(v, a, b, c, d)
794		}
795		return strings.Trim(string(valAsString(v...)), " ")
796	}
797	return "unknown"
798}
799
800func threadsPerCore() int {
801	mfi := maxFunctionID()
802	vend, _ := vendorID()
803
804	if mfi < 0x4 || (vend != Intel && vend != AMD) {
805		return 1
806	}
807
808	if mfi < 0xb {
809		if vend != Intel {
810			return 1
811		}
812		_, b, _, d := cpuid(1)
813		if (d & (1 << 28)) != 0 {
814			// v will contain logical core count
815			v := (b >> 16) & 255
816			if v > 1 {
817				a4, _, _, _ := cpuid(4)
818				// physical cores
819				v2 := (a4 >> 26) + 1
820				if v2 > 0 {
821					return int(v) / int(v2)
822				}
823			}
824		}
825		return 1
826	}
827	_, b, _, _ := cpuidex(0xb, 0)
828	if b&0xffff == 0 {
829		return 1
830	}
831	return int(b & 0xffff)
832}
833
834func logicalCores() int {
835	mfi := maxFunctionID()
836	v, _ := vendorID()
837	switch v {
838	case Intel:
839		// Use this on old Intel processors
840		if mfi < 0xb {
841			if mfi < 1 {
842				return 0
843			}
844			// CPUID.1:EBX[23:16] represents the maximum number of addressable IDs (initial APIC ID)
845			// that can be assigned to logical processors in a physical package.
846			// The value may not be the same as the number of logical processors that are present in the hardware of a physical package.
847			_, ebx, _, _ := cpuid(1)
848			logical := (ebx >> 16) & 0xff
849			return int(logical)
850		}
851		_, b, _, _ := cpuidex(0xb, 1)
852		return int(b & 0xffff)
853	case AMD, Hygon:
854		_, b, _, _ := cpuid(1)
855		return int((b >> 16) & 0xff)
856	default:
857		return 0
858	}
859}
860
861func familyModel() (int, int) {
862	if maxFunctionID() < 0x1 {
863		return 0, 0
864	}
865	eax, _, _, _ := cpuid(1)
866	family := ((eax >> 8) & 0xf) + ((eax >> 20) & 0xff)
867	model := ((eax >> 4) & 0xf) + ((eax >> 12) & 0xf0)
868	return int(family), int(model)
869}
870
871func physicalCores() int {
872	v, _ := vendorID()
873	switch v {
874	case Intel:
875		return logicalCores() / threadsPerCore()
876	case AMD, Hygon:
877		lc := logicalCores()
878		tpc := threadsPerCore()
879		if lc > 0 && tpc > 0 {
880			return lc / tpc
881		}
882		// The following is inaccurate on AMD EPYC 7742 64-Core Processor
883
884		if maxExtendedFunction() >= 0x80000008 {
885			_, _, c, _ := cpuid(0x80000008)
886			return int(c&0xff) + 1
887		}
888	}
889	return 0
890}
891
892// Except from http://en.wikipedia.org/wiki/CPUID#EAX.3D0:_Get_vendor_ID
893var vendorMapping = map[string]Vendor{
894	"AMDisbetter!": AMD,
895	"AuthenticAMD": AMD,
896	"CentaurHauls": VIA,
897	"GenuineIntel": Intel,
898	"TransmetaCPU": Transmeta,
899	"GenuineTMx86": Transmeta,
900	"Geode by NSC": NSC,
901	"VIA VIA VIA ": VIA,
902	"KVMKVMKVMKVM": KVM,
903	"Microsoft Hv": MSVM,
904	"VMwareVMware": VMware,
905	"XenVMMXenVMM": XenHVM,
906	"bhyve bhyve ": Bhyve,
907	"HygonGenuine": Hygon,
908	"Vortex86 SoC": SiS,
909	"SiS SiS SiS ": SiS,
910	"RiseRiseRise": SiS,
911	"Genuine  RDC": RDC,
912}
913
914func vendorID() (Vendor, string) {
915	_, b, c, d := cpuid(0)
916	v := string(valAsString(b, d, c))
917	vend, ok := vendorMapping[v]
918	if !ok {
919		return Other, v
920	}
921	return vend, v
922}
923
924func cacheLine() int {
925	if maxFunctionID() < 0x1 {
926		return 0
927	}
928
929	_, ebx, _, _ := cpuid(1)
930	cache := (ebx & 0xff00) >> 5 // cflush size
931	if cache == 0 && maxExtendedFunction() >= 0x80000006 {
932		_, _, ecx, _ := cpuid(0x80000006)
933		cache = ecx & 0xff // cacheline size
934	}
935	// TODO: Read from Cache and TLB Information
936	return int(cache)
937}
938
939func (c *CPUInfo) cacheSize() {
940	c.Cache.L1D = -1
941	c.Cache.L1I = -1
942	c.Cache.L2 = -1
943	c.Cache.L3 = -1
944	vendor, _ := vendorID()
945	switch vendor {
946	case Intel:
947		if maxFunctionID() < 4 {
948			return
949		}
950		for i := uint32(0); ; i++ {
951			eax, ebx, ecx, _ := cpuidex(4, i)
952			cacheType := eax & 15
953			if cacheType == 0 {
954				break
955			}
956			cacheLevel := (eax >> 5) & 7
957			coherency := int(ebx&0xfff) + 1
958			partitions := int((ebx>>12)&0x3ff) + 1
959			associativity := int((ebx>>22)&0x3ff) + 1
960			sets := int(ecx) + 1
961			size := associativity * partitions * coherency * sets
962			switch cacheLevel {
963			case 1:
964				if cacheType == 1 {
965					// 1 = Data Cache
966					c.Cache.L1D = size
967				} else if cacheType == 2 {
968					// 2 = Instruction Cache
969					c.Cache.L1I = size
970				} else {
971					if c.Cache.L1D < 0 {
972						c.Cache.L1I = size
973					}
974					if c.Cache.L1I < 0 {
975						c.Cache.L1I = size
976					}
977				}
978			case 2:
979				c.Cache.L2 = size
980			case 3:
981				c.Cache.L3 = size
982			}
983		}
984	case AMD, Hygon:
985		// Untested.
986		if maxExtendedFunction() < 0x80000005 {
987			return
988		}
989		_, _, ecx, edx := cpuid(0x80000005)
990		c.Cache.L1D = int(((ecx >> 24) & 0xFF) * 1024)
991		c.Cache.L1I = int(((edx >> 24) & 0xFF) * 1024)
992
993		if maxExtendedFunction() < 0x80000006 {
994			return
995		}
996		_, _, ecx, _ = cpuid(0x80000006)
997		c.Cache.L2 = int(((ecx >> 16) & 0xFFFF) * 1024)
998
999		// CPUID Fn8000_001D_EAX_x[N:0] Cache Properties
1000		if maxExtendedFunction() < 0x8000001D {
1001			return
1002		}
1003		for i := uint32(0); i < math.MaxUint32; i++ {
1004			eax, ebx, ecx, _ := cpuidex(0x8000001D, i)
1005
1006			level := (eax >> 5) & 7
1007			cacheNumSets := ecx + 1
1008			cacheLineSize := 1 + (ebx & 2047)
1009			cachePhysPartitions := 1 + ((ebx >> 12) & 511)
1010			cacheNumWays := 1 + ((ebx >> 22) & 511)
1011
1012			typ := eax & 15
1013			size := int(cacheNumSets * cacheLineSize * cachePhysPartitions * cacheNumWays)
1014			if typ == 0 {
1015				return
1016			}
1017
1018			switch level {
1019			case 1:
1020				switch typ {
1021				case 1:
1022					// Data cache
1023					c.Cache.L1D = size
1024				case 2:
1025					// Inst cache
1026					c.Cache.L1I = size
1027				default:
1028					if c.Cache.L1D < 0 {
1029						c.Cache.L1I = size
1030					}
1031					if c.Cache.L1I < 0 {
1032						c.Cache.L1I = size
1033					}
1034				}
1035			case 2:
1036				c.Cache.L2 = size
1037			case 3:
1038				c.Cache.L3 = size
1039			}
1040		}
1041	}
1042
1043	return
1044}
1045
1046type SGXEPCSection struct {
1047	BaseAddress uint64
1048	EPCSize     uint64
1049}
1050
1051type SGXSupport struct {
1052	Available           bool
1053	LaunchControl       bool
1054	SGX1Supported       bool
1055	SGX2Supported       bool
1056	MaxEnclaveSizeNot64 int64
1057	MaxEnclaveSize64    int64
1058	EPCSections         []SGXEPCSection
1059}
1060
1061func hasSGX(available, lc bool) (rval SGXSupport) {
1062	rval.Available = available
1063
1064	if !available {
1065		return
1066	}
1067
1068	rval.LaunchControl = lc
1069
1070	a, _, _, d := cpuidex(0x12, 0)
1071	rval.SGX1Supported = a&0x01 != 0
1072	rval.SGX2Supported = a&0x02 != 0
1073	rval.MaxEnclaveSizeNot64 = 1 << (d & 0xFF)     // pow 2
1074	rval.MaxEnclaveSize64 = 1 << ((d >> 8) & 0xFF) // pow 2
1075	rval.EPCSections = make([]SGXEPCSection, 0)
1076
1077	for subleaf := uint32(2); subleaf < 2+8; subleaf++ {
1078		eax, ebx, ecx, edx := cpuidex(0x12, subleaf)
1079		leafType := eax & 0xf
1080
1081		if leafType == 0 {
1082			// Invalid subleaf, stop iterating
1083			break
1084		} else if leafType == 1 {
1085			// EPC Section subleaf
1086			baseAddress := uint64(eax&0xfffff000) + (uint64(ebx&0x000fffff) << 32)
1087			size := uint64(ecx&0xfffff000) + (uint64(edx&0x000fffff) << 32)
1088
1089			section := SGXEPCSection{BaseAddress: baseAddress, EPCSize: size}
1090			rval.EPCSections = append(rval.EPCSections, section)
1091		}
1092	}
1093
1094	return
1095}
1096
1097func support() Flags {
1098	mfi := maxFunctionID()
1099	vend, _ := vendorID()
1100	if mfi < 0x1 {
1101		return 0
1102	}
1103	rval := uint64(0)
1104	_, _, c, d := cpuid(1)
1105	if (d & (1 << 15)) != 0 {
1106		rval |= CMOV
1107	}
1108	if (d & (1 << 23)) != 0 {
1109		rval |= MMX
1110	}
1111	if (d & (1 << 25)) != 0 {
1112		rval |= MMXEXT
1113	}
1114	if (d & (1 << 25)) != 0 {
1115		rval |= SSE
1116	}
1117	if (d & (1 << 26)) != 0 {
1118		rval |= SSE2
1119	}
1120	if (c & 1) != 0 {
1121		rval |= SSE3
1122	}
1123	if (c & (1 << 5)) != 0 {
1124		rval |= VMX
1125	}
1126	if (c & 0x00000200) != 0 {
1127		rval |= SSSE3
1128	}
1129	if (c & 0x00080000) != 0 {
1130		rval |= SSE4
1131	}
1132	if (c & 0x00100000) != 0 {
1133		rval |= SSE42
1134	}
1135	if (c & (1 << 25)) != 0 {
1136		rval |= AESNI
1137	}
1138	if (c & (1 << 1)) != 0 {
1139		rval |= CLMUL
1140	}
1141	if c&(1<<23) != 0 {
1142		rval |= POPCNT
1143	}
1144	if c&(1<<30) != 0 {
1145		rval |= RDRAND
1146	}
1147	if c&(1<<29) != 0 {
1148		rval |= F16C
1149	}
1150	if c&(1<<13) != 0 {
1151		rval |= CX16
1152	}
1153	if vend == Intel && (d&(1<<28)) != 0 && mfi >= 4 {
1154		if threadsPerCore() > 1 {
1155			rval |= HTT
1156		}
1157	}
1158	if vend == AMD && (d&(1<<28)) != 0 && mfi >= 4 {
1159		if threadsPerCore() > 1 {
1160			rval |= HTT
1161		}
1162	}
1163	// Check XGETBV, OXSAVE and AVX bits
1164	if c&(1<<26) != 0 && c&(1<<27) != 0 && c&(1<<28) != 0 {
1165		// Check for OS support
1166		eax, _ := xgetbv(0)
1167		if (eax & 0x6) == 0x6 {
1168			rval |= AVX
1169			if (c & 0x00001000) != 0 {
1170				rval |= FMA3
1171			}
1172		}
1173	}
1174
1175	// Check AVX2, AVX2 requires OS support, but BMI1/2 don't.
1176	if mfi >= 7 {
1177		_, ebx, ecx, edx := cpuidex(7, 0)
1178		eax1, _, _, _ := cpuidex(7, 1)
1179		if (rval&AVX) != 0 && (ebx&0x00000020) != 0 {
1180			rval |= AVX2
1181		}
1182		if (ebx & 0x00000008) != 0 {
1183			rval |= BMI1
1184			if (ebx & 0x00000100) != 0 {
1185				rval |= BMI2
1186			}
1187		}
1188		if ebx&(1<<2) != 0 {
1189			rval |= SGX
1190		}
1191		if ebx&(1<<4) != 0 {
1192			rval |= HLE
1193		}
1194		if ebx&(1<<9) != 0 {
1195			rval |= ERMS
1196		}
1197		if ebx&(1<<11) != 0 {
1198			rval |= RTM
1199		}
1200		if ebx&(1<<14) != 0 {
1201			rval |= MPX
1202		}
1203		if ebx&(1<<18) != 0 {
1204			rval |= RDSEED
1205		}
1206		if ebx&(1<<19) != 0 {
1207			rval |= ADX
1208		}
1209		if ebx&(1<<29) != 0 {
1210			rval |= SHA
1211		}
1212		if edx&(1<<26) != 0 {
1213			rval |= IBPB
1214		}
1215		if ecx&(1<<30) != 0 {
1216			rval |= SGXLC
1217		}
1218		if edx&(1<<27) != 0 {
1219			rval |= STIBP
1220		}
1221
1222		// Only detect AVX-512 features if XGETBV is supported
1223		if c&((1<<26)|(1<<27)) == (1<<26)|(1<<27) {
1224			// Check for OS support
1225			eax, _ := xgetbv(0)
1226
1227			// Verify that XCR0[7:5] = ‘111b’ (OPMASK state, upper 256-bit of ZMM0-ZMM15 and
1228			// ZMM16-ZMM31 state are enabled by OS)
1229			/// and that XCR0[2:1] = ‘11b’ (XMM state and YMM state are enabled by OS).
1230			if (eax>>5)&7 == 7 && (eax>>1)&3 == 3 {
1231				if ebx&(1<<16) != 0 {
1232					rval |= AVX512F
1233				}
1234				if ebx&(1<<17) != 0 {
1235					rval |= AVX512DQ
1236				}
1237				if ebx&(1<<21) != 0 {
1238					rval |= AVX512IFMA
1239				}
1240				if ebx&(1<<26) != 0 {
1241					rval |= AVX512PF
1242				}
1243				if ebx&(1<<27) != 0 {
1244					rval |= AVX512ER
1245				}
1246				if ebx&(1<<28) != 0 {
1247					rval |= AVX512CD
1248				}
1249				if ebx&(1<<30) != 0 {
1250					rval |= AVX512BW
1251				}
1252				if ebx&(1<<31) != 0 {
1253					rval |= AVX512VL
1254				}
1255				// ecx
1256				if ecx&(1<<1) != 0 {
1257					rval |= AVX512VBMI
1258				}
1259				if ecx&(1<<6) != 0 {
1260					rval |= AVX512VBMI2
1261				}
1262				if ecx&(1<<8) != 0 {
1263					rval |= GFNI
1264				}
1265				if ecx&(1<<9) != 0 {
1266					rval |= VAES
1267				}
1268				if ecx&(1<<10) != 0 {
1269					rval |= VPCLMULQDQ
1270				}
1271				if ecx&(1<<11) != 0 {
1272					rval |= AVX512VNNI
1273				}
1274				if ecx&(1<<12) != 0 {
1275					rval |= AVX512BITALG
1276				}
1277				if ecx&(1<<14) != 0 {
1278					rval |= AVX512VPOPCNTDQ
1279				}
1280				// edx
1281				if edx&(1<<8) != 0 {
1282					rval |= AVX512VP2INTERSECT
1283				}
1284				// cpuid eax 07h,ecx=1
1285				if eax1&(1<<5) != 0 {
1286					rval |= AVX512BF16
1287				}
1288			}
1289		}
1290	}
1291
1292	if maxExtendedFunction() >= 0x80000001 {
1293		_, _, c, d := cpuid(0x80000001)
1294		if (c & (1 << 5)) != 0 {
1295			rval |= LZCNT
1296			rval |= POPCNT
1297		}
1298		if (d & (1 << 31)) != 0 {
1299			rval |= AMD3DNOW
1300		}
1301		if (d & (1 << 30)) != 0 {
1302			rval |= AMD3DNOWEXT
1303		}
1304		if (d & (1 << 23)) != 0 {
1305			rval |= MMX
1306		}
1307		if (d & (1 << 22)) != 0 {
1308			rval |= MMXEXT
1309		}
1310		if (c & (1 << 6)) != 0 {
1311			rval |= SSE4A
1312		}
1313		if d&(1<<20) != 0 {
1314			rval |= NX
1315		}
1316		if d&(1<<27) != 0 {
1317			rval |= RDTSCP
1318		}
1319
1320		/* Allow for selectively disabling SSE2 functions on AMD processors
1321		   with SSE2 support but not SSE4a. This includes Athlon64, some
1322		   Opteron, and some Sempron processors. MMX, SSE, or 3DNow! are faster
1323		   than SSE2 often enough to utilize this special-case flag.
1324		   AV_CPU_FLAG_SSE2 and AV_CPU_FLAG_SSE2SLOW are both set in this case
1325		   so that SSE2 is used unless explicitly disabled by checking
1326		   AV_CPU_FLAG_SSE2SLOW. */
1327		if vend != Intel &&
1328			rval&SSE2 != 0 && (c&0x00000040) == 0 {
1329			rval |= SSE2SLOW
1330		}
1331
1332		/* XOP and FMA4 use the AVX instruction coding scheme, so they can't be
1333		 * used unless the OS has AVX support. */
1334		if (rval & AVX) != 0 {
1335			if (c & 0x00000800) != 0 {
1336				rval |= XOP
1337			}
1338			if (c & 0x00010000) != 0 {
1339				rval |= FMA4
1340			}
1341		}
1342
1343		if vend == Intel {
1344			family, model := familyModel()
1345			if family == 6 && (model == 9 || model == 13 || model == 14) {
1346				/* 6/9 (pentium-m "banias"), 6/13 (pentium-m "dothan"), and
1347				 * 6/14 (core1 "yonah") theoretically support sse2, but it's
1348				 * usually slower than mmx. */
1349				if (rval & SSE2) != 0 {
1350					rval |= SSE2SLOW
1351				}
1352				if (rval & SSE3) != 0 {
1353					rval |= SSE3SLOW
1354				}
1355			}
1356			/* The Atom processor has SSSE3 support, which is useful in many cases,
1357			 * but sometimes the SSSE3 version is slower than the SSE2 equivalent
1358			 * on the Atom, but is generally faster on other processors supporting
1359			 * SSSE3. This flag allows for selectively disabling certain SSSE3
1360			 * functions on the Atom. */
1361			if family == 6 && model == 28 {
1362				rval |= ATOM
1363			}
1364		}
1365	}
1366	return Flags(rval)
1367}
1368
1369func valAsString(values ...uint32) []byte {
1370	r := make([]byte, 4*len(values))
1371	for i, v := range values {
1372		dst := r[i*4:]
1373		dst[0] = byte(v & 0xff)
1374		dst[1] = byte((v >> 8) & 0xff)
1375		dst[2] = byte((v >> 16) & 0xff)
1376		dst[3] = byte((v >> 24) & 0xff)
1377		switch {
1378		case dst[0] == 0:
1379			return r[:i*4]
1380		case dst[1] == 0:
1381			return r[:i*4+1]
1382		case dst[2] == 0:
1383			return r[:i*4+2]
1384		case dst[3] == 0:
1385			return r[:i*4+3]
1386		}
1387	}
1388	return r
1389}
1390
1391// Single-precision and double-precision floating point
1392func (c CPUInfo) ArmFP() bool {
1393	return c.Arm&FP != 0
1394}
1395
1396// Advanced SIMD
1397func (c CPUInfo) ArmASIMD() bool {
1398	return c.Arm&ASIMD != 0
1399}
1400
1401// Generic timer
1402func (c CPUInfo) ArmEVTSTRM() bool {
1403	return c.Arm&EVTSTRM != 0
1404}
1405
1406// AES instructions
1407func (c CPUInfo) ArmAES() bool {
1408	return c.Arm&AES != 0
1409}
1410
1411// Polynomial Multiply instructions (PMULL/PMULL2)
1412func (c CPUInfo) ArmPMULL() bool {
1413	return c.Arm&PMULL != 0
1414}
1415
1416// SHA-1 instructions (SHA1C, etc)
1417func (c CPUInfo) ArmSHA1() bool {
1418	return c.Arm&SHA1 != 0
1419}
1420
1421// SHA-2 instructions (SHA256H, etc)
1422func (c CPUInfo) ArmSHA2() bool {
1423	return c.Arm&SHA2 != 0
1424}
1425
1426// CRC32/CRC32C instructions
1427func (c CPUInfo) ArmCRC32() bool {
1428	return c.Arm&CRC32 != 0
1429}
1430
1431// Large System Extensions (LSE)
1432func (c CPUInfo) ArmATOMICS() bool {
1433	return c.Arm&ATOMICS != 0
1434}
1435
1436// Half-precision floating point
1437func (c CPUInfo) ArmFPHP() bool {
1438	return c.Arm&FPHP != 0
1439}
1440
1441// Advanced SIMD half-precision floating point
1442func (c CPUInfo) ArmASIMDHP() bool {
1443	return c.Arm&ASIMDHP != 0
1444}
1445
1446// Rounding Double Multiply Accumulate/Subtract (SQRDMLAH/SQRDMLSH)
1447func (c CPUInfo) ArmASIMDRDM() bool {
1448	return c.Arm&ASIMDRDM != 0
1449}
1450
1451// Javascript-style double->int convert (FJCVTZS)
1452func (c CPUInfo) ArmJSCVT() bool {
1453	return c.Arm&JSCVT != 0
1454}
1455
1456// Floatin point complex number addition and multiplication
1457func (c CPUInfo) ArmFCMA() bool {
1458	return c.Arm&FCMA != 0
1459}
1460
1461// Weaker release consistency (LDAPR, etc)
1462func (c CPUInfo) ArmLRCPC() bool {
1463	return c.Arm&LRCPC != 0
1464}
1465
1466// Data cache clean to Point of Persistence (DC CVAP)
1467func (c CPUInfo) ArmDCPOP() bool {
1468	return c.Arm&DCPOP != 0
1469}
1470
1471// SHA-3 instructions (EOR3, RAXI, XAR, BCAX)
1472func (c CPUInfo) ArmSHA3() bool {
1473	return c.Arm&SHA3 != 0
1474}
1475
1476// SM3 instructions
1477func (c CPUInfo) ArmSM3() bool {
1478	return c.Arm&SM3 != 0
1479}
1480
1481// SM4 instructions
1482func (c CPUInfo) ArmSM4() bool {
1483	return c.Arm&SM4 != 0
1484}
1485
1486// SIMD Dot Product
1487func (c CPUInfo) ArmASIMDDP() bool {
1488	return c.Arm&ASIMDDP != 0
1489}
1490
1491// SHA512 instructions
1492func (c CPUInfo) ArmSHA512() bool {
1493	return c.Arm&SHA512 != 0
1494}
1495
1496// Scalable Vector Extension
1497func (c CPUInfo) ArmSVE() bool {
1498	return c.Arm&SVE != 0
1499}
1500
1501// Generic Pointer Authentication
1502func (c CPUInfo) ArmGPA() bool {
1503	return c.Arm&GPA != 0
1504}
1505