1 /**
2  * Identify the characteristics of the host CPU, providing information
3  * about cache sizes and assembly optimisation hints. This module is
4  * provided primarily for assembly language programmers.
5  *
6  * References:
7  * Some of this information was extremely difficult to track down. Some of the
8  * documents below were found only in cached versions stored by search engines!
9  * This code relies on information found in:
10  *
11  * $(UL
12  * $(LI "Intel(R) 64 and IA-32 Architectures Software Developers Manual,
13  *    Volume 2A: Instruction Set Reference, A-M" (2007).
14  * )
15  * $(LI "AMD CPUID Specification", Advanced Micro Devices, Rev 2.28 (2008).
16  * )
17  * $(LI "AMD Processor Recognition Application Note For Processors Prior to AMD
18  *    Family 0Fh Processors", Advanced Micro Devices, Rev 3.13 (2005).
19  * )
20  * $(LI "AMD Geode(TM) GX Processors Data Book",
21  *    Advanced Micro Devices, Publication ID 31505E, (2005).
22  * )
23  * $(LI "AMD K6 Processor Code Optimisation", Advanced Micro Devices, Rev D (2000).
24  * )
25  * $(LI "Application note 106: Software Customization for the 6x86 Family",
26  *    Cyrix Corporation, Rev 1.5 (1998)
27  * )
28  * $(LI $(LINK http://www.datasheetcatalog.org/datasheet/nationalsemiconductor/GX1.pdf))
29  * $(LI "Geode(TM) GX1 Processor Series Low Power Integrated X86 Solution",
30  *   National Semiconductor, (2002)
31  * )
32  * $(LI "The VIA Isaiah Architecture", G. Glenn Henry, Centaur Technology, Inc (2008).
33  * )
34  * $(LI $(LINK http://www.sandpile.org/ia32/cpuid.htm))
35  * $(LI $(LINK http://www.akkadia.org/drepper/cpumemory.pdf))
36  * $(LI "What every programmer should know about memory",
37  *    Ulrich Depper, Red Hat, Inc., (2007).
38  * )
39  * $(LI "CPU Identification by the Windows Kernel", G. Chappell (2009).
40  *   $(LINK http://www.geoffchappell.com/viewer.htm?doc=studies/windows/km/cpu/cx8.htm)
41  * )
42  * $(LI "Intel(R) Processor Identification and the CPUID Instruction, Application
43  *    Note 485" (2009).
44  * )
45  * )
46  *
47  * Bugs: Currently only works on x86 and Itanium CPUs.
48  *      Many processors have bugs in their microcode for the CPUID instruction,
49  *      so sometimes the cache information may be incorrect.
50  *
51  * Copyright: Copyright Don Clugston 2007 - 2009.
52  * License:   $(LINK2 http://www.boost.org/LICENSE_1_0.txt, Boost License 1.0)
53  * Authors:   Don Clugston, Tomas Lindquist Olsen <tomas@famolsen.dk>
54  * Source:    $(DRUNTIMESRC core/_cpuid.d)
55  */
56 
57 module core.cpuid;
58 
59 version (GNU) version = GNU_OR_LDC;
60 version (LDC) version = GNU_OR_LDC;
61 
62 @trusted:
63 nothrow:
64 @nogc:
65 
66 // If optimizing for a particular processor, it is generally better
67 // to identify based on features rather than model. NOTE: Normally
68 // it's only worthwhile to optimise for the latest Intel and AMD CPU,
69 // with a backup for other CPUs.
70 // Pentium    -- preferPentium1()
71 // PMMX       --   + mmx()
72 // PPro       -- default
73 // PII        --   + mmx()
74 // PIII       --   + mmx() + sse()
75 // PentiumM   --   + mmx() + sse() + sse2()
76 // Pentium4   -- preferPentium4()
77 // PentiumD   --   + isX86_64()
78 // Core2      -- default + isX86_64()
79 // AMD K5     -- preferPentium1()
80 // AMD K6     --   + mmx()
81 // AMD K6-II  --   + mmx() + 3dnow()
82 // AMD K7     -- preferAthlon()
83 // AMD K8     --   + sse2()
84 // AMD K10    --   + isX86_64()
85 // Cyrix 6x86 -- preferPentium1()
86 //    6x86MX  --   + mmx()
87 
88 // GDC support uses extended inline assembly:
89 //   https://gcc.gnu.org/onlinedocs/gcc/Extended-Asm.html        (general information and hints)
90 //   https://gcc.gnu.org/onlinedocs/gcc/Simple-Constraints.html  (binding variables to registers)
91 //   https://gcc.gnu.org/onlinedocs/gcc/Machine-Constraints.html (x86 specific register short names)
92 
93 public:
94 
95 /// Cache size and behaviour
96 struct CacheInfo
97 {
98     /// Size of the cache, in kilobytes, per CPU.
99     /// For L1 unified (data + code) caches, this size is half the physical size.
100     /// (we don't halve it for larger sizes, since normally
101     /// data size is much greater than code size for critical loops).
102     size_t size;
103     /// Number of ways of associativity, eg:
104     /// $(UL
105     /// $(LI 1 = direct mapped)
106     /// $(LI 2 = 2-way set associative)
107     /// $(LI 3 = 3-way set associative)
108     /// $(LI ubyte.max = fully associative)
109     /// )
110     ubyte associativity;
111     /// Number of bytes read into the cache when a cache miss occurs.
112     uint lineSize;
113 }
114 
115 public:
116     /// $(RED Scheduled for deprecation. Please use $(D dataCaches) instead.)
117     // Note: When we deprecate it, we simply make it private.
118     __gshared CacheInfo[5] datacache;
119 
120 @property pure
121 {
122     /// The data caches. If there are fewer than 5 physical caches levels,
123     /// the remaining levels are set to size_t.max (== entire memory space)
dataCaches()124     const(CacheInfo)[5] dataCaches() { return _dataCaches; }
125 
126     /// Returns vendor string, for display purposes only.
127     /// Do NOT use this to determine features!
128     /// Note that some CPUs have programmable vendorIDs.
vendor()129     string vendor()     {return _vendor;}
130     /// Returns processor string, for display purposes only
processor()131     string processor()  {return _processor;}
132 
133     /// Does it have an x87 FPU on-chip?
x87onChip()134     bool x87onChip()    {return _x87onChip;}
135     /// Is MMX supported?
mmx()136     bool mmx()          {return _mmx;}
137     /// Is SSE supported?
sse()138     bool sse()          {return _sse;}
139     /// Is SSE2 supported?
sse2()140     bool sse2()         {return _sse2;}
141     /// Is SSE3 supported?
sse3()142     bool sse3()         {return _sse3;}
143     /// Is SSSE3 supported?
ssse3()144     bool ssse3()         {return _ssse3;}
145     /// Is SSE4.1 supported?
sse41()146     bool sse41()        {return _sse41;}
147     /// Is SSE4.2 supported?
sse42()148     bool sse42()        {return _sse42;}
149     /// Is SSE4a supported?
sse4a()150     bool sse4a()        {return _sse4a;}
151     /// Is AES supported
aes()152     bool aes()          {return _aes;}
153     /// Is pclmulqdq supported
hasPclmulqdq()154     bool hasPclmulqdq() {return _hasPclmulqdq;}
155     /// Is rdrand supported
hasRdrand()156     bool hasRdrand()    {return _hasRdrand;}
157     /// Is AVX supported
avx()158     bool avx()          {return _avx;}
159     /// Is VEX-Encoded AES supported
vaes()160     bool vaes()         {return _vaes;}
161     /// Is vpclmulqdq supported
hasVpclmulqdq()162     bool hasVpclmulqdq(){return _hasVpclmulqdq; }
163     /// Is FMA supported
fma()164     bool fma()          {return _fma;}
165     /// Is FP16C supported
fp16c()166     bool fp16c()        {return _fp16c;}
167     /// Is AVX2 supported
avx2()168     bool avx2()         {return _avx2;}
169     /// Is HLE (hardware lock elision) supported
hle()170     bool hle()          {return _hle;}
171     /// Is RTM (restricted transactional memory) supported
rtm()172     bool rtm()          {return _rtm;}
173     /// Is rdseed supported
hasRdseed()174     bool hasRdseed()    {return _hasRdseed;}
175     /// Is SHA supported
hasSha()176     bool hasSha()       {return _hasSha;}
177     /// Is AMD 3DNOW supported?
amd3dnow()178     bool amd3dnow()     {return _amd3dnow;}
179     /// Is AMD 3DNOW Ext supported?
amd3dnowExt()180     bool amd3dnowExt()  {return _amd3dnowExt;}
181     /// Are AMD extensions to MMX supported?
amdMmx()182     bool amdMmx()       {return _amdMmx;}
183     /// Is fxsave/fxrstor supported?
hasFxsr()184     bool hasFxsr()          {return _hasFxsr;}
185     /// Is cmov supported?
hasCmov()186     bool hasCmov()          {return _hasCmov;}
187     /// Is rdtsc supported?
hasRdtsc()188     bool hasRdtsc()         {return _hasRdtsc;}
189     /// Is cmpxchg8b supported?
hasCmpxchg8b()190     bool hasCmpxchg8b()     {return _hasCmpxchg8b;}
191     /// Is cmpxchg8b supported?
hasCmpxchg16b()192     bool hasCmpxchg16b()    {return _hasCmpxchg16b;}
193     /// Is SYSENTER/SYSEXIT supported?
hasSysEnterSysExit()194     bool hasSysEnterSysExit() {return _hasSysEnterSysExit;}
195     /// Is 3DNow prefetch supported?
has3dnowPrefetch()196     bool has3dnowPrefetch()   {return _has3dnowPrefetch;}
197     /// Are LAHF and SAHF supported in 64-bit mode?
hasLahfSahf()198     bool hasLahfSahf()        {return _hasLahfSahf;}
199     /// Is POPCNT supported?
hasPopcnt()200     bool hasPopcnt()        {return _hasPopcnt;}
201     /// Is LZCNT supported?
hasLzcnt()202     bool hasLzcnt()         {return _hasLzcnt;}
203     /// Is this an Intel64 or AMD 64?
isX86_64()204     bool isX86_64()         {return _isX86_64;}
205 
206     /// Is this an IA64 (Itanium) processor?
isItanium()207     bool isItanium()        { return _isItanium; }
208 
209     /// Is hyperthreading supported?
hyperThreading()210     bool hyperThreading()   { return _hyperThreading; }
211     /// Returns number of threads per CPU
threadsPerCPU()212     uint threadsPerCPU()    {return _threadsPerCPU;}
213     /// Returns number of cores in CPU
coresPerCPU()214     uint coresPerCPU()      {return _coresPerCPU;}
215 
216     /// Optimisation hints for assembly code.
217     ///
218     /// For forward compatibility, the CPU is compared against different
219     /// microarchitectures. For 32-bit x86, comparisons are made against
220     /// the Intel PPro/PII/PIII/PM family.
221     ///
222     /// The major 32-bit x86 microarchitecture 'dynasties' have been:
223     ///
224     /// $(UL
225     /// $(LI Intel P6 (PentiumPro, PII, PIII, PM, Core, Core2). )
226     /// $(LI AMD Athlon (K7, K8, K10). )
227     /// $(LI Intel NetBurst (Pentium 4, Pentium D). )
228     /// $(LI In-order Pentium (Pentium1, PMMX, Atom) )
229     /// )
230     ///
231     /// Other early CPUs (Nx586, AMD K5, K6, Centaur C3, Transmeta,
232     /// Cyrix, Rise) were mostly in-order.
233     ///
234     /// Some new processors do not fit into the existing categories:
235     ///
236     /// $(UL
237     /// $(LI Intel Atom 230/330 (family 6, model 0x1C) is an in-order core. )
238     /// $(LI Centaur Isiah = VIA Nano (family 6, model F) is an out-of-order core. )
239     /// )
240     ///
241     /// Within each dynasty, the optimisation techniques are largely
242     /// identical (eg, use instruction pairing for group 4). Major
243     /// instruction set improvements occur within each dynasty.
244 
245     /// Does this CPU perform better on AMD K7 code than PentiumPro..Core2 code?
preferAthlon()246     bool preferAthlon() { return _preferAthlon; }
247     /// Does this CPU perform better on Pentium4 code than PentiumPro..Core2 code?
preferPentium4()248     bool preferPentium4() { return _preferPentium4; }
249     /// Does this CPU perform better on Pentium I code than Pentium Pro code?
preferPentium1()250     bool preferPentium1() { return _preferPentium1; }
251 }
252 
253 private immutable
254 {
255     /* These exist as immutables so that the query property functions can
256      * be backwards compatible with code that called them with ().
257      * Also, immutables can only be set by the static this().
258      */
259     const(CacheInfo)[5] _dataCaches;
260     string _vendor;
261     string _processor;
262     bool _x87onChip;
263     bool _mmx;
264     bool _sse;
265     bool _sse2;
266     bool _sse3;
267     bool _ssse3;
268     bool _sse41;
269     bool _sse42;
270     bool _sse4a;
271     bool _aes;
272     bool _hasPclmulqdq;
273     bool _hasRdrand;
274     bool _avx;
275     bool _vaes;
276     bool _hasVpclmulqdq;
277     bool _fma;
278     bool _fp16c;
279     bool _avx2;
280     bool _hle;
281     bool _rtm;
282     bool _hasRdseed;
283     bool _hasSha;
284     bool _amd3dnow;
285     bool _amd3dnowExt;
286     bool _amdMmx;
287     bool _hasFxsr;
288     bool _hasCmov;
289     bool _hasRdtsc;
290     bool _hasCmpxchg8b;
291     bool _hasCmpxchg16b;
292     bool _hasSysEnterSysExit;
293     bool _has3dnowPrefetch;
294     bool _hasLahfSahf;
295     bool _hasPopcnt;
296     bool _hasLzcnt;
297     bool _isX86_64;
298     bool _isItanium;
299     bool _hyperThreading;
300     uint _threadsPerCPU;
301     uint _coresPerCPU;
302     bool _preferAthlon;
303     bool _preferPentium4;
304     bool _preferPentium1;
305 }
306 
307 __gshared:
308     // All these values are set only once, and never subsequently modified.
309 public:
310     /// $(RED Warning: This field will be turned into a property in a future release.)
311     ///
312     /// Processor type (vendor-dependent).
313     /// This should be visible ONLY for display purposes.
314     uint stepping, model, family;
315     /// $(RED This field has been deprecated. Please use $(D cacheLevels) instead.)
316     uint numCacheLevels = 1;
317     /// The number of cache levels in the CPU.
cacheLevels()318     @property uint cacheLevels() { return numCacheLevels; }
319 private:
320 
321 struct CpuFeatures
322 {
323     bool probablyIntel; // true = _probably_ an Intel processor, might be faking
324     bool probablyAMD; // true = _probably_ an AMD or Hygon processor
325     string processorName;
326     char [12] vendorID = 0;
327     char [48] processorNameBuffer = 0;
328     uint features = 0;     // mmx, sse, sse2, hyperthreading, etc
329     uint miscfeatures = 0; // sse3, etc.
330     uint extfeatures = 0;  // HLE, AVX2, RTM, etc.
331     uint amdfeatures = 0;  // 3DNow!, mmxext, etc
332     uint amdmiscfeatures = 0; // sse4a, sse5, svm, etc
333     ulong xfeatures = 0;   // XFEATURES_ENABLED_MASK
334     uint maxCores = 1;
335     uint maxThreads = 1;
336 }
337 
338 CpuFeatures cpuFeatures;
339 
340 /* Hide from the optimizer where cf (a register) is coming from, so that
341  * cf doesn't get "optimized away". The idea is to  reference
342  * the global data through cf so not so many fixups are inserted
343  * into the executable image.
344  */
getCpuFeatures()345 CpuFeatures* getCpuFeatures() @nogc nothrow
346 {
347     pragma(inline, false);
348     return &cpuFeatures;
349 }
350 
351     // Note that this may indicate multi-core rather than hyperthreading.
hyperThreadingBit()352     @property bool hyperThreadingBit()    { return (cpuFeatures.features&HTT_BIT)!=0;}
353 
354     // feature flags CPUID1_EDX
355     enum : uint
356     {
357         FPU_BIT = 1,
358         TIMESTAMP_BIT = 1<<4, // rdtsc
359         MDSR_BIT = 1<<5,      // RDMSR/WRMSR
360         CMPXCHG8B_BIT = 1<<8,
361         SYSENTERSYSEXIT_BIT = 1<<11,
362         CMOV_BIT = 1<<15,
363         MMX_BIT = 1<<23,
364         FXSR_BIT = 1<<24,
365         SSE_BIT = 1<<25,
366         SSE2_BIT = 1<<26,
367         HTT_BIT = 1<<28,
368         IA64_BIT = 1<<30
369     }
370     // feature flags misc CPUID1_ECX
371     enum : uint
372     {
373         SSE3_BIT = 1,
374         PCLMULQDQ_BIT = 1<<1, // from AVX
375         MWAIT_BIT = 1<<3,
376         SSSE3_BIT = 1<<9,
377         FMA_BIT = 1<<12,     // from AVX
378         CMPXCHG16B_BIT = 1<<13,
379         SSE41_BIT = 1<<19,
380         SSE42_BIT = 1<<20,
381         POPCNT_BIT = 1<<23,
382         AES_BIT = 1<<25, // AES instructions from AVX
383         OSXSAVE_BIT = 1<<27, // Used for AVX
384         AVX_BIT = 1<<28,
385         FP16C_BIT = 1<<29,
386         RDRAND_BIT = 1<<30,
387     }
388     // Feature flags for cpuid.{EAX = 7, ECX = 0}.EBX.
389     enum : uint
390     {
391         FSGSBASE_BIT = 1 << 0,
392         BMI1_BIT = 1 << 3,
393         HLE_BIT = 1 << 4,
394         AVX2_BIT = 1 << 5,
395         SMEP_BIT = 1 << 7,
396         BMI2_BIT = 1 << 8,
397         ERMS_BIT = 1 << 9,
398         INVPCID_BIT = 1 << 10,
399         RTM_BIT = 1 << 11,
400         RDSEED_BIT = 1 << 18,
401         SHA_BIT = 1 << 29,
402     }
403     // feature flags XFEATURES_ENABLED_MASK
404     enum : ulong
405     {
406         XF_FP_BIT  = 0x1,
407         XF_SSE_BIT = 0x2,
408         XF_YMM_BIT = 0x4,
409     }
410     // AMD feature flags CPUID80000001_EDX
411     enum : uint
412     {
413         AMD_MMX_BIT = 1<<22,
414 //      FXR_OR_CYRIXMMX_BIT = 1<<24, // Cyrix/NS: 6x86MMX instructions.
415         FFXSR_BIT = 1<<25,
416         PAGE1GB_BIT = 1<<26, // support for 1GB pages
417         RDTSCP_BIT = 1<<27,
418         AMD64_BIT = 1<<29,
419         AMD_3DNOW_EXT_BIT = 1<<30,
420         AMD_3DNOW_BIT = 1<<31
421     }
422     // AMD misc feature flags CPUID80000001_ECX
423     enum : uint
424     {
425         LAHFSAHF_BIT = 1,
426         LZCNT_BIT = 1<<5,
427         SSE4A_BIT = 1<<6,
428         AMD_3DNOW_PREFETCH_BIT = 1<<8,
429     }
430 
431 
version(GNU_OR_LDC)432 version (GNU_OR_LDC) {
433     version (X86)
434         enum supportedX86 = true;
435     else version (X86_64)
436         enum supportedX86 = true;
437     else
438         enum supportedX86 = false;
439 } else version (D_InlineAsm_X86) {
440     enum supportedX86 = true;
version(D_InlineAsm_X86_64)441 } else version (D_InlineAsm_X86_64) {
442     enum supportedX86 = true;
443 } else {
444     enum supportedX86 = false;
445 }
446 
447 static if (supportedX86) {
448 // Note that this code will also work for Itanium in x86 mode.
449 
450 __gshared uint max_cpuid, max_extended_cpuid;
451 
452 // CPUID2: "cache and tlb information"
getcacheinfoCPUID2()453 void getcacheinfoCPUID2()
454 {
455     // We are only interested in the data caches
456     void decipherCpuid2(ubyte x) @nogc nothrow {
457         if (x==0) return;
458         // Values from http://www.sandpile.org/ia32/cpuid.htm.
459         // Includes Itanium and non-Intel CPUs.
460         //
461         static immutable ubyte [63] ids = [
462             0x0A, 0x0C, 0x0D, 0x2C, 0x60, 0x0E, 0x66, 0x67, 0x68,
463             // level 2 cache
464             0x41, 0x42, 0x43, 0x44, 0x45, 0x78, 0x79, 0x7A, 0x7B, 0x7C, 0x7D, 0x7F,
465             0x82, 0x83, 0x84, 0x85, 0x86, 0x87, 0x49, 0x4E,
466             0x39, 0x3A, 0x3B, 0x3C, 0x3D, 0x3E, 0x48, 0x80, 0x81,
467             // level 3 cache
468             0x22, 0x23, 0x25, 0x29, 0x46, 0x47, 0x4A, 0x4B, 0x4C, 0x4D,
469 
470             0xD0, 0xD1, 0xD2, 0xD6, 0xD7, 0xD8, 0xDC, 0xDD, 0xDE,
471             0xE2, 0xE3, 0xE4, 0xEA, 0xEB, 0xEC
472         ];
473         static immutable uint [63] sizes = [
474             8, 16, 16, 64, 16, 24, 8, 16, 32,
475             128, 256, 512, 1024, 2048, 1024, 128, 256, 512, 1024, 2048, 512,
476             256, 512, 1024, 2048, 512, 1024, 4096, 6*1024,
477             128, 192, 128, 256, 384, 512, 3072, 512, 128,
478             512, 1024, 2048, 4096, 4096, 8192, 6*1024, 8192, 12*1024, 16*1024,
479 
480             512, 1024, 2048, 1024, 2048, 4096, 1024+512, 3*1024, 6*1024,
481             2*1024, 4*1024, 8*1024, 12*1024, 28*1024, 24*1024
482         ];
483     // CPUBUG: Pentium M reports 0x2C but tests show it is only 4-way associative
484         static immutable ubyte [63] ways = [
485             2, 4, 4, 8, 8, 6, 4, 4, 4,
486             4, 4, 4, 4, 4, 4, 8, 8, 8, 8, 8, 2,
487             8, 8, 8, 8, 4, 8, 16, 24,
488             4, 6, 2, 4, 6, 4, 12, 8, 8,
489             4, 8, 8, 8, 4, 8, 12, 16, 12, 16,
490             4, 4, 4, 8, 8, 8, 12, 12, 12,
491             16, 16, 16, 24, 24, 24
492         ];
493         enum { FIRSTDATA2 = 8, FIRSTDATA3 = 28+9 }
494         for (size_t i=0; i< ids.length; ++i) {
495             if (x==ids[i]) {
496                 int level = i< FIRSTDATA2 ? 0: i<FIRSTDATA3 ? 1 : 2;
497                 if (x==0x49 && family==0xF && model==0x6) level=2;
498                 datacache[level].size=sizes[i];
499                 datacache[level].associativity=ways[i];
500                 if (level == 3 || x==0x2C || x==0x0D || (x>=0x48 && x<=0x80)
501                                    || x==0x86 || x==0x87
502                                    || (x>=0x66 && x<=0x68) || (x>=0x39 && x<=0x3E)){
503                     datacache[level].lineSize = 64;
504                 } else datacache[level].lineSize = 32;
505             }
506         }
507     }
508 
509     uint[4] a;
510     bool firstTime = true;
511     // On a multi-core system, this could theoretically fail, but it's only used
512     // for old single-core CPUs.
513     uint numinfos = 1;
514     do {
515         version (GNU_OR_LDC) asm pure nothrow @nogc {
516             "cpuid" : "=a" (a[0]), "=b" (a[1]), "=c" (a[2]), "=d" (a[3]) : "a" (2);
517         } else asm pure nothrow @nogc {
518             mov EAX, 2;
519             cpuid;
520             mov a+0, EAX;
521             mov a+4, EBX;
522             mov a+8, ECX;
523             mov a+12, EDX;
524         }
525         if (firstTime) {
526             if (a[0]==0x0000_7001 && a[3]==0x80 && a[1]==0 && a[2]==0) {
527         // Cyrix MediaGX MMXEnhanced returns: EAX= 00007001, EDX=00000080.
528         // These are NOT standard Intel values
529         // (TLB = 32 entry, 4 way associative, 4K pages)
530         // (L1 cache = 16K, 4way, linesize16)
531                 datacache[0].size=8;
532                 datacache[0].associativity=4;
533                 datacache[0].lineSize=16;
534                 return;
535             }
536             // lsb of a is how many times to loop.
537             numinfos = a[0] & 0xFF;
538             // and otherwise it should be ignored
539             a[0] &= 0xFFFF_FF00;
540             firstTime = false;
541         }
542         for (int c=0; c<4;++c) {
543             // high bit set == no info.
544             if (a[c] & 0x8000_0000) continue;
545             decipherCpuid2(cast(ubyte)(a[c] & 0xFF));
546             decipherCpuid2(cast(ubyte)((a[c]>>8) & 0xFF));
547             decipherCpuid2(cast(ubyte)((a[c]>>16) & 0xFF));
548             decipherCpuid2(cast(ubyte)((a[c]>>24) & 0xFF));
549         }
550     } while (--numinfos);
551 }
552 
553 // CPUID4: "Deterministic cache parameters" leaf
getcacheinfoCPUID4()554 void getcacheinfoCPUID4()
555 {
556     int cachenum = 0;
557     for (;;) {
558         uint a, b, number_of_sets;
559         version (GNU_OR_LDC) asm pure nothrow @nogc {
560             "cpuid" : "=a" (a), "=b" (b), "=c" (number_of_sets) : "a" (4), "c" (cachenum) : "edx";
561         } else asm pure nothrow @nogc {
562             mov EAX, 4;
563             mov ECX, cachenum;
564             cpuid;
565             mov a, EAX;
566             mov b, EBX;
567             mov number_of_sets, ECX;
568         }
569         ++cachenum;
570         if ((a&0x1F)==0) break; // no more caches
571         immutable uint numthreads = ((a>>14) & 0xFFF)  + 1;
572         immutable uint numcores = ((a>>26) & 0x3F) + 1;
573         if (numcores > cpuFeatures.maxCores) cpuFeatures.maxCores = numcores;
574         if ((a&0x1F)!=1 && ((a&0x1F)!=3)) continue; // we only want data & unified caches
575 
576         ++number_of_sets;
577         immutable ubyte level = cast(ubyte)(((a>>5)&7)-1);
578         if (level > datacache.length) continue; // ignore deep caches
579         datacache[level].associativity = a & 0x200 ? ubyte.max :cast(ubyte)((b>>22)+1);
580         datacache[level].lineSize = (b & 0xFFF)+ 1; // system coherency line size
581         immutable uint line_partitions = ((b >> 12)& 0x3FF) + 1;
582         // Size = number of sets * associativity * cachelinesize * linepartitions
583         // and must convert to Kb, also dividing by the number of hyperthreads using this cache.
584         immutable ulong sz = (datacache[level].associativity< ubyte.max)? number_of_sets *
585             datacache[level].associativity : number_of_sets;
586         datacache[level].size = cast(size_t)(
587                 (sz * datacache[level].lineSize * line_partitions ) / (numthreads *1024));
588         if (level == 0 && (a&0xF)==3) {
589             // Halve the size for unified L1 caches
590             datacache[level].size/=2;
591         }
592     }
593 }
594 
595 // CPUID8000_0005 & 6
getAMDcacheinfo()596 void getAMDcacheinfo()
597 {
598     uint dummy, c5, c6, d6;
599     version (GNU_OR_LDC) asm pure nothrow @nogc {
600         "cpuid" : "=a" (dummy), "=c" (c5) : "a" (0x8000_0005) : "ebx", "edx";
601     } else asm pure nothrow @nogc {
602         mov EAX, 0x8000_0005; // L1 cache
603         cpuid;
604         // EAX has L1_TLB_4M.
605         // EBX has L1_TLB_4K
606         // EDX has L1 instruction cache
607         mov c5, ECX;
608     }
609 
610     datacache[0].size = ( (c5>>24) & 0xFF);
611     datacache[0].associativity = cast(ubyte)( (c5 >> 16) & 0xFF);
612     datacache[0].lineSize = c5 & 0xFF;
613 
614     if (max_extended_cpuid >= 0x8000_0006) {
615         // AMD K6-III or K6-2+ or later.
616         ubyte numcores = 1;
617         if (max_extended_cpuid >= 0x8000_0008) {
618             version (GNU_OR_LDC) asm pure nothrow @nogc {
619                 "cpuid" : "=a" (dummy), "=c" (numcores) : "a" (0x8000_0008) : "ebx", "edx";
620             } else asm pure nothrow @nogc {
621                 mov EAX, 0x8000_0008;
622                 cpuid;
623                 mov numcores, CL;
624             }
625             ++numcores;
626             if (numcores>cpuFeatures.maxCores) cpuFeatures.maxCores = numcores;
627         }
628 
629         version (GNU_OR_LDC) asm pure nothrow @nogc {
630             "cpuid" : "=a" (dummy), "=c" (c6), "=d" (d6) : "a" (0x8000_0006) : "ebx";
631         } else asm pure nothrow @nogc {
632             mov EAX, 0x8000_0006; // L2/L3 cache
633             cpuid;
634             mov c6, ECX; // L2 cache info
635             mov d6, EDX; // L3 cache info
636         }
637 
638         static immutable ubyte [] assocmap = [ 0, 1, 2, 0, 4, 0, 8, 0, 16, 0, 32, 48, 64, 96, 128, 0xFF ];
639         datacache[1].size = (c6>>16) & 0xFFFF;
640         datacache[1].associativity = assocmap[(c6>>12)&0xF];
641         datacache[1].lineSize = c6 & 0xFF;
642 
643         // The L3 cache value is TOTAL, not per core.
644         datacache[2].size = ((d6>>18)*512)/numcores; // could be up to 2 * this, -1.
645         datacache[2].associativity = assocmap[(d6>>12)&0xF];
646         datacache[2].lineSize = d6 & 0xFF;
647     }
648 }
649 
650 // For Intel CoreI7 and later, use function 0x0B
651 // to determine number of processors.
getCpuInfo0B()652 void getCpuInfo0B()
653 {
654     int level=0;
655     int threadsPerCore;
656     uint a, b, c, d;
657     do {
658         version (GNU_OR_LDC) asm pure nothrow @nogc {
659             "cpuid" : "=a" (a), "=b" (b), "=c" (c), "=d" (d) : "a" (0x0B), "c" (level);
660         } else asm pure nothrow @nogc {
661             mov EAX, 0x0B;
662             mov ECX, level;
663             cpuid;
664             mov a, EAX;
665             mov b, EBX;
666             mov c, ECX;
667             mov d, EDX;
668         }
669         if (b!=0) {
670            // I'm not sure about this. The docs state that there
671            // are 2 hyperthreads per core if HT is factory enabled.
672             if (level==0)
673                 threadsPerCore = b & 0xFFFF;
674             else if (level==1) {
675                 cpuFeatures.maxThreads = b & 0xFFFF;
676                 cpuFeatures.maxCores = cpuFeatures.maxThreads / threadsPerCore;
677             }
678 
679         }
680         ++level;
681     } while (a!=0 || b!=0);
682 }
683 
cpuidX86()684 void cpuidX86()
685 {
686     auto cf = getCpuFeatures();
687 
688     uint a, b, c, d;
689     uint* venptr = cast(uint*)cf.vendorID.ptr;
690     version (GNU_OR_LDC)
691     {
692         asm pure nothrow @nogc {
693             "cpuid" : "=a" (max_cpuid), "=b" (venptr[0]), "=d" (venptr[1]), "=c" (venptr[2]) : "a" (0);
694             "cpuid" : "=a" (max_extended_cpuid) : "a" (0x8000_0000) : "ebx", "ecx", "edx";
695         }
696     }
697     else
698     {
699         uint a2;
700         version (D_InlineAsm_X86)
701         {
702             asm pure nothrow @nogc {
703                 mov EAX, 0;
704                 cpuid;
705                 mov a, EAX;
706                 mov EAX, venptr;
707                 mov [EAX], EBX;
708                 mov [EAX + 4], EDX;
709                 mov [EAX + 8], ECX;
710             }
711         }
712         else version (D_InlineAsm_X86_64)
713         {
714             asm pure nothrow @nogc {
715                 mov EAX, 0;
716                 cpuid;
717                 mov a, EAX;
718                 mov RAX, venptr;
719                 mov [RAX], EBX;
720                 mov [RAX + 4], EDX;
721                 mov [RAX + 8], ECX;
722             }
723         }
724         asm pure nothrow @nogc {
725             mov EAX, 0x8000_0000;
726             cpuid;
727             mov a2, EAX;
728         }
729         max_cpuid = a;
730         max_extended_cpuid = a2;
731     }
732 
733 
734     cf.probablyIntel = cf.vendorID == "GenuineIntel";
735     cf.probablyAMD = (cf.vendorID == "AuthenticAMD" || cf.vendorID == "HygonGenuine");
736     uint apic = 0; // brand index, apic id
737     version (GNU_OR_LDC) asm pure nothrow @nogc {
738         "cpuid" : "=a" (a), "=b" (apic), "=c" (cf.miscfeatures), "=d" (cf.features) : "a" (1);
739     } else {
740         asm pure nothrow @nogc {
741             mov EAX, 1; // model, stepping
742             cpuid;
743             mov a, EAX;
744             mov apic, EBX;
745             mov c, ECX;
746             mov d, EDX;
747         }
748         cf.features = d;
749         cf.miscfeatures = c;
750     }
751     stepping = a & 0xF;
752     immutable uint fbase = (a >> 8) & 0xF;
753     immutable uint mbase = (a >> 4) & 0xF;
754     family = ((fbase == 0xF) || (fbase == 0)) ? fbase + (a >> 20) & 0xFF : fbase;
755     model = ((fbase == 0xF) || (fbase == 6 && cf.probablyIntel) ) ?
756          mbase + ((a >> 12) & 0xF0) : mbase;
757 
758     if (max_cpuid >= 7)
759     {
760         version (GNU_OR_LDC) asm pure nothrow @nogc {
761             "cpuid" : "=a" (a), "=b" (cf.extfeatures), "=c" (c) : "a" (7), "c" (0) : "edx";
762         } else {
763             uint ext;
764             asm pure nothrow @nogc {
765                 mov EAX, 7; // Structured extended feature leaf.
766                 mov ECX, 0; // Main leaf.
767                 cpuid;
768                 mov ext, EBX; // HLE, AVX2, RTM, etc.
769             }
770             cf.extfeatures = ext;
771         }
772     }
773 
774     if (cf.miscfeatures & OSXSAVE_BIT)
775     {
776         version (GNU_OR_LDC) asm pure nothrow @nogc {
777             /* Old assemblers do not recognize xgetbv, and there is no easy way
778              * to conditionally compile based on the assembler used, so use the
779              * raw .byte sequence instead.  */
780             ".byte 0x0f, 0x01, 0xd0" : "=a" (a), "=d" (d) : "c" (0);
781         } else asm pure nothrow @nogc {
782             mov ECX, 0;
783             xgetbv;
784             mov d, EDX;
785             mov a, EAX;
786         }
787         cf.xfeatures = cast(ulong)d << 32 | a;
788     }
789 
790     cf.amdfeatures = 0;
791     cf.amdmiscfeatures = 0;
792     if (max_extended_cpuid >= 0x8000_0001) {
793         version (GNU_OR_LDC) asm pure nothrow @nogc {
794             "cpuid" : "=a" (a), "=c" (cf.amdmiscfeatures), "=d" (cf.amdfeatures) : "a" (0x8000_0001) : "ebx";
795         } else {
796             asm pure nothrow @nogc {
797                 mov EAX, 0x8000_0001;
798                 cpuid;
799                 mov c, ECX;
800                 mov d, EDX;
801             }
802             cf.amdmiscfeatures = c;
803             cf.amdfeatures = d;
804         }
805     }
806     // Try to detect fraudulent vendorIDs
807     if (amd3dnow) cf.probablyIntel = false;
808 
809     if (!cf.probablyIntel && max_extended_cpuid >= 0x8000_0008) {
810         //http://support.amd.com/TechDocs/25481.pdf pg.36
811         cf.maxCores = 1;
812         if (hyperThreadingBit) {
813             // determine max number of cores for AMD
814             version (GNU_OR_LDC) asm pure nothrow @nogc {
815                 "cpuid" : "=a" (a), "=c" (c) : "a" (0x8000_0008) : "ebx", "edx";
816             } else asm pure nothrow @nogc {
817                 mov EAX, 0x8000_0008;
818                 cpuid;
819                 mov c, ECX;
820             }
821             cf.maxCores += c & 0xFF;
822         }
823     }
824 
825     if (max_extended_cpuid >= 0x8000_0004) {
826         uint* pnb = cast(uint*)cf.processorNameBuffer.ptr;
827         version (GNU_OR_LDC)
828         {
829             asm pure nothrow @nogc {
830                 "cpuid" : "=a" (pnb[0]), "=b" (pnb[1]), "=c" (pnb[ 2]), "=d" (pnb[ 3]) : "a" (0x8000_0002);
831                 "cpuid" : "=a" (pnb[4]), "=b" (pnb[5]), "=c" (pnb[ 6]), "=d" (pnb[ 7]) : "a" (0x8000_0003);
832                 "cpuid" : "=a" (pnb[8]), "=b" (pnb[9]), "=c" (pnb[10]), "=d" (pnb[11]) : "a" (0x8000_0004);
833             }
834         }
835         else version (D_InlineAsm_X86)
836         {
837             asm pure nothrow @nogc {
838                 push ESI;
839                 mov ESI, pnb;
840                 mov EAX, 0x8000_0002;
841                 cpuid;
842                 mov [ESI], EAX;
843                 mov [ESI+4], EBX;
844                 mov [ESI+8], ECX;
845                 mov [ESI+12], EDX;
846                 mov EAX, 0x8000_0003;
847                 cpuid;
848                 mov [ESI+16], EAX;
849                 mov [ESI+20], EBX;
850                 mov [ESI+24], ECX;
851                 mov [ESI+28], EDX;
852                 mov EAX, 0x8000_0004;
853                 cpuid;
854                 mov [ESI+32], EAX;
855                 mov [ESI+36], EBX;
856                 mov [ESI+40], ECX;
857                 mov [ESI+44], EDX;
858                 pop ESI;
859             }
860         }
861         else version (D_InlineAsm_X86_64)
862         {
863             asm pure nothrow @nogc {
864                 push RSI;
865                 mov RSI, pnb;
866                 mov EAX, 0x8000_0002;
867                 cpuid;
868                 mov [RSI], EAX;
869                 mov [RSI+4], EBX;
870                 mov [RSI+8], ECX;
871                 mov [RSI+12], EDX;
872                 mov EAX, 0x8000_0003;
873                 cpuid;
874                 mov [RSI+16], EAX;
875                 mov [RSI+20], EBX;
876                 mov [RSI+24], ECX;
877                 mov [RSI+28], EDX;
878                 mov EAX, 0x8000_0004;
879                 cpuid;
880                 mov [RSI+32], EAX;
881                 mov [RSI+36], EBX;
882                 mov [RSI+40], ECX;
883                 mov [RSI+44], EDX;
884                 pop RSI;
885             }
886         }
887         // Intel P4 and PM pad at front with spaces.
888         // Other CPUs pad at end with nulls.
889         int start = 0, end = 0;
890         while (cf.processorNameBuffer[start] == ' ') { ++start; }
891         while (cf.processorNameBuffer[cf.processorNameBuffer.length-end-1] == 0) { ++end; }
892         cf.processorName = cast(string)(cf.processorNameBuffer[start..$-end]);
893     } else {
894         cf.processorName = "Unknown CPU";
895     }
896     // Determine cache sizes
897 
898     // Intel docs specify that they return 0 for 0x8000_0005.
899     // AMD docs do not specify the behaviour for 0004 and 0002.
900     // Centaur/VIA and most other manufacturers use the AMD method,
901     // except Cyrix MediaGX MMX Enhanced uses their OWN form of CPUID2!
902     // NS Geode GX1 provides CyrixCPUID2 _and_ does the same wrong behaviour
903     // for CPUID80000005. But Geode GX uses the AMD method
904 
905     // Deal with Geode GX1 - make it same as MediaGX MMX.
906     if (max_extended_cpuid==0x8000_0005 && max_cpuid==2) {
907         max_extended_cpuid = 0x8000_0004;
908     }
909     // Therefore, we try the AMD method unless it's an Intel chip.
910     // If we still have no info, try the Intel methods.
911     datacache[0].size = 0;
912     if (max_cpuid<2 || !cf.probablyIntel) {
913         if (max_extended_cpuid >= 0x8000_0005) {
914             getAMDcacheinfo();
915         } else if (cf.probablyAMD) {
916             // According to AMDProcRecognitionAppNote, this means CPU
917             // K5 model 0, or Am5x86 (model 4), or Am4x86DX4 (model 4)
918             // Am5x86 has 16Kb 4-way unified data & code cache.
919             datacache[0].size = 8;
920             datacache[0].associativity = 4;
921             datacache[0].lineSize = 32;
922         } else {
923             // Some obscure CPU.
924             // Values for Cyrix 6x86MX (family 6, model 0)
925             datacache[0].size = 64;
926             datacache[0].associativity = 4;
927             datacache[0].lineSize = 32;
928         }
929     }
930     if ((datacache[0].size == 0) && max_cpuid>=4) {
931         getcacheinfoCPUID4();
932     }
933     if ((datacache[0].size == 0) && max_cpuid>=2) {
934         getcacheinfoCPUID2();
935     }
936     if (datacache[0].size == 0) {
937         // Pentium, PMMX, late model 486, or an obscure CPU
938         if (mmx) { // Pentium MMX. Also has 8kB code cache.
939             datacache[0].size = 16;
940             datacache[0].associativity = 4;
941             datacache[0].lineSize = 32;
942         } else { // Pentium 1 (which also has 8kB code cache)
943                  // or 486.
944             // Cyrix 6x86: 16, 4way, 32 linesize
945             datacache[0].size = 8;
946             datacache[0].associativity = 2;
947             datacache[0].lineSize = 32;
948         }
949     }
950     if (cf.probablyIntel && max_cpuid >= 0x0B) {
951         // For Intel i7 and later, use function 0x0B to determine
952         // cores and hyperthreads.
953         getCpuInfo0B();
954     } else {
955         if (hyperThreadingBit) cf.maxThreads = (apic>>>16) & 0xFF;
956         else cf.maxThreads = cf.maxCores;
957 
958         if (cf.probablyAMD && max_extended_cpuid >= 0x8000_001E) {
959             version (GNU_OR_LDC) asm pure nothrow @nogc {
960                 "cpuid" : "=a" (a), "=b" (b) : "a" (0x8000_001E) : "ecx", "edx";
961             } else {
962                 asm pure nothrow @nogc {
963                     mov EAX, 0x8000_001e;
964                     cpuid;
965                     mov b, EBX;
966                 }
967             }
968             ubyte coresPerComputeUnit = ((b >> 8) & 3) + 1;
969             cf.maxCores = cf.maxThreads / coresPerComputeUnit;
970         }
971     }
972 }
973 
974 // Return true if the cpuid instruction is supported.
975 // BUG(WONTFIX): Returns false for Cyrix 6x86 and 6x86L. They will be treated as 486 machines.
hasCPUID()976 bool hasCPUID()
977 {
978     version (X86_64)
979         return true;
980     else
981     {
982         uint flags;
983         version (GNU_OR_LDC)
984         {
985             // http://wiki.osdev.org/CPUID#Checking_CPUID_availability
986             asm nothrow @nogc { "
987                 pushfl                    # Save EFLAGS
988                 pushfl                    # Store EFLAGS
989                 xorl $0x00200000, (%%esp) # Invert the ID bit in stored EFLAGS
990                 popfl                     # Load stored EFLAGS (with ID bit inverted)
991                 pushfl                    # Store EFLAGS again (ID bit may or may not be inverted)
992                 popl %%eax                # eax = modified EFLAGS (ID bit may or may not be inverted)
993                 xorl (%%esp), %%eax       # eax = whichever bits were changed
994                 popfl                     # Restore original EFLAGS
995                 " : "=a" (flags);
996             }
997         }
998         else version (D_InlineAsm_X86)
999         {
1000             asm nothrow @nogc {
1001                 pushfd;
1002                 pop EAX;
1003                 mov flags, EAX;
1004                 xor EAX, 0x0020_0000;
1005                 push EAX;
1006                 popfd;
1007                 pushfd;
1008                 pop EAX;
1009                 xor flags, EAX;
1010             }
1011         }
1012         return (flags & 0x0020_0000) != 0;
1013     }
1014 }
1015 
1016 } else { // supported X86
1017 
hasCPUID()1018     bool hasCPUID() { return false; }
1019 
cpuidX86()1020     void cpuidX86()
1021     {
1022             datacache[0].size = 8;
1023             datacache[0].associativity = 2;
1024             datacache[0].lineSize = 32;
1025     }
1026 }
1027 
1028 /*
1029 // TODO: Implement this function with OS support
1030 void cpuidPPC()
1031 {
1032     enum :int  { PPC601, PPC603, PPC603E, PPC604,
1033                  PPC604E, PPC620, PPCG3, PPCG4, PPCG5 }
1034 
1035     // TODO:
1036     // asm { mfpvr; } returns the CPU version but unfortunately it can
1037     // only be used in kernel mode. So OS support is required.
1038     int cputype = PPC603;
1039 
1040     // 601 has a 8KB combined data & code L1 cache.
1041     uint sizes[] = [4, 8, 16, 16, 32, 32, 32, 32, 64];
1042     ubyte ways[] = [8, 2,  4,  4,  4,  8,  8,  8,  8];
1043     uint L2size[]= [0, 0,  0,  0,  0,  0,  0,  256,  512];
1044     uint L3size[]= [0, 0,  0,  0,  0,  0,  0,  2048,  0];
1045 
1046     datacache[0].size = sizes[cputype];
1047     datacache[0].associativity = ways[cputype];
1048     datacache[0].lineSize = (cputype==PPCG5)? 128 :
1049         (cputype == PPC620 || cputype == PPCG3)? 64 : 32;
1050     datacache[1].size = L2size[cputype];
1051     datacache[2].size = L3size[cputype];
1052     datacache[1].lineSize = datacache[0].lineSize;
1053     datacache[2].lineSize = datacache[0].lineSize;
1054 }
1055 
1056 // TODO: Implement this function with OS support
1057 void cpuidSparc()
1058 {
1059     // UltaSparcIIi  : L1 = 16,  2way. L2 = 512, 4 way.
1060     // UltraSparcIII : L1 = 64,  4way. L2= 4096 or 8192.
1061     // UltraSparcIIIi: L1 = 64,  4way. L2= 1024, 4 way
1062     // UltraSparcIV  : L1 = 64,  4way. L2 = 16*1024.
1063     // UltraSparcIV+ : L1 = 64,  4way. L2 = 2048, L3=32*1024.
1064     // Sparc64V      : L1 = 128, 2way. L2 = 4096 4way.
1065 }
1066 */
1067 
this()1068 shared static this()
1069 {
1070     auto cf = getCpuFeatures();
1071 
1072     if (hasCPUID()) {
1073         cpuidX86();
1074     } else {
1075         // it's a 386 or 486, or a Cyrix 6x86.
1076         //Probably still has an external cache.
1077     }
1078     if (datacache[0].size==0) {
1079             // Guess same as Pentium 1.
1080             datacache[0].size = 8;
1081             datacache[0].associativity = 2;
1082             datacache[0].lineSize = 32;
1083     }
1084     numCacheLevels = 1;
1085     // And now fill up all the unused levels with full memory space.
1086     for (size_t i=1; i< datacache.length; ++i) {
1087         if (datacache[i].size==0) {
1088             // Set all remaining levels of cache equal to full address space.
1089             datacache[i].size = size_t.max/1024;
1090             datacache[i].associativity = 1;
1091             datacache[i].lineSize = datacache[i-1].lineSize;
1092         }
1093         else
1094             ++numCacheLevels;
1095     }
1096 
1097     // Set the immortals
1098 
1099     _dataCaches =     datacache;
1100     _vendor =         cast(string)cf.vendorID;
1101     _processor =      cf.processorName;
1102     _x87onChip =      (cf.features&FPU_BIT)!=0;
1103     _mmx =            (cf.features&MMX_BIT)!=0;
1104     _sse =            (cf.features&SSE_BIT)!=0;
1105     _sse2 =           (cf.features&SSE2_BIT)!=0;
1106     _sse3 =           (cf.miscfeatures&SSE3_BIT)!=0;
1107     _ssse3 =          (cf.miscfeatures&SSSE3_BIT)!=0;
1108     _sse41 =          (cf.miscfeatures&SSE41_BIT)!=0;
1109     _sse42 =          (cf.miscfeatures&SSE42_BIT)!=0;
1110     _sse4a =          (cf.amdmiscfeatures&SSE4A_BIT)!=0;
1111     _aes =            (cf.miscfeatures&AES_BIT)!=0;
1112     _hasPclmulqdq =   (cf.miscfeatures&PCLMULQDQ_BIT)!=0;
1113     _hasRdrand =      (cf.miscfeatures&RDRAND_BIT)!=0;
1114 
1115     enum avx_mask = XF_SSE_BIT|XF_YMM_BIT;
1116     _avx =            (cf.xfeatures & avx_mask) == avx_mask && (cf.miscfeatures&AVX_BIT)!=0;
1117 
1118     _vaes =           avx && aes;
1119     _hasVpclmulqdq =  avx && hasPclmulqdq;
1120     _fma =            avx && (cf.miscfeatures&FMA_BIT)!=0;
1121     _fp16c =          avx && (cf.miscfeatures&FP16C_BIT)!=0;
1122     _avx2 =           avx && (cf.extfeatures & AVX2_BIT) != 0;
1123     _hle =            (cf.extfeatures & HLE_BIT) != 0;
1124     _rtm =            (cf.extfeatures & RTM_BIT) != 0;
1125     _hasRdseed =      (cf.extfeatures&RDSEED_BIT)!=0;
1126     _hasSha =         (cf.extfeatures&SHA_BIT)!=0;
1127     _amd3dnow =       (cf.amdfeatures&AMD_3DNOW_BIT)!=0;
1128     _amd3dnowExt =    (cf.amdfeatures&AMD_3DNOW_EXT_BIT)!=0;
1129     _amdMmx =         (cf.amdfeatures&AMD_MMX_BIT)!=0;
1130     _hasFxsr =        (cf.features&FXSR_BIT)!=0;
1131     _hasCmov =        (cf.features&CMOV_BIT)!=0;
1132     _hasRdtsc =       (cf.features&TIMESTAMP_BIT)!=0;
1133     _hasCmpxchg8b =   (cf.features&CMPXCHG8B_BIT)!=0;
1134     _hasCmpxchg16b =  (cf.miscfeatures&CMPXCHG16B_BIT)!=0;
1135     _hasSysEnterSysExit =
1136         // The SYSENTER/SYSEXIT features were buggy on Pentium Pro and early PentiumII.
1137         // (REF: www.geoffchappell.com).
1138         (cf.probablyIntel && (family < 6 || (family==6 && (model< 3 || (model==3 && stepping<3)))))
1139             ? false
1140             : (cf.features & SYSENTERSYSEXIT_BIT)!=0;
1141     _has3dnowPrefetch = (cf.amdmiscfeatures&AMD_3DNOW_PREFETCH_BIT)!=0;
1142     _hasLahfSahf =    (cf.amdmiscfeatures&LAHFSAHF_BIT)!=0;
1143     _hasPopcnt =      (cf.miscfeatures&POPCNT_BIT)!=0;
1144     _hasLzcnt =       (cf.amdmiscfeatures&LZCNT_BIT)!=0;
1145     _isX86_64 =       (cf.amdfeatures&AMD64_BIT)!=0;
1146     _isItanium =      (cf.features&IA64_BIT)!=0;
1147     _hyperThreading = cf.maxThreads>cf.maxCores;
1148     _threadsPerCPU =  cf.maxThreads;
1149     _coresPerCPU =    cf.maxCores;
1150     _preferAthlon =   cf.probablyAMD && family >=6;
1151     _preferPentium4 = cf.probablyIntel && family == 0xF;
1152     _preferPentium1 = family < 6 || (family==6 && model < 0xF && !cf.probablyIntel);
1153 }
1154