1 /**
2  * Identify the characteristics of the host CPU, providing information
3  * about cache sizes and assembly optimisation hints. This module is
4  * provided primarily for assembly language programmers.
5  *
6  * References:
7  * Some of this information was extremely difficult to track down. Some of the
8  * documents below were found only in cached versions stored by search engines!
9  * This code relies on information found in:
10  *
11  * $(UL
12  * $(LI "Intel(R) 64 and IA-32 Architectures Software Developers Manual,
13  *    Volume 2A: Instruction Set Reference, A-M" (2007).
14  * )
15  * $(LI "AMD CPUID Specification", Advanced Micro Devices, Rev 2.28 (2008).
16  * )
17  * $(LI "AMD Processor Recognition Application Note For Processors Prior to AMD
18  *    Family 0Fh Processors", Advanced Micro Devices, Rev 3.13 (2005).
19  * )
20  * $(LI "AMD Geode(TM) GX Processors Data Book",
21  *    Advanced Micro Devices, Publication ID 31505E, (2005).
22  * )
23  * $(LI "AMD K6 Processor Code Optimisation", Advanced Micro Devices, Rev D (2000).
24  * )
25  * $(LI "Application note 106: Software Customization for the 6x86 Family",
26  *    Cyrix Corporation, Rev 1.5 (1998)
27  * )
28  * $(LI $(LINK http://www.datasheetcatalog.org/datasheet/nationalsemiconductor/GX1.pdf))
29  * $(LI "Geode(TM) GX1 Processor Series Low Power Integrated X86 Solution",
30  *   National Semiconductor, (2002)
31  * )
32  * $(LI "The VIA Isaiah Architecture", G. Glenn Henry, Centaur Technology, Inc (2008).
33  * )
34  * $(LI $(LINK http://www.sandpile.org/ia32/cpuid.htm))
35  * $(LI $(LINK http://www.akkadia.org/drepper/cpumemory.pdf))
36  * $(LI "What every programmer should know about memory",
37  *    Ulrich Depper, Red Hat, Inc., (2007).
38  * )
39  * $(LI "CPU Identification by the Windows Kernel", G. Chappell (2009).
40  *   $(LINK http://www.geoffchappell.com/viewer.htm?doc=studies/windows/km/cpu/cx8.htm)
41  * )
42  * $(LI "Intel(R) Processor Identification and the CPUID Instruction, Application
43  *    Note 485" (2009).
44  * )
45  * )
46  *
47  * Bugs: Currently only works on x86 and Itanium CPUs.
48  *      Many processors have bugs in their microcode for the CPUID instruction,
49  *      so sometimes the cache information may be incorrect.
50  *
51  * Copyright: Copyright Don Clugston 2007 - 2009.
52  * License:   $(LINK2 http://www.boost.org/LICENSE_1_0.txt, Boost License 1.0)
53  * Authors:   Don Clugston, Tomas Lindquist Olsen <tomas@famolsen.dk>
54  * Source:    $(DRUNTIMESRC core/_cpuid.d)
55  */
56 
57 module core.cpuid;
58 
59 @trusted:
60 nothrow:
61 @nogc:
62 
63 // If optimizing for a particular processor, it is generally better
64 // to identify based on features rather than model. NOTE: Normally
65 // it's only worthwhile to optimise for the latest Intel and AMD CPU,
66 // with a backup for other CPUs.
67 // Pentium    -- preferPentium1()
68 // PMMX       --   + mmx()
69 // PPro       -- default
70 // PII        --   + mmx()
71 // PIII       --   + mmx() + sse()
72 // PentiumM   --   + mmx() + sse() + sse2()
73 // Pentium4   -- preferPentium4()
74 // PentiumD   --   + isX86_64()
75 // Core2      -- default + isX86_64()
76 // AMD K5     -- preferPentium1()
77 // AMD K6     --   + mmx()
78 // AMD K6-II  --   + mmx() + 3dnow()
79 // AMD K7     -- preferAthlon()
80 // AMD K8     --   + sse2()
81 // AMD K10    --   + isX86_64()
82 // Cyrix 6x86 -- preferPentium1()
83 //    6x86MX  --   + mmx()
84 
85 // GDC support uses extended inline assembly:
86 //   https://gcc.gnu.org/onlinedocs/gcc/Extended-Asm.html        (general information and hints)
87 //   https://gcc.gnu.org/onlinedocs/gcc/Simple-Constraints.html  (binding variables to registers)
88 //   https://gcc.gnu.org/onlinedocs/gcc/Machine-Constraints.html (x86 specific register short names)
89 
90 public:
91 
92 /// Cache size and behaviour
93 struct CacheInfo
94 {
95     /// Size of the cache, in kilobytes, per CPU.
96     /// For L1 unified (data + code) caches, this size is half the physical size.
97     /// (we don't halve it for larger sizes, since normally
98     /// data size is much greater than code size for critical loops).
99     size_t size;
100     /// Number of ways of associativity, eg:
101     /// $(UL
102     /// $(LI 1 = direct mapped)
103     /// $(LI 2 = 2-way set associative)
104     /// $(LI 3 = 3-way set associative)
105     /// $(LI ubyte.max = fully associative)
106     /// )
107     ubyte associativity;
108     /// Number of bytes read into the cache when a cache miss occurs.
109     uint lineSize;
110 }
111 
112 public:
113     /// $(RED Scheduled for deprecation. Please use $(D dataCaches) instead.)
114     // Note: When we deprecate it, we simply make it private.
115     __gshared CacheInfo[5] datacache;
116 
117 @property pure
118 {
119     /// The data caches. If there are fewer than 5 physical caches levels,
120     /// the remaining levels are set to size_t.max (== entire memory space)
dataCaches()121     const(CacheInfo)[5] dataCaches() { return _dataCaches; }
122 
123     /// Returns vendor string, for display purposes only.
124     /// Do NOT use this to determine features!
125     /// Note that some CPUs have programmable vendorIDs.
vendor()126     string vendor()     {return _vendor;}
127     /// Returns processor string, for display purposes only
processor()128     string processor()  {return _processor;}
129 
130     /// Does it have an x87 FPU on-chip?
x87onChip()131     bool x87onChip()    {return _x87onChip;}
132     /// Is MMX supported?
mmx()133     bool mmx()          {return _mmx;}
134     /// Is SSE supported?
sse()135     bool sse()          {return _sse;}
136     /// Is SSE2 supported?
sse2()137     bool sse2()         {return _sse2;}
138     /// Is SSE3 supported?
sse3()139     bool sse3()         {return _sse3;}
140     /// Is SSSE3 supported?
ssse3()141     bool ssse3()         {return _ssse3;}
142     /// Is SSE4.1 supported?
sse41()143     bool sse41()        {return _sse41;}
144     /// Is SSE4.2 supported?
sse42()145     bool sse42()        {return _sse42;}
146     /// Is SSE4a supported?
sse4a()147     bool sse4a()        {return _sse4a;}
148     /// Is AES supported
aes()149     bool aes()          {return _aes;}
150     /// Is pclmulqdq supported
hasPclmulqdq()151     bool hasPclmulqdq() {return _hasPclmulqdq;}
152     /// Is rdrand supported
hasRdrand()153     bool hasRdrand()    {return _hasRdrand;}
154     /// Is AVX supported
avx()155     bool avx()          {return _avx;}
156     /// Is VEX-Encoded AES supported
vaes()157     bool vaes()         {return _vaes;}
158     /// Is vpclmulqdq supported
hasVpclmulqdq()159     bool hasVpclmulqdq(){return _hasVpclmulqdq; }
160     /// Is FMA supported
fma()161     bool fma()          {return _fma;}
162     /// Is FP16C supported
fp16c()163     bool fp16c()        {return _fp16c;}
164     /// Is AVX2 supported
avx2()165     bool avx2()         {return _avx2;}
166     /// Is HLE (hardware lock elision) supported
hle()167     bool hle()          {return _hle;}
168     /// Is RTM (restricted transactional memory) supported
rtm()169     bool rtm()          {return _rtm;}
170     /// Is rdseed supported
hasRdseed()171     bool hasRdseed()    {return _hasRdseed;}
172     /// Is SHA supported
hasSha()173     bool hasSha()       {return _hasSha;}
174     /// Is AMD 3DNOW supported?
amd3dnow()175     bool amd3dnow()     {return _amd3dnow;}
176     /// Is AMD 3DNOW Ext supported?
amd3dnowExt()177     bool amd3dnowExt()  {return _amd3dnowExt;}
178     /// Are AMD extensions to MMX supported?
amdMmx()179     bool amdMmx()       {return _amdMmx;}
180     /// Is fxsave/fxrstor supported?
hasFxsr()181     bool hasFxsr()          {return _hasFxsr;}
182     /// Is cmov supported?
hasCmov()183     bool hasCmov()          {return _hasCmov;}
184     /// Is rdtsc supported?
hasRdtsc()185     bool hasRdtsc()         {return _hasRdtsc;}
186     /// Is cmpxchg8b supported?
hasCmpxchg8b()187     bool hasCmpxchg8b()     {return _hasCmpxchg8b;}
188     /// Is cmpxchg8b supported?
hasCmpxchg16b()189     bool hasCmpxchg16b()    {return _hasCmpxchg16b;}
190     /// Is SYSENTER/SYSEXIT supported?
hasSysEnterSysExit()191     bool hasSysEnterSysExit() {return _hasSysEnterSysExit;}
192     /// Is 3DNow prefetch supported?
has3dnowPrefetch()193     bool has3dnowPrefetch()   {return _has3dnowPrefetch;}
194     /// Are LAHF and SAHF supported in 64-bit mode?
hasLahfSahf()195     bool hasLahfSahf()        {return _hasLahfSahf;}
196     /// Is POPCNT supported?
hasPopcnt()197     bool hasPopcnt()        {return _hasPopcnt;}
198     /// Is LZCNT supported?
hasLzcnt()199     bool hasLzcnt()         {return _hasLzcnt;}
200     /// Is this an Intel64 or AMD 64?
isX86_64()201     bool isX86_64()         {return _isX86_64;}
202 
203     /// Is this an IA64 (Itanium) processor?
isItanium()204     bool isItanium()        { return _isItanium; }
205 
206     /// Is hyperthreading supported?
hyperThreading()207     bool hyperThreading()   { return _hyperThreading; }
208     /// Returns number of threads per CPU
threadsPerCPU()209     uint threadsPerCPU()    {return _threadsPerCPU;}
210     /// Returns number of cores in CPU
coresPerCPU()211     uint coresPerCPU()      {return _coresPerCPU;}
212 
213     /// Optimisation hints for assembly code.
214     ///
215     /// For forward compatibility, the CPU is compared against different
216     /// microarchitectures. For 32-bit x86, comparisons are made against
217     /// the Intel PPro/PII/PIII/PM family.
218     ///
219     /// The major 32-bit x86 microarchitecture 'dynasties' have been:
220     ///
221     /// $(UL
222     /// $(LI Intel P6 (PentiumPro, PII, PIII, PM, Core, Core2). )
223     /// $(LI AMD Athlon (K7, K8, K10). )
224     /// $(LI Intel NetBurst (Pentium 4, Pentium D). )
225     /// $(LI In-order Pentium (Pentium1, PMMX, Atom) )
226     /// )
227     ///
228     /// Other early CPUs (Nx586, AMD K5, K6, Centaur C3, Transmeta,
229     /// Cyrix, Rise) were mostly in-order.
230     ///
231     /// Some new processors do not fit into the existing categories:
232     ///
233     /// $(UL
234     /// $(LI Intel Atom 230/330 (family 6, model 0x1C) is an in-order core. )
235     /// $(LI Centaur Isiah = VIA Nano (family 6, model F) is an out-of-order core. )
236     /// )
237     ///
238     /// Within each dynasty, the optimisation techniques are largely
239     /// identical (eg, use instruction pairing for group 4). Major
240     /// instruction set improvements occur within each dynasty.
241 
242     /// Does this CPU perform better on AMD K7 code than PentiumPro..Core2 code?
preferAthlon()243     bool preferAthlon() { return _preferAthlon; }
244     /// Does this CPU perform better on Pentium4 code than PentiumPro..Core2 code?
preferPentium4()245     bool preferPentium4() { return _preferPentium4; }
246     /// Does this CPU perform better on Pentium I code than Pentium Pro code?
preferPentium1()247     bool preferPentium1() { return _preferPentium1; }
248 }
249 
250 private immutable
251 {
252     /* These exist as immutables so that the query property functions can
253      * be backwards compatible with code that called them with ().
254      * Also, immutables can only be set by the static this().
255      */
256     const(CacheInfo)[5] _dataCaches;
257     string _vendor;
258     string _processor;
259     bool _x87onChip;
260     bool _mmx;
261     bool _sse;
262     bool _sse2;
263     bool _sse3;
264     bool _ssse3;
265     bool _sse41;
266     bool _sse42;
267     bool _sse4a;
268     bool _aes;
269     bool _hasPclmulqdq;
270     bool _hasRdrand;
271     bool _avx;
272     bool _vaes;
273     bool _hasVpclmulqdq;
274     bool _fma;
275     bool _fp16c;
276     bool _avx2;
277     bool _hle;
278     bool _rtm;
279     bool _hasRdseed;
280     bool _hasSha;
281     bool _amd3dnow;
282     bool _amd3dnowExt;
283     bool _amdMmx;
284     bool _hasFxsr;
285     bool _hasCmov;
286     bool _hasRdtsc;
287     bool _hasCmpxchg8b;
288     bool _hasCmpxchg16b;
289     bool _hasSysEnterSysExit;
290     bool _has3dnowPrefetch;
291     bool _hasLahfSahf;
292     bool _hasPopcnt;
293     bool _hasLzcnt;
294     bool _isX86_64;
295     bool _isItanium;
296     bool _hyperThreading;
297     uint _threadsPerCPU;
298     uint _coresPerCPU;
299     bool _preferAthlon;
300     bool _preferPentium4;
301     bool _preferPentium1;
302 }
303 
304 __gshared:
305     // All these values are set only once, and never subsequently modified.
306 public:
307     /// $(RED Warning: This field will be turned into a property in a future release.)
308     ///
309     /// Processor type (vendor-dependent).
310     /// This should be visible ONLY for display purposes.
311     uint stepping, model, family;
312     /// $(RED This field has been deprecated. Please use $(D cacheLevels) instead.)
313     uint numCacheLevels = 1;
314     /// The number of cache levels in the CPU.
cacheLevels()315     @property uint cacheLevels() { return numCacheLevels; }
316 private:
317 
318 struct CpuFeatures
319 {
320     bool probablyIntel; // true = _probably_ an Intel processor, might be faking
321     bool probablyAMD; // true = _probably_ an AMD processor
322     string processorName;
323     char [12] vendorID;
324     char [48] processorNameBuffer;
325     uint features = 0;     // mmx, sse, sse2, hyperthreading, etc
326     uint miscfeatures = 0; // sse3, etc.
327     uint extfeatures = 0;  // HLE, AVX2, RTM, etc.
328     uint amdfeatures = 0;  // 3DNow!, mmxext, etc
329     uint amdmiscfeatures = 0; // sse4a, sse5, svm, etc
330     ulong xfeatures = 0;   // XFEATURES_ENABLED_MASK
331     uint maxCores = 1;
332     uint maxThreads = 1;
333 }
334 
335 CpuFeatures cpuFeatures;
336 
337 /* Hide from the optimizer where cf (a register) is coming from, so that
338  * cf doesn't get "optimized away". The idea is to  reference
339  * the global data through cf so not so many fixups are inserted
340  * into the executable image.
341  */
getCpuFeatures()342 CpuFeatures* getCpuFeatures() @nogc nothrow
343 {
344     pragma(inline, false);
345     return &cpuFeatures;
346 }
347 
348     // Note that this may indicate multi-core rather than hyperthreading.
hyperThreadingBit()349     @property bool hyperThreadingBit()    { return (cpuFeatures.features&HTT_BIT)!=0;}
350 
351     // feature flags CPUID1_EDX
352     enum : uint
353     {
354         FPU_BIT = 1,
355         TIMESTAMP_BIT = 1<<4, // rdtsc
356         MDSR_BIT = 1<<5,      // RDMSR/WRMSR
357         CMPXCHG8B_BIT = 1<<8,
358         SYSENTERSYSEXIT_BIT = 1<<11,
359         CMOV_BIT = 1<<15,
360         MMX_BIT = 1<<23,
361         FXSR_BIT = 1<<24,
362         SSE_BIT = 1<<25,
363         SSE2_BIT = 1<<26,
364         HTT_BIT = 1<<28,
365         IA64_BIT = 1<<30
366     }
367     // feature flags misc CPUID1_ECX
368     enum : uint
369     {
370         SSE3_BIT = 1,
371         PCLMULQDQ_BIT = 1<<1, // from AVX
372         MWAIT_BIT = 1<<3,
373         SSSE3_BIT = 1<<9,
374         FMA_BIT = 1<<12,     // from AVX
375         CMPXCHG16B_BIT = 1<<13,
376         SSE41_BIT = 1<<19,
377         SSE42_BIT = 1<<20,
378         POPCNT_BIT = 1<<23,
379         AES_BIT = 1<<25, // AES instructions from AVX
380         OSXSAVE_BIT = 1<<27, // Used for AVX
381         AVX_BIT = 1<<28,
382         FP16C_BIT = 1<<29,
383         RDRAND_BIT = 1<<30,
384     }
385     // Feature flags for cpuid.{EAX = 7, ECX = 0}.EBX.
386     enum : uint
387     {
388         FSGSBASE_BIT = 1 << 0,
389         BMI1_BIT = 1 << 3,
390         HLE_BIT = 1 << 4,
391         AVX2_BIT = 1 << 5,
392         SMEP_BIT = 1 << 7,
393         BMI2_BIT = 1 << 8,
394         ERMS_BIT = 1 << 9,
395         INVPCID_BIT = 1 << 10,
396         RTM_BIT = 1 << 11,
397         RDSEED_BIT = 1 << 18,
398         SHA_BIT = 1 << 29,
399     }
400     // feature flags XFEATURES_ENABLED_MASK
401     enum : ulong
402     {
403         XF_FP_BIT  = 0x1,
404         XF_SSE_BIT = 0x2,
405         XF_YMM_BIT = 0x4,
406     }
407     // AMD feature flags CPUID80000001_EDX
408     enum : uint
409     {
410         AMD_MMX_BIT = 1<<22,
411 //      FXR_OR_CYRIXMMX_BIT = 1<<24, // Cyrix/NS: 6x86MMX instructions.
412         FFXSR_BIT = 1<<25,
413         PAGE1GB_BIT = 1<<26, // support for 1GB pages
414         RDTSCP_BIT = 1<<27,
415         AMD64_BIT = 1<<29,
416         AMD_3DNOW_EXT_BIT = 1<<30,
417         AMD_3DNOW_BIT = 1<<31
418     }
419     // AMD misc feature flags CPUID80000001_ECX
420     enum : uint
421     {
422         LAHFSAHF_BIT = 1,
423         LZCNT_BIT = 1<<5,
424         SSE4A_BIT = 1<<6,
425         AMD_3DNOW_PREFETCH_BIT = 1<<8,
426     }
427 
428 
version(GNU)429 version (GNU) {
430     version (X86)
431         enum supportedX86 = true;
432     else version (X86_64)
433         enum supportedX86 = true;
434     else
435         enum supportedX86 = false;
436 } else version (D_InlineAsm_X86) {
437     enum supportedX86 = true;
version(D_InlineAsm_X86_64)438 } else version (D_InlineAsm_X86_64) {
439     enum supportedX86 = true;
440 } else {
441     enum supportedX86 = false;
442 }
443 
444 static if (supportedX86) {
445 // Note that this code will also work for Itanium in x86 mode.
446 
447 __gshared uint max_cpuid, max_extended_cpuid;
448 
449 // CPUID2: "cache and tlb information"
getcacheinfoCPUID2()450 void getcacheinfoCPUID2()
451 {
452     // We are only interested in the data caches
453     void decipherCpuid2(ubyte x) @nogc nothrow {
454         if (x==0) return;
455         // Values from http://www.sandpile.org/ia32/cpuid.htm.
456         // Includes Itanium and non-Intel CPUs.
457         //
458         static immutable ubyte [63] ids = [
459             0x0A, 0x0C, 0x0D, 0x2C, 0x60, 0x0E, 0x66, 0x67, 0x68,
460             // level 2 cache
461             0x41, 0x42, 0x43, 0x44, 0x45, 0x78, 0x79, 0x7A, 0x7B, 0x7C, 0x7D, 0x7F,
462             0x82, 0x83, 0x84, 0x85, 0x86, 0x87, 0x49, 0x4E,
463             0x39, 0x3A, 0x3B, 0x3C, 0x3D, 0x3E, 0x48, 0x80, 0x81,
464             // level 3 cache
465             0x22, 0x23, 0x25, 0x29, 0x46, 0x47, 0x4A, 0x4B, 0x4C, 0x4D,
466 
467             0xD0, 0xD1, 0xD2, 0xD6, 0xD7, 0xD8, 0xDC, 0xDD, 0xDE,
468             0xE2, 0xE3, 0xE4, 0xEA, 0xEB, 0xEC
469         ];
470         static immutable uint [63] sizes = [
471             8, 16, 16, 64, 16, 24, 8, 16, 32,
472             128, 256, 512, 1024, 2048, 1024, 128, 256, 512, 1024, 2048, 512,
473             256, 512, 1024, 2048, 512, 1024, 4096, 6*1024,
474             128, 192, 128, 256, 384, 512, 3072, 512, 128,
475             512, 1024, 2048, 4096, 4096, 8192, 6*1024, 8192, 12*1024, 16*1024,
476 
477             512, 1024, 2048, 1024, 2048, 4096, 1024+512, 3*1024, 6*1024,
478             2*1024, 4*1024, 8*1024, 12*1024, 28*1024, 24*1024
479         ];
480     // CPUBUG: Pentium M reports 0x2C but tests show it is only 4-way associative
481         static immutable ubyte [63] ways = [
482             2, 4, 4, 8, 8, 6, 4, 4, 4,
483             4, 4, 4, 4, 4, 4, 8, 8, 8, 8, 8, 2,
484             8, 8, 8, 8, 4, 8, 16, 24,
485             4, 6, 2, 4, 6, 4, 12, 8, 8,
486             4, 8, 8, 8, 4, 8, 12, 16, 12, 16,
487             4, 4, 4, 8, 8, 8, 12, 12, 12,
488             16, 16, 16, 24, 24, 24
489         ];
490         enum { FIRSTDATA2 = 8, FIRSTDATA3 = 28+9 }
491         for (size_t i=0; i< ids.length; ++i) {
492             if (x==ids[i]) {
493                 int level = i< FIRSTDATA2 ? 0: i<FIRSTDATA3 ? 1 : 2;
494                 if (x==0x49 && family==0xF && model==0x6) level=2;
495                 datacache[level].size=sizes[i];
496                 datacache[level].associativity=ways[i];
497                 if (level == 3 || x==0x2C || x==0x0D || (x>=0x48 && x<=0x80)
498                                    || x==0x86 || x==0x87
499                                    || (x>=0x66 && x<=0x68) || (x>=0x39 && x<=0x3E)){
500                     datacache[level].lineSize = 64;
501                 } else datacache[level].lineSize = 32;
502             }
503         }
504     }
505 
506     uint[4] a;
507     bool firstTime = true;
508     // On a multi-core system, this could theoretically fail, but it's only used
509     // for old single-core CPUs.
510     uint numinfos = 1;
511     do {
512         version (GNU) asm pure nothrow @nogc {
513             "cpuid" : "=a" (a[0]), "=b" (a[1]), "=c" (a[2]), "=d" (a[3]) : "a" (2);
514         } else asm pure nothrow @nogc {
515             mov EAX, 2;
516             cpuid;
517             mov a, EAX;
518             mov a+4, EBX;
519             mov a+8, ECX;
520             mov a+12, EDX;
521         }
522         if (firstTime) {
523             if (a[0]==0x0000_7001 && a[3]==0x80 && a[1]==0 && a[2]==0) {
524         // Cyrix MediaGX MMXEnhanced returns: EAX= 00007001, EDX=00000080.
525         // These are NOT standard Intel values
526         // (TLB = 32 entry, 4 way associative, 4K pages)
527         // (L1 cache = 16K, 4way, linesize16)
528                 datacache[0].size=8;
529                 datacache[0].associativity=4;
530                 datacache[0].lineSize=16;
531                 return;
532             }
533             // lsb of a is how many times to loop.
534             numinfos = a[0] & 0xFF;
535             // and otherwise it should be ignored
536             a[0] &= 0xFFFF_FF00;
537             firstTime = false;
538         }
539         for (int c=0; c<4;++c) {
540             // high bit set == no info.
541             if (a[c] & 0x8000_0000) continue;
542             decipherCpuid2(cast(ubyte)(a[c] & 0xFF));
543             decipherCpuid2(cast(ubyte)((a[c]>>8) & 0xFF));
544             decipherCpuid2(cast(ubyte)((a[c]>>16) & 0xFF));
545             decipherCpuid2(cast(ubyte)((a[c]>>24) & 0xFF));
546         }
547     } while (--numinfos);
548 }
549 
550 // CPUID4: "Deterministic cache parameters" leaf
getcacheinfoCPUID4()551 void getcacheinfoCPUID4()
552 {
553     int cachenum = 0;
554     for (;;) {
555         uint a, b, number_of_sets;
556         version (GNU) asm pure nothrow @nogc {
557             "cpuid" : "=a" (a), "=b" (b), "=c" (number_of_sets) : "a" (4), "c" (cachenum) : "edx";
558         } else asm pure nothrow @nogc {
559             mov EAX, 4;
560             mov ECX, cachenum;
561             cpuid;
562             mov a, EAX;
563             mov b, EBX;
564             mov number_of_sets, ECX;
565         }
566         ++cachenum;
567         if ((a&0x1F)==0) break; // no more caches
568         immutable uint numthreads = ((a>>14) & 0xFFF)  + 1;
569         immutable uint numcores = ((a>>26) & 0x3F) + 1;
570         if (numcores > cpuFeatures.maxCores) cpuFeatures.maxCores = numcores;
571         if ((a&0x1F)!=1 && ((a&0x1F)!=3)) continue; // we only want data & unified caches
572 
573         ++number_of_sets;
574         immutable ubyte level = cast(ubyte)(((a>>5)&7)-1);
575         if (level > datacache.length) continue; // ignore deep caches
576         datacache[level].associativity = a & 0x200 ? ubyte.max :cast(ubyte)((b>>22)+1);
577         datacache[level].lineSize = (b & 0xFFF)+ 1; // system coherency line size
578         immutable uint line_partitions = ((b >> 12)& 0x3FF) + 1;
579         // Size = number of sets * associativity * cachelinesize * linepartitions
580         // and must convert to Kb, also dividing by the number of hyperthreads using this cache.
581         immutable ulong sz = (datacache[level].associativity< ubyte.max)? number_of_sets *
582             datacache[level].associativity : number_of_sets;
583         datacache[level].size = cast(size_t)(
584                 (sz * datacache[level].lineSize * line_partitions ) / (numthreads *1024));
585         if (level == 0 && (a&0xF)==3) {
586             // Halve the size for unified L1 caches
587             datacache[level].size/=2;
588         }
589     }
590 }
591 
592 // CPUID8000_0005 & 6
getAMDcacheinfo()593 void getAMDcacheinfo()
594 {
595     uint dummy, c5, c6, d6;
596     version (GNU) asm pure nothrow @nogc {
597         "cpuid" : "=a" (dummy), "=c" (c5) : "a" (0x8000_0005) : "ebx", "edx";
598     } else asm pure nothrow @nogc {
599         mov EAX, 0x8000_0005; // L1 cache
600         cpuid;
601         // EAX has L1_TLB_4M.
602         // EBX has L1_TLB_4K
603         // EDX has L1 instruction cache
604         mov c5, ECX;
605     }
606 
607     datacache[0].size = ( (c5>>24) & 0xFF);
608     datacache[0].associativity = cast(ubyte)( (c5 >> 16) & 0xFF);
609     datacache[0].lineSize = c5 & 0xFF;
610 
611     if (max_extended_cpuid >= 0x8000_0006) {
612         // AMD K6-III or K6-2+ or later.
613         ubyte numcores = 1;
614         if (max_extended_cpuid >= 0x8000_0008) {
615             version (GNU) asm pure nothrow @nogc {
616                 "cpuid" : "=a" (dummy), "=c" (numcores) : "a" (0x8000_0008) : "ebx", "edx";
617             } else asm pure nothrow @nogc {
618                 mov EAX, 0x8000_0008;
619                 cpuid;
620                 mov numcores, CL;
621             }
622             ++numcores;
623             if (numcores>cpuFeatures.maxCores) cpuFeatures.maxCores = numcores;
624         }
625 
626         version (GNU) asm pure nothrow @nogc {
627             "cpuid" : "=a" (dummy), "=c" (c6), "=d" (d6) : "a" (0x8000_0006) : "ebx";
628         } else asm pure nothrow @nogc {
629             mov EAX, 0x8000_0006; // L2/L3 cache
630             cpuid;
631             mov c6, ECX; // L2 cache info
632             mov d6, EDX; // L3 cache info
633         }
634 
635         static immutable ubyte [] assocmap = [ 0, 1, 2, 0, 4, 0, 8, 0, 16, 0, 32, 48, 64, 96, 128, 0xFF ];
636         datacache[1].size = (c6>>16) & 0xFFFF;
637         datacache[1].associativity = assocmap[(c6>>12)&0xF];
638         datacache[1].lineSize = c6 & 0xFF;
639 
640         // The L3 cache value is TOTAL, not per core.
641         datacache[2].size = ((d6>>18)*512)/numcores; // could be up to 2 * this, -1.
642         datacache[2].associativity = assocmap[(d6>>12)&0xF];
643         datacache[2].lineSize = d6 & 0xFF;
644     }
645 }
646 
647 // For Intel CoreI7 and later, use function 0x0B
648 // to determine number of processors.
getCpuInfo0B()649 void getCpuInfo0B()
650 {
651     int level=0;
652     int threadsPerCore;
653     uint a, b, c, d;
654     do {
655         version (GNU) asm pure nothrow @nogc {
656             "cpuid" : "=a" (a), "=b" (b), "=c" (c), "=d" (d) : "a" (0x0B), "c" (level);
657         } else asm pure nothrow @nogc {
658             mov EAX, 0x0B;
659             mov ECX, level;
660             cpuid;
661             mov a, EAX;
662             mov b, EBX;
663             mov c, ECX;
664             mov d, EDX;
665         }
666         if (b!=0) {
667            // I'm not sure about this. The docs state that there
668            // are 2 hyperthreads per core if HT is factory enabled.
669             if (level==0)
670                 threadsPerCore = b & 0xFFFF;
671             else if (level==1) {
672                 cpuFeatures.maxThreads = b & 0xFFFF;
673                 cpuFeatures.maxCores = cpuFeatures.maxThreads / threadsPerCore;
674             }
675 
676         }
677         ++level;
678     } while (a!=0 || b!=0);
679 }
680 
cpuidX86()681 void cpuidX86()
682 {
683     auto cf = getCpuFeatures();
684 
685     uint a, b, c, d;
686     uint* venptr = cast(uint*)cf.vendorID.ptr;
687     version (GNU)
688     {
689         asm pure nothrow @nogc {
690             "cpuid" : "=a" (max_cpuid), "=b" (venptr[0]), "=d" (venptr[1]), "=c" (venptr[2]) : "a" (0);
691             "cpuid" : "=a" (max_extended_cpuid) : "a" (0x8000_0000) : "ebx", "ecx", "edx";
692         }
693     }
694     else
695     {
696         uint a2;
697         version (D_InlineAsm_X86)
698         {
699             asm pure nothrow @nogc {
700                 mov EAX, 0;
701                 cpuid;
702                 mov a, EAX;
703                 mov EAX, venptr;
704                 mov [EAX], EBX;
705                 mov [EAX + 4], EDX;
706                 mov [EAX + 8], ECX;
707             }
708         }
709         else version (D_InlineAsm_X86_64)
710         {
711             asm pure nothrow @nogc {
712                 mov EAX, 0;
713                 cpuid;
714                 mov a, EAX;
715                 mov RAX, venptr;
716                 mov [RAX], EBX;
717                 mov [RAX + 4], EDX;
718                 mov [RAX + 8], ECX;
719             }
720         }
721         asm pure nothrow @nogc {
722             mov EAX, 0x8000_0000;
723             cpuid;
724             mov a2, EAX;
725         }
726         max_cpuid = a;
727         max_extended_cpuid = a2;
728     }
729 
730 
731     cf.probablyIntel = cf.vendorID == "GenuineIntel";
732     cf.probablyAMD = cf.vendorID == "AuthenticAMD";
733     uint apic = 0; // brand index, apic id
734     version (GNU) asm pure nothrow @nogc {
735         "cpuid" : "=a" (a), "=b" (apic), "=c" (cf.miscfeatures), "=d" (cf.features) : "a" (1);
736     } else {
737         asm pure nothrow @nogc {
738             mov EAX, 1; // model, stepping
739             cpuid;
740             mov a, EAX;
741             mov apic, EBX;
742             mov c, ECX;
743             mov d, EDX;
744         }
745         cf.features = d;
746         cf.miscfeatures = c;
747     }
748     stepping = a & 0xF;
749     immutable uint fbase = (a >> 8) & 0xF;
750     immutable uint mbase = (a >> 4) & 0xF;
751     family = ((fbase == 0xF) || (fbase == 0)) ? fbase + (a >> 20) & 0xFF : fbase;
752     model = ((fbase == 0xF) || (fbase == 6 && cf.probablyIntel) ) ?
753          mbase + ((a >> 12) & 0xF0) : mbase;
754 
755     if (max_cpuid >= 7)
756     {
757         version (GNU) asm pure nothrow @nogc {
758             "cpuid" : "=a" (a), "=b" (cf.extfeatures), "=c" (c) : "a" (7), "c" (0) : "edx";
759         } else {
760             uint ext;
761             asm pure nothrow @nogc {
762                 mov EAX, 7; // Structured extended feature leaf.
763                 mov ECX, 0; // Main leaf.
764                 cpuid;
765                 mov ext, EBX; // HLE, AVX2, RTM, etc.
766             }
767             cf.extfeatures = ext;
768         }
769     }
770 
771     if (cf.miscfeatures & OSXSAVE_BIT)
772     {
773         version (GNU) asm pure nothrow @nogc {
774             "xgetbv" : "=a" (a), "=d" (d) : "c" (0);
775         } else asm pure nothrow @nogc {
776             mov ECX, 0;
777             xgetbv;
778             mov d, EDX;
779             mov a, EAX;
780         }
781         cf.xfeatures = cast(ulong)d << 32 | a;
782     }
783 
784     cf.amdfeatures = 0;
785     cf.amdmiscfeatures = 0;
786     if (max_extended_cpuid >= 0x8000_0001) {
787         version (GNU) asm pure nothrow @nogc {
788             "cpuid" : "=a" (a), "=c" (cf.amdmiscfeatures), "=d" (cf.amdfeatures) : "a" (0x8000_0001) : "ebx";
789         } else {
790             asm pure nothrow @nogc {
791                 mov EAX, 0x8000_0001;
792                 cpuid;
793                 mov c, ECX;
794                 mov d, EDX;
795             }
796             cf.amdmiscfeatures = c;
797             cf.amdfeatures = d;
798         }
799     }
800     // Try to detect fraudulent vendorIDs
801     if (amd3dnow) cf.probablyIntel = false;
802 
803     if (!cf.probablyIntel && max_extended_cpuid >= 0x8000_0008) {
804         //http://support.amd.com/TechDocs/25481.pdf pg.36
805         cf.maxCores = 1;
806         if (hyperThreadingBit) {
807             // determine max number of cores for AMD
808             version (GNU) asm pure nothrow @nogc {
809                 "cpuid" : "=a" (a), "=c" (c) : "a" (0x8000_0008) : "ebx", "edx";
810             } else asm pure nothrow @nogc {
811                 mov EAX, 0x8000_0008;
812                 cpuid;
813                 mov c, ECX;
814             }
815             cf.maxCores += c & 0xFF;
816         }
817     }
818 
819     if (max_extended_cpuid >= 0x8000_0004) {
820         uint* pnb = cast(uint*)cf.processorNameBuffer.ptr;
821         version (GNU)
822         {
823             asm pure nothrow @nogc {
824                 "cpuid" : "=a" (pnb[0]), "=b" (pnb[1]), "=c" (pnb[ 2]), "=d" (pnb[ 3]) : "a" (0x8000_0002);
825                 "cpuid" : "=a" (pnb[4]), "=b" (pnb[5]), "=c" (pnb[ 6]), "=d" (pnb[ 7]) : "a" (0x8000_0003);
826                 "cpuid" : "=a" (pnb[8]), "=b" (pnb[9]), "=c" (pnb[10]), "=d" (pnb[11]) : "a" (0x8000_0004);
827             }
828         }
829         else version (D_InlineAsm_X86)
830         {
831             asm pure nothrow @nogc {
832                 push ESI;
833                 mov ESI, pnb;
834                 mov EAX, 0x8000_0002;
835                 cpuid;
836                 mov [ESI], EAX;
837                 mov [ESI+4], EBX;
838                 mov [ESI+8], ECX;
839                 mov [ESI+12], EDX;
840                 mov EAX, 0x8000_0003;
841                 cpuid;
842                 mov [ESI+16], EAX;
843                 mov [ESI+20], EBX;
844                 mov [ESI+24], ECX;
845                 mov [ESI+28], EDX;
846                 mov EAX, 0x8000_0004;
847                 cpuid;
848                 mov [ESI+32], EAX;
849                 mov [ESI+36], EBX;
850                 mov [ESI+40], ECX;
851                 mov [ESI+44], EDX;
852                 pop ESI;
853             }
854         }
855         else version (D_InlineAsm_X86_64)
856         {
857             asm pure nothrow @nogc {
858                 push RSI;
859                 mov RSI, pnb;
860                 mov EAX, 0x8000_0002;
861                 cpuid;
862                 mov [RSI], EAX;
863                 mov [RSI+4], EBX;
864                 mov [RSI+8], ECX;
865                 mov [RSI+12], EDX;
866                 mov EAX, 0x8000_0003;
867                 cpuid;
868                 mov [RSI+16], EAX;
869                 mov [RSI+20], EBX;
870                 mov [RSI+24], ECX;
871                 mov [RSI+28], EDX;
872                 mov EAX, 0x8000_0004;
873                 cpuid;
874                 mov [RSI+32], EAX;
875                 mov [RSI+36], EBX;
876                 mov [RSI+40], ECX;
877                 mov [RSI+44], EDX;
878                 pop RSI;
879             }
880         }
881         // Intel P4 and PM pad at front with spaces.
882         // Other CPUs pad at end with nulls.
883         int start = 0, end = 0;
884         while (cf.processorNameBuffer[start] == ' ') { ++start; }
885         while (cf.processorNameBuffer[cf.processorNameBuffer.length-end-1] == 0) { ++end; }
886         cf.processorName = cast(string)(cf.processorNameBuffer[start..$-end]);
887     } else {
888         cf.processorName = "Unknown CPU";
889     }
890     // Determine cache sizes
891 
892     // Intel docs specify that they return 0 for 0x8000_0005.
893     // AMD docs do not specify the behaviour for 0004 and 0002.
894     // Centaur/VIA and most other manufacturers use the AMD method,
895     // except Cyrix MediaGX MMX Enhanced uses their OWN form of CPUID2!
896     // NS Geode GX1 provides CyrixCPUID2 _and_ does the same wrong behaviour
897     // for CPUID80000005. But Geode GX uses the AMD method
898 
899     // Deal with Geode GX1 - make it same as MediaGX MMX.
900     if (max_extended_cpuid==0x8000_0005 && max_cpuid==2) {
901         max_extended_cpuid = 0x8000_0004;
902     }
903     // Therefore, we try the AMD method unless it's an Intel chip.
904     // If we still have no info, try the Intel methods.
905     datacache[0].size = 0;
906     if (max_cpuid<2 || !cf.probablyIntel) {
907         if (max_extended_cpuid >= 0x8000_0005) {
908             getAMDcacheinfo();
909         } else if (cf.probablyAMD) {
910             // According to AMDProcRecognitionAppNote, this means CPU
911             // K5 model 0, or Am5x86 (model 4), or Am4x86DX4 (model 4)
912             // Am5x86 has 16Kb 4-way unified data & code cache.
913             datacache[0].size = 8;
914             datacache[0].associativity = 4;
915             datacache[0].lineSize = 32;
916         } else {
917             // Some obscure CPU.
918             // Values for Cyrix 6x86MX (family 6, model 0)
919             datacache[0].size = 64;
920             datacache[0].associativity = 4;
921             datacache[0].lineSize = 32;
922         }
923     }
924     if ((datacache[0].size == 0) && max_cpuid>=4) {
925         getcacheinfoCPUID4();
926     }
927     if ((datacache[0].size == 0) && max_cpuid>=2) {
928         getcacheinfoCPUID2();
929     }
930     if (datacache[0].size == 0) {
931         // Pentium, PMMX, late model 486, or an obscure CPU
932         if (mmx) { // Pentium MMX. Also has 8kB code cache.
933             datacache[0].size = 16;
934             datacache[0].associativity = 4;
935             datacache[0].lineSize = 32;
936         } else { // Pentium 1 (which also has 8kB code cache)
937                  // or 486.
938             // Cyrix 6x86: 16, 4way, 32 linesize
939             datacache[0].size = 8;
940             datacache[0].associativity = 2;
941             datacache[0].lineSize = 32;
942         }
943     }
944     if (cf.probablyIntel && max_cpuid >= 0x0B) {
945         // For Intel i7 and later, use function 0x0B to determine
946         // cores and hyperthreads.
947         getCpuInfo0B();
948     } else {
949         if (hyperThreadingBit) cf.maxThreads = (apic>>>16) & 0xFF;
950         else cf.maxThreads = cf.maxCores;
951 
952         if (cf.probablyAMD && max_extended_cpuid >= 0x8000_001E) {
953             version (GNU) asm pure nothrow @nogc {
954                 "cpuid" : "=a" (a), "=b" (b) : "a" (0x8000_001E) : "ecx", "edx";
955             } else {
956                 asm pure nothrow @nogc {
957                     mov EAX, 0x8000_001e;
958                     cpuid;
959                     mov b, EBX;
960                 }
961             }
962             ubyte coresPerComputeUnit = ((b >> 8) & 3) + 1;
963             cf.maxCores = cf.maxThreads / coresPerComputeUnit;
964         }
965     }
966 }
967 
968 // Return true if the cpuid instruction is supported.
969 // BUG(WONTFIX): Returns false for Cyrix 6x86 and 6x86L. They will be treated as 486 machines.
hasCPUID()970 bool hasCPUID()
971 {
972     version (X86_64)
973         return true;
974     else
975     {
976         uint flags;
977         version (GNU)
978         {
979             // http://wiki.osdev.org/CPUID#Checking_CPUID_availability
980             // ASM template supports both AT&T and Intel syntax.
981             asm nothrow @nogc { "
982                 pushf{l|d}                 # Save EFLAGS
983                 pushf{l|d}                 # Store EFLAGS
984                 xor{l $0x00200000, (%%esp)| dword ptr [esp], 0x00200000}
985                                            # Invert the ID bit in stored EFLAGS
986                 popf{l|d}                  # Load stored EFLAGS (with ID bit inverted)
987                 pushf{l|d}                 # Store EFLAGS again (ID bit may or may not be inverted)
988                 pop {%%}eax                # eax = modified EFLAGS (ID bit may or may not be inverted)
989                 xor {(%%esp), %%eax|eax, [esp]}
990                                            # eax = whichever bits were changed
991                 popf{l|d}                  # Restore original EFLAGS
992                 " : "=a" (flags);
993             }
994         }
995         else version (D_InlineAsm_X86)
996         {
997             asm nothrow @nogc {
998                 pushfd;
999                 pop EAX;
1000                 mov flags, EAX;
1001                 xor EAX, 0x0020_0000;
1002                 push EAX;
1003                 popfd;
1004                 pushfd;
1005                 pop EAX;
1006                 xor flags, EAX;
1007             }
1008         }
1009         return (flags & 0x0020_0000) != 0;
1010     }
1011 }
1012 
1013 } else { // supported X86
1014 
hasCPUID()1015     bool hasCPUID() { return false; }
1016 
cpuidX86()1017     void cpuidX86()
1018     {
1019             datacache[0].size = 8;
1020             datacache[0].associativity = 2;
1021             datacache[0].lineSize = 32;
1022     }
1023 }
1024 
1025 /*
1026 // TODO: Implement this function with OS support
1027 void cpuidPPC()
1028 {
1029     enum :int  { PPC601, PPC603, PPC603E, PPC604,
1030                  PPC604E, PPC620, PPCG3, PPCG4, PPCG5 }
1031 
1032     // TODO:
1033     // asm { mfpvr; } returns the CPU version but unfortunately it can
1034     // only be used in kernel mode. So OS support is required.
1035     int cputype = PPC603;
1036 
1037     // 601 has a 8KB combined data & code L1 cache.
1038     uint sizes[] = [4, 8, 16, 16, 32, 32, 32, 32, 64];
1039     ubyte ways[] = [8, 2,  4,  4,  4,  8,  8,  8,  8];
1040     uint L2size[]= [0, 0,  0,  0,  0,  0,  0,  256,  512];
1041     uint L3size[]= [0, 0,  0,  0,  0,  0,  0,  2048,  0];
1042 
1043     datacache[0].size = sizes[cputype];
1044     datacache[0].associativity = ways[cputype];
1045     datacache[0].lineSize = (cputype==PPCG5)? 128 :
1046         (cputype == PPC620 || cputype == PPCG3)? 64 : 32;
1047     datacache[1].size = L2size[cputype];
1048     datacache[2].size = L3size[cputype];
1049     datacache[1].lineSize = datacache[0].lineSize;
1050     datacache[2].lineSize = datacache[0].lineSize;
1051 }
1052 
1053 // TODO: Implement this function with OS support
1054 void cpuidSparc()
1055 {
1056     // UltaSparcIIi  : L1 = 16,  2way. L2 = 512, 4 way.
1057     // UltraSparcIII : L1 = 64,  4way. L2= 4096 or 8192.
1058     // UltraSparcIIIi: L1 = 64,  4way. L2= 1024, 4 way
1059     // UltraSparcIV  : L1 = 64,  4way. L2 = 16*1024.
1060     // UltraSparcIV+ : L1 = 64,  4way. L2 = 2048, L3=32*1024.
1061     // Sparc64V      : L1 = 128, 2way. L2 = 4096 4way.
1062 }
1063 */
1064 
this()1065 shared static this()
1066 {
1067     auto cf = getCpuFeatures();
1068 
1069     if (hasCPUID()) {
1070         cpuidX86();
1071     } else {
1072         // it's a 386 or 486, or a Cyrix 6x86.
1073         //Probably still has an external cache.
1074     }
1075     if (datacache[0].size==0) {
1076             // Guess same as Pentium 1.
1077             datacache[0].size = 8;
1078             datacache[0].associativity = 2;
1079             datacache[0].lineSize = 32;
1080     }
1081     numCacheLevels = 1;
1082     // And now fill up all the unused levels with full memory space.
1083     for (size_t i=1; i< datacache.length; ++i) {
1084         if (datacache[i].size==0) {
1085             // Set all remaining levels of cache equal to full address space.
1086             datacache[i].size = size_t.max/1024;
1087             datacache[i].associativity = 1;
1088             datacache[i].lineSize = datacache[i-1].lineSize;
1089         }
1090         else
1091             ++numCacheLevels;
1092     }
1093 
1094     // Set the immortals
1095 
1096     _dataCaches =     datacache;
1097     _vendor =         cast(string)cf.vendorID;
1098     _processor =      cf.processorName;
1099     _x87onChip =      (cf.features&FPU_BIT)!=0;
1100     _mmx =            (cf.features&MMX_BIT)!=0;
1101     _sse =            (cf.features&SSE_BIT)!=0;
1102     _sse2 =           (cf.features&SSE2_BIT)!=0;
1103     _sse3 =           (cf.miscfeatures&SSE3_BIT)!=0;
1104     _ssse3 =          (cf.miscfeatures&SSSE3_BIT)!=0;
1105     _sse41 =          (cf.miscfeatures&SSE41_BIT)!=0;
1106     _sse42 =          (cf.miscfeatures&SSE42_BIT)!=0;
1107     _sse4a =          (cf.amdmiscfeatures&SSE4A_BIT)!=0;
1108     _aes =            (cf.miscfeatures&AES_BIT)!=0;
1109     _hasPclmulqdq =   (cf.miscfeatures&PCLMULQDQ_BIT)!=0;
1110     _hasRdrand =      (cf.miscfeatures&RDRAND_BIT)!=0;
1111 
1112     enum avx_mask = XF_SSE_BIT|XF_YMM_BIT;
1113     _avx =            (cf.xfeatures & avx_mask) == avx_mask && (cf.miscfeatures&AVX_BIT)!=0;
1114 
1115     _vaes =           avx && aes;
1116     _hasVpclmulqdq =  avx && hasPclmulqdq;
1117     _fma =            avx && (cf.miscfeatures&FMA_BIT)!=0;
1118     _fp16c =          avx && (cf.miscfeatures&FP16C_BIT)!=0;
1119     _avx2 =           avx && (cf.extfeatures & AVX2_BIT) != 0;
1120     _hle =            (cf.extfeatures & HLE_BIT) != 0;
1121     _rtm =            (cf.extfeatures & RTM_BIT) != 0;
1122     _hasRdseed =      (cf.extfeatures&RDSEED_BIT)!=0;
1123     _hasSha =         (cf.extfeatures&SHA_BIT)!=0;
1124     _amd3dnow =       (cf.amdfeatures&AMD_3DNOW_BIT)!=0;
1125     _amd3dnowExt =    (cf.amdfeatures&AMD_3DNOW_EXT_BIT)!=0;
1126     _amdMmx =         (cf.amdfeatures&AMD_MMX_BIT)!=0;
1127     _hasFxsr =        (cf.features&FXSR_BIT)!=0;
1128     _hasCmov =        (cf.features&CMOV_BIT)!=0;
1129     _hasRdtsc =       (cf.features&TIMESTAMP_BIT)!=0;
1130     _hasCmpxchg8b =   (cf.features&CMPXCHG8B_BIT)!=0;
1131     _hasCmpxchg16b =  (cf.miscfeatures&CMPXCHG16B_BIT)!=0;
1132     _hasSysEnterSysExit =
1133         // The SYSENTER/SYSEXIT features were buggy on Pentium Pro and early PentiumII.
1134         // (REF: www.geoffchappell.com).
1135         (cf.probablyIntel && (family < 6 || (family==6 && (model< 3 || (model==3 && stepping<3)))))
1136             ? false
1137             : (cf.features & SYSENTERSYSEXIT_BIT)!=0;
1138     _has3dnowPrefetch = (cf.amdmiscfeatures&AMD_3DNOW_PREFETCH_BIT)!=0;
1139     _hasLahfSahf =    (cf.amdmiscfeatures&LAHFSAHF_BIT)!=0;
1140     _hasPopcnt =      (cf.miscfeatures&POPCNT_BIT)!=0;
1141     _hasLzcnt =       (cf.amdmiscfeatures&LZCNT_BIT)!=0;
1142     _isX86_64 =       (cf.amdfeatures&AMD64_BIT)!=0;
1143     _isItanium =      (cf.features&IA64_BIT)!=0;
1144     _hyperThreading = cf.maxThreads>cf.maxCores;
1145     _threadsPerCPU =  cf.maxThreads;
1146     _coresPerCPU =    cf.maxCores;
1147     _preferAthlon =   cf.probablyAMD && family >=6;
1148     _preferPentium4 = cf.probablyIntel && family == 0xF;
1149     _preferPentium1 = family < 6 || (family==6 && model < 0xF && !cf.probablyIntel);
1150 }
1151