1 /**
2 * Identify the characteristics of the host CPU, providing information
3 * about cache sizes and assembly optimisation hints. This module is
4 * provided primarily for assembly language programmers.
5 *
6 * References:
7 * Some of this information was extremely difficult to track down. Some of the
8 * documents below were found only in cached versions stored by search engines!
9 * This code relies on information found in:
10 *
11 * $(UL
12 * $(LI "Intel(R) 64 and IA-32 Architectures Software Developers Manual,
13 * Volume 2A: Instruction Set Reference, A-M" (2007).
14 * )
15 * $(LI "AMD CPUID Specification", Advanced Micro Devices, Rev 2.28 (2008).
16 * )
17 * $(LI "AMD Processor Recognition Application Note For Processors Prior to AMD
18 * Family 0Fh Processors", Advanced Micro Devices, Rev 3.13 (2005).
19 * )
20 * $(LI "AMD Geode(TM) GX Processors Data Book",
21 * Advanced Micro Devices, Publication ID 31505E, (2005).
22 * )
23 * $(LI "AMD K6 Processor Code Optimisation", Advanced Micro Devices, Rev D (2000).
24 * )
25 * $(LI "Application note 106: Software Customization for the 6x86 Family",
26 * Cyrix Corporation, Rev 1.5 (1998)
27 * )
28 * $(LI $(LINK http://www.datasheetcatalog.org/datasheet/nationalsemiconductor/GX1.pdf))
29 * $(LI "Geode(TM) GX1 Processor Series Low Power Integrated X86 Solution",
30 * National Semiconductor, (2002)
31 * )
32 * $(LI "The VIA Isaiah Architecture", G. Glenn Henry, Centaur Technology, Inc (2008).
33 * )
34 * $(LI $(LINK http://www.sandpile.org/ia32/cpuid.htm))
35 * $(LI $(LINK http://www.akkadia.org/drepper/cpumemory.pdf))
36 * $(LI "What every programmer should know about memory",
37 * Ulrich Depper, Red Hat, Inc., (2007).
38 * )
39 * $(LI "CPU Identification by the Windows Kernel", G. Chappell (2009).
40 * $(LINK http://www.geoffchappell.com/viewer.htm?doc=studies/windows/km/cpu/cx8.htm)
41 * )
42 * $(LI "Intel(R) Processor Identification and the CPUID Instruction, Application
43 * Note 485" (2009).
44 * )
45 * )
46 *
47 * Bugs: Currently only works on x86 and Itanium CPUs.
48 * Many processors have bugs in their microcode for the CPUID instruction,
49 * so sometimes the cache information may be incorrect.
50 *
51 * Copyright: Copyright Don Clugston 2007 - 2009.
52 * License: $(LINK2 http://www.boost.org/LICENSE_1_0.txt, Boost License 1.0)
53 * Authors: Don Clugston, Tomas Lindquist Olsen <tomas@famolsen.dk>
54 * Source: $(DRUNTIMESRC core/_cpuid.d)
55 */
56
57 module core.cpuid;
58
59 @trusted:
60 nothrow:
61 @nogc:
62
63 // If optimizing for a particular processor, it is generally better
64 // to identify based on features rather than model. NOTE: Normally
65 // it's only worthwhile to optimise for the latest Intel and AMD CPU,
66 // with a backup for other CPUs.
67 // Pentium -- preferPentium1()
68 // PMMX -- + mmx()
69 // PPro -- default
70 // PII -- + mmx()
71 // PIII -- + mmx() + sse()
72 // PentiumM -- + mmx() + sse() + sse2()
73 // Pentium4 -- preferPentium4()
74 // PentiumD -- + isX86_64()
75 // Core2 -- default + isX86_64()
76 // AMD K5 -- preferPentium1()
77 // AMD K6 -- + mmx()
78 // AMD K6-II -- + mmx() + 3dnow()
79 // AMD K7 -- preferAthlon()
80 // AMD K8 -- + sse2()
81 // AMD K10 -- + isX86_64()
82 // Cyrix 6x86 -- preferPentium1()
83 // 6x86MX -- + mmx()
84
85 // GDC support uses extended inline assembly:
86 // https://gcc.gnu.org/onlinedocs/gcc/Extended-Asm.html (general information and hints)
87 // https://gcc.gnu.org/onlinedocs/gcc/Simple-Constraints.html (binding variables to registers)
88 // https://gcc.gnu.org/onlinedocs/gcc/Machine-Constraints.html (x86 specific register short names)
89
90 public:
91
92 /// Cache size and behaviour
93 struct CacheInfo
94 {
95 /// Size of the cache, in kilobytes, per CPU.
96 /// For L1 unified (data + code) caches, this size is half the physical size.
97 /// (we don't halve it for larger sizes, since normally
98 /// data size is much greater than code size for critical loops).
99 size_t size;
100 /// Number of ways of associativity, eg:
101 /// $(UL
102 /// $(LI 1 = direct mapped)
103 /// $(LI 2 = 2-way set associative)
104 /// $(LI 3 = 3-way set associative)
105 /// $(LI ubyte.max = fully associative)
106 /// )
107 ubyte associativity;
108 /// Number of bytes read into the cache when a cache miss occurs.
109 uint lineSize;
110 }
111
112 public:
113 /// $(RED Scheduled for deprecation. Please use $(D dataCaches) instead.)
114 // Note: When we deprecate it, we simply make it private.
115 __gshared CacheInfo[5] datacache;
116
117 @property pure
118 {
119 /// The data caches. If there are fewer than 5 physical caches levels,
120 /// the remaining levels are set to size_t.max (== entire memory space)
dataCaches()121 const(CacheInfo)[5] dataCaches() { return _dataCaches; }
122
123 /// Returns vendor string, for display purposes only.
124 /// Do NOT use this to determine features!
125 /// Note that some CPUs have programmable vendorIDs.
vendor()126 string vendor() {return _vendor;}
127 /// Returns processor string, for display purposes only
processor()128 string processor() {return _processor;}
129
130 /// Does it have an x87 FPU on-chip?
x87onChip()131 bool x87onChip() {return _x87onChip;}
132 /// Is MMX supported?
mmx()133 bool mmx() {return _mmx;}
134 /// Is SSE supported?
sse()135 bool sse() {return _sse;}
136 /// Is SSE2 supported?
sse2()137 bool sse2() {return _sse2;}
138 /// Is SSE3 supported?
sse3()139 bool sse3() {return _sse3;}
140 /// Is SSSE3 supported?
ssse3()141 bool ssse3() {return _ssse3;}
142 /// Is SSE4.1 supported?
sse41()143 bool sse41() {return _sse41;}
144 /// Is SSE4.2 supported?
sse42()145 bool sse42() {return _sse42;}
146 /// Is SSE4a supported?
sse4a()147 bool sse4a() {return _sse4a;}
148 /// Is AES supported
aes()149 bool aes() {return _aes;}
150 /// Is pclmulqdq supported
hasPclmulqdq()151 bool hasPclmulqdq() {return _hasPclmulqdq;}
152 /// Is rdrand supported
hasRdrand()153 bool hasRdrand() {return _hasRdrand;}
154 /// Is AVX supported
avx()155 bool avx() {return _avx;}
156 /// Is VEX-Encoded AES supported
vaes()157 bool vaes() {return _vaes;}
158 /// Is vpclmulqdq supported
hasVpclmulqdq()159 bool hasVpclmulqdq(){return _hasVpclmulqdq; }
160 /// Is FMA supported
fma()161 bool fma() {return _fma;}
162 /// Is FP16C supported
fp16c()163 bool fp16c() {return _fp16c;}
164 /// Is AVX2 supported
avx2()165 bool avx2() {return _avx2;}
166 /// Is HLE (hardware lock elision) supported
hle()167 bool hle() {return _hle;}
168 /// Is RTM (restricted transactional memory) supported
rtm()169 bool rtm() {return _rtm;}
170 /// Is rdseed supported
hasRdseed()171 bool hasRdseed() {return _hasRdseed;}
172 /// Is SHA supported
hasSha()173 bool hasSha() {return _hasSha;}
174 /// Is AMD 3DNOW supported?
amd3dnow()175 bool amd3dnow() {return _amd3dnow;}
176 /// Is AMD 3DNOW Ext supported?
amd3dnowExt()177 bool amd3dnowExt() {return _amd3dnowExt;}
178 /// Are AMD extensions to MMX supported?
amdMmx()179 bool amdMmx() {return _amdMmx;}
180 /// Is fxsave/fxrstor supported?
hasFxsr()181 bool hasFxsr() {return _hasFxsr;}
182 /// Is cmov supported?
hasCmov()183 bool hasCmov() {return _hasCmov;}
184 /// Is rdtsc supported?
hasRdtsc()185 bool hasRdtsc() {return _hasRdtsc;}
186 /// Is cmpxchg8b supported?
hasCmpxchg8b()187 bool hasCmpxchg8b() {return _hasCmpxchg8b;}
188 /// Is cmpxchg8b supported?
hasCmpxchg16b()189 bool hasCmpxchg16b() {return _hasCmpxchg16b;}
190 /// Is SYSENTER/SYSEXIT supported?
hasSysEnterSysExit()191 bool hasSysEnterSysExit() {return _hasSysEnterSysExit;}
192 /// Is 3DNow prefetch supported?
has3dnowPrefetch()193 bool has3dnowPrefetch() {return _has3dnowPrefetch;}
194 /// Are LAHF and SAHF supported in 64-bit mode?
hasLahfSahf()195 bool hasLahfSahf() {return _hasLahfSahf;}
196 /// Is POPCNT supported?
hasPopcnt()197 bool hasPopcnt() {return _hasPopcnt;}
198 /// Is LZCNT supported?
hasLzcnt()199 bool hasLzcnt() {return _hasLzcnt;}
200 /// Is this an Intel64 or AMD 64?
isX86_64()201 bool isX86_64() {return _isX86_64;}
202
203 /// Is this an IA64 (Itanium) processor?
isItanium()204 bool isItanium() { return _isItanium; }
205
206 /// Is hyperthreading supported?
hyperThreading()207 bool hyperThreading() { return _hyperThreading; }
208 /// Returns number of threads per CPU
threadsPerCPU()209 uint threadsPerCPU() {return _threadsPerCPU;}
210 /// Returns number of cores in CPU
coresPerCPU()211 uint coresPerCPU() {return _coresPerCPU;}
212
213 /// Optimisation hints for assembly code.
214 ///
215 /// For forward compatibility, the CPU is compared against different
216 /// microarchitectures. For 32-bit x86, comparisons are made against
217 /// the Intel PPro/PII/PIII/PM family.
218 ///
219 /// The major 32-bit x86 microarchitecture 'dynasties' have been:
220 ///
221 /// $(UL
222 /// $(LI Intel P6 (PentiumPro, PII, PIII, PM, Core, Core2). )
223 /// $(LI AMD Athlon (K7, K8, K10). )
224 /// $(LI Intel NetBurst (Pentium 4, Pentium D). )
225 /// $(LI In-order Pentium (Pentium1, PMMX, Atom) )
226 /// )
227 ///
228 /// Other early CPUs (Nx586, AMD K5, K6, Centaur C3, Transmeta,
229 /// Cyrix, Rise) were mostly in-order.
230 ///
231 /// Some new processors do not fit into the existing categories:
232 ///
233 /// $(UL
234 /// $(LI Intel Atom 230/330 (family 6, model 0x1C) is an in-order core. )
235 /// $(LI Centaur Isiah = VIA Nano (family 6, model F) is an out-of-order core. )
236 /// )
237 ///
238 /// Within each dynasty, the optimisation techniques are largely
239 /// identical (eg, use instruction pairing for group 4). Major
240 /// instruction set improvements occur within each dynasty.
241
242 /// Does this CPU perform better on AMD K7 code than PentiumPro..Core2 code?
preferAthlon()243 bool preferAthlon() { return _preferAthlon; }
244 /// Does this CPU perform better on Pentium4 code than PentiumPro..Core2 code?
preferPentium4()245 bool preferPentium4() { return _preferPentium4; }
246 /// Does this CPU perform better on Pentium I code than Pentium Pro code?
preferPentium1()247 bool preferPentium1() { return _preferPentium1; }
248 }
249
250 private immutable
251 {
252 /* These exist as immutables so that the query property functions can
253 * be backwards compatible with code that called them with ().
254 * Also, immutables can only be set by the static this().
255 */
256 const(CacheInfo)[5] _dataCaches;
257 string _vendor;
258 string _processor;
259 bool _x87onChip;
260 bool _mmx;
261 bool _sse;
262 bool _sse2;
263 bool _sse3;
264 bool _ssse3;
265 bool _sse41;
266 bool _sse42;
267 bool _sse4a;
268 bool _aes;
269 bool _hasPclmulqdq;
270 bool _hasRdrand;
271 bool _avx;
272 bool _vaes;
273 bool _hasVpclmulqdq;
274 bool _fma;
275 bool _fp16c;
276 bool _avx2;
277 bool _hle;
278 bool _rtm;
279 bool _hasRdseed;
280 bool _hasSha;
281 bool _amd3dnow;
282 bool _amd3dnowExt;
283 bool _amdMmx;
284 bool _hasFxsr;
285 bool _hasCmov;
286 bool _hasRdtsc;
287 bool _hasCmpxchg8b;
288 bool _hasCmpxchg16b;
289 bool _hasSysEnterSysExit;
290 bool _has3dnowPrefetch;
291 bool _hasLahfSahf;
292 bool _hasPopcnt;
293 bool _hasLzcnt;
294 bool _isX86_64;
295 bool _isItanium;
296 bool _hyperThreading;
297 uint _threadsPerCPU;
298 uint _coresPerCPU;
299 bool _preferAthlon;
300 bool _preferPentium4;
301 bool _preferPentium1;
302 }
303
304 __gshared:
305 // All these values are set only once, and never subsequently modified.
306 public:
307 /// $(RED Warning: This field will be turned into a property in a future release.)
308 ///
309 /// Processor type (vendor-dependent).
310 /// This should be visible ONLY for display purposes.
311 uint stepping, model, family;
312 /// $(RED This field has been deprecated. Please use $(D cacheLevels) instead.)
313 uint numCacheLevels = 1;
314 /// The number of cache levels in the CPU.
cacheLevels()315 @property uint cacheLevels() { return numCacheLevels; }
316 private:
317
318 struct CpuFeatures
319 {
320 bool probablyIntel; // true = _probably_ an Intel processor, might be faking
321 bool probablyAMD; // true = _probably_ an AMD processor
322 string processorName;
323 char [12] vendorID;
324 char [48] processorNameBuffer;
325 uint features = 0; // mmx, sse, sse2, hyperthreading, etc
326 uint miscfeatures = 0; // sse3, etc.
327 uint extfeatures = 0; // HLE, AVX2, RTM, etc.
328 uint amdfeatures = 0; // 3DNow!, mmxext, etc
329 uint amdmiscfeatures = 0; // sse4a, sse5, svm, etc
330 ulong xfeatures = 0; // XFEATURES_ENABLED_MASK
331 uint maxCores = 1;
332 uint maxThreads = 1;
333 }
334
335 CpuFeatures cpuFeatures;
336
337 /* Hide from the optimizer where cf (a register) is coming from, so that
338 * cf doesn't get "optimized away". The idea is to reference
339 * the global data through cf so not so many fixups are inserted
340 * into the executable image.
341 */
getCpuFeatures()342 CpuFeatures* getCpuFeatures() @nogc nothrow
343 {
344 pragma(inline, false);
345 return &cpuFeatures;
346 }
347
348 // Note that this may indicate multi-core rather than hyperthreading.
hyperThreadingBit()349 @property bool hyperThreadingBit() { return (cpuFeatures.features&HTT_BIT)!=0;}
350
351 // feature flags CPUID1_EDX
352 enum : uint
353 {
354 FPU_BIT = 1,
355 TIMESTAMP_BIT = 1<<4, // rdtsc
356 MDSR_BIT = 1<<5, // RDMSR/WRMSR
357 CMPXCHG8B_BIT = 1<<8,
358 SYSENTERSYSEXIT_BIT = 1<<11,
359 CMOV_BIT = 1<<15,
360 MMX_BIT = 1<<23,
361 FXSR_BIT = 1<<24,
362 SSE_BIT = 1<<25,
363 SSE2_BIT = 1<<26,
364 HTT_BIT = 1<<28,
365 IA64_BIT = 1<<30
366 }
367 // feature flags misc CPUID1_ECX
368 enum : uint
369 {
370 SSE3_BIT = 1,
371 PCLMULQDQ_BIT = 1<<1, // from AVX
372 MWAIT_BIT = 1<<3,
373 SSSE3_BIT = 1<<9,
374 FMA_BIT = 1<<12, // from AVX
375 CMPXCHG16B_BIT = 1<<13,
376 SSE41_BIT = 1<<19,
377 SSE42_BIT = 1<<20,
378 POPCNT_BIT = 1<<23,
379 AES_BIT = 1<<25, // AES instructions from AVX
380 OSXSAVE_BIT = 1<<27, // Used for AVX
381 AVX_BIT = 1<<28,
382 FP16C_BIT = 1<<29,
383 RDRAND_BIT = 1<<30,
384 }
385 // Feature flags for cpuid.{EAX = 7, ECX = 0}.EBX.
386 enum : uint
387 {
388 FSGSBASE_BIT = 1 << 0,
389 BMI1_BIT = 1 << 3,
390 HLE_BIT = 1 << 4,
391 AVX2_BIT = 1 << 5,
392 SMEP_BIT = 1 << 7,
393 BMI2_BIT = 1 << 8,
394 ERMS_BIT = 1 << 9,
395 INVPCID_BIT = 1 << 10,
396 RTM_BIT = 1 << 11,
397 RDSEED_BIT = 1 << 18,
398 SHA_BIT = 1 << 29,
399 }
400 // feature flags XFEATURES_ENABLED_MASK
401 enum : ulong
402 {
403 XF_FP_BIT = 0x1,
404 XF_SSE_BIT = 0x2,
405 XF_YMM_BIT = 0x4,
406 }
407 // AMD feature flags CPUID80000001_EDX
408 enum : uint
409 {
410 AMD_MMX_BIT = 1<<22,
411 // FXR_OR_CYRIXMMX_BIT = 1<<24, // Cyrix/NS: 6x86MMX instructions.
412 FFXSR_BIT = 1<<25,
413 PAGE1GB_BIT = 1<<26, // support for 1GB pages
414 RDTSCP_BIT = 1<<27,
415 AMD64_BIT = 1<<29,
416 AMD_3DNOW_EXT_BIT = 1<<30,
417 AMD_3DNOW_BIT = 1<<31
418 }
419 // AMD misc feature flags CPUID80000001_ECX
420 enum : uint
421 {
422 LAHFSAHF_BIT = 1,
423 LZCNT_BIT = 1<<5,
424 SSE4A_BIT = 1<<6,
425 AMD_3DNOW_PREFETCH_BIT = 1<<8,
426 }
427
428
version(GNU)429 version (GNU) {
430 version (X86)
431 enum supportedX86 = true;
432 else version (X86_64)
433 enum supportedX86 = true;
434 else
435 enum supportedX86 = false;
436 } else version (D_InlineAsm_X86) {
437 enum supportedX86 = true;
version(D_InlineAsm_X86_64)438 } else version (D_InlineAsm_X86_64) {
439 enum supportedX86 = true;
440 } else {
441 enum supportedX86 = false;
442 }
443
444 static if (supportedX86) {
445 // Note that this code will also work for Itanium in x86 mode.
446
447 __gshared uint max_cpuid, max_extended_cpuid;
448
449 // CPUID2: "cache and tlb information"
getcacheinfoCPUID2()450 void getcacheinfoCPUID2()
451 {
452 // We are only interested in the data caches
453 void decipherCpuid2(ubyte x) @nogc nothrow {
454 if (x==0) return;
455 // Values from http://www.sandpile.org/ia32/cpuid.htm.
456 // Includes Itanium and non-Intel CPUs.
457 //
458 static immutable ubyte [63] ids = [
459 0x0A, 0x0C, 0x0D, 0x2C, 0x60, 0x0E, 0x66, 0x67, 0x68,
460 // level 2 cache
461 0x41, 0x42, 0x43, 0x44, 0x45, 0x78, 0x79, 0x7A, 0x7B, 0x7C, 0x7D, 0x7F,
462 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, 0x49, 0x4E,
463 0x39, 0x3A, 0x3B, 0x3C, 0x3D, 0x3E, 0x48, 0x80, 0x81,
464 // level 3 cache
465 0x22, 0x23, 0x25, 0x29, 0x46, 0x47, 0x4A, 0x4B, 0x4C, 0x4D,
466
467 0xD0, 0xD1, 0xD2, 0xD6, 0xD7, 0xD8, 0xDC, 0xDD, 0xDE,
468 0xE2, 0xE3, 0xE4, 0xEA, 0xEB, 0xEC
469 ];
470 static immutable uint [63] sizes = [
471 8, 16, 16, 64, 16, 24, 8, 16, 32,
472 128, 256, 512, 1024, 2048, 1024, 128, 256, 512, 1024, 2048, 512,
473 256, 512, 1024, 2048, 512, 1024, 4096, 6*1024,
474 128, 192, 128, 256, 384, 512, 3072, 512, 128,
475 512, 1024, 2048, 4096, 4096, 8192, 6*1024, 8192, 12*1024, 16*1024,
476
477 512, 1024, 2048, 1024, 2048, 4096, 1024+512, 3*1024, 6*1024,
478 2*1024, 4*1024, 8*1024, 12*1024, 28*1024, 24*1024
479 ];
480 // CPUBUG: Pentium M reports 0x2C but tests show it is only 4-way associative
481 static immutable ubyte [63] ways = [
482 2, 4, 4, 8, 8, 6, 4, 4, 4,
483 4, 4, 4, 4, 4, 4, 8, 8, 8, 8, 8, 2,
484 8, 8, 8, 8, 4, 8, 16, 24,
485 4, 6, 2, 4, 6, 4, 12, 8, 8,
486 4, 8, 8, 8, 4, 8, 12, 16, 12, 16,
487 4, 4, 4, 8, 8, 8, 12, 12, 12,
488 16, 16, 16, 24, 24, 24
489 ];
490 enum { FIRSTDATA2 = 8, FIRSTDATA3 = 28+9 }
491 for (size_t i=0; i< ids.length; ++i) {
492 if (x==ids[i]) {
493 int level = i< FIRSTDATA2 ? 0: i<FIRSTDATA3 ? 1 : 2;
494 if (x==0x49 && family==0xF && model==0x6) level=2;
495 datacache[level].size=sizes[i];
496 datacache[level].associativity=ways[i];
497 if (level == 3 || x==0x2C || x==0x0D || (x>=0x48 && x<=0x80)
498 || x==0x86 || x==0x87
499 || (x>=0x66 && x<=0x68) || (x>=0x39 && x<=0x3E)){
500 datacache[level].lineSize = 64;
501 } else datacache[level].lineSize = 32;
502 }
503 }
504 }
505
506 uint[4] a;
507 bool firstTime = true;
508 // On a multi-core system, this could theoretically fail, but it's only used
509 // for old single-core CPUs.
510 uint numinfos = 1;
511 do {
512 version (GNU) asm pure nothrow @nogc {
513 "cpuid" : "=a" (a[0]), "=b" (a[1]), "=c" (a[2]), "=d" (a[3]) : "a" (2);
514 } else asm pure nothrow @nogc {
515 mov EAX, 2;
516 cpuid;
517 mov a, EAX;
518 mov a+4, EBX;
519 mov a+8, ECX;
520 mov a+12, EDX;
521 }
522 if (firstTime) {
523 if (a[0]==0x0000_7001 && a[3]==0x80 && a[1]==0 && a[2]==0) {
524 // Cyrix MediaGX MMXEnhanced returns: EAX= 00007001, EDX=00000080.
525 // These are NOT standard Intel values
526 // (TLB = 32 entry, 4 way associative, 4K pages)
527 // (L1 cache = 16K, 4way, linesize16)
528 datacache[0].size=8;
529 datacache[0].associativity=4;
530 datacache[0].lineSize=16;
531 return;
532 }
533 // lsb of a is how many times to loop.
534 numinfos = a[0] & 0xFF;
535 // and otherwise it should be ignored
536 a[0] &= 0xFFFF_FF00;
537 firstTime = false;
538 }
539 for (int c=0; c<4;++c) {
540 // high bit set == no info.
541 if (a[c] & 0x8000_0000) continue;
542 decipherCpuid2(cast(ubyte)(a[c] & 0xFF));
543 decipherCpuid2(cast(ubyte)((a[c]>>8) & 0xFF));
544 decipherCpuid2(cast(ubyte)((a[c]>>16) & 0xFF));
545 decipherCpuid2(cast(ubyte)((a[c]>>24) & 0xFF));
546 }
547 } while (--numinfos);
548 }
549
550 // CPUID4: "Deterministic cache parameters" leaf
getcacheinfoCPUID4()551 void getcacheinfoCPUID4()
552 {
553 int cachenum = 0;
554 for (;;) {
555 uint a, b, number_of_sets;
556 version (GNU) asm pure nothrow @nogc {
557 "cpuid" : "=a" (a), "=b" (b), "=c" (number_of_sets) : "a" (4), "c" (cachenum) : "edx";
558 } else asm pure nothrow @nogc {
559 mov EAX, 4;
560 mov ECX, cachenum;
561 cpuid;
562 mov a, EAX;
563 mov b, EBX;
564 mov number_of_sets, ECX;
565 }
566 ++cachenum;
567 if ((a&0x1F)==0) break; // no more caches
568 immutable uint numthreads = ((a>>14) & 0xFFF) + 1;
569 immutable uint numcores = ((a>>26) & 0x3F) + 1;
570 if (numcores > cpuFeatures.maxCores) cpuFeatures.maxCores = numcores;
571 if ((a&0x1F)!=1 && ((a&0x1F)!=3)) continue; // we only want data & unified caches
572
573 ++number_of_sets;
574 immutable ubyte level = cast(ubyte)(((a>>5)&7)-1);
575 if (level > datacache.length) continue; // ignore deep caches
576 datacache[level].associativity = a & 0x200 ? ubyte.max :cast(ubyte)((b>>22)+1);
577 datacache[level].lineSize = (b & 0xFFF)+ 1; // system coherency line size
578 immutable uint line_partitions = ((b >> 12)& 0x3FF) + 1;
579 // Size = number of sets * associativity * cachelinesize * linepartitions
580 // and must convert to Kb, also dividing by the number of hyperthreads using this cache.
581 immutable ulong sz = (datacache[level].associativity< ubyte.max)? number_of_sets *
582 datacache[level].associativity : number_of_sets;
583 datacache[level].size = cast(size_t)(
584 (sz * datacache[level].lineSize * line_partitions ) / (numthreads *1024));
585 if (level == 0 && (a&0xF)==3) {
586 // Halve the size for unified L1 caches
587 datacache[level].size/=2;
588 }
589 }
590 }
591
592 // CPUID8000_0005 & 6
getAMDcacheinfo()593 void getAMDcacheinfo()
594 {
595 uint dummy, c5, c6, d6;
596 version (GNU) asm pure nothrow @nogc {
597 "cpuid" : "=a" (dummy), "=c" (c5) : "a" (0x8000_0005) : "ebx", "edx";
598 } else asm pure nothrow @nogc {
599 mov EAX, 0x8000_0005; // L1 cache
600 cpuid;
601 // EAX has L1_TLB_4M.
602 // EBX has L1_TLB_4K
603 // EDX has L1 instruction cache
604 mov c5, ECX;
605 }
606
607 datacache[0].size = ( (c5>>24) & 0xFF);
608 datacache[0].associativity = cast(ubyte)( (c5 >> 16) & 0xFF);
609 datacache[0].lineSize = c5 & 0xFF;
610
611 if (max_extended_cpuid >= 0x8000_0006) {
612 // AMD K6-III or K6-2+ or later.
613 ubyte numcores = 1;
614 if (max_extended_cpuid >= 0x8000_0008) {
615 version (GNU) asm pure nothrow @nogc {
616 "cpuid" : "=a" (dummy), "=c" (numcores) : "a" (0x8000_0008) : "ebx", "edx";
617 } else asm pure nothrow @nogc {
618 mov EAX, 0x8000_0008;
619 cpuid;
620 mov numcores, CL;
621 }
622 ++numcores;
623 if (numcores>cpuFeatures.maxCores) cpuFeatures.maxCores = numcores;
624 }
625
626 version (GNU) asm pure nothrow @nogc {
627 "cpuid" : "=a" (dummy), "=c" (c6), "=d" (d6) : "a" (0x8000_0006) : "ebx";
628 } else asm pure nothrow @nogc {
629 mov EAX, 0x8000_0006; // L2/L3 cache
630 cpuid;
631 mov c6, ECX; // L2 cache info
632 mov d6, EDX; // L3 cache info
633 }
634
635 static immutable ubyte [] assocmap = [ 0, 1, 2, 0, 4, 0, 8, 0, 16, 0, 32, 48, 64, 96, 128, 0xFF ];
636 datacache[1].size = (c6>>16) & 0xFFFF;
637 datacache[1].associativity = assocmap[(c6>>12)&0xF];
638 datacache[1].lineSize = c6 & 0xFF;
639
640 // The L3 cache value is TOTAL, not per core.
641 datacache[2].size = ((d6>>18)*512)/numcores; // could be up to 2 * this, -1.
642 datacache[2].associativity = assocmap[(d6>>12)&0xF];
643 datacache[2].lineSize = d6 & 0xFF;
644 }
645 }
646
647 // For Intel CoreI7 and later, use function 0x0B
648 // to determine number of processors.
getCpuInfo0B()649 void getCpuInfo0B()
650 {
651 int level=0;
652 int threadsPerCore;
653 uint a, b, c, d;
654 do {
655 version (GNU) asm pure nothrow @nogc {
656 "cpuid" : "=a" (a), "=b" (b), "=c" (c), "=d" (d) : "a" (0x0B), "c" (level);
657 } else asm pure nothrow @nogc {
658 mov EAX, 0x0B;
659 mov ECX, level;
660 cpuid;
661 mov a, EAX;
662 mov b, EBX;
663 mov c, ECX;
664 mov d, EDX;
665 }
666 if (b!=0) {
667 // I'm not sure about this. The docs state that there
668 // are 2 hyperthreads per core if HT is factory enabled.
669 if (level==0)
670 threadsPerCore = b & 0xFFFF;
671 else if (level==1) {
672 cpuFeatures.maxThreads = b & 0xFFFF;
673 cpuFeatures.maxCores = cpuFeatures.maxThreads / threadsPerCore;
674 }
675
676 }
677 ++level;
678 } while (a!=0 || b!=0);
679 }
680
cpuidX86()681 void cpuidX86()
682 {
683 auto cf = getCpuFeatures();
684
685 uint a, b, c, d;
686 uint* venptr = cast(uint*)cf.vendorID.ptr;
687 version (GNU)
688 {
689 asm pure nothrow @nogc {
690 "cpuid" : "=a" (max_cpuid), "=b" (venptr[0]), "=d" (venptr[1]), "=c" (venptr[2]) : "a" (0);
691 "cpuid" : "=a" (max_extended_cpuid) : "a" (0x8000_0000) : "ebx", "ecx", "edx";
692 }
693 }
694 else
695 {
696 uint a2;
697 version (D_InlineAsm_X86)
698 {
699 asm pure nothrow @nogc {
700 mov EAX, 0;
701 cpuid;
702 mov a, EAX;
703 mov EAX, venptr;
704 mov [EAX], EBX;
705 mov [EAX + 4], EDX;
706 mov [EAX + 8], ECX;
707 }
708 }
709 else version (D_InlineAsm_X86_64)
710 {
711 asm pure nothrow @nogc {
712 mov EAX, 0;
713 cpuid;
714 mov a, EAX;
715 mov RAX, venptr;
716 mov [RAX], EBX;
717 mov [RAX + 4], EDX;
718 mov [RAX + 8], ECX;
719 }
720 }
721 asm pure nothrow @nogc {
722 mov EAX, 0x8000_0000;
723 cpuid;
724 mov a2, EAX;
725 }
726 max_cpuid = a;
727 max_extended_cpuid = a2;
728 }
729
730
731 cf.probablyIntel = cf.vendorID == "GenuineIntel";
732 cf.probablyAMD = cf.vendorID == "AuthenticAMD";
733 uint apic = 0; // brand index, apic id
734 version (GNU) asm pure nothrow @nogc {
735 "cpuid" : "=a" (a), "=b" (apic), "=c" (cf.miscfeatures), "=d" (cf.features) : "a" (1);
736 } else {
737 asm pure nothrow @nogc {
738 mov EAX, 1; // model, stepping
739 cpuid;
740 mov a, EAX;
741 mov apic, EBX;
742 mov c, ECX;
743 mov d, EDX;
744 }
745 cf.features = d;
746 cf.miscfeatures = c;
747 }
748 stepping = a & 0xF;
749 immutable uint fbase = (a >> 8) & 0xF;
750 immutable uint mbase = (a >> 4) & 0xF;
751 family = ((fbase == 0xF) || (fbase == 0)) ? fbase + (a >> 20) & 0xFF : fbase;
752 model = ((fbase == 0xF) || (fbase == 6 && cf.probablyIntel) ) ?
753 mbase + ((a >> 12) & 0xF0) : mbase;
754
755 if (max_cpuid >= 7)
756 {
757 version (GNU) asm pure nothrow @nogc {
758 "cpuid" : "=a" (a), "=b" (cf.extfeatures), "=c" (c) : "a" (7), "c" (0) : "edx";
759 } else {
760 uint ext;
761 asm pure nothrow @nogc {
762 mov EAX, 7; // Structured extended feature leaf.
763 mov ECX, 0; // Main leaf.
764 cpuid;
765 mov ext, EBX; // HLE, AVX2, RTM, etc.
766 }
767 cf.extfeatures = ext;
768 }
769 }
770
771 if (cf.miscfeatures & OSXSAVE_BIT)
772 {
773 version (GNU) asm pure nothrow @nogc {
774 "xgetbv" : "=a" (a), "=d" (d) : "c" (0);
775 } else asm pure nothrow @nogc {
776 mov ECX, 0;
777 xgetbv;
778 mov d, EDX;
779 mov a, EAX;
780 }
781 cf.xfeatures = cast(ulong)d << 32 | a;
782 }
783
784 cf.amdfeatures = 0;
785 cf.amdmiscfeatures = 0;
786 if (max_extended_cpuid >= 0x8000_0001) {
787 version (GNU) asm pure nothrow @nogc {
788 "cpuid" : "=a" (a), "=c" (cf.amdmiscfeatures), "=d" (cf.amdfeatures) : "a" (0x8000_0001) : "ebx";
789 } else {
790 asm pure nothrow @nogc {
791 mov EAX, 0x8000_0001;
792 cpuid;
793 mov c, ECX;
794 mov d, EDX;
795 }
796 cf.amdmiscfeatures = c;
797 cf.amdfeatures = d;
798 }
799 }
800 // Try to detect fraudulent vendorIDs
801 if (amd3dnow) cf.probablyIntel = false;
802
803 if (!cf.probablyIntel && max_extended_cpuid >= 0x8000_0008) {
804 //http://support.amd.com/TechDocs/25481.pdf pg.36
805 cf.maxCores = 1;
806 if (hyperThreadingBit) {
807 // determine max number of cores for AMD
808 version (GNU) asm pure nothrow @nogc {
809 "cpuid" : "=a" (a), "=c" (c) : "a" (0x8000_0008) : "ebx", "edx";
810 } else asm pure nothrow @nogc {
811 mov EAX, 0x8000_0008;
812 cpuid;
813 mov c, ECX;
814 }
815 cf.maxCores += c & 0xFF;
816 }
817 }
818
819 if (max_extended_cpuid >= 0x8000_0004) {
820 uint* pnb = cast(uint*)cf.processorNameBuffer.ptr;
821 version (GNU)
822 {
823 asm pure nothrow @nogc {
824 "cpuid" : "=a" (pnb[0]), "=b" (pnb[1]), "=c" (pnb[ 2]), "=d" (pnb[ 3]) : "a" (0x8000_0002);
825 "cpuid" : "=a" (pnb[4]), "=b" (pnb[5]), "=c" (pnb[ 6]), "=d" (pnb[ 7]) : "a" (0x8000_0003);
826 "cpuid" : "=a" (pnb[8]), "=b" (pnb[9]), "=c" (pnb[10]), "=d" (pnb[11]) : "a" (0x8000_0004);
827 }
828 }
829 else version (D_InlineAsm_X86)
830 {
831 asm pure nothrow @nogc {
832 push ESI;
833 mov ESI, pnb;
834 mov EAX, 0x8000_0002;
835 cpuid;
836 mov [ESI], EAX;
837 mov [ESI+4], EBX;
838 mov [ESI+8], ECX;
839 mov [ESI+12], EDX;
840 mov EAX, 0x8000_0003;
841 cpuid;
842 mov [ESI+16], EAX;
843 mov [ESI+20], EBX;
844 mov [ESI+24], ECX;
845 mov [ESI+28], EDX;
846 mov EAX, 0x8000_0004;
847 cpuid;
848 mov [ESI+32], EAX;
849 mov [ESI+36], EBX;
850 mov [ESI+40], ECX;
851 mov [ESI+44], EDX;
852 pop ESI;
853 }
854 }
855 else version (D_InlineAsm_X86_64)
856 {
857 asm pure nothrow @nogc {
858 push RSI;
859 mov RSI, pnb;
860 mov EAX, 0x8000_0002;
861 cpuid;
862 mov [RSI], EAX;
863 mov [RSI+4], EBX;
864 mov [RSI+8], ECX;
865 mov [RSI+12], EDX;
866 mov EAX, 0x8000_0003;
867 cpuid;
868 mov [RSI+16], EAX;
869 mov [RSI+20], EBX;
870 mov [RSI+24], ECX;
871 mov [RSI+28], EDX;
872 mov EAX, 0x8000_0004;
873 cpuid;
874 mov [RSI+32], EAX;
875 mov [RSI+36], EBX;
876 mov [RSI+40], ECX;
877 mov [RSI+44], EDX;
878 pop RSI;
879 }
880 }
881 // Intel P4 and PM pad at front with spaces.
882 // Other CPUs pad at end with nulls.
883 int start = 0, end = 0;
884 while (cf.processorNameBuffer[start] == ' ') { ++start; }
885 while (cf.processorNameBuffer[cf.processorNameBuffer.length-end-1] == 0) { ++end; }
886 cf.processorName = cast(string)(cf.processorNameBuffer[start..$-end]);
887 } else {
888 cf.processorName = "Unknown CPU";
889 }
890 // Determine cache sizes
891
892 // Intel docs specify that they return 0 for 0x8000_0005.
893 // AMD docs do not specify the behaviour for 0004 and 0002.
894 // Centaur/VIA and most other manufacturers use the AMD method,
895 // except Cyrix MediaGX MMX Enhanced uses their OWN form of CPUID2!
896 // NS Geode GX1 provides CyrixCPUID2 _and_ does the same wrong behaviour
897 // for CPUID80000005. But Geode GX uses the AMD method
898
899 // Deal with Geode GX1 - make it same as MediaGX MMX.
900 if (max_extended_cpuid==0x8000_0005 && max_cpuid==2) {
901 max_extended_cpuid = 0x8000_0004;
902 }
903 // Therefore, we try the AMD method unless it's an Intel chip.
904 // If we still have no info, try the Intel methods.
905 datacache[0].size = 0;
906 if (max_cpuid<2 || !cf.probablyIntel) {
907 if (max_extended_cpuid >= 0x8000_0005) {
908 getAMDcacheinfo();
909 } else if (cf.probablyAMD) {
910 // According to AMDProcRecognitionAppNote, this means CPU
911 // K5 model 0, or Am5x86 (model 4), or Am4x86DX4 (model 4)
912 // Am5x86 has 16Kb 4-way unified data & code cache.
913 datacache[0].size = 8;
914 datacache[0].associativity = 4;
915 datacache[0].lineSize = 32;
916 } else {
917 // Some obscure CPU.
918 // Values for Cyrix 6x86MX (family 6, model 0)
919 datacache[0].size = 64;
920 datacache[0].associativity = 4;
921 datacache[0].lineSize = 32;
922 }
923 }
924 if ((datacache[0].size == 0) && max_cpuid>=4) {
925 getcacheinfoCPUID4();
926 }
927 if ((datacache[0].size == 0) && max_cpuid>=2) {
928 getcacheinfoCPUID2();
929 }
930 if (datacache[0].size == 0) {
931 // Pentium, PMMX, late model 486, or an obscure CPU
932 if (mmx) { // Pentium MMX. Also has 8kB code cache.
933 datacache[0].size = 16;
934 datacache[0].associativity = 4;
935 datacache[0].lineSize = 32;
936 } else { // Pentium 1 (which also has 8kB code cache)
937 // or 486.
938 // Cyrix 6x86: 16, 4way, 32 linesize
939 datacache[0].size = 8;
940 datacache[0].associativity = 2;
941 datacache[0].lineSize = 32;
942 }
943 }
944 if (cf.probablyIntel && max_cpuid >= 0x0B) {
945 // For Intel i7 and later, use function 0x0B to determine
946 // cores and hyperthreads.
947 getCpuInfo0B();
948 } else {
949 if (hyperThreadingBit) cf.maxThreads = (apic>>>16) & 0xFF;
950 else cf.maxThreads = cf.maxCores;
951
952 if (cf.probablyAMD && max_extended_cpuid >= 0x8000_001E) {
953 version (GNU) asm pure nothrow @nogc {
954 "cpuid" : "=a" (a), "=b" (b) : "a" (0x8000_001E) : "ecx", "edx";
955 } else {
956 asm pure nothrow @nogc {
957 mov EAX, 0x8000_001e;
958 cpuid;
959 mov b, EBX;
960 }
961 }
962 ubyte coresPerComputeUnit = ((b >> 8) & 3) + 1;
963 cf.maxCores = cf.maxThreads / coresPerComputeUnit;
964 }
965 }
966 }
967
968 // Return true if the cpuid instruction is supported.
969 // BUG(WONTFIX): Returns false for Cyrix 6x86 and 6x86L. They will be treated as 486 machines.
hasCPUID()970 bool hasCPUID()
971 {
972 version (X86_64)
973 return true;
974 else
975 {
976 uint flags;
977 version (GNU)
978 {
979 // http://wiki.osdev.org/CPUID#Checking_CPUID_availability
980 // ASM template supports both AT&T and Intel syntax.
981 asm nothrow @nogc { "
982 pushf{l|d} # Save EFLAGS
983 pushf{l|d} # Store EFLAGS
984 xor{l $0x00200000, (%%esp)| dword ptr [esp], 0x00200000}
985 # Invert the ID bit in stored EFLAGS
986 popf{l|d} # Load stored EFLAGS (with ID bit inverted)
987 pushf{l|d} # Store EFLAGS again (ID bit may or may not be inverted)
988 pop {%%}eax # eax = modified EFLAGS (ID bit may or may not be inverted)
989 xor {(%%esp), %%eax|eax, [esp]}
990 # eax = whichever bits were changed
991 popf{l|d} # Restore original EFLAGS
992 " : "=a" (flags);
993 }
994 }
995 else version (D_InlineAsm_X86)
996 {
997 asm nothrow @nogc {
998 pushfd;
999 pop EAX;
1000 mov flags, EAX;
1001 xor EAX, 0x0020_0000;
1002 push EAX;
1003 popfd;
1004 pushfd;
1005 pop EAX;
1006 xor flags, EAX;
1007 }
1008 }
1009 return (flags & 0x0020_0000) != 0;
1010 }
1011 }
1012
1013 } else { // supported X86
1014
hasCPUID()1015 bool hasCPUID() { return false; }
1016
cpuidX86()1017 void cpuidX86()
1018 {
1019 datacache[0].size = 8;
1020 datacache[0].associativity = 2;
1021 datacache[0].lineSize = 32;
1022 }
1023 }
1024
1025 /*
1026 // TODO: Implement this function with OS support
1027 void cpuidPPC()
1028 {
1029 enum :int { PPC601, PPC603, PPC603E, PPC604,
1030 PPC604E, PPC620, PPCG3, PPCG4, PPCG5 }
1031
1032 // TODO:
1033 // asm { mfpvr; } returns the CPU version but unfortunately it can
1034 // only be used in kernel mode. So OS support is required.
1035 int cputype = PPC603;
1036
1037 // 601 has a 8KB combined data & code L1 cache.
1038 uint sizes[] = [4, 8, 16, 16, 32, 32, 32, 32, 64];
1039 ubyte ways[] = [8, 2, 4, 4, 4, 8, 8, 8, 8];
1040 uint L2size[]= [0, 0, 0, 0, 0, 0, 0, 256, 512];
1041 uint L3size[]= [0, 0, 0, 0, 0, 0, 0, 2048, 0];
1042
1043 datacache[0].size = sizes[cputype];
1044 datacache[0].associativity = ways[cputype];
1045 datacache[0].lineSize = (cputype==PPCG5)? 128 :
1046 (cputype == PPC620 || cputype == PPCG3)? 64 : 32;
1047 datacache[1].size = L2size[cputype];
1048 datacache[2].size = L3size[cputype];
1049 datacache[1].lineSize = datacache[0].lineSize;
1050 datacache[2].lineSize = datacache[0].lineSize;
1051 }
1052
1053 // TODO: Implement this function with OS support
1054 void cpuidSparc()
1055 {
1056 // UltaSparcIIi : L1 = 16, 2way. L2 = 512, 4 way.
1057 // UltraSparcIII : L1 = 64, 4way. L2= 4096 or 8192.
1058 // UltraSparcIIIi: L1 = 64, 4way. L2= 1024, 4 way
1059 // UltraSparcIV : L1 = 64, 4way. L2 = 16*1024.
1060 // UltraSparcIV+ : L1 = 64, 4way. L2 = 2048, L3=32*1024.
1061 // Sparc64V : L1 = 128, 2way. L2 = 4096 4way.
1062 }
1063 */
1064
this()1065 shared static this()
1066 {
1067 auto cf = getCpuFeatures();
1068
1069 if (hasCPUID()) {
1070 cpuidX86();
1071 } else {
1072 // it's a 386 or 486, or a Cyrix 6x86.
1073 //Probably still has an external cache.
1074 }
1075 if (datacache[0].size==0) {
1076 // Guess same as Pentium 1.
1077 datacache[0].size = 8;
1078 datacache[0].associativity = 2;
1079 datacache[0].lineSize = 32;
1080 }
1081 numCacheLevels = 1;
1082 // And now fill up all the unused levels with full memory space.
1083 for (size_t i=1; i< datacache.length; ++i) {
1084 if (datacache[i].size==0) {
1085 // Set all remaining levels of cache equal to full address space.
1086 datacache[i].size = size_t.max/1024;
1087 datacache[i].associativity = 1;
1088 datacache[i].lineSize = datacache[i-1].lineSize;
1089 }
1090 else
1091 ++numCacheLevels;
1092 }
1093
1094 // Set the immortals
1095
1096 _dataCaches = datacache;
1097 _vendor = cast(string)cf.vendorID;
1098 _processor = cf.processorName;
1099 _x87onChip = (cf.features&FPU_BIT)!=0;
1100 _mmx = (cf.features&MMX_BIT)!=0;
1101 _sse = (cf.features&SSE_BIT)!=0;
1102 _sse2 = (cf.features&SSE2_BIT)!=0;
1103 _sse3 = (cf.miscfeatures&SSE3_BIT)!=0;
1104 _ssse3 = (cf.miscfeatures&SSSE3_BIT)!=0;
1105 _sse41 = (cf.miscfeatures&SSE41_BIT)!=0;
1106 _sse42 = (cf.miscfeatures&SSE42_BIT)!=0;
1107 _sse4a = (cf.amdmiscfeatures&SSE4A_BIT)!=0;
1108 _aes = (cf.miscfeatures&AES_BIT)!=0;
1109 _hasPclmulqdq = (cf.miscfeatures&PCLMULQDQ_BIT)!=0;
1110 _hasRdrand = (cf.miscfeatures&RDRAND_BIT)!=0;
1111
1112 enum avx_mask = XF_SSE_BIT|XF_YMM_BIT;
1113 _avx = (cf.xfeatures & avx_mask) == avx_mask && (cf.miscfeatures&AVX_BIT)!=0;
1114
1115 _vaes = avx && aes;
1116 _hasVpclmulqdq = avx && hasPclmulqdq;
1117 _fma = avx && (cf.miscfeatures&FMA_BIT)!=0;
1118 _fp16c = avx && (cf.miscfeatures&FP16C_BIT)!=0;
1119 _avx2 = avx && (cf.extfeatures & AVX2_BIT) != 0;
1120 _hle = (cf.extfeatures & HLE_BIT) != 0;
1121 _rtm = (cf.extfeatures & RTM_BIT) != 0;
1122 _hasRdseed = (cf.extfeatures&RDSEED_BIT)!=0;
1123 _hasSha = (cf.extfeatures&SHA_BIT)!=0;
1124 _amd3dnow = (cf.amdfeatures&AMD_3DNOW_BIT)!=0;
1125 _amd3dnowExt = (cf.amdfeatures&AMD_3DNOW_EXT_BIT)!=0;
1126 _amdMmx = (cf.amdfeatures&AMD_MMX_BIT)!=0;
1127 _hasFxsr = (cf.features&FXSR_BIT)!=0;
1128 _hasCmov = (cf.features&CMOV_BIT)!=0;
1129 _hasRdtsc = (cf.features&TIMESTAMP_BIT)!=0;
1130 _hasCmpxchg8b = (cf.features&CMPXCHG8B_BIT)!=0;
1131 _hasCmpxchg16b = (cf.miscfeatures&CMPXCHG16B_BIT)!=0;
1132 _hasSysEnterSysExit =
1133 // The SYSENTER/SYSEXIT features were buggy on Pentium Pro and early PentiumII.
1134 // (REF: www.geoffchappell.com).
1135 (cf.probablyIntel && (family < 6 || (family==6 && (model< 3 || (model==3 && stepping<3)))))
1136 ? false
1137 : (cf.features & SYSENTERSYSEXIT_BIT)!=0;
1138 _has3dnowPrefetch = (cf.amdmiscfeatures&AMD_3DNOW_PREFETCH_BIT)!=0;
1139 _hasLahfSahf = (cf.amdmiscfeatures&LAHFSAHF_BIT)!=0;
1140 _hasPopcnt = (cf.miscfeatures&POPCNT_BIT)!=0;
1141 _hasLzcnt = (cf.amdmiscfeatures&LZCNT_BIT)!=0;
1142 _isX86_64 = (cf.amdfeatures&AMD64_BIT)!=0;
1143 _isItanium = (cf.features&IA64_BIT)!=0;
1144 _hyperThreading = cf.maxThreads>cf.maxCores;
1145 _threadsPerCPU = cf.maxThreads;
1146 _coresPerCPU = cf.maxCores;
1147 _preferAthlon = cf.probablyAMD && family >=6;
1148 _preferPentium4 = cf.probablyIntel && family == 0xF;
1149 _preferPentium1 = family < 6 || (family==6 && model < 0xF && !cf.probablyIntel);
1150 }
1151