1 /*********************************************************************/
2 /* Copyright 2009, 2010 The University of Texas at Austin. */
3 /* All rights reserved. */
4 /* */
5 /* Redistribution and use in source and binary forms, with or */
6 /* without modification, are permitted provided that the following */
7 /* conditions are met: */
8 /* */
9 /* 1. Redistributions of source code must retain the above */
10 /* copyright notice, this list of conditions and the following */
11 /* disclaimer. */
12 /* */
13 /* 2. Redistributions in binary form must reproduce the above */
14 /* copyright notice, this list of conditions and the following */
15 /* disclaimer in the documentation and/or other materials */
16 /* provided with the distribution. */
17 /* */
18 /* THIS SOFTWARE IS PROVIDED BY THE UNIVERSITY OF TEXAS AT */
19 /* AUSTIN ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, */
20 /* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */
21 /* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */
22 /* DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY OF TEXAS AT */
23 /* AUSTIN OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, */
24 /* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES */
25 /* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE */
26 /* GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR */
27 /* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF */
28 /* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT */
29 /* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT */
30 /* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */
31 /* POSSIBILITY OF SUCH DAMAGE. */
32 /* */
33 /* The views and conclusions contained in the software and */
34 /* documentation are those of the authors and should not be */
35 /* interpreted as representing official policies, either expressed */
36 /* or implied, of The University of Texas at Austin. */
37 /*********************************************************************/
38
39 #include <stdio.h>
40 #include <string.h>
41 #include "cpuid.h"
42
43 #if defined(_MSC_VER) && !defined(__clang__)
44 #define C_INLINE __inline
45 #else
46 #define C_INLINE inline
47 #endif
48
49 /*
50 #ifdef NO_AVX
51 #define CPUTYPE_HASWELL CPUTYPE_NEHALEM
52 #define CORE_HASWELL CORE_NEHALEM
53 #define CPUTYPE_SKYLAKEX CPUTYPE_NEHALEM
54 #define CORE_SKYLAKEX CORE_NEHALEM
55 #define CPUTYPE_SANDYBRIDGE CPUTYPE_NEHALEM
56 #define CORE_SANDYBRIDGE CORE_NEHALEM
57 #define CPUTYPE_BULLDOZER CPUTYPE_BARCELONA
58 #define CORE_BULLDOZER CORE_BARCELONA
59 #define CPUTYPE_PILEDRIVER CPUTYPE_BARCELONA
60 #define CORE_PILEDRIVER CORE_BARCELONA
61 #endif
62 */
63
64 #if defined(_MSC_VER) && !defined(__clang__)
65
cpuid(int op,int * eax,int * ebx,int * ecx,int * edx)66 void cpuid(int op, int *eax, int *ebx, int *ecx, int *edx)
67 {
68 int cpuInfo[4] = {-1};
69 __cpuid(cpuInfo, op);
70 *eax = cpuInfo[0];
71 *ebx = cpuInfo[1];
72 *ecx = cpuInfo[2];
73 *edx = cpuInfo[3];
74 }
75
cpuid_count(int op,int count,int * eax,int * ebx,int * ecx,int * edx)76 void cpuid_count(int op, int count, int *eax, int *ebx, int *ecx, int *edx)
77 {
78 int cpuInfo[4] = {-1};
79 __cpuidex(cpuInfo, op, count);
80 *eax = cpuInfo[0];
81 *ebx = cpuInfo[1];
82 *ecx = cpuInfo[2];
83 *edx = cpuInfo[3];
84 }
85
86 #else
87
88 #ifndef CPUIDEMU
89
90 #if defined(__APPLE__) && defined(__i386__)
91 void cpuid(int op, int *eax, int *ebx, int *ecx, int *edx);
92 void cpuid_count(int op, int count, int *eax, int *ebx, int *ecx, int *edx);
93 #else
cpuid(int op,int * eax,int * ebx,int * ecx,int * edx)94 static C_INLINE void cpuid(int op, int *eax, int *ebx, int *ecx, int *edx){
95 #if defined(__i386__) && defined(__PIC__)
96 __asm__ __volatile__
97 ("mov %%ebx, %%edi;"
98 "cpuid;"
99 "xchgl %%ebx, %%edi;"
100 : "=a" (*eax), "=D" (*ebx), "=c" (*ecx), "=d" (*edx) : "a" (op), "c" (0) : "cc");
101 #else
102 __asm__ __volatile__
103 ("cpuid": "=a" (*eax), "=b" (*ebx), "=c" (*ecx), "=d" (*edx) : "a" (op) , "c" (0) : "cc");
104 #endif
105 }
106
cpuid_count(int op,int count,int * eax,int * ebx,int * ecx,int * edx)107 static C_INLINE void cpuid_count(int op, int count ,int *eax, int *ebx, int *ecx, int *edx){
108 #if defined(__i386__) && defined(__PIC__)
109 __asm__ __volatile__
110 ("mov %%ebx, %%edi;"
111 "cpuid;"
112 "xchgl %%ebx, %%edi;"
113 : "=a" (*eax), "=D" (*ebx), "=c" (*ecx), "=d" (*edx) : "0" (op), "2" (count) : "cc");
114 #else
115 __asm__ __volatile__
116 ("cpuid": "=a" (*eax), "=b" (*ebx), "=c" (*ecx), "=d" (*edx) : "0" (op), "2" (count) : "cc");
117 #endif
118 }
119 #endif
120
121 #else
122
123 typedef struct {
124 unsigned int id, a, b, c, d;
125 } idlist_t;
126
127 typedef struct {
128 char *vendor;
129 char *name;
130 int start, stop;
131 } vendor_t;
132
133 extern idlist_t idlist[];
134 extern vendor_t vendor[];
135
136 static int cv = VENDOR;
137
cpuid(unsigned int op,unsigned int * eax,unsigned int * ebx,unsigned int * ecx,unsigned int * edx)138 void cpuid(unsigned int op, unsigned int *eax, unsigned int *ebx, unsigned int *ecx, unsigned int *edx){
139
140 static int current = 0;
141
142 int start = vendor[cv].start;
143 int stop = vendor[cv].stop;
144 int count = stop - start;
145
146 if ((current < start) || (current > stop)) current = start;
147
148 while ((count > 0) && (idlist[current].id != op)) {
149
150 current ++;
151 if (current > stop) current = start;
152 count --;
153
154 }
155
156 *eax = idlist[current].a;
157 *ebx = idlist[current].b;
158 *ecx = idlist[current].c;
159 *edx = idlist[current].d;
160 }
161
cpuid_count(unsigned int op,unsigned int count,unsigned int * eax,unsigned int * ebx,unsigned int * ecx,unsigned int * edx)162 void cpuid_count (unsigned int op, unsigned int count, unsigned int *eax, unsigned int *ebx, unsigned int *ecx, unsigned int *edx) {
163 return cpuid (op, eax, ebx, ecx, edx);
164 }
165
166 #endif
167
168 #endif // _MSC_VER
169
have_cpuid(void)170 static C_INLINE int have_cpuid(void){
171 int eax, ebx, ecx, edx;
172
173 cpuid(0, &eax, &ebx, &ecx, &edx);
174 return eax;
175 }
176
have_excpuid(void)177 static C_INLINE int have_excpuid(void){
178 int eax, ebx, ecx, edx;
179
180 cpuid(0x80000000, &eax, &ebx, &ecx, &edx);
181 return eax & 0xffff;
182 }
183
184 #ifndef NO_AVX
xgetbv(int op,int * eax,int * edx)185 static C_INLINE void xgetbv(int op, int * eax, int * edx){
186 //Use binary code for xgetbv
187 #if defined(_MSC_VER) && !defined(__clang__)
188 *eax = __xgetbv(op);
189 #else
190 __asm__ __volatile__
191 (".byte 0x0f, 0x01, 0xd0": "=a" (*eax), "=d" (*edx) : "c" (op) : "cc");
192 #endif
193 }
194 #endif
195
support_avx()196 int support_avx(){
197 #ifndef NO_AVX
198 int eax, ebx, ecx, edx;
199 int ret=0;
200
201 cpuid(1, &eax, &ebx, &ecx, &edx);
202 if ((ecx & (1 << 28)) != 0 && (ecx & (1 << 27)) != 0 && (ecx & (1 << 26)) != 0){
203 xgetbv(0, &eax, &edx);
204 if((eax & 6) == 6){
205 ret=1; //OS supports saving xmm and ymm registers (6 = (1<<1) | (1<<2))
206 }
207 }
208 return ret;
209 #else
210 return 0;
211 #endif
212 }
213
support_avx2()214 int support_avx2(){
215 #ifndef NO_AVX2
216 int eax, ebx, ecx=0, edx;
217 int ret=0;
218
219 if (!support_avx())
220 return 0;
221 cpuid(7, &eax, &ebx, &ecx, &edx);
222 if((ebx & (1<<5)) != 0)
223 ret=1; //CPU supports AVX2
224 return ret;
225 #else
226 return 0;
227 #endif
228 }
229
support_avx512()230 int support_avx512(){
231 #if !defined(NO_AVX) && !defined(NO_AVX512)
232 int eax, ebx, ecx, edx;
233 int ret=0;
234
235 if (!support_avx())
236 return 0;
237 cpuid(7, &eax, &ebx, &ecx, &edx);
238 if((ebx & (1<<5)) == 0){
239 ret=0; //cpu does not have avx2 flag
240 }
241 if((ebx & (1<<31)) != 0){ //AVX512VL flag
242 xgetbv(0, &eax, &edx);
243 if((eax & 0xe0) == 0xe0)
244 ret=1; //OS supports saving zmm registers
245 }
246 return ret;
247 #else
248 return 0;
249 #endif
250 }
251
support_avx512_bf16()252 int support_avx512_bf16(){
253 #if !defined(NO_AVX) && !defined(NO_AVX512)
254 int eax, ebx, ecx, edx;
255 int ret=0;
256
257 if (!support_avx512())
258 return 0;
259 cpuid_count(7, 1, &eax, &ebx, &ecx, &edx);
260 if((eax & 32) == 32){
261 ret=1; // CPUID.7.1:EAX[bit 5] indicates whether avx512_bf16 supported or not
262 }
263 return ret;
264 #else
265 return 0;
266 #endif
267 }
268
get_vendor(void)269 int get_vendor(void){
270 int eax, ebx, ecx, edx;
271 char vendor[13];
272
273 cpuid(0, &eax, &ebx, &ecx, &edx);
274
275 *(int *)(&vendor[0]) = ebx;
276 *(int *)(&vendor[4]) = edx;
277 *(int *)(&vendor[8]) = ecx;
278 vendor[12] = (char)0;
279
280 if (!strcmp(vendor, "GenuineIntel")) return VENDOR_INTEL;
281 if (!strcmp(vendor, " UMC UMC UMC")) return VENDOR_UMC;
282 if (!strcmp(vendor, "AuthenticAMD")) return VENDOR_AMD;
283 if (!strcmp(vendor, "CyrixInstead")) return VENDOR_CYRIX;
284 if (!strcmp(vendor, "NexGenDriven")) return VENDOR_NEXGEN;
285 if (!strcmp(vendor, "CentaurHauls")) return VENDOR_CENTAUR;
286 if (!strcmp(vendor, " Shanghai ")) return VENDOR_ZHAOXIN;
287 if (!strcmp(vendor, "RiseRiseRise")) return VENDOR_RISE;
288 if (!strcmp(vendor, " SiS SiS SiS")) return VENDOR_SIS;
289 if (!strcmp(vendor, "GenuineTMx86")) return VENDOR_TRANSMETA;
290 if (!strcmp(vendor, "Geode by NSC")) return VENDOR_NSC;
291 if (!strcmp(vendor, "HygonGenuine")) return VENDOR_HYGON;
292
293 if ((eax == 0) || ((eax & 0x500) != 0)) return VENDOR_INTEL;
294
295 return VENDOR_UNKNOWN;
296 }
297
get_cputype(int gettype)298 int get_cputype(int gettype){
299 int eax, ebx, ecx, edx;
300 int extend_family, family;
301 int extend_model, model;
302 int type, stepping;
303 int feature = 0;
304
305 cpuid(1, &eax, &ebx, &ecx, &edx);
306
307 switch (gettype) {
308 case GET_EXFAMILY :
309 return BITMASK(eax, 20, 0xff);
310 case GET_EXMODEL :
311 return BITMASK(eax, 16, 0x0f);
312 case GET_TYPE :
313 return BITMASK(eax, 12, 0x03);
314 case GET_FAMILY :
315 return BITMASK(eax, 8, 0x0f);
316 case GET_MODEL :
317 return BITMASK(eax, 4, 0x0f);
318 case GET_APICID :
319 return BITMASK(ebx, 24, 0x0f);
320 case GET_LCOUNT :
321 return BITMASK(ebx, 16, 0x0f);
322 case GET_CHUNKS :
323 return BITMASK(ebx, 8, 0x0f);
324 case GET_STEPPING :
325 return BITMASK(eax, 0, 0x0f);
326 case GET_BLANDID :
327 return BITMASK(ebx, 0, 0xff);
328 case GET_NUMSHARE :
329 if (have_cpuid() < 4) return 0;
330 cpuid(4, &eax, &ebx, &ecx, &edx);
331 return BITMASK(eax, 14, 0xfff);
332 case GET_NUMCORES :
333 if (have_cpuid() < 4) return 0;
334 cpuid(4, &eax, &ebx, &ecx, &edx);
335 return BITMASK(eax, 26, 0x3f);
336
337 case GET_FEATURE :
338 if ((edx & (1 << 3)) != 0) feature |= HAVE_PSE;
339 if ((edx & (1 << 15)) != 0) feature |= HAVE_CMOV;
340 if ((edx & (1 << 19)) != 0) feature |= HAVE_CFLUSH;
341 if ((edx & (1 << 23)) != 0) feature |= HAVE_MMX;
342 if ((edx & (1 << 25)) != 0) feature |= HAVE_SSE;
343 if ((edx & (1 << 26)) != 0) feature |= HAVE_SSE2;
344 if ((edx & (1 << 27)) != 0) {
345 if (BITMASK(ebx, 16, 0x0f) > 0) feature |= HAVE_HIT;
346 }
347 if ((ecx & (1 << 0)) != 0) feature |= HAVE_SSE3;
348 if ((ecx & (1 << 9)) != 0) feature |= HAVE_SSSE3;
349 if ((ecx & (1 << 19)) != 0) feature |= HAVE_SSE4_1;
350 if ((ecx & (1 << 20)) != 0) feature |= HAVE_SSE4_2;
351 #ifndef NO_AVX
352 if (support_avx()) feature |= HAVE_AVX;
353 if (support_avx2()) feature |= HAVE_AVX2;
354 if (support_avx512()) feature |= HAVE_AVX512VL;
355 if (support_avx512_bf16()) feature |= HAVE_AVX512BF16;
356 if ((ecx & (1 << 12)) != 0) feature |= HAVE_FMA3;
357 #endif
358
359 if (have_excpuid() >= 0x01) {
360 cpuid(0x80000001, &eax, &ebx, &ecx, &edx);
361 if ((ecx & (1 << 6)) != 0) feature |= HAVE_SSE4A;
362 if ((ecx & (1 << 7)) != 0) feature |= HAVE_MISALIGNSSE;
363 #ifndef NO_AVX
364 if ((ecx & (1 << 16)) != 0) feature |= HAVE_FMA4;
365 #endif
366 if ((edx & (1 << 30)) != 0) feature |= HAVE_3DNOWEX;
367 if ((edx & (1 << 31)) != 0) feature |= HAVE_3DNOW;
368 }
369
370 if (have_excpuid() >= 0x1a) {
371 cpuid(0x8000001a, &eax, &ebx, &ecx, &edx);
372 if ((eax & (1 << 0)) != 0) feature |= HAVE_128BITFPU;
373 if ((eax & (1 << 1)) != 0) feature |= HAVE_FASTMOVU;
374 }
375
376 }
377 return feature;
378 }
379
get_cacheinfo(int type,cache_info_t * cacheinfo)380 int get_cacheinfo(int type, cache_info_t *cacheinfo){
381 int eax, ebx, ecx, edx, cpuid_level;
382 int info[15];
383 int i;
384 cache_info_t LC1, LD1, L2, L3,
385 ITB, DTB, LITB, LDTB,
386 L2ITB, L2DTB, L2LITB, L2LDTB;
387
388 LC1.size = 0; LC1.associative = 0; LC1.linesize = 0; LC1.shared = 0;
389 LD1.size = 0; LD1.associative = 0; LD1.linesize = 0; LD1.shared = 0;
390 L2.size = 0; L2.associative = 0; L2.linesize = 0; L2.shared = 0;
391 L3.size = 0; L3.associative = 0; L3.linesize = 0; L3.shared = 0;
392 ITB.size = 0; ITB.associative = 0; ITB.linesize = 0; ITB.shared = 0;
393 DTB.size = 0; DTB.associative = 0; DTB.linesize = 0; DTB.shared = 0;
394 LITB.size = 0; LITB.associative = 0; LITB.linesize = 0; LITB.shared = 0;
395 LDTB.size = 0; LDTB.associative = 0; LDTB.linesize = 0; LDTB.shared = 0;
396 L2ITB.size = 0; L2ITB.associative = 0; L2ITB.linesize = 0; L2ITB.shared = 0;
397 L2DTB.size = 0; L2DTB.associative = 0; L2DTB.linesize = 0; L2DTB.shared = 0;
398 L2LITB.size = 0; L2LITB.associative = 0; L2LITB.linesize = 0; L2LITB.shared = 0;
399 L2LDTB.size = 0; L2LDTB.associative = 0; L2LDTB.linesize = 0; L2LDTB.shared = 0;
400
401 cpuid(0, &cpuid_level, &ebx, &ecx, &edx);
402
403 if (cpuid_level > 1) {
404 int numcalls =0 ;
405 cpuid(2, &eax, &ebx, &ecx, &edx);
406 numcalls = BITMASK(eax, 0, 0xff); //FIXME some systems may require repeated calls to read all entries
407 info[ 0] = BITMASK(eax, 8, 0xff);
408 info[ 1] = BITMASK(eax, 16, 0xff);
409 info[ 2] = BITMASK(eax, 24, 0xff);
410
411 info[ 3] = BITMASK(ebx, 0, 0xff);
412 info[ 4] = BITMASK(ebx, 8, 0xff);
413 info[ 5] = BITMASK(ebx, 16, 0xff);
414 info[ 6] = BITMASK(ebx, 24, 0xff);
415
416 info[ 7] = BITMASK(ecx, 0, 0xff);
417 info[ 8] = BITMASK(ecx, 8, 0xff);
418 info[ 9] = BITMASK(ecx, 16, 0xff);
419 info[10] = BITMASK(ecx, 24, 0xff);
420
421 info[11] = BITMASK(edx, 0, 0xff);
422 info[12] = BITMASK(edx, 8, 0xff);
423 info[13] = BITMASK(edx, 16, 0xff);
424 info[14] = BITMASK(edx, 24, 0xff);
425
426 for (i = 0; i < 15; i++){
427 switch (info[i]){
428
429 /* This table is from http://www.sandpile.org/ia32/cpuid.htm */
430
431 case 0x01 :
432 ITB.size = 4;
433 ITB.associative = 4;
434 ITB.linesize = 32;
435 break;
436 case 0x02 :
437 LITB.size = 4096;
438 LITB.associative = 0;
439 LITB.linesize = 2;
440 break;
441 case 0x03 :
442 DTB.size = 4;
443 DTB.associative = 4;
444 DTB.linesize = 64;
445 break;
446 case 0x04 :
447 LDTB.size = 4096;
448 LDTB.associative = 4;
449 LDTB.linesize = 8;
450 break;
451 case 0x05 :
452 LDTB.size = 4096;
453 LDTB.associative = 4;
454 LDTB.linesize = 32;
455 break;
456 case 0x06 :
457 LC1.size = 8;
458 LC1.associative = 4;
459 LC1.linesize = 32;
460 break;
461 case 0x08 :
462 LC1.size = 16;
463 LC1.associative = 4;
464 LC1.linesize = 32;
465 break;
466 case 0x09 :
467 LC1.size = 32;
468 LC1.associative = 4;
469 LC1.linesize = 64;
470 break;
471 case 0x0a :
472 LD1.size = 8;
473 LD1.associative = 2;
474 LD1.linesize = 32;
475 break;
476 case 0x0c :
477 LD1.size = 16;
478 LD1.associative = 4;
479 LD1.linesize = 32;
480 break;
481 case 0x0d :
482 LD1.size = 16;
483 LD1.associative = 4;
484 LD1.linesize = 64;
485 break;
486 case 0x0e :
487 LD1.size = 24;
488 LD1.associative = 6;
489 LD1.linesize = 64;
490 break;
491 case 0x10 :
492 LD1.size = 16;
493 LD1.associative = 4;
494 LD1.linesize = 32;
495 break;
496 case 0x15 :
497 LC1.size = 16;
498 LC1.associative = 4;
499 LC1.linesize = 32;
500 break;
501 case 0x1a :
502 L2.size = 96;
503 L2.associative = 6;
504 L2.linesize = 64;
505 break;
506 case 0x21 :
507 L2.size = 256;
508 L2.associative = 8;
509 L2.linesize = 64;
510 break;
511 case 0x22 :
512 L3.size = 512;
513 L3.associative = 4;
514 L3.linesize = 64;
515 break;
516 case 0x23 :
517 L3.size = 1024;
518 L3.associative = 8;
519 L3.linesize = 64;
520 break;
521 case 0x25 :
522 L3.size = 2048;
523 L3.associative = 8;
524 L3.linesize = 64;
525 break;
526 case 0x29 :
527 L3.size = 4096;
528 L3.associative = 8;
529 L3.linesize = 64;
530 break;
531 case 0x2c :
532 LD1.size = 32;
533 LD1.associative = 8;
534 LD1.linesize = 64;
535 break;
536 case 0x30 :
537 LC1.size = 32;
538 LC1.associative = 8;
539 LC1.linesize = 64;
540 break;
541 case 0x39 :
542 L2.size = 128;
543 L2.associative = 4;
544 L2.linesize = 64;
545 break;
546 case 0x3a :
547 L2.size = 192;
548 L2.associative = 6;
549 L2.linesize = 64;
550 break;
551 case 0x3b :
552 L2.size = 128;
553 L2.associative = 2;
554 L2.linesize = 64;
555 break;
556 case 0x3c :
557 L2.size = 256;
558 L2.associative = 4;
559 L2.linesize = 64;
560 break;
561 case 0x3d :
562 L2.size = 384;
563 L2.associative = 6;
564 L2.linesize = 64;
565 break;
566 case 0x3e :
567 L2.size = 512;
568 L2.associative = 4;
569 L2.linesize = 64;
570 break;
571 case 0x41 :
572 L2.size = 128;
573 L2.associative = 4;
574 L2.linesize = 32;
575 break;
576 case 0x42 :
577 L2.size = 256;
578 L2.associative = 4;
579 L2.linesize = 32;
580 break;
581 case 0x43 :
582 L2.size = 512;
583 L2.associative = 4;
584 L2.linesize = 32;
585 break;
586 case 0x44 :
587 L2.size = 1024;
588 L2.associative = 4;
589 L2.linesize = 32;
590 break;
591 case 0x45 :
592 L2.size = 2048;
593 L2.associative = 4;
594 L2.linesize = 32;
595 break;
596 case 0x46 :
597 L3.size = 4096;
598 L3.associative = 4;
599 L3.linesize = 64;
600 break;
601 case 0x47 :
602 L3.size = 8192;
603 L3.associative = 8;
604 L3.linesize = 64;
605 break;
606 case 0x48 :
607 L2.size = 3184;
608 L2.associative = 12;
609 L2.linesize = 64;
610 break;
611 case 0x49 :
612 if ((get_cputype(GET_FAMILY) == 0x0f) && (get_cputype(GET_MODEL) == 0x06)) {
613 L3.size = 4096;
614 L3.associative = 16;
615 L3.linesize = 64;
616 } else {
617 L2.size = 4096;
618 L2.associative = 16;
619 L2.linesize = 64;
620 }
621 break;
622 case 0x4a :
623 L3.size = 6144;
624 L3.associative = 12;
625 L3.linesize = 64;
626 break;
627 case 0x4b :
628 L3.size = 8192;
629 L3.associative = 16;
630 L3.linesize = 64;
631 break;
632 case 0x4c :
633 L3.size = 12280;
634 L3.associative = 12;
635 L3.linesize = 64;
636 break;
637 case 0x4d :
638 L3.size = 16384;
639 L3.associative = 16;
640 L3.linesize = 64;
641 break;
642 case 0x4e :
643 L2.size = 6144;
644 L2.associative = 24;
645 L2.linesize = 64;
646 break;
647 case 0x4f :
648 ITB.size = 4;
649 ITB.associative = 0;
650 ITB.linesize = 32;
651 break;
652 case 0x50 :
653 ITB.size = 4;
654 ITB.associative = 0;
655 ITB.linesize = 64;
656 LITB.size = 4096;
657 LITB.associative = 0;
658 LITB.linesize = 64;
659 LITB.shared = 1;
660 break;
661 case 0x51 :
662 ITB.size = 4;
663 ITB.associative = 0;
664 ITB.linesize = 128;
665 LITB.size = 4096;
666 LITB.associative = 0;
667 LITB.linesize = 128;
668 LITB.shared = 1;
669 break;
670 case 0x52 :
671 ITB.size = 4;
672 ITB.associative = 0;
673 ITB.linesize = 256;
674 LITB.size = 4096;
675 LITB.associative = 0;
676 LITB.linesize = 256;
677 LITB.shared = 1;
678 break;
679 case 0x55 :
680 LITB.size = 4096;
681 LITB.associative = 0;
682 LITB.linesize = 7;
683 LITB.shared = 1;
684 break;
685 case 0x56 :
686 LDTB.size = 4096;
687 LDTB.associative = 4;
688 LDTB.linesize = 16;
689 break;
690 case 0x57 :
691 LDTB.size = 4096;
692 LDTB.associative = 4;
693 LDTB.linesize = 16;
694 break;
695 case 0x5b :
696 DTB.size = 4;
697 DTB.associative = 0;
698 DTB.linesize = 64;
699 LDTB.size = 4096;
700 LDTB.associative = 0;
701 LDTB.linesize = 64;
702 LDTB.shared = 1;
703 break;
704 case 0x5c :
705 DTB.size = 4;
706 DTB.associative = 0;
707 DTB.linesize = 128;
708 LDTB.size = 4096;
709 LDTB.associative = 0;
710 LDTB.linesize = 128;
711 LDTB.shared = 1;
712 break;
713 case 0x5d :
714 DTB.size = 4;
715 DTB.associative = 0;
716 DTB.linesize = 256;
717 LDTB.size = 4096;
718 LDTB.associative = 0;
719 LDTB.linesize = 256;
720 LDTB.shared = 1;
721 break;
722 case 0x60 :
723 LD1.size = 16;
724 LD1.associative = 8;
725 LD1.linesize = 64;
726 break;
727 case 0x63 :
728 DTB.size = 2048;
729 DTB.associative = 4;
730 DTB.linesize = 32;
731 LDTB.size = 4096;
732 LDTB.associative= 4;
733 LDTB.linesize = 32;
734 break;
735 case 0x66 :
736 LD1.size = 8;
737 LD1.associative = 4;
738 LD1.linesize = 64;
739 break;
740 case 0x67 :
741 LD1.size = 16;
742 LD1.associative = 4;
743 LD1.linesize = 64;
744 break;
745 case 0x68 :
746 LD1.size = 32;
747 LD1.associative = 4;
748 LD1.linesize = 64;
749 break;
750 case 0x70 :
751 LC1.size = 12;
752 LC1.associative = 8;
753 break;
754 case 0x71 :
755 LC1.size = 16;
756 LC1.associative = 8;
757 break;
758 case 0x72 :
759 LC1.size = 32;
760 LC1.associative = 8;
761 break;
762 case 0x73 :
763 LC1.size = 64;
764 LC1.associative = 8;
765 break;
766 case 0x76 :
767 ITB.size = 2048;
768 ITB.associative = 0;
769 ITB.linesize = 8;
770 LITB.size = 4096;
771 LITB.associative= 0;
772 LITB.linesize = 8;
773 break;
774 case 0x77 :
775 LC1.size = 16;
776 LC1.associative = 4;
777 LC1.linesize = 64;
778 break;
779 case 0x78 :
780 L2.size = 1024;
781 L2.associative = 4;
782 L2.linesize = 64;
783 break;
784 case 0x79 :
785 L2.size = 128;
786 L2.associative = 8;
787 L2.linesize = 64;
788 break;
789 case 0x7a :
790 L2.size = 256;
791 L2.associative = 8;
792 L2.linesize = 64;
793 break;
794 case 0x7b :
795 L2.size = 512;
796 L2.associative = 8;
797 L2.linesize = 64;
798 break;
799 case 0x7c :
800 L2.size = 1024;
801 L2.associative = 8;
802 L2.linesize = 64;
803 break;
804 case 0x7d :
805 L2.size = 2048;
806 L2.associative = 8;
807 L2.linesize = 64;
808 break;
809 case 0x7e :
810 L2.size = 256;
811 L2.associative = 8;
812 L2.linesize = 128;
813 break;
814 case 0x7f :
815 L2.size = 512;
816 L2.associative = 2;
817 L2.linesize = 64;
818 break;
819 case 0x81 :
820 L2.size = 128;
821 L2.associative = 8;
822 L2.linesize = 32;
823 break;
824 case 0x82 :
825 L2.size = 256;
826 L2.associative = 8;
827 L2.linesize = 32;
828 break;
829 case 0x83 :
830 L2.size = 512;
831 L2.associative = 8;
832 L2.linesize = 32;
833 break;
834 case 0x84 :
835 L2.size = 1024;
836 L2.associative = 8;
837 L2.linesize = 32;
838 break;
839 case 0x85 :
840 L2.size = 2048;
841 L2.associative = 8;
842 L2.linesize = 32;
843 break;
844 case 0x86 :
845 L2.size = 512;
846 L2.associative = 4;
847 L2.linesize = 64;
848 break;
849 case 0x87 :
850 L2.size = 1024;
851 L2.associative = 8;
852 L2.linesize = 64;
853 break;
854 case 0x88 :
855 L3.size = 2048;
856 L3.associative = 4;
857 L3.linesize = 64;
858 break;
859 case 0x89 :
860 L3.size = 4096;
861 L3.associative = 4;
862 L3.linesize = 64;
863 break;
864 case 0x8a :
865 L3.size = 8192;
866 L3.associative = 4;
867 L3.linesize = 64;
868 break;
869 case 0x8d :
870 L3.size = 3096;
871 L3.associative = 12;
872 L3.linesize = 128;
873 break;
874 case 0x90 :
875 ITB.size = 4;
876 ITB.associative = 0;
877 ITB.linesize = 64;
878 break;
879 case 0x96 :
880 DTB.size = 4;
881 DTB.associative = 0;
882 DTB.linesize = 32;
883 break;
884 case 0x9b :
885 L2DTB.size = 4;
886 L2DTB.associative = 0;
887 L2DTB.linesize = 96;
888 break;
889 case 0xb0 :
890 ITB.size = 4;
891 ITB.associative = 4;
892 ITB.linesize = 128;
893 break;
894 case 0xb1 :
895 LITB.size = 4096;
896 LITB.associative = 4;
897 LITB.linesize = 4;
898 break;
899 case 0xb2 :
900 ITB.size = 4;
901 ITB.associative = 4;
902 ITB.linesize = 64;
903 break;
904 case 0xb3 :
905 DTB.size = 4;
906 DTB.associative = 4;
907 DTB.linesize = 128;
908 break;
909 case 0xb4 :
910 DTB.size = 4;
911 DTB.associative = 4;
912 DTB.linesize = 256;
913 break;
914 case 0xba :
915 DTB.size = 4;
916 DTB.associative = 4;
917 DTB.linesize = 64;
918 break;
919 case 0xd0 :
920 L3.size = 512;
921 L3.associative = 4;
922 L3.linesize = 64;
923 break;
924 case 0xd1 :
925 L3.size = 1024;
926 L3.associative = 4;
927 L3.linesize = 64;
928 break;
929 case 0xd2 :
930 L3.size = 2048;
931 L3.associative = 4;
932 L3.linesize = 64;
933 break;
934 case 0xd6 :
935 L3.size = 1024;
936 L3.associative = 8;
937 L3.linesize = 64;
938 break;
939 case 0xd7 :
940 L3.size = 2048;
941 L3.associative = 8;
942 L3.linesize = 64;
943 break;
944 case 0xd8 :
945 L3.size = 4096;
946 L3.associative = 8;
947 L3.linesize = 64;
948 break;
949 case 0xdc :
950 L3.size = 2048;
951 L3.associative = 12;
952 L3.linesize = 64;
953 break;
954 case 0xdd :
955 L3.size = 4096;
956 L3.associative = 12;
957 L3.linesize = 64;
958 break;
959 case 0xde :
960 L3.size = 8192;
961 L3.associative = 12;
962 L3.linesize = 64;
963 break;
964 case 0xe2 :
965 L3.size = 2048;
966 L3.associative = 16;
967 L3.linesize = 64;
968 break;
969 case 0xe3 :
970 L3.size = 4096;
971 L3.associative = 16;
972 L3.linesize = 64;
973 break;
974 case 0xe4 :
975 L3.size = 8192;
976 L3.associative = 16;
977 L3.linesize = 64;
978 break;
979 }
980 }
981 }
982
983 if (get_vendor() == VENDOR_INTEL) {
984 if(LD1.size<=0 || LC1.size<=0){
985 //If we didn't detect L1 correctly before,
986 int count;
987 for (count=0;count <4;count++) {
988 cpuid_count(4, count, &eax, &ebx, &ecx, &edx);
989 switch (eax &0x1f) {
990 case 0:
991 continue;
992 case 1:
993 case 3:
994 {
995 switch ((eax >>5) &0x07)
996 {
997 case 1:
998 {
999 // fprintf(stderr,"L1 data cache...\n");
1000 int sets = ecx+1;
1001 int lines = (ebx & 0x0fff) +1;
1002 ebx>>=12;
1003 int part = (ebx&0x03ff)+1;
1004 ebx >>=10;
1005 int assoc = (ebx&0x03ff)+1;
1006 LD1.size = (assoc*part*lines*sets)/1024;
1007 LD1.associative = assoc;
1008 LD1.linesize= lines;
1009 break;
1010 }
1011 default:
1012 break;
1013 }
1014 break;
1015 }
1016 case 2:
1017 {
1018 switch ((eax >>5) &0x07)
1019 {
1020 case 1:
1021 {
1022 // fprintf(stderr,"L1 instruction cache...\n");
1023 int sets = ecx+1;
1024 int lines = (ebx & 0x0fff) +1;
1025 ebx>>=12;
1026 int part = (ebx&0x03ff)+1;
1027 ebx >>=10;
1028 int assoc = (ebx&0x03ff)+1;
1029 LC1.size = (assoc*part*lines*sets)/1024;
1030 LC1.associative = assoc;
1031 LC1.linesize= lines;
1032 break;
1033 }
1034 default:
1035 break;
1036 }
1037 break;
1038
1039 }
1040 default:
1041 break;
1042 }
1043 }
1044 }
1045 cpuid(0x80000000, &cpuid_level, &ebx, &ecx, &edx);
1046 if (cpuid_level >= 0x80000006) {
1047 if(L2.size<=0){
1048 //If we didn't detect L2 correctly before,
1049 cpuid(0x80000006, &eax, &ebx, &ecx, &edx);
1050
1051 L2.size = BITMASK(ecx, 16, 0xffff);
1052 L2.associative = BITMASK(ecx, 12, 0x0f);
1053
1054 switch (L2.associative){
1055 case 0x06:
1056 L2.associative = 8;
1057 break;
1058 case 0x08:
1059 L2.associative = 16;
1060 break;
1061 }
1062
1063 L2.linesize = BITMASK(ecx, 0, 0xff);
1064 }
1065 }
1066 }
1067
1068 if ((get_vendor() == VENDOR_AMD) ||
1069 (get_vendor() == VENDOR_HYGON) ||
1070 (get_vendor() == VENDOR_CENTAUR) ||
1071 (get_vendor() == VENDOR_ZHAOXIN)) {
1072 cpuid(0x80000005, &eax, &ebx, &ecx, &edx);
1073
1074 LDTB.size = 4096;
1075 LDTB.associative = BITMASK(eax, 24, 0xff);
1076 if (LDTB.associative == 0xff) LDTB.associative = 0;
1077 LDTB.linesize = BITMASK(eax, 16, 0xff);
1078
1079 LITB.size = 4096;
1080 LITB.associative = BITMASK(eax, 8, 0xff);
1081 if (LITB.associative == 0xff) LITB.associative = 0;
1082 LITB.linesize = BITMASK(eax, 0, 0xff);
1083
1084 DTB.size = 4;
1085 DTB.associative = BITMASK(ebx, 24, 0xff);
1086 if (DTB.associative == 0xff) DTB.associative = 0;
1087 DTB.linesize = BITMASK(ebx, 16, 0xff);
1088
1089 ITB.size = 4;
1090 ITB.associative = BITMASK(ebx, 8, 0xff);
1091 if (ITB.associative == 0xff) ITB.associative = 0;
1092 ITB.linesize = BITMASK(ebx, 0, 0xff);
1093
1094 LD1.size = BITMASK(ecx, 24, 0xff);
1095 LD1.associative = BITMASK(ecx, 16, 0xff);
1096 if (LD1.associative == 0xff) LD1.associative = 0;
1097 LD1.linesize = BITMASK(ecx, 0, 0xff);
1098
1099 LC1.size = BITMASK(ecx, 24, 0xff);
1100 LC1.associative = BITMASK(ecx, 16, 0xff);
1101 if (LC1.associative == 0xff) LC1.associative = 0;
1102 LC1.linesize = BITMASK(ecx, 0, 0xff);
1103
1104 cpuid(0x80000006, &eax, &ebx, &ecx, &edx);
1105
1106 L2LDTB.size = 4096;
1107 L2LDTB.associative = BITMASK(eax, 24, 0xff);
1108 if (L2LDTB.associative == 0xff) L2LDTB.associative = 0;
1109 L2LDTB.linesize = BITMASK(eax, 16, 0xff);
1110
1111 L2LITB.size = 4096;
1112 L2LITB.associative = BITMASK(eax, 8, 0xff);
1113 if (L2LITB.associative == 0xff) L2LITB.associative = 0;
1114 L2LITB.linesize = BITMASK(eax, 0, 0xff);
1115
1116 L2DTB.size = 4;
1117 L2DTB.associative = BITMASK(ebx, 24, 0xff);
1118 if (L2DTB.associative == 0xff) L2DTB.associative = 0;
1119 L2DTB.linesize = BITMASK(ebx, 16, 0xff);
1120
1121 L2ITB.size = 4;
1122 L2ITB.associative = BITMASK(ebx, 8, 0xff);
1123 if (L2ITB.associative == 0xff) L2ITB.associative = 0;
1124 L2ITB.linesize = BITMASK(ebx, 0, 0xff);
1125
1126 if(L2.size <= 0){
1127 //If we didn't detect L2 correctly before,
1128 L2.size = BITMASK(ecx, 16, 0xffff);
1129 L2.associative = BITMASK(ecx, 12, 0xf);
1130 switch (L2.associative){
1131 case 0x06:
1132 L2.associative = 8;
1133 break;
1134 case 0x08:
1135 L2.associative = 16;
1136 break;
1137 }
1138
1139 if (L2.associative == 0xff) L2.associative = 0;
1140 L2.linesize = BITMASK(ecx, 0, 0xff);
1141 }
1142
1143 L3.size = BITMASK(edx, 18, 0x3fff) * 512;
1144 L3.associative = BITMASK(edx, 12, 0xf);
1145 if (L3.associative == 0xff) L2.associative = 0;
1146 L3.linesize = BITMASK(edx, 0, 0xff);
1147
1148 }
1149
1150 switch (type) {
1151
1152 case CACHE_INFO_L1_I :
1153 *cacheinfo = LC1;
1154 break;
1155 case CACHE_INFO_L1_D :
1156 *cacheinfo = LD1;
1157 break;
1158 case CACHE_INFO_L2 :
1159 *cacheinfo = L2;
1160 break;
1161 case CACHE_INFO_L3 :
1162 *cacheinfo = L3;
1163 break;
1164 case CACHE_INFO_L1_DTB :
1165 *cacheinfo = DTB;
1166 break;
1167 case CACHE_INFO_L1_ITB :
1168 *cacheinfo = ITB;
1169 break;
1170 case CACHE_INFO_L1_LDTB :
1171 *cacheinfo = LDTB;
1172 break;
1173 case CACHE_INFO_L1_LITB :
1174 *cacheinfo = LITB;
1175 break;
1176 case CACHE_INFO_L2_DTB :
1177 *cacheinfo = L2DTB;
1178 break;
1179 case CACHE_INFO_L2_ITB :
1180 *cacheinfo = L2ITB;
1181 break;
1182 case CACHE_INFO_L2_LDTB :
1183 *cacheinfo = L2LDTB;
1184 break;
1185 case CACHE_INFO_L2_LITB :
1186 *cacheinfo = L2LITB;
1187 break;
1188 }
1189 return 0;
1190 }
1191
get_cpuname(void)1192 int get_cpuname(void){
1193
1194 int family, exfamily, model, vendor, exmodel, stepping;
1195
1196 if (!have_cpuid()) return CPUTYPE_80386;
1197
1198 family = get_cputype(GET_FAMILY);
1199 exfamily = get_cputype(GET_EXFAMILY);
1200 model = get_cputype(GET_MODEL);
1201 exmodel = get_cputype(GET_EXMODEL);
1202 stepping = get_cputype(GET_STEPPING);
1203
1204 vendor = get_vendor();
1205
1206 if (vendor == VENDOR_INTEL){
1207 switch (family) {
1208 case 0x4:
1209 return CPUTYPE_80486;
1210 case 0x5:
1211 return CPUTYPE_PENTIUM;
1212 case 0x6:
1213 switch (exmodel) {
1214 case 0:
1215 switch (model) {
1216 case 1:
1217 case 3:
1218 case 5:
1219 case 6:
1220 #if defined(__x86_64__) || defined(__amd64__)
1221 return CPUTYPE_CORE2;
1222 #else
1223 return CPUTYPE_PENTIUM2;
1224 #endif
1225 case 7:
1226 case 8:
1227 case 10:
1228 case 11:
1229 return CPUTYPE_PENTIUM3;
1230 case 9:
1231 case 13:
1232 case 14:
1233 return CPUTYPE_PENTIUMM;
1234 case 15:
1235 return CPUTYPE_CORE2;
1236 }
1237 break;
1238 case 1: // family 6 exmodel 1
1239 switch (model) {
1240 case 6:
1241 return CPUTYPE_CORE2;
1242 case 7:
1243 return CPUTYPE_PENRYN;
1244 case 10:
1245 case 11:
1246 case 14:
1247 case 15:
1248 return CPUTYPE_NEHALEM;
1249 case 12:
1250 return CPUTYPE_ATOM;
1251 case 13:
1252 return CPUTYPE_DUNNINGTON;
1253 }
1254 break;
1255 case 2: // family 6 exmodel 2
1256 switch (model) {
1257 case 5:
1258 //Intel Core (Clarkdale) / Core (Arrandale)
1259 // Pentium (Clarkdale) / Pentium Mobile (Arrandale)
1260 // Xeon (Clarkdale), 32nm
1261 return CPUTYPE_NEHALEM;
1262 case 10:
1263 //Intel Core i5-2000 /i7-2000 (Sandy Bridge)
1264 if(support_avx())
1265 return CPUTYPE_SANDYBRIDGE;
1266 else
1267 return CPUTYPE_NEHALEM; //OS doesn't support AVX
1268 case 12:
1269 //Xeon Processor 5600 (Westmere-EP)
1270 return CPUTYPE_NEHALEM;
1271 case 13:
1272 //Intel Core i7-3000 / Xeon E5 (Sandy Bridge)
1273 if(support_avx())
1274 return CPUTYPE_SANDYBRIDGE;
1275 else
1276 return CPUTYPE_NEHALEM;
1277 case 14:
1278 // Xeon E7540
1279 case 15:
1280 //Xeon Processor E7 (Westmere-EX)
1281 return CPUTYPE_NEHALEM;
1282 }
1283 break;
1284 case 3: // family 6 exmodel 3
1285 switch (model) {
1286 case 7:
1287 // Bay Trail
1288 return CPUTYPE_ATOM;
1289 case 10:
1290 case 14:
1291 // Ivy Bridge
1292 if(support_avx())
1293 return CPUTYPE_SANDYBRIDGE;
1294 else
1295 return CPUTYPE_NEHALEM;
1296 case 12:
1297 case 15:
1298 if(support_avx2())
1299 return CPUTYPE_HASWELL;
1300 if(support_avx())
1301 return CPUTYPE_SANDYBRIDGE;
1302 else
1303 return CPUTYPE_NEHALEM;
1304 case 13:
1305 //Broadwell
1306 if(support_avx2())
1307 return CPUTYPE_HASWELL;
1308 if(support_avx())
1309 return CPUTYPE_SANDYBRIDGE;
1310 else
1311 return CPUTYPE_NEHALEM;
1312 }
1313 break;
1314 case 4: // family 6 exmodel 4
1315 switch (model) {
1316 case 5:
1317 case 6:
1318 if(support_avx2())
1319 return CPUTYPE_HASWELL;
1320 if(support_avx())
1321 return CPUTYPE_SANDYBRIDGE;
1322 else
1323 return CPUTYPE_NEHALEM;
1324 case 7:
1325 case 15:
1326 //Broadwell
1327 if(support_avx2())
1328 return CPUTYPE_HASWELL;
1329 if(support_avx())
1330 return CPUTYPE_SANDYBRIDGE;
1331 else
1332 return CPUTYPE_NEHALEM;
1333 case 14:
1334 //Skylake
1335 if(support_avx2())
1336 return CPUTYPE_HASWELL;
1337 if(support_avx())
1338 return CPUTYPE_SANDYBRIDGE;
1339 else
1340 return CPUTYPE_NEHALEM;
1341 case 12:
1342 // Braswell
1343 case 13:
1344 // Avoton
1345 return CPUTYPE_NEHALEM;
1346 }
1347 break;
1348 case 5: // family 6 exmodel 5
1349 switch (model) {
1350 case 6:
1351 //Broadwell
1352 if(support_avx2())
1353 return CPUTYPE_HASWELL;
1354 if(support_avx())
1355 return CPUTYPE_SANDYBRIDGE;
1356 else
1357 return CPUTYPE_NEHALEM;
1358 case 5:
1359 // Skylake X
1360 if(support_avx512_bf16())
1361 return CPUTYPE_COOPERLAKE;
1362 if(support_avx512())
1363 return CPUTYPE_SKYLAKEX;
1364 if(support_avx2())
1365 return CPUTYPE_HASWELL;
1366 if(support_avx())
1367 return CPUTYPE_SANDYBRIDGE;
1368 else
1369 return CPUTYPE_NEHALEM;
1370 case 14:
1371 // Skylake
1372 if(support_avx2())
1373 return CPUTYPE_HASWELL;
1374 if(support_avx())
1375 return CPUTYPE_SANDYBRIDGE;
1376 else
1377 return CPUTYPE_NEHALEM;
1378 case 7:
1379 // Xeon Phi Knights Landing
1380 if(support_avx2())
1381 return CPUTYPE_HASWELL;
1382 if(support_avx())
1383 return CPUTYPE_SANDYBRIDGE;
1384 else
1385 return CPUTYPE_NEHALEM;
1386 case 12:
1387 // Apollo Lake
1388 case 15:
1389 // Denverton
1390 return CPUTYPE_NEHALEM;
1391 }
1392 break;
1393 case 6: // family 6 exmodel 6
1394 switch (model) {
1395 case 6: // Cannon Lake
1396 if(support_avx512())
1397 return CPUTYPE_SKYLAKEX;
1398 if(support_avx2())
1399 return CPUTYPE_HASWELL;
1400 if(support_avx())
1401 return CPUTYPE_SANDYBRIDGE;
1402 else
1403 return CPUTYPE_NEHALEM;
1404 case 10: // Ice Lake SP
1405 if(support_avx512_bf16())
1406 return CPUTYPE_COOPERLAKE;
1407 if(support_avx512())
1408 return CPUTYPE_SKYLAKEX;
1409 if(support_avx2())
1410 return CPUTYPE_HASWELL;
1411 if(support_avx())
1412 return CPUTYPE_SANDYBRIDGE;
1413 else
1414 return CPUTYPE_NEHALEM;
1415 }
1416 break;
1417 case 7: // family 6 exmodel 7
1418 switch (model) {
1419 case 10: // Goldmont Plus
1420 return CPUTYPE_NEHALEM;
1421 case 14: // Ice Lake
1422 if(support_avx512())
1423 return CPUTYPE_SKYLAKEX;
1424 if(support_avx2())
1425 return CPUTYPE_HASWELL;
1426 if(support_avx())
1427 return CPUTYPE_SANDYBRIDGE;
1428 else
1429 return CPUTYPE_NEHALEM;
1430 }
1431 break;
1432 case 9:
1433 case 8:
1434 switch (model) {
1435 case 12: // Tiger Lake
1436 if(support_avx512())
1437 return CPUTYPE_SKYLAKEX;
1438 if(support_avx2())
1439 return CPUTYPE_HASWELL;
1440 if(support_avx())
1441 return CPUTYPE_SANDYBRIDGE;
1442 else
1443 return CPUTYPE_NEHALEM;
1444 case 14: // Kaby Lake and refreshes
1445 if(support_avx2())
1446 return CPUTYPE_HASWELL;
1447 if(support_avx())
1448 return CPUTYPE_SANDYBRIDGE;
1449 else
1450 return CPUTYPE_NEHALEM;
1451 }
1452 case 10: //family 6 exmodel 10
1453 switch (model) {
1454 case 5: // Comet Lake H and S
1455 case 6: // Comet Lake U
1456 if(support_avx2())
1457 return CPUTYPE_HASWELL;
1458 if(support_avx())
1459 return CPUTYPE_SANDYBRIDGE;
1460 else
1461 return CPUTYPE_NEHALEM;
1462 case 7: // Rocket Lake
1463 if(support_avx512())
1464 return CPUTYPE_SKYLAKEX;
1465 if(support_avx2())
1466 return CPUTYPE_HASWELL;
1467 if(support_avx())
1468 return CPUTYPE_SANDYBRIDGE;
1469 else
1470 return CPUTYPE_NEHALEM;
1471 }
1472 break;
1473 }
1474 break;
1475 case 0x7:
1476 return CPUTYPE_ITANIUM;
1477 case 0xf:
1478 switch (exfamily) {
1479 case 0 :
1480 return CPUTYPE_PENTIUM4;
1481 case 1 :
1482 return CPUTYPE_ITANIUM;
1483 }
1484 break;
1485 }
1486 return CPUTYPE_INTEL_UNKNOWN;
1487 }
1488
1489 if (vendor == VENDOR_AMD){
1490 switch (family) {
1491 case 0x4:
1492 return CPUTYPE_AMD5X86;
1493 case 0x5:
1494 return CPUTYPE_AMDK6;
1495 case 0x6:
1496 #if defined(__x86_64__) || defined(__amd64__)
1497 return CPUTYPE_BARCELONA;
1498 #else
1499 return CPUTYPE_ATHLON;
1500 #endif
1501 case 0xf:
1502 switch (exfamily) {
1503 case 0:
1504 case 2:
1505 return CPUTYPE_OPTERON;
1506 case 1:
1507 case 3:
1508 // case 7:
1509 // case 10:
1510 return CPUTYPE_BARCELONA;
1511 case 5:
1512 case 7:
1513 return CPUTYPE_BOBCAT;
1514 case 6:
1515 switch (model) {
1516 case 1:
1517 //AMD Bulldozer Opteron 6200 / Opteron 4200 / AMD FX-Series
1518 if(support_avx())
1519 return CPUTYPE_BULLDOZER;
1520 else
1521 return CPUTYPE_BARCELONA; //OS don't support AVX.
1522 case 2: //AMD Piledriver
1523 case 3: //AMD Richland
1524 if(support_avx())
1525 return CPUTYPE_PILEDRIVER;
1526 else
1527 return CPUTYPE_BARCELONA; //OS don't support AVX.
1528 case 5: // New EXCAVATOR CPUS
1529 if(support_avx())
1530 return CPUTYPE_EXCAVATOR;
1531 else
1532 return CPUTYPE_BARCELONA; //OS don't support AVX.
1533 case 0:
1534 case 8:
1535 switch(exmodel){
1536 case 1: //AMD Trinity
1537 if(support_avx())
1538 return CPUTYPE_PILEDRIVER;
1539 else
1540 return CPUTYPE_BARCELONA; //OS don't support AVX.
1541 case 3:
1542 if(support_avx())
1543 return CPUTYPE_STEAMROLLER;
1544 else
1545 return CPUTYPE_BARCELONA; //OS don't support AVX.
1546
1547 case 6:
1548 if(support_avx())
1549 return CPUTYPE_EXCAVATOR;
1550 else
1551 return CPUTYPE_BARCELONA; //OS don't support AVX.
1552 }
1553 break;
1554 }
1555 break;
1556 case 8:
1557 switch (model) {
1558 case 1:
1559 // AMD Ryzen
1560 case 8:
1561 // AMD Ryzen2
1562 default:
1563 // Matisse/Renoir and other recent Ryzen2
1564 if(support_avx())
1565 #ifndef NO_AVX2
1566 return CPUTYPE_ZEN;
1567 #else
1568 return CPUTYPE_SANDYBRIDGE; // Zen is closer in architecture to Sandy Bridge than to Excavator
1569 #endif
1570 else
1571 return CPUTYPE_BARCELONA;
1572 }
1573 break;
1574 case 10: // Zen3
1575 if(support_avx())
1576 #ifndef NO_AVX2
1577 return CPUTYPE_ZEN;
1578 #else
1579 return CPUTYPE_SANDYBRIDGE; // Zen is closer in architecture to Sandy Bridge than to Excavator
1580 #endif
1581 else
1582 return CPUTYPE_BARCELONA;
1583 }
1584 break;
1585 }
1586 return CPUTYPE_AMD_UNKNOWN;
1587 }
1588
1589 if (vendor == VENDOR_HYGON){
1590 switch (family) {
1591 case 0xf:
1592 switch (exfamily) {
1593 case 9:
1594 //Hygon Dhyana
1595 if(support_avx())
1596 #ifndef NO_AVX2
1597 return CPUTYPE_ZEN;
1598 #else
1599 return CPUTYPE_SANDYBRIDGE; // closer in architecture to Sandy Bridge than to Excavator
1600 #endif
1601 else
1602 return CPUTYPE_BARCELONA;
1603 }
1604 break;
1605 }
1606 return CPUTYPE_HYGON_UNKNOWN;
1607 }
1608
1609 if (vendor == VENDOR_CYRIX){
1610 switch (family) {
1611 case 0x4:
1612 return CPUTYPE_CYRIX5X86;
1613 case 0x5:
1614 return CPUTYPE_CYRIXM1;
1615 case 0x6:
1616 return CPUTYPE_CYRIXM2;
1617 }
1618 return CPUTYPE_CYRIX_UNKNOWN;
1619 }
1620
1621 if (vendor == VENDOR_NEXGEN){
1622 switch (family) {
1623 case 0x5:
1624 return CPUTYPE_NEXGENNX586;
1625 }
1626 return CPUTYPE_NEXGEN_UNKNOWN;
1627 }
1628
1629 if (vendor == VENDOR_CENTAUR){
1630 switch (family) {
1631 case 0x5:
1632 return CPUTYPE_CENTAURC6;
1633 case 0x6:
1634 if (model == 0xf && stepping < 0xe)
1635 return CPUTYPE_NANO;
1636 return CPUTYPE_NEHALEM;
1637 default:
1638 if (family >= 0x7)
1639 return CPUTYPE_NEHALEM;
1640 else
1641 return CPUTYPE_VIAC3;
1642 }
1643 }
1644
1645 if (vendor == VENDOR_ZHAOXIN){
1646 return CPUTYPE_NEHALEM;
1647 }
1648
1649 if (vendor == VENDOR_RISE){
1650 switch (family) {
1651 case 0x5:
1652 return CPUTYPE_RISEMP6;
1653 }
1654 return CPUTYPE_RISE_UNKNOWN;
1655 }
1656
1657 if (vendor == VENDOR_SIS){
1658 switch (family) {
1659 case 0x5:
1660 return CPUTYPE_SYS55X;
1661 }
1662 return CPUTYPE_SIS_UNKNOWN;
1663 }
1664
1665 if (vendor == VENDOR_TRANSMETA){
1666 switch (family) {
1667 case 0x5:
1668 return CPUTYPE_CRUSOETM3X;
1669 }
1670 return CPUTYPE_TRANSMETA_UNKNOWN;
1671 }
1672
1673 if (vendor == VENDOR_NSC){
1674 switch (family) {
1675 case 0x5:
1676 return CPUTYPE_NSGEODE;
1677 }
1678 return CPUTYPE_NSC_UNKNOWN;
1679 }
1680
1681 return CPUTYPE_UNKNOWN;
1682 }
1683
1684 static char *cpuname[] = {
1685 "UNKNOWN",
1686 "INTEL_UNKNOWN",
1687 "UMC_UNKNOWN",
1688 "AMD_UNKNOWN",
1689 "CYRIX_UNKNOWN",
1690 "NEXGEN_UNKNOWN",
1691 "CENTAUR_UNKNOWN",
1692 "RISE_UNKNOWN",
1693 "SIS_UNKNOWN",
1694 "TRANSMETA_UNKNOWN",
1695 "NSC_UNKNOWN",
1696 "80386",
1697 "80486",
1698 "PENTIUM",
1699 "PENTIUM2",
1700 "PENTIUM3",
1701 "PENTIUMM",
1702 "PENTIUM4",
1703 "CORE2",
1704 "PENRYN",
1705 "DUNNINGTON",
1706 "NEHALEM",
1707 "ATOM",
1708 "ITANIUM",
1709 "ITANIUM2",
1710 "5X86",
1711 "K6",
1712 "ATHLON",
1713 "DURON",
1714 "OPTERON",
1715 "BARCELONA",
1716 "SHANGHAI",
1717 "ISTANBUL",
1718 "CYRIX5X86",
1719 "CYRIXM1",
1720 "CYRIXM2",
1721 "NEXGENNX586",
1722 "CENTAURC6",
1723 "RISEMP6",
1724 "SYS55X",
1725 "TM3X00",
1726 "NSGEODE",
1727 "VIAC3",
1728 "NANO",
1729 "SANDYBRIDGE",
1730 "BOBCAT",
1731 "BULLDOZER",
1732 "PILEDRIVER",
1733 "HASWELL",
1734 "STEAMROLLER",
1735 "EXCAVATOR",
1736 "ZEN",
1737 "SKYLAKEX",
1738 "DHYANA",
1739 "COOPERLAKE"
1740 };
1741
1742 static char *lowercpuname[] = {
1743 "unknown",
1744 "intel_unknown",
1745 "umc_unknown",
1746 "amd_unknown",
1747 "cyrix_unknown",
1748 "nexgen_unknown",
1749 "centaur_unknown",
1750 "rise_unknown",
1751 "sis_unknown",
1752 "transmeta_unknown",
1753 "nsc_unknown",
1754 "80386",
1755 "80486",
1756 "pentium",
1757 "pentium2",
1758 "pentium3",
1759 "pentiumm",
1760 "pentium4",
1761 "core2",
1762 "penryn",
1763 "dunnington",
1764 "nehalem",
1765 "atom",
1766 "itanium",
1767 "itanium2",
1768 "5x86",
1769 "k6",
1770 "athlon",
1771 "duron",
1772 "opteron",
1773 "barcelona",
1774 "shanghai",
1775 "istanbul",
1776 "cyrix5x86",
1777 "cyrixm1",
1778 "cyrixm2",
1779 "nexgennx586",
1780 "centaurc6",
1781 "risemp6",
1782 "sys55x",
1783 "tms3x00",
1784 "nsgeode",
1785 "nano",
1786 "sandybridge",
1787 "bobcat",
1788 "bulldozer",
1789 "piledriver",
1790 "haswell",
1791 "steamroller",
1792 "excavator",
1793 "zen",
1794 "skylakex",
1795 "dhyana",
1796 "cooperlake"
1797 };
1798
1799 static char *corename[] = {
1800 "UNKNOWN",
1801 "80486",
1802 "P5",
1803 "P6",
1804 "KATMAI",
1805 "COPPERMINE",
1806 "NORTHWOOD",
1807 "PRESCOTT",
1808 "BANIAS",
1809 "ATHLON",
1810 "OPTERON",
1811 "BARCELONA",
1812 "VIAC3",
1813 "YONAH",
1814 "CORE2",
1815 "PENRYN",
1816 "DUNNINGTON",
1817 "NEHALEM",
1818 "ATOM",
1819 "NANO",
1820 "SANDYBRIDGE",
1821 "BOBCAT",
1822 "BULLDOZER",
1823 "PILEDRIVER",
1824 "HASWELL",
1825 "STEAMROLLER",
1826 "EXCAVATOR",
1827 "ZEN",
1828 "SKYLAKEX",
1829 "DHYANA",
1830 "COOPERLAKE"
1831 };
1832
1833 static char *corename_lower[] = {
1834 "unknown",
1835 "80486",
1836 "p5",
1837 "p6",
1838 "katmai",
1839 "coppermine",
1840 "northwood",
1841 "prescott",
1842 "banias",
1843 "athlon",
1844 "opteron",
1845 "barcelona",
1846 "viac3",
1847 "yonah",
1848 "core2",
1849 "penryn",
1850 "dunnington",
1851 "nehalem",
1852 "atom",
1853 "nano",
1854 "sandybridge",
1855 "bobcat",
1856 "bulldozer",
1857 "piledriver",
1858 "haswell",
1859 "steamroller",
1860 "excavator",
1861 "zen",
1862 "skylakex",
1863 "dhyana",
1864 "cooperlake"
1865 };
1866
1867
get_cpunamechar(void)1868 char *get_cpunamechar(void){
1869 return cpuname[get_cpuname()];
1870 }
1871
get_lower_cpunamechar(void)1872 char *get_lower_cpunamechar(void){
1873 return lowercpuname[get_cpuname()];
1874 }
1875
1876
get_coretype(void)1877 int get_coretype(void){
1878
1879 int family, exfamily, model, exmodel, vendor, stepping;
1880
1881 if (!have_cpuid()) return CORE_80486;
1882
1883 family = get_cputype(GET_FAMILY);
1884 exfamily = get_cputype(GET_EXFAMILY);
1885 model = get_cputype(GET_MODEL);
1886 exmodel = get_cputype(GET_EXMODEL);
1887 stepping = get_cputype(GET_STEPPING);
1888
1889 vendor = get_vendor();
1890
1891 if (vendor == VENDOR_INTEL){
1892 switch (family) {
1893 case 4:
1894 return CORE_80486;
1895 case 5:
1896 return CORE_P5;
1897 case 6:
1898 switch (exmodel) {
1899 case 0:
1900 switch (model) {
1901 case 0:
1902 case 1:
1903 case 2:
1904 case 3:
1905 case 4:
1906 case 5:
1907 case 6:
1908 #if defined(__x86_64__) || defined(__amd64__)
1909 return CORE_CORE2;
1910 #else
1911 return CORE_P6;
1912 #endif
1913 case 7:
1914 return CORE_KATMAI;
1915 case 8:
1916 case 10:
1917 case 11:
1918 return CORE_COPPERMINE;
1919 case 9:
1920 case 13:
1921 case 14:
1922 return CORE_BANIAS;
1923 case 15:
1924 return CORE_CORE2;
1925 }
1926 break;
1927 case 1:
1928 switch (model) {
1929 case 6:
1930 return CORE_CORE2;
1931 case 7:
1932 return CORE_PENRYN;
1933 case 10:
1934 case 11:
1935 case 14:
1936 case 15:
1937 return CORE_NEHALEM;
1938 case 12:
1939 return CORE_ATOM;
1940 case 13:
1941 return CORE_DUNNINGTON;
1942 }
1943 break;
1944 case 2:
1945 switch (model) {
1946 case 5:
1947 //Intel Core (Clarkdale) / Core (Arrandale)
1948 // Pentium (Clarkdale) / Pentium Mobile (Arrandale)
1949 // Xeon (Clarkdale), 32nm
1950 return CORE_NEHALEM;
1951 case 10:
1952 //Intel Core i5-2000 /i7-2000 (Sandy Bridge)
1953 if(support_avx())
1954 return CORE_SANDYBRIDGE;
1955 else
1956 return CORE_NEHALEM; //OS doesn't support AVX
1957 case 12:
1958 //Xeon Processor 5600 (Westmere-EP)
1959 return CORE_NEHALEM;
1960 case 13:
1961 //Intel Core i7-3000 / Xeon E5 (Sandy Bridge)
1962 if(support_avx())
1963 return CORE_SANDYBRIDGE;
1964 else
1965 return CORE_NEHALEM; //OS doesn't support AVX
1966 case 14:
1967 //Xeon E7540
1968 case 15:
1969 //Xeon Processor E7 (Westmere-EX)
1970 return CORE_NEHALEM;
1971 }
1972 break;
1973 case 3:
1974 switch (model) {
1975 case 7:
1976 return CORE_ATOM;
1977 case 10:
1978 case 14:
1979 if(support_avx())
1980 return CORE_SANDYBRIDGE;
1981 else
1982 return CORE_NEHALEM; //OS doesn't support AVX
1983 case 12:
1984 case 15:
1985 if(support_avx())
1986 #ifndef NO_AVX2
1987 return CORE_HASWELL;
1988 #else
1989 return CORE_SANDYBRIDGE;
1990 #endif
1991 else
1992 return CORE_NEHALEM;
1993 case 13:
1994 //broadwell
1995 if(support_avx())
1996 #ifndef NO_AVX2
1997 return CORE_HASWELL;
1998 #else
1999 return CORE_SANDYBRIDGE;
2000 #endif
2001 else
2002 return CORE_NEHALEM;
2003 }
2004 break;
2005 case 4:
2006 switch (model) {
2007 case 5:
2008 case 6:
2009 if(support_avx())
2010 #ifndef NO_AVX2
2011 return CORE_HASWELL;
2012 #else
2013 return CORE_SANDYBRIDGE;
2014 #endif
2015 else
2016 return CORE_NEHALEM;
2017 case 7:
2018 case 15:
2019 //broadwell
2020 if(support_avx())
2021 #ifndef NO_AVX2
2022 return CORE_HASWELL;
2023 #else
2024 return CORE_SANDYBRIDGE;
2025 #endif
2026 else
2027 return CORE_NEHALEM;
2028 case 14:
2029 //Skylake
2030 if(support_avx())
2031 #ifndef NO_AVX2
2032 return CORE_HASWELL;
2033 #else
2034 return CORE_SANDYBRIDGE;
2035 #endif
2036 else
2037 return CORE_NEHALEM;
2038 case 12:
2039 // Braswell
2040 case 13:
2041 // Avoton
2042 return CORE_NEHALEM;
2043 }
2044 break;
2045 case 10:
2046 switch (model) {
2047 case 5: // Comet Lake H and S
2048 case 6: // Comet Lake U
2049 if(support_avx())
2050 #ifndef NO_AVX2
2051 return CORE_HASWELL;
2052 #else
2053 return CORE_SANDYBRIDGE;
2054 #endif
2055 else
2056 return CORE_NEHALEM;
2057 case 7:// Rocket Lake
2058 #ifndef NO_AVX512
2059 if(support_avx512())
2060 return CORE_SKYLAKEX;
2061 #endif
2062 #ifndef NO_AVX2
2063 if(support_avx2())
2064 return CORE_HASWELL;
2065 #endif
2066 if(support_avx())
2067 return CORE_SANDYBRIDGE;
2068 else
2069 return CORE_NEHALEM;
2070 }
2071 case 5:
2072 switch (model) {
2073 case 6:
2074 //broadwell
2075 if(support_avx())
2076 #ifndef NO_AVX2
2077 return CORE_HASWELL;
2078 #else
2079 return CORE_SANDYBRIDGE;
2080 #endif
2081 else
2082 return CORE_NEHALEM;
2083 case 5:
2084 // Skylake X
2085 #ifndef NO_AVX512
2086 if(support_avx512_bf16())
2087 return CORE_COOPERLAKE;
2088 return CORE_SKYLAKEX;
2089 #else
2090 if(support_avx())
2091 #ifndef NO_AVX2
2092 return CORE_HASWELL;
2093 #else
2094 return CORE_SANDYBRIDGE;
2095 #endif
2096 else
2097 return CORE_NEHALEM;
2098 #endif
2099 case 14:
2100 // Skylake
2101 if(support_avx())
2102 #ifndef NO_AVX2
2103 return CORE_HASWELL;
2104 #else
2105 return CORE_SANDYBRIDGE;
2106 #endif
2107 else
2108 return CORE_NEHALEM;
2109 case 7:
2110 // Phi Knights Landing
2111 if(support_avx())
2112 #ifndef NO_AVX2
2113 return CORE_HASWELL;
2114 #else
2115 return CORE_SANDYBRIDGE;
2116 #endif
2117 else
2118 return CORE_NEHALEM;
2119 case 12:
2120 // Apollo Lake
2121 return CORE_NEHALEM;
2122 }
2123 break;
2124 case 6:
2125 if (model == 6)
2126 #ifndef NO_AVX512
2127 return CORE_SKYLAKEX;
2128 #else
2129 if(support_avx())
2130 #ifndef NO_AVX2
2131 return CORE_HASWELL;
2132 #else
2133 return CORE_SANDYBRIDGE;
2134 #endif
2135 else
2136 return CORE_NEHALEM;
2137 #endif
2138 if (model == 10)
2139 #ifndef NO_AVX512
2140 if(support_avx512_bf16())
2141 return CORE_COOPERLAKE;
2142 return CORE_SKYLAKEX;
2143 #else
2144 if(support_avx())
2145 #ifndef NO_AVX2
2146 return CORE_HASWELL;
2147 #else
2148 return CORE_SANDYBRIDGE;
2149 #endif
2150 else
2151 return CORE_NEHALEM;
2152 #endif
2153 break;
2154 case 7:
2155 if (model == 10)
2156 return CORE_NEHALEM;
2157 if (model == 14)
2158 #ifndef NO_AVX512
2159 return CORE_SKYLAKEX;
2160 #else
2161 if(support_avx())
2162 #ifndef NO_AVX2
2163 return CORE_HASWELL;
2164 #else
2165 return CORE_SANDYBRIDGE;
2166 #endif
2167 else
2168 return CORE_NEHALEM;
2169 #endif
2170 break;
2171 case 9:
2172 case 8:
2173 if (model == 12) { // Tiger Lake
2174 if(support_avx512())
2175 return CORE_SKYLAKEX;
2176 if(support_avx2())
2177 return CORE_HASWELL;
2178 if(support_avx())
2179 return CORE_SANDYBRIDGE;
2180 else
2181 return CORE_NEHALEM;
2182 }
2183 if (model == 14) { // Kaby Lake
2184 if(support_avx())
2185 #ifndef NO_AVX2
2186 return CORE_HASWELL;
2187 #else
2188 return CORE_SANDYBRIDGE;
2189 #endif
2190 else
2191 return CORE_NEHALEM;
2192 }
2193 }
2194 break;
2195
2196 case 15:
2197 if (model <= 0x2) return CORE_NORTHWOOD;
2198 else return CORE_PRESCOTT;
2199 }
2200 }
2201
2202 if (vendor == VENDOR_AMD){
2203 if (family <= 0x5) return CORE_80486;
2204 #if defined(__x86_64__) || defined(__amd64__)
2205 if (family <= 0xe) return CORE_BARCELONA;
2206 #else
2207 if (family <= 0xe) return CORE_ATHLON;
2208 #endif
2209 if (family == 0xf){
2210 if ((exfamily == 0) || (exfamily == 2)) return CORE_OPTERON;
2211 else if (exfamily == 5) return CORE_BOBCAT;
2212 else if (exfamily == 6) {
2213 switch (model) {
2214 case 1:
2215 //AMD Bulldozer Opteron 6200 / Opteron 4200 / AMD FX-Series
2216 if(support_avx())
2217 return CORE_BULLDOZER;
2218 else
2219 return CORE_BARCELONA; //OS don't support AVX.
2220 case 2: //AMD Piledriver
2221 case 3: //AMD Richland
2222 if(support_avx())
2223 return CORE_PILEDRIVER;
2224 else
2225 return CORE_BARCELONA; //OS don't support AVX.
2226 case 5: // New EXCAVATOR
2227 if(support_avx())
2228 return CORE_EXCAVATOR;
2229 else
2230 return CORE_BARCELONA; //OS don't support AVX.
2231 case 0:
2232 case 8:
2233 switch(exmodel){
2234 case 1: //AMD Trinity
2235 if(support_avx())
2236 return CORE_PILEDRIVER;
2237 else
2238 return CORE_BARCELONA; //OS don't support AVX.
2239
2240 case 3:
2241 if(support_avx())
2242 return CORE_STEAMROLLER;
2243 else
2244 return CORE_BARCELONA; //OS don't support AVX.
2245
2246 case 6:
2247 if(support_avx())
2248 return CORE_EXCAVATOR;
2249 else
2250 return CORE_BARCELONA; //OS don't support AVX.
2251 }
2252 break;
2253 }
2254 } else if (exfamily == 8 || exfamily == 10) {
2255 switch (model) {
2256 case 1:
2257 // AMD Ryzen
2258 case 8:
2259 // Ryzen 2
2260 default:
2261 // Matisse,Renoir Ryzen2 models
2262 if(support_avx())
2263 #ifndef NO_AVX2
2264 return CORE_ZEN;
2265 #else
2266 return CORE_SANDYBRIDGE; // Zen is closer in architecture to Sandy Bridge than to Excavator
2267 #endif
2268 else
2269 return CORE_BARCELONA;
2270 }
2271 } else {
2272 return CORE_BARCELONA;
2273 }
2274 }
2275 }
2276
2277 if (vendor == VENDOR_HYGON){
2278 if (family == 0xf){
2279 if (exfamily == 9) {
2280 if(support_avx())
2281 #ifndef NO_AVX2
2282 return CORE_ZEN;
2283 #else
2284 return CORE_SANDYBRIDGE; // closer in architecture to Sandy Bridge than to Excavator
2285 #endif
2286 else
2287 return CORE_BARCELONA;
2288 } else {
2289 return CORE_BARCELONA;
2290 }
2291 }
2292 }
2293
2294 if (vendor == VENDOR_CENTAUR) {
2295 switch (family) {
2296 case 0x6:
2297 if (model == 0xf && stepping < 0xe)
2298 return CORE_NANO;
2299 return CORE_NEHALEM;
2300 default:
2301 if (family >= 0x7)
2302 return CORE_NEHALEM;
2303 else
2304 return CORE_VIAC3;
2305 }
2306 }
2307
2308 if (vendor == VENDOR_ZHAOXIN) {
2309 return CORE_NEHALEM;
2310 }
2311
2312 return CORE_UNKNOWN;
2313 }
2314
get_cpuconfig(void)2315 void get_cpuconfig(void){
2316
2317 cache_info_t info;
2318 int features;
2319
2320 printf("#define %s\n", cpuname[get_cpuname()]);
2321
2322
2323 if (get_coretype() != CORE_P5) {
2324
2325 get_cacheinfo(CACHE_INFO_L1_I, &info);
2326 if (info.size > 0) {
2327 printf("#define L1_CODE_SIZE %d\n", info.size * 1024);
2328 printf("#define L1_CODE_ASSOCIATIVE %d\n", info.associative);
2329 printf("#define L1_CODE_LINESIZE %d\n", info.linesize);
2330 }
2331
2332 get_cacheinfo(CACHE_INFO_L1_D, &info);
2333 if (info.size > 0) {
2334 printf("#define L1_DATA_SIZE %d\n", info.size * 1024);
2335 printf("#define L1_DATA_ASSOCIATIVE %d\n", info.associative);
2336 printf("#define L1_DATA_LINESIZE %d\n", info.linesize);
2337 }
2338
2339 get_cacheinfo(CACHE_INFO_L2, &info);
2340 if (info.size > 0) {
2341 printf("#define L2_SIZE %d\n", info.size * 1024);
2342 printf("#define L2_ASSOCIATIVE %d\n", info.associative);
2343 printf("#define L2_LINESIZE %d\n", info.linesize);
2344 } else {
2345 //fall back for some virtual machines.
2346 printf("#define L2_SIZE 1048576\n");
2347 printf("#define L2_ASSOCIATIVE 6\n");
2348 printf("#define L2_LINESIZE 64\n");
2349 }
2350
2351
2352 get_cacheinfo(CACHE_INFO_L3, &info);
2353 if (info.size > 0) {
2354 printf("#define L3_SIZE %d\n", info.size * 1024);
2355 printf("#define L3_ASSOCIATIVE %d\n", info.associative);
2356 printf("#define L3_LINESIZE %d\n", info.linesize);
2357 }
2358
2359 get_cacheinfo(CACHE_INFO_L1_ITB, &info);
2360 if (info.size > 0) {
2361 printf("#define ITB_SIZE %d\n", info.size * 1024);
2362 printf("#define ITB_ASSOCIATIVE %d\n", info.associative);
2363 printf("#define ITB_ENTRIES %d\n", info.linesize);
2364 }
2365
2366 get_cacheinfo(CACHE_INFO_L1_DTB, &info);
2367 if (info.size > 0) {
2368 printf("#define DTB_SIZE %d\n", info.size * 1024);
2369 printf("#define DTB_ASSOCIATIVE %d\n", info.associative);
2370 printf("#define DTB_DEFAULT_ENTRIES %d\n", info.linesize);
2371 } else {
2372 //fall back for some virtual machines.
2373 printf("#define DTB_DEFAULT_ENTRIES 32\n");
2374 }
2375
2376 features = get_cputype(GET_FEATURE);
2377
2378 if (features & HAVE_CMOV ) printf("#define HAVE_CMOV\n");
2379 if (features & HAVE_MMX ) printf("#define HAVE_MMX\n");
2380 if (features & HAVE_SSE ) printf("#define HAVE_SSE\n");
2381 if (features & HAVE_SSE2 ) printf("#define HAVE_SSE2\n");
2382 if (features & HAVE_SSE3 ) printf("#define HAVE_SSE3\n");
2383 if (features & HAVE_SSSE3) printf("#define HAVE_SSSE3\n");
2384 if (features & HAVE_SSE4_1) printf("#define HAVE_SSE4_1\n");
2385 if (features & HAVE_SSE4_2) printf("#define HAVE_SSE4_2\n");
2386 if (features & HAVE_SSE4A) printf("#define HAVE_SSE4A\n");
2387 if (features & HAVE_SSE5 ) printf("#define HAVE_SSSE5\n");
2388 if (features & HAVE_AVX ) printf("#define HAVE_AVX\n");
2389 if (features & HAVE_AVX2 ) printf("#define HAVE_AVX2\n");
2390 if (features & HAVE_AVX512VL ) printf("#define HAVE_AVX512VL\n");
2391 if (features & HAVE_AVX512BF16 ) printf("#define HAVE_AVX512BF16\n");
2392 if (features & HAVE_3DNOWEX) printf("#define HAVE_3DNOWEX\n");
2393 if (features & HAVE_3DNOW) printf("#define HAVE_3DNOW\n");
2394 if (features & HAVE_FMA4 ) printf("#define HAVE_FMA4\n");
2395 if (features & HAVE_FMA3 ) printf("#define HAVE_FMA3\n");
2396 if (features & HAVE_CFLUSH) printf("#define HAVE_CFLUSH\n");
2397 if (features & HAVE_HIT) printf("#define HAVE_HIT 1\n");
2398 if (features & HAVE_MISALIGNSSE) printf("#define HAVE_MISALIGNSSE\n");
2399 if (features & HAVE_128BITFPU) printf("#define HAVE_128BITFPU\n");
2400 if (features & HAVE_FASTMOVU) printf("#define HAVE_FASTMOVU\n");
2401
2402 printf("#define NUM_SHAREDCACHE %d\n", get_cputype(GET_NUMSHARE) + 1);
2403 printf("#define NUM_CORES %d\n", get_cputype(GET_NUMCORES) + 1);
2404
2405 features = get_coretype();
2406 if (features > 0) printf("#define CORE_%s\n", corename[features]);
2407 } else {
2408 printf("#define DTB_DEFAULT_ENTRIES 16\n");
2409 printf("#define L1_CODE_SIZE 8192\n");
2410 printf("#define L1_DATA_SIZE 8192\n");
2411 printf("#define L2_SIZE 0\n");
2412 }
2413 }
2414
get_architecture(void)2415 void get_architecture(void){
2416 #ifndef __64BIT__
2417 printf("X86");
2418 #else
2419 printf("X86_64");
2420 #endif
2421 }
2422
get_subarchitecture(void)2423 void get_subarchitecture(void){
2424 printf("%s", get_cpunamechar());
2425 }
2426
get_subdirname(void)2427 void get_subdirname(void){
2428 #ifndef __64BIT__
2429 printf("x86");
2430 #else
2431 printf("x86_64");
2432 #endif
2433 }
2434
get_corename(void)2435 char *get_corename(void){
2436 return corename[get_coretype()];
2437 }
2438
get_libname(void)2439 void get_libname(void){
2440 printf("%s", corename_lower[get_coretype()]);
2441 }
2442
2443 /* This if for Makefile */
get_sse(void)2444 void get_sse(void){
2445
2446 int features;
2447
2448 features = get_cputype(GET_FEATURE);
2449
2450 if (features & HAVE_MMX ) printf("HAVE_MMX=1\n");
2451 if (features & HAVE_SSE ) printf("HAVE_SSE=1\n");
2452 if (features & HAVE_SSE2 ) printf("HAVE_SSE2=1\n");
2453 if (features & HAVE_SSE3 ) printf("HAVE_SSE3=1\n");
2454 if (features & HAVE_SSSE3) printf("HAVE_SSSE3=1\n");
2455 if (features & HAVE_SSE4_1) printf("HAVE_SSE4_1=1\n");
2456 if (features & HAVE_SSE4_2) printf("HAVE_SSE4_2=1\n");
2457 if (features & HAVE_SSE4A) printf("HAVE_SSE4A=1\n");
2458 if (features & HAVE_SSE5 ) printf("HAVE_SSSE5=1\n");
2459 if (features & HAVE_AVX ) printf("HAVE_AVX=1\n");
2460 if (features & HAVE_AVX2 ) printf("HAVE_AVX2=1\n");
2461 if (features & HAVE_AVX512VL ) printf("HAVE_AVX512VL=1\n");
2462 if (features & HAVE_AVX512BF16 ) printf("HAVE_AVX512BF16=1\n");
2463 if (features & HAVE_3DNOWEX) printf("HAVE_3DNOWEX=1\n");
2464 if (features & HAVE_3DNOW) printf("HAVE_3DNOW=1\n");
2465 if (features & HAVE_FMA4 ) printf("HAVE_FMA4=1\n");
2466 if (features & HAVE_FMA3 ) printf("HAVE_FMA3=1\n");
2467
2468 }
2469