1 /*********************************************************************/
2 /* Copyright 2009, 2010 The University of Texas at Austin.           */
3 /* All rights reserved.                                              */
4 /*                                                                   */
5 /* Redistribution and use in source and binary forms, with or        */
6 /* without modification, are permitted provided that the following   */
7 /* conditions are met:                                               */
8 /*                                                                   */
9 /*   1. Redistributions of source code must retain the above         */
10 /*      copyright notice, this list of conditions and the following  */
11 /*      disclaimer.                                                  */
12 /*                                                                   */
13 /*   2. Redistributions in binary form must reproduce the above      */
14 /*      copyright notice, this list of conditions and the following  */
15 /*      disclaimer in the documentation and/or other materials       */
16 /*      provided with the distribution.                              */
17 /*                                                                   */
18 /*    THIS  SOFTWARE IS PROVIDED  BY THE  UNIVERSITY OF  TEXAS AT    */
19 /*    AUSTIN  ``AS IS''  AND ANY  EXPRESS OR  IMPLIED WARRANTIES,    */
20 /*    INCLUDING, BUT  NOT LIMITED  TO, THE IMPLIED  WARRANTIES OF    */
21 /*    MERCHANTABILITY  AND FITNESS FOR  A PARTICULAR  PURPOSE ARE    */
22 /*    DISCLAIMED.  IN  NO EVENT SHALL THE UNIVERSITY  OF TEXAS AT    */
23 /*    AUSTIN OR CONTRIBUTORS BE  LIABLE FOR ANY DIRECT, INDIRECT,    */
24 /*    INCIDENTAL,  SPECIAL, EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES    */
25 /*    (INCLUDING, BUT  NOT LIMITED TO,  PROCUREMENT OF SUBSTITUTE    */
26 /*    GOODS  OR  SERVICES; LOSS  OF  USE,  DATA,  OR PROFITS;  OR    */
27 /*    BUSINESS INTERRUPTION) HOWEVER CAUSED  AND ON ANY THEORY OF    */
28 /*    LIABILITY, WHETHER  IN CONTRACT, STRICT  LIABILITY, OR TORT    */
29 /*    (INCLUDING NEGLIGENCE OR OTHERWISE)  ARISING IN ANY WAY OUT    */
30 /*    OF  THE  USE OF  THIS  SOFTWARE,  EVEN  IF ADVISED  OF  THE    */
31 /*    POSSIBILITY OF SUCH DAMAGE.                                    */
32 /*                                                                   */
33 /* The views and conclusions contained in the software and           */
34 /* documentation are those of the authors and should not be          */
35 /* interpreted as representing official policies, either expressed   */
36 /* or implied, of The University of Texas at Austin.                 */
37 /*********************************************************************/
38 
39 #include <stdio.h>
40 #include <string.h>
41 #include "cpuid.h"
42 
43 #if defined(_MSC_VER) && !defined(__clang__)
44 #define C_INLINE __inline
45 #else
46 #define C_INLINE inline
47 #endif
48 
49 /*
50 #ifdef NO_AVX
51 #define CPUTYPE_HASWELL CPUTYPE_NEHALEM
52 #define CORE_HASWELL CORE_NEHALEM
53 #define CPUTYPE_SKYLAKEX CPUTYPE_NEHALEM
54 #define CORE_SKYLAKEX CORE_NEHALEM
55 #define CPUTYPE_SANDYBRIDGE CPUTYPE_NEHALEM
56 #define CORE_SANDYBRIDGE CORE_NEHALEM
57 #define CPUTYPE_BULLDOZER CPUTYPE_BARCELONA
58 #define CORE_BULLDOZER CORE_BARCELONA
59 #define CPUTYPE_PILEDRIVER CPUTYPE_BARCELONA
60 #define CORE_PILEDRIVER CORE_BARCELONA
61 #endif
62 */
63 
64 #if defined(_MSC_VER) && !defined(__clang__)
65 
cpuid(int op,int * eax,int * ebx,int * ecx,int * edx)66 void cpuid(int op, int *eax, int *ebx, int *ecx, int *edx)
67 {
68   int cpuInfo[4] = {-1};
69   __cpuid(cpuInfo, op);
70   *eax = cpuInfo[0];
71   *ebx = cpuInfo[1];
72   *ecx = cpuInfo[2];
73   *edx = cpuInfo[3];
74 }
75 
cpuid_count(int op,int count,int * eax,int * ebx,int * ecx,int * edx)76 void cpuid_count(int op, int count, int *eax, int *ebx, int *ecx, int *edx)
77 {
78   int cpuInfo[4] = {-1};
79   __cpuidex(cpuInfo, op, count);
80   *eax = cpuInfo[0];
81   *ebx = cpuInfo[1];
82   *ecx = cpuInfo[2];
83   *edx = cpuInfo[3];
84 }
85 
86 #else
87 
88 #ifndef CPUIDEMU
89 
90 #if defined(__APPLE__) && defined(__i386__)
91 void cpuid(int op, int *eax, int *ebx, int *ecx, int *edx);
92 void cpuid_count(int op, int count, int *eax, int *ebx, int *ecx, int *edx);
93 #else
cpuid(int op,int * eax,int * ebx,int * ecx,int * edx)94 static C_INLINE void cpuid(int op, int *eax, int *ebx, int *ecx, int *edx){
95 #if defined(__i386__) && defined(__PIC__)
96   __asm__ __volatile__
97     ("mov %%ebx, %%edi;"
98      "cpuid;"
99      "xchgl %%ebx, %%edi;"
100      : "=a" (*eax), "=D" (*ebx), "=c" (*ecx), "=d" (*edx) : "a" (op), "c" (0) : "cc");
101 #else
102   __asm__ __volatile__
103     ("cpuid": "=a" (*eax), "=b" (*ebx), "=c" (*ecx), "=d" (*edx) : "a" (op) , "c" (0) : "cc");
104 #endif
105 }
106 
cpuid_count(int op,int count,int * eax,int * ebx,int * ecx,int * edx)107 static C_INLINE void cpuid_count(int op, int count ,int *eax, int *ebx, int *ecx, int *edx){
108 #if defined(__i386__) && defined(__PIC__)
109   __asm__ __volatile__
110     ("mov %%ebx, %%edi;"
111      "cpuid;"
112      "xchgl %%ebx, %%edi;"
113      : "=a" (*eax), "=D" (*ebx), "=c" (*ecx), "=d" (*edx) : "0" (op), "2" (count) : "cc");
114 #else
115   __asm__ __volatile__
116     ("cpuid": "=a" (*eax), "=b" (*ebx), "=c" (*ecx), "=d" (*edx) : "0" (op), "2" (count) : "cc");
117 #endif
118 }
119 #endif
120 
121 #else
122 
123 typedef struct {
124   unsigned int id, a, b, c, d;
125 } idlist_t;
126 
127 typedef struct {
128   char *vendor;
129   char *name;
130   int start, stop;
131 } vendor_t;
132 
133 extern idlist_t idlist[];
134 extern vendor_t vendor[];
135 
136 static int cv = VENDOR;
137 
cpuid(unsigned int op,unsigned int * eax,unsigned int * ebx,unsigned int * ecx,unsigned int * edx)138 void cpuid(unsigned int op, unsigned int *eax, unsigned int *ebx, unsigned int *ecx, unsigned int *edx){
139 
140   static int current = 0;
141 
142   int start = vendor[cv].start;
143   int stop  = vendor[cv].stop;
144   int count = stop - start;
145 
146   if ((current < start) || (current > stop)) current = start;
147 
148   while ((count > 0) && (idlist[current].id != op)) {
149 
150     current ++;
151     if (current > stop) current = start;
152     count --;
153 
154   }
155 
156   *eax = idlist[current].a;
157   *ebx = idlist[current].b;
158   *ecx = idlist[current].c;
159   *edx = idlist[current].d;
160 }
161 
cpuid_count(unsigned int op,unsigned int count,unsigned int * eax,unsigned int * ebx,unsigned int * ecx,unsigned int * edx)162 void cpuid_count (unsigned int op, unsigned int count, unsigned int *eax, unsigned int *ebx, unsigned int *ecx, unsigned int *edx) {
163   return cpuid (op, eax, ebx, ecx, edx);
164 }
165 
166 #endif
167 
168 #endif // _MSC_VER
169 
have_cpuid(void)170 static C_INLINE int have_cpuid(void){
171   int eax, ebx, ecx, edx;
172 
173   cpuid(0, &eax, &ebx, &ecx, &edx);
174   return eax;
175 }
176 
have_excpuid(void)177 static C_INLINE int have_excpuid(void){
178   int eax, ebx, ecx, edx;
179 
180   cpuid(0x80000000, &eax, &ebx, &ecx, &edx);
181   return eax & 0xffff;
182 }
183 
184 #ifndef NO_AVX
xgetbv(int op,int * eax,int * edx)185 static C_INLINE void xgetbv(int op, int * eax, int * edx){
186   //Use binary code for xgetbv
187 #if defined(_MSC_VER) && !defined(__clang__)
188   *eax = __xgetbv(op);
189 #else
190   __asm__ __volatile__
191     (".byte 0x0f, 0x01, 0xd0": "=a" (*eax), "=d" (*edx) : "c" (op) : "cc");
192 #endif
193 }
194 #endif
195 
support_avx()196 int support_avx(){
197 #ifndef NO_AVX
198   int eax, ebx, ecx, edx;
199   int ret=0;
200 
201   cpuid(1, &eax, &ebx, &ecx, &edx);
202   if ((ecx & (1 << 28)) != 0 && (ecx & (1 << 27)) != 0 && (ecx & (1 << 26)) != 0){
203     xgetbv(0, &eax, &edx);
204     if((eax & 6) == 6){
205       ret=1;  //OS supports saving xmm and ymm registers (6 = (1<<1) | (1<<2))
206     }
207   }
208   return ret;
209 #else
210   return 0;
211 #endif
212 }
213 
support_avx2()214 int support_avx2(){
215 #ifndef NO_AVX2
216   int eax, ebx, ecx=0, edx;
217   int ret=0;
218 
219   if (!support_avx())
220     return 0;
221   cpuid(7, &eax, &ebx, &ecx, &edx);
222   if((ebx & (1<<5)) != 0)
223       ret=1;  //CPU supports AVX2
224   return ret;
225 #else
226   return 0;
227 #endif
228 }
229 
support_avx512()230 int support_avx512(){
231 #if !defined(NO_AVX) && !defined(NO_AVX512)
232   int eax, ebx, ecx, edx;
233   int ret=0;
234 
235   if (!support_avx())
236     return 0;
237   cpuid(7, &eax, &ebx, &ecx, &edx);
238   if((ebx & (1<<5)) == 0){
239       ret=0;  //cpu does not have avx2 flag
240   }
241   if((ebx & (1<<31)) != 0){ //AVX512VL flag
242     xgetbv(0, &eax, &edx);
243     if((eax & 0xe0) == 0xe0)
244       ret=1;  //OS supports saving zmm registers
245  }
246   return ret;
247 #else
248   return 0;
249 #endif
250 }
251 
support_avx512_bf16()252 int support_avx512_bf16(){
253 #if !defined(NO_AVX) && !defined(NO_AVX512)
254   int eax, ebx, ecx, edx;
255   int ret=0;
256 
257   if (!support_avx512())
258     return 0;
259   cpuid_count(7, 1, &eax, &ebx, &ecx, &edx);
260   if((eax & 32) == 32){
261       ret=1;  // CPUID.7.1:EAX[bit 5] indicates whether avx512_bf16 supported or not
262   }
263   return ret;
264 #else
265   return 0;
266 #endif
267 }
268 
get_vendor(void)269 int get_vendor(void){
270   int eax, ebx, ecx, edx;
271   char vendor[13];
272 
273   cpuid(0, &eax, &ebx, &ecx, &edx);
274 
275   *(int *)(&vendor[0]) = ebx;
276   *(int *)(&vendor[4]) = edx;
277   *(int *)(&vendor[8]) = ecx;
278   vendor[12] = (char)0;
279 
280   if (!strcmp(vendor, "GenuineIntel")) return VENDOR_INTEL;
281   if (!strcmp(vendor, " UMC UMC UMC")) return VENDOR_UMC;
282   if (!strcmp(vendor, "AuthenticAMD")) return VENDOR_AMD;
283   if (!strcmp(vendor, "CyrixInstead")) return VENDOR_CYRIX;
284   if (!strcmp(vendor, "NexGenDriven")) return VENDOR_NEXGEN;
285   if (!strcmp(vendor, "CentaurHauls")) return VENDOR_CENTAUR;
286   if (!strcmp(vendor, "  Shanghai  ")) return VENDOR_ZHAOXIN;
287   if (!strcmp(vendor, "RiseRiseRise")) return VENDOR_RISE;
288   if (!strcmp(vendor, " SiS SiS SiS")) return VENDOR_SIS;
289   if (!strcmp(vendor, "GenuineTMx86")) return VENDOR_TRANSMETA;
290   if (!strcmp(vendor, "Geode by NSC")) return VENDOR_NSC;
291   if (!strcmp(vendor, "HygonGenuine")) return VENDOR_HYGON;
292 
293   if ((eax == 0) || ((eax & 0x500) != 0)) return VENDOR_INTEL;
294 
295   return VENDOR_UNKNOWN;
296 }
297 
get_cputype(int gettype)298 int get_cputype(int gettype){
299   int eax, ebx, ecx, edx;
300   int extend_family, family;
301   int extend_model, model;
302   int type, stepping;
303   int feature = 0;
304 
305   cpuid(1, &eax, &ebx, &ecx, &edx);
306 
307   switch (gettype) {
308   case GET_EXFAMILY :
309     return BITMASK(eax, 20, 0xff);
310   case GET_EXMODEL :
311     return BITMASK(eax, 16, 0x0f);
312   case GET_TYPE :
313     return BITMASK(eax, 12, 0x03);
314   case GET_FAMILY :
315     return BITMASK(eax,  8, 0x0f);
316   case GET_MODEL :
317     return BITMASK(eax,  4, 0x0f);
318   case GET_APICID :
319     return BITMASK(ebx, 24, 0x0f);
320   case GET_LCOUNT :
321     return BITMASK(ebx, 16, 0x0f);
322   case GET_CHUNKS :
323     return BITMASK(ebx,  8, 0x0f);
324   case GET_STEPPING :
325     return BITMASK(eax,  0, 0x0f);
326   case GET_BLANDID :
327     return BITMASK(ebx,  0, 0xff);
328   case GET_NUMSHARE :
329     if (have_cpuid() < 4) return 0;
330     cpuid(4, &eax, &ebx, &ecx, &edx);
331     return BITMASK(eax, 14, 0xfff);
332   case GET_NUMCORES :
333     if (have_cpuid() < 4) return 0;
334     cpuid(4, &eax, &ebx, &ecx, &edx);
335     return BITMASK(eax, 26, 0x3f);
336 
337   case GET_FEATURE :
338     if ((edx & (1 <<  3)) != 0) feature |= HAVE_PSE;
339     if ((edx & (1 << 15)) != 0) feature |= HAVE_CMOV;
340     if ((edx & (1 << 19)) != 0) feature |= HAVE_CFLUSH;
341     if ((edx & (1 << 23)) != 0) feature |= HAVE_MMX;
342     if ((edx & (1 << 25)) != 0) feature |= HAVE_SSE;
343     if ((edx & (1 << 26)) != 0) feature |= HAVE_SSE2;
344     if ((edx & (1 << 27)) != 0) {
345       if (BITMASK(ebx, 16, 0x0f) > 0) feature |= HAVE_HIT;
346     }
347     if ((ecx & (1 <<  0)) != 0) feature |= HAVE_SSE3;
348     if ((ecx & (1 <<  9)) != 0) feature |= HAVE_SSSE3;
349     if ((ecx & (1 << 19)) != 0) feature |= HAVE_SSE4_1;
350     if ((ecx & (1 << 20)) != 0) feature |= HAVE_SSE4_2;
351 #ifndef NO_AVX
352     if (support_avx()) feature |= HAVE_AVX;
353     if (support_avx2()) feature |= HAVE_AVX2;
354     if (support_avx512()) feature |= HAVE_AVX512VL;
355     if (support_avx512_bf16()) feature |= HAVE_AVX512BF16;
356     if ((ecx & (1 << 12)) != 0) feature |= HAVE_FMA3;
357 #endif
358 
359     if (have_excpuid() >= 0x01) {
360       cpuid(0x80000001, &eax, &ebx, &ecx, &edx);
361       if ((ecx & (1 <<  6)) != 0) feature |= HAVE_SSE4A;
362       if ((ecx & (1 <<  7)) != 0) feature |= HAVE_MISALIGNSSE;
363 #ifndef NO_AVX
364       if ((ecx & (1 <<  16)) != 0) feature |= HAVE_FMA4;
365 #endif
366       if ((edx & (1 << 30)) != 0) feature |= HAVE_3DNOWEX;
367       if ((edx & (1 << 31)) != 0) feature |= HAVE_3DNOW;
368     }
369 
370     if (have_excpuid() >= 0x1a) {
371       cpuid(0x8000001a, &eax, &ebx, &ecx, &edx);
372       if ((eax & (1 <<  0)) != 0) feature |= HAVE_128BITFPU;
373       if ((eax & (1 <<  1)) != 0) feature |= HAVE_FASTMOVU;
374     }
375 
376   }
377   return feature;
378 }
379 
get_cacheinfo(int type,cache_info_t * cacheinfo)380 int get_cacheinfo(int type, cache_info_t *cacheinfo){
381   int eax, ebx, ecx, edx, cpuid_level;
382   int info[15];
383   int i;
384   cache_info_t LC1, LD1, L2, L3,
385     ITB, DTB, LITB, LDTB,
386     L2ITB, L2DTB, L2LITB, L2LDTB;
387 
388   LC1.size    = 0; LC1.associative = 0; LC1.linesize = 0; LC1.shared = 0;
389   LD1.size    = 0; LD1.associative    = 0; LD1.linesize    = 0; LD1.shared    = 0;
390   L2.size     = 0; L2.associative     = 0; L2.linesize     = 0; L2.shared     = 0;
391   L3.size     = 0; L3.associative     = 0; L3.linesize     = 0; L3.shared     = 0;
392   ITB.size    = 0; ITB.associative    = 0; ITB.linesize    = 0; ITB.shared    = 0;
393   DTB.size    = 0; DTB.associative    = 0; DTB.linesize    = 0; DTB.shared    = 0;
394   LITB.size   = 0; LITB.associative   = 0; LITB.linesize   = 0; LITB.shared   = 0;
395   LDTB.size   = 0; LDTB.associative   = 0; LDTB.linesize   = 0; LDTB.shared   = 0;
396   L2ITB.size  = 0; L2ITB.associative  = 0; L2ITB.linesize  = 0; L2ITB.shared  = 0;
397   L2DTB.size  = 0; L2DTB.associative  = 0; L2DTB.linesize  = 0; L2DTB.shared  = 0;
398   L2LITB.size = 0; L2LITB.associative = 0; L2LITB.linesize = 0; L2LITB.shared = 0;
399   L2LDTB.size = 0; L2LDTB.associative = 0; L2LDTB.linesize = 0; L2LDTB.shared = 0;
400 
401   cpuid(0, &cpuid_level, &ebx, &ecx, &edx);
402 
403   if (cpuid_level > 1) {
404     int numcalls =0 ;
405     cpuid(2, &eax, &ebx, &ecx, &edx);
406     numcalls = BITMASK(eax, 0, 0xff); //FIXME some systems may require repeated calls to read all entries
407     info[ 0] = BITMASK(eax,  8, 0xff);
408     info[ 1] = BITMASK(eax, 16, 0xff);
409     info[ 2] = BITMASK(eax, 24, 0xff);
410 
411     info[ 3] = BITMASK(ebx,  0, 0xff);
412     info[ 4] = BITMASK(ebx,  8, 0xff);
413     info[ 5] = BITMASK(ebx, 16, 0xff);
414     info[ 6] = BITMASK(ebx, 24, 0xff);
415 
416     info[ 7] = BITMASK(ecx,  0, 0xff);
417     info[ 8] = BITMASK(ecx,  8, 0xff);
418     info[ 9] = BITMASK(ecx, 16, 0xff);
419     info[10] = BITMASK(ecx, 24, 0xff);
420 
421     info[11] = BITMASK(edx,  0, 0xff);
422     info[12] = BITMASK(edx,  8, 0xff);
423     info[13] = BITMASK(edx, 16, 0xff);
424     info[14] = BITMASK(edx, 24, 0xff);
425 
426     for (i = 0; i < 15; i++){
427       switch (info[i]){
428 
429 	/* This table is from http://www.sandpile.org/ia32/cpuid.htm */
430 
431       case 0x01 :
432 	ITB.size        =     4;
433 	ITB.associative =     4;
434 	ITB.linesize     =   32;
435 	break;
436       case 0x02 :
437 	LITB.size        = 4096;
438 	LITB.associative =    0;
439 	LITB.linesize    =    2;
440 	break;
441       case 0x03 :
442 	DTB.size        =     4;
443 	DTB.associative =     4;
444 	DTB.linesize     =   64;
445 	break;
446       case 0x04 :
447 	LDTB.size        = 4096;
448 	LDTB.associative =    4;
449 	LDTB.linesize    =    8;
450 	break;
451       case 0x05 :
452 	LDTB.size        = 4096;
453 	LDTB.associative =    4;
454 	LDTB.linesize    =   32;
455 	break;
456       case 0x06 :
457 	LC1.size        = 8;
458 	LC1.associative = 4;
459 	LC1.linesize    = 32;
460 	break;
461       case 0x08 :
462 	LC1.size        = 16;
463 	LC1.associative = 4;
464 	LC1.linesize    = 32;
465 	break;
466       case 0x09 :
467 	LC1.size        = 32;
468 	LC1.associative = 4;
469 	LC1.linesize    = 64;
470 	break;
471       case 0x0a :
472 	LD1.size        = 8;
473 	LD1.associative = 2;
474 	LD1.linesize    = 32;
475 	break;
476       case 0x0c :
477 	LD1.size        = 16;
478 	LD1.associative = 4;
479 	LD1.linesize    = 32;
480 	break;
481       case 0x0d :
482 	LD1.size        = 16;
483 	LD1.associative = 4;
484 	LD1.linesize    = 64;
485 	break;
486       case 0x0e :
487 	LD1.size        = 24;
488 	LD1.associative = 6;
489 	LD1.linesize    = 64;
490 	break;
491       case 0x10 :
492 	LD1.size        = 16;
493 	LD1.associative = 4;
494 	LD1.linesize    = 32;
495 	break;
496       case 0x15 :
497 	LC1.size        = 16;
498 	LC1.associative = 4;
499 	LC1.linesize    = 32;
500 	break;
501       case 0x1a :
502 	L2.size         = 96;
503 	L2.associative  = 6;
504 	L2.linesize     = 64;
505 	break;
506       case 0x21 :
507 	L2.size         = 256;
508 	L2.associative  = 8;
509 	L2.linesize     = 64;
510 	break;
511       case 0x22 :
512 	L3.size         = 512;
513 	L3.associative  = 4;
514 	L3.linesize     = 64;
515 	break;
516       case 0x23 :
517 	L3.size         = 1024;
518 	L3.associative  = 8;
519 	L3.linesize     = 64;
520 	break;
521       case 0x25 :
522 	L3.size         = 2048;
523 	L3.associative  = 8;
524 	L3.linesize     = 64;
525 	break;
526       case 0x29 :
527 	L3.size         = 4096;
528 	L3.associative  = 8;
529 	L3.linesize     = 64;
530 	break;
531       case 0x2c :
532 	LD1.size        = 32;
533 	LD1.associative = 8;
534 	LD1.linesize    = 64;
535 	break;
536       case 0x30 :
537 	LC1.size        = 32;
538 	LC1.associative = 8;
539 	LC1.linesize    = 64;
540 	break;
541       case 0x39 :
542 	L2.size         = 128;
543 	L2.associative  = 4;
544 	L2.linesize     = 64;
545 	break;
546       case 0x3a :
547 	L2.size         = 192;
548 	L2.associative  = 6;
549 	L2.linesize     = 64;
550 	break;
551       case 0x3b :
552 	L2.size         = 128;
553 	L2.associative  = 2;
554 	L2.linesize     = 64;
555 	break;
556       case 0x3c :
557 	L2.size         = 256;
558 	L2.associative  = 4;
559 	L2.linesize     = 64;
560 	break;
561       case 0x3d :
562 	L2.size         = 384;
563 	L2.associative  = 6;
564 	L2.linesize     = 64;
565 	break;
566       case 0x3e :
567 	L2.size         = 512;
568 	L2.associative  = 4;
569 	L2.linesize     = 64;
570 	break;
571       case 0x41 :
572 	L2.size         = 128;
573 	L2.associative  = 4;
574 	L2.linesize     = 32;
575 	break;
576       case 0x42 :
577 	L2.size         = 256;
578 	L2.associative  = 4;
579 	L2.linesize     = 32;
580 	break;
581       case 0x43 :
582 	L2.size         = 512;
583 	L2.associative  = 4;
584 	L2.linesize     = 32;
585 	break;
586       case 0x44 :
587 	L2.size         = 1024;
588 	L2.associative  = 4;
589 	L2.linesize     = 32;
590 	break;
591       case 0x45 :
592 	L2.size         = 2048;
593 	L2.associative  = 4;
594 	L2.linesize     = 32;
595 	break;
596       case 0x46 :
597 	L3.size         = 4096;
598 	L3.associative  = 4;
599 	L3.linesize     = 64;
600 	break;
601       case 0x47 :
602 	L3.size         = 8192;
603 	L3.associative  = 8;
604 	L3.linesize     = 64;
605 	break;
606       case 0x48 :
607 	L2.size         = 3184;
608 	L2.associative  = 12;
609 	L2.linesize     = 64;
610 	break;
611       case 0x49 :
612 	if ((get_cputype(GET_FAMILY) == 0x0f) && (get_cputype(GET_MODEL) == 0x06)) {
613 	  L3.size         = 4096;
614 	  L3.associative  = 16;
615 	  L3.linesize     = 64;
616 	} else {
617 	  L2.size         = 4096;
618 	  L2.associative  = 16;
619 	  L2.linesize     = 64;
620 	}
621 	break;
622       case 0x4a :
623 	L3.size         = 6144;
624 	L3.associative  = 12;
625 	L3.linesize     = 64;
626 	break;
627       case 0x4b :
628 	L3.size         = 8192;
629 	L3.associative  = 16;
630 	L3.linesize     = 64;
631 	break;
632       case 0x4c :
633 	L3.size         = 12280;
634 	L3.associative  = 12;
635 	L3.linesize     = 64;
636 	break;
637       case 0x4d :
638 	L3.size         = 16384;
639 	L3.associative  = 16;
640 	L3.linesize     = 64;
641 	break;
642       case 0x4e :
643 	L2.size         = 6144;
644 	L2.associative  = 24;
645 	L2.linesize     = 64;
646 	break;
647       case 0x4f :
648 	ITB.size         = 4;
649 	ITB.associative  = 0;
650 	ITB.linesize     = 32;
651 	break;
652       case 0x50 :
653 	ITB.size         = 4;
654 	ITB.associative  = 0;
655 	ITB.linesize     = 64;
656 	LITB.size        = 4096;
657 	LITB.associative = 0;
658 	LITB.linesize    = 64;
659 	LITB.shared      = 1;
660 	break;
661       case 0x51 :
662 	ITB.size        = 4;
663 	ITB.associative = 0;
664 	ITB.linesize     = 128;
665 	LITB.size        = 4096;
666 	LITB.associative = 0;
667 	LITB.linesize    = 128;
668 	LITB.shared      = 1;
669 	break;
670       case 0x52 :
671 	ITB.size         = 4;
672 	ITB.associative  = 0;
673 	ITB.linesize     = 256;
674 	LITB.size        = 4096;
675 	LITB.associative = 0;
676 	LITB.linesize    = 256;
677 	LITB.shared      = 1;
678 	break;
679       case 0x55 :
680 	LITB.size        = 4096;
681 	LITB.associative = 0;
682 	LITB.linesize    = 7;
683 	LITB.shared      = 1;
684 	break;
685       case 0x56 :
686 	LDTB.size        = 4096;
687 	LDTB.associative = 4;
688 	LDTB.linesize    = 16;
689 	break;
690       case 0x57 :
691 	LDTB.size        = 4096;
692 	LDTB.associative = 4;
693 	LDTB.linesize    = 16;
694 	break;
695       case 0x5b :
696 	DTB.size         = 4;
697 	DTB.associative  = 0;
698 	DTB.linesize     = 64;
699 	LDTB.size        = 4096;
700 	LDTB.associative = 0;
701 	LDTB.linesize    = 64;
702 	LDTB.shared      = 1;
703 	break;
704       case 0x5c :
705 	DTB.size         = 4;
706 	DTB.associative  = 0;
707 	DTB.linesize     = 128;
708 	LDTB.size        = 4096;
709 	LDTB.associative = 0;
710 	LDTB.linesize    = 128;
711 	LDTB.shared      = 1;
712 	break;
713       case 0x5d :
714 	DTB.size         = 4;
715 	DTB.associative  = 0;
716 	DTB.linesize     = 256;
717 	LDTB.size        = 4096;
718 	LDTB.associative = 0;
719 	LDTB.linesize    = 256;
720 	LDTB.shared      = 1;
721 	break;
722       case 0x60 :
723 	LD1.size        = 16;
724 	LD1.associative = 8;
725 	LD1.linesize    = 64;
726 	break;
727       case 0x63 :
728   	DTB.size        = 2048;
729   	DTB.associative = 4;
730   	DTB.linesize    = 32;
731   	LDTB.size       = 4096;
732   	LDTB.associative= 4;
733   	LDTB.linesize   = 32;
734 	break;
735       case 0x66 :
736 	LD1.size        = 8;
737 	LD1.associative = 4;
738 	LD1.linesize    = 64;
739 	break;
740       case 0x67 :
741 	LD1.size        = 16;
742 	LD1.associative = 4;
743 	LD1.linesize    = 64;
744 	break;
745       case 0x68 :
746 	LD1.size        = 32;
747 	LD1.associative = 4;
748 	LD1.linesize    = 64;
749 	break;
750       case 0x70 :
751 	LC1.size        = 12;
752 	LC1.associative = 8;
753 	break;
754       case 0x71 :
755 	LC1.size        = 16;
756 	LC1.associative = 8;
757 	break;
758       case 0x72 :
759 	LC1.size        = 32;
760 	LC1.associative = 8;
761 	break;
762       case 0x73 :
763 	LC1.size        = 64;
764 	LC1.associative = 8;
765 	break;
766       case 0x76 :
767   	ITB.size        = 2048;
768   	ITB.associative = 0;
769   	ITB.linesize    = 8;
770   	LITB.size       = 4096;
771   	LITB.associative= 0;
772   	LITB.linesize   = 8;
773 	break;
774       case 0x77 :
775 	LC1.size        = 16;
776 	LC1.associative = 4;
777 	LC1.linesize    = 64;
778 	break;
779       case 0x78 :
780 	L2.size        = 1024;
781 	L2.associative = 4;
782 	L2.linesize    = 64;
783 	break;
784       case 0x79 :
785 	L2.size         = 128;
786 	L2.associative  = 8;
787 	L2.linesize     = 64;
788 	break;
789       case 0x7a :
790 	L2.size         = 256;
791 	L2.associative  = 8;
792 	L2.linesize     = 64;
793 	break;
794       case 0x7b :
795 	L2.size         = 512;
796 	L2.associative  = 8;
797 	L2.linesize     = 64;
798 	break;
799       case 0x7c :
800 	L2.size         = 1024;
801 	L2.associative  = 8;
802 	L2.linesize     = 64;
803 	break;
804       case 0x7d :
805 	L2.size         = 2048;
806 	L2.associative  = 8;
807 	L2.linesize     = 64;
808 	break;
809       case 0x7e :
810 	L2.size         = 256;
811 	L2.associative  = 8;
812 	L2.linesize     = 128;
813 	break;
814       case 0x7f :
815 	L2.size         = 512;
816 	L2.associative  = 2;
817 	L2.linesize     = 64;
818 	break;
819       case 0x81 :
820 	L2.size         = 128;
821 	L2.associative  = 8;
822 	L2.linesize     = 32;
823 	break;
824       case 0x82 :
825 	L2.size         = 256;
826 	L2.associative  = 8;
827 	L2.linesize     = 32;
828 	break;
829       case 0x83 :
830 	L2.size         = 512;
831 	L2.associative  = 8;
832 	L2.linesize     = 32;
833 	break;
834       case 0x84 :
835 	L2.size         = 1024;
836 	L2.associative  = 8;
837 	L2.linesize     = 32;
838 	break;
839       case 0x85 :
840 	L2.size         = 2048;
841 	L2.associative  = 8;
842 	L2.linesize     = 32;
843 	break;
844       case 0x86 :
845 	L2.size         = 512;
846 	L2.associative  = 4;
847 	L2.linesize     = 64;
848 	break;
849       case 0x87 :
850 	L2.size         = 1024;
851 	L2.associative  = 8;
852 	L2.linesize     = 64;
853 	break;
854       case 0x88 :
855 	L3.size         = 2048;
856 	L3.associative  = 4;
857 	L3.linesize     = 64;
858 	break;
859       case 0x89 :
860 	L3.size         = 4096;
861 	L3.associative  = 4;
862 	L3.linesize     = 64;
863 	break;
864       case 0x8a :
865 	L3.size         = 8192;
866 	L3.associative  = 4;
867 	L3.linesize     = 64;
868 	break;
869       case 0x8d :
870 	L3.size         = 3096;
871 	L3.associative  = 12;
872 	L3.linesize     = 128;
873 	break;
874       case 0x90 :
875 	ITB.size        = 4;
876 	ITB.associative = 0;
877 	ITB.linesize    = 64;
878 	break;
879       case 0x96 :
880 	DTB.size        = 4;
881 	DTB.associative = 0;
882 	DTB.linesize    = 32;
883 	break;
884       case 0x9b :
885 	L2DTB.size        = 4;
886 	L2DTB.associative = 0;
887 	L2DTB.linesize    = 96;
888 	break;
889       case 0xb0 :
890 	ITB.size        = 4;
891 	ITB.associative = 4;
892 	ITB.linesize    = 128;
893 	break;
894       case 0xb1 :
895 	LITB.size        = 4096;
896 	LITB.associative = 4;
897 	LITB.linesize    = 4;
898 	break;
899       case 0xb2 :
900 	ITB.size        = 4;
901 	ITB.associative = 4;
902 	ITB.linesize    = 64;
903 	break;
904       case 0xb3 :
905 	DTB.size        = 4;
906 	DTB.associative = 4;
907 	DTB.linesize    = 128;
908 	break;
909       case 0xb4 :
910 	DTB.size        = 4;
911 	DTB.associative = 4;
912 	DTB.linesize    = 256;
913 	break;
914       case 0xba :
915 	DTB.size        = 4;
916 	DTB.associative = 4;
917 	DTB.linesize    = 64;
918 	break;
919       case 0xd0 :
920 	L3.size         = 512;
921 	L3.associative  = 4;
922 	L3.linesize     = 64;
923 	break;
924       case 0xd1 :
925 	L3.size         = 1024;
926 	L3.associative  = 4;
927 	L3.linesize     = 64;
928 	break;
929       case 0xd2 :
930 	L3.size         = 2048;
931 	L3.associative  = 4;
932 	L3.linesize     = 64;
933 	break;
934       case 0xd6 :
935 	L3.size         = 1024;
936 	L3.associative  = 8;
937 	L3.linesize     = 64;
938 	break;
939       case 0xd7 :
940 	L3.size         = 2048;
941 	L3.associative  = 8;
942 	L3.linesize     = 64;
943 	break;
944       case 0xd8 :
945 	L3.size         = 4096;
946 	L3.associative  = 8;
947 	L3.linesize     = 64;
948 	break;
949       case 0xdc :
950 	L3.size         = 2048;
951 	L3.associative  = 12;
952 	L3.linesize     = 64;
953 	break;
954       case 0xdd :
955 	L3.size         = 4096;
956 	L3.associative  = 12;
957 	L3.linesize     = 64;
958 	break;
959       case 0xde :
960 	L3.size         = 8192;
961 	L3.associative  = 12;
962 	L3.linesize     = 64;
963 	break;
964       case 0xe2 :
965 	L3.size         = 2048;
966 	L3.associative  = 16;
967 	L3.linesize     = 64;
968 	break;
969       case 0xe3 :
970 	L3.size         = 4096;
971 	L3.associative  = 16;
972 	L3.linesize     = 64;
973 	break;
974       case 0xe4 :
975 	L3.size         = 8192;
976 	L3.associative  = 16;
977 	L3.linesize     = 64;
978 	break;
979       }
980     }
981   }
982 
983   if (get_vendor() == VENDOR_INTEL) {
984       if(LD1.size<=0 || LC1.size<=0){
985 	//If we didn't detect L1 correctly before,
986 	int count;
987 	for (count=0;count <4;count++) {
988 	cpuid_count(4, count, &eax, &ebx, &ecx, &edx);
989         switch (eax &0x1f) {
990         case 0:
991           continue;
992           case 1:
993           case 3:
994           {
995             switch ((eax >>5) &0x07)
996             {
997             case 1:
998             {
999 //            fprintf(stderr,"L1 data cache...\n");
1000             int sets = ecx+1;
1001             int lines = (ebx & 0x0fff) +1;
1002             ebx>>=12;
1003             int part = (ebx&0x03ff)+1;
1004             ebx >>=10;
1005             int assoc = (ebx&0x03ff)+1;
1006             LD1.size = (assoc*part*lines*sets)/1024;
1007             LD1.associative = assoc;
1008             LD1.linesize= lines;
1009             break;
1010             }
1011             default:
1012               break;
1013            }
1014           break;
1015           }
1016          case 2:
1017           {
1018             switch ((eax >>5) &0x07)
1019             {
1020             case 1:
1021             {
1022 //            fprintf(stderr,"L1 instruction cache...\n");
1023             int sets = ecx+1;
1024             int lines = (ebx & 0x0fff) +1;
1025             ebx>>=12;
1026             int part = (ebx&0x03ff)+1;
1027             ebx >>=10;
1028             int assoc = (ebx&0x03ff)+1;
1029             LC1.size = (assoc*part*lines*sets)/1024;
1030             LC1.associative = assoc;
1031             LC1.linesize= lines;
1032             break;
1033             }
1034             default:
1035               break;
1036            }
1037           break;
1038 
1039           }
1040           default:
1041           break;
1042         }
1043       }
1044     }
1045     cpuid(0x80000000, &cpuid_level, &ebx, &ecx, &edx);
1046     if (cpuid_level >= 0x80000006) {
1047       if(L2.size<=0){
1048 	//If we didn't detect L2 correctly before,
1049 	cpuid(0x80000006, &eax, &ebx, &ecx, &edx);
1050 
1051 	L2.size         = BITMASK(ecx, 16, 0xffff);
1052 	L2.associative  = BITMASK(ecx, 12, 0x0f);
1053 
1054 	switch (L2.associative){
1055 	case 0x06:
1056 	  L2.associative = 8;
1057 	  break;
1058 	case 0x08:
1059 	  L2.associative = 16;
1060 	  break;
1061 	}
1062 
1063 	L2.linesize     = BITMASK(ecx,  0, 0xff);
1064       }
1065     }
1066   }
1067 
1068   if ((get_vendor() == VENDOR_AMD) ||
1069       (get_vendor() == VENDOR_HYGON) ||
1070       (get_vendor() == VENDOR_CENTAUR) ||
1071       (get_vendor() == VENDOR_ZHAOXIN)) {
1072     cpuid(0x80000005, &eax, &ebx, &ecx, &edx);
1073 
1074     LDTB.size        = 4096;
1075     LDTB.associative = BITMASK(eax, 24, 0xff);
1076     if (LDTB.associative == 0xff) LDTB.associative = 0;
1077     LDTB.linesize    = BITMASK(eax, 16, 0xff);
1078 
1079     LITB.size        = 4096;
1080     LITB.associative = BITMASK(eax,  8, 0xff);
1081     if (LITB.associative == 0xff) LITB.associative = 0;
1082     LITB.linesize    = BITMASK(eax,  0, 0xff);
1083 
1084     DTB.size        = 4;
1085     DTB.associative = BITMASK(ebx, 24, 0xff);
1086     if (DTB.associative == 0xff) DTB.associative = 0;
1087     DTB.linesize    = BITMASK(ebx, 16, 0xff);
1088 
1089     ITB.size        = 4;
1090     ITB.associative = BITMASK(ebx,  8, 0xff);
1091     if (ITB.associative == 0xff) ITB.associative = 0;
1092     ITB.linesize    = BITMASK(ebx,  0, 0xff);
1093 
1094     LD1.size        = BITMASK(ecx, 24, 0xff);
1095     LD1.associative = BITMASK(ecx, 16, 0xff);
1096     if (LD1.associative == 0xff) LD1.associative = 0;
1097     LD1.linesize    = BITMASK(ecx,  0, 0xff);
1098 
1099     LC1.size        = BITMASK(ecx, 24, 0xff);
1100     LC1.associative = BITMASK(ecx, 16, 0xff);
1101     if (LC1.associative == 0xff) LC1.associative = 0;
1102     LC1.linesize    = BITMASK(ecx,  0, 0xff);
1103 
1104     cpuid(0x80000006, &eax, &ebx, &ecx, &edx);
1105 
1106     L2LDTB.size        = 4096;
1107     L2LDTB.associative = BITMASK(eax, 24, 0xff);
1108     if (L2LDTB.associative == 0xff) L2LDTB.associative = 0;
1109     L2LDTB.linesize    = BITMASK(eax, 16, 0xff);
1110 
1111     L2LITB.size        = 4096;
1112     L2LITB.associative = BITMASK(eax,  8, 0xff);
1113     if (L2LITB.associative == 0xff) L2LITB.associative = 0;
1114     L2LITB.linesize    = BITMASK(eax,  0, 0xff);
1115 
1116     L2DTB.size        = 4;
1117     L2DTB.associative = BITMASK(ebx, 24, 0xff);
1118     if (L2DTB.associative == 0xff) L2DTB.associative = 0;
1119     L2DTB.linesize    = BITMASK(ebx, 16, 0xff);
1120 
1121     L2ITB.size        = 4;
1122     L2ITB.associative = BITMASK(ebx,  8, 0xff);
1123     if (L2ITB.associative == 0xff) L2ITB.associative = 0;
1124     L2ITB.linesize    = BITMASK(ebx,  0, 0xff);
1125 
1126     if(L2.size <= 0){
1127       //If we didn't detect L2 correctly before,
1128       L2.size        = BITMASK(ecx, 16, 0xffff);
1129       L2.associative = BITMASK(ecx, 12, 0xf);
1130       switch (L2.associative){
1131       case 0x06:
1132 	L2.associative = 8;
1133 	break;
1134       case 0x08:
1135 	L2.associative = 16;
1136 	break;
1137       }
1138 
1139       if (L2.associative == 0xff) L2.associative = 0;
1140       L2.linesize    = BITMASK(ecx,  0, 0xff);
1141     }
1142 
1143     L3.size        = BITMASK(edx, 18, 0x3fff) * 512;
1144     L3.associative = BITMASK(edx, 12, 0xf);
1145     if (L3.associative == 0xff) L2.associative = 0;
1146     L3.linesize    = BITMASK(edx,  0, 0xff);
1147 
1148   }
1149 
1150     switch (type) {
1151 
1152     case CACHE_INFO_L1_I :
1153       *cacheinfo = LC1;
1154       break;
1155     case CACHE_INFO_L1_D :
1156       *cacheinfo = LD1;
1157       break;
1158     case CACHE_INFO_L2 :
1159       *cacheinfo = L2;
1160       break;
1161     case CACHE_INFO_L3 :
1162       *cacheinfo = L3;
1163       break;
1164     case CACHE_INFO_L1_DTB :
1165       *cacheinfo = DTB;
1166       break;
1167     case CACHE_INFO_L1_ITB :
1168       *cacheinfo = ITB;
1169       break;
1170     case CACHE_INFO_L1_LDTB :
1171       *cacheinfo = LDTB;
1172       break;
1173     case CACHE_INFO_L1_LITB :
1174       *cacheinfo = LITB;
1175       break;
1176     case CACHE_INFO_L2_DTB :
1177       *cacheinfo = L2DTB;
1178       break;
1179     case CACHE_INFO_L2_ITB :
1180       *cacheinfo = L2ITB;
1181       break;
1182     case CACHE_INFO_L2_LDTB :
1183       *cacheinfo = L2LDTB;
1184       break;
1185     case CACHE_INFO_L2_LITB :
1186       *cacheinfo = L2LITB;
1187       break;
1188     }
1189   return 0;
1190 }
1191 
get_cpuname(void)1192 int get_cpuname(void){
1193 
1194   int family, exfamily, model, vendor, exmodel, stepping;
1195 
1196   if (!have_cpuid()) return CPUTYPE_80386;
1197 
1198   family   = get_cputype(GET_FAMILY);
1199   exfamily = get_cputype(GET_EXFAMILY);
1200   model    = get_cputype(GET_MODEL);
1201   exmodel  = get_cputype(GET_EXMODEL);
1202   stepping = get_cputype(GET_STEPPING);
1203 
1204   vendor = get_vendor();
1205 
1206   if (vendor == VENDOR_INTEL){
1207     switch (family) {
1208     case 0x4:
1209       return CPUTYPE_80486;
1210     case 0x5:
1211       return CPUTYPE_PENTIUM;
1212     case 0x6:
1213       switch (exmodel) {
1214       case 0:
1215 	switch (model) {
1216 	case  1:
1217 	case  3:
1218 	case  5:
1219 	case  6:
1220 #if defined(__x86_64__) || defined(__amd64__)
1221 	  return CPUTYPE_CORE2;
1222 #else
1223 	  return CPUTYPE_PENTIUM2;
1224 #endif
1225 	case  7:
1226 	case  8:
1227 	case 10:
1228 	case 11:
1229 	  return CPUTYPE_PENTIUM3;
1230 	case  9:
1231 	case 13:
1232 	case 14:
1233 	  return CPUTYPE_PENTIUMM;
1234 	case 15:
1235 	  return CPUTYPE_CORE2;
1236 	}
1237 	break;
1238       case 1:  // family 6 exmodel 1
1239 	switch (model) {
1240 	case  6:
1241 	  return CPUTYPE_CORE2;
1242 	case  7:
1243 	  return CPUTYPE_PENRYN;
1244 	case 10:
1245 	case 11:
1246 	case 14:
1247 	case 15:
1248 	  return CPUTYPE_NEHALEM;
1249 	case 12:
1250 	  return CPUTYPE_ATOM;
1251 	case 13:
1252 	  return CPUTYPE_DUNNINGTON;
1253 	}
1254 	break;
1255       case  2: // family 6 exmodel 2
1256 	switch (model) {
1257 	case 5:
1258 	  //Intel Core (Clarkdale) / Core (Arrandale)
1259 	  // Pentium (Clarkdale) / Pentium Mobile (Arrandale)
1260 	  // Xeon (Clarkdale), 32nm
1261 	  return CPUTYPE_NEHALEM;
1262 	case 10:
1263 	  //Intel Core i5-2000 /i7-2000 (Sandy Bridge)
1264 	  if(support_avx())
1265 	    return CPUTYPE_SANDYBRIDGE;
1266 	  else
1267 	    return CPUTYPE_NEHALEM; //OS doesn't support AVX
1268 	case 12:
1269 	  //Xeon Processor 5600 (Westmere-EP)
1270 	  return CPUTYPE_NEHALEM;
1271 	case 13:
1272 	  //Intel Core i7-3000 / Xeon E5 (Sandy Bridge)
1273 	  if(support_avx())
1274 	    return CPUTYPE_SANDYBRIDGE;
1275 	  else
1276 	    return CPUTYPE_NEHALEM;
1277 	case 14:
1278 	  // Xeon E7540
1279 	case 15:
1280 	  //Xeon Processor E7 (Westmere-EX)
1281 	  return CPUTYPE_NEHALEM;
1282 	}
1283 	break;
1284       case 3: // family 6 exmodel 3
1285 	switch (model) {
1286 	case  7:
1287 	    // Bay Trail
1288 	    return CPUTYPE_ATOM;
1289 	case 10:
1290         case 14:
1291 	  // Ivy Bridge
1292 	  if(support_avx())
1293 	    return CPUTYPE_SANDYBRIDGE;
1294 	  else
1295 	    return CPUTYPE_NEHALEM;
1296         case 12:
1297 	case 15:
1298           if(support_avx2())
1299             return CPUTYPE_HASWELL;
1300           if(support_avx())
1301 	    return CPUTYPE_SANDYBRIDGE;
1302           else
1303 	    return CPUTYPE_NEHALEM;
1304 	case 13:
1305 	  //Broadwell
1306           if(support_avx2())
1307             return CPUTYPE_HASWELL;
1308           if(support_avx())
1309 	    return CPUTYPE_SANDYBRIDGE;
1310           else
1311 	    return CPUTYPE_NEHALEM;
1312 	}
1313 	break;
1314       case 4: // family 6 exmodel 4
1315         switch (model) {
1316         case 5:
1317 	case 6:
1318           if(support_avx2())
1319             return CPUTYPE_HASWELL;
1320           if(support_avx())
1321 	    return CPUTYPE_SANDYBRIDGE;
1322           else
1323 	    return CPUTYPE_NEHALEM;
1324 	case 7:
1325 	case 15:
1326 	  //Broadwell
1327           if(support_avx2())
1328             return CPUTYPE_HASWELL;
1329           if(support_avx())
1330 	    return CPUTYPE_SANDYBRIDGE;
1331           else
1332 	    return CPUTYPE_NEHALEM;
1333 	case 14:
1334 	  //Skylake
1335           if(support_avx2())
1336             return CPUTYPE_HASWELL;
1337           if(support_avx())
1338 	    return CPUTYPE_SANDYBRIDGE;
1339           else
1340 	    return CPUTYPE_NEHALEM;
1341 	case 12:
1342 	  // Braswell
1343 	case 13:
1344 	  // Avoton
1345 	    return CPUTYPE_NEHALEM;
1346         }
1347         break;
1348       case 5:  // family 6 exmodel 5
1349         switch (model) {
1350 	case 6:
1351 	  //Broadwell
1352           if(support_avx2())
1353             return CPUTYPE_HASWELL;
1354           if(support_avx())
1355 	    return CPUTYPE_SANDYBRIDGE;
1356           else
1357 	    return CPUTYPE_NEHALEM;
1358 	case 5:
1359 	  // Skylake X
1360           if(support_avx512_bf16())
1361             return CPUTYPE_COOPERLAKE;
1362           if(support_avx512())
1363             return CPUTYPE_SKYLAKEX;
1364           if(support_avx2())
1365             return CPUTYPE_HASWELL;
1366           if(support_avx())
1367 	    return CPUTYPE_SANDYBRIDGE;
1368 	  else
1369 	  return CPUTYPE_NEHALEM;
1370         case 14:
1371 	  // Skylake
1372           if(support_avx2())
1373             return CPUTYPE_HASWELL;
1374           if(support_avx())
1375 	    return CPUTYPE_SANDYBRIDGE;
1376           else
1377 	    return CPUTYPE_NEHALEM;
1378 	case 7:
1379 	    // Xeon Phi Knights Landing
1380           if(support_avx2())
1381             return CPUTYPE_HASWELL;
1382           if(support_avx())
1383 	    return CPUTYPE_SANDYBRIDGE;
1384           else
1385 	    return CPUTYPE_NEHALEM;
1386 	case 12:
1387 	    // Apollo Lake
1388 	case 15:
1389 	    // Denverton
1390 	    return CPUTYPE_NEHALEM;
1391 	}
1392 	break;
1393       case 6:  // family 6 exmodel 6
1394         switch (model) {
1395         case 6: // Cannon Lake
1396           if(support_avx512())
1397             return CPUTYPE_SKYLAKEX;
1398           if(support_avx2())
1399             return CPUTYPE_HASWELL;
1400           if(support_avx())
1401 	    return CPUTYPE_SANDYBRIDGE;
1402 	  else
1403 	  return CPUTYPE_NEHALEM;
1404 	case 10: // Ice Lake SP
1405 	  if(support_avx512_bf16())
1406             return CPUTYPE_COOPERLAKE;
1407           if(support_avx512())
1408             return CPUTYPE_SKYLAKEX;
1409           if(support_avx2())
1410             return CPUTYPE_HASWELL;
1411           if(support_avx())
1412 	    return CPUTYPE_SANDYBRIDGE;
1413 	  else
1414 	  return CPUTYPE_NEHALEM;
1415         }
1416       break;
1417       case 7: // family 6 exmodel 7
1418         switch (model) {
1419 	case 10: // Goldmont Plus
1420 	    return CPUTYPE_NEHALEM;
1421         case 14: // Ice Lake
1422           if(support_avx512())
1423             return CPUTYPE_SKYLAKEX;
1424           if(support_avx2())
1425             return CPUTYPE_HASWELL;
1426           if(support_avx())
1427 	    return CPUTYPE_SANDYBRIDGE;
1428 	  else
1429 	  return CPUTYPE_NEHALEM;
1430         }
1431       break;
1432       case 9:
1433       case 8:
1434         switch (model) {
1435         case 12: // Tiger Lake
1436           if(support_avx512())
1437             return CPUTYPE_SKYLAKEX;
1438           if(support_avx2())
1439             return CPUTYPE_HASWELL;
1440           if(support_avx())
1441             return CPUTYPE_SANDYBRIDGE;
1442           else
1443           return CPUTYPE_NEHALEM;
1444 	case 14: // Kaby Lake and refreshes
1445           if(support_avx2())
1446             return CPUTYPE_HASWELL;
1447           if(support_avx())
1448 	    return CPUTYPE_SANDYBRIDGE;
1449           else
1450 	    return CPUTYPE_NEHALEM;
1451     }
1452       case 10: //family 6 exmodel 10
1453         switch (model) {
1454     case 5: // Comet Lake H and S
1455     case 6: // Comet Lake U
1456           if(support_avx2())
1457             return CPUTYPE_HASWELL;
1458           if(support_avx())
1459         return CPUTYPE_SANDYBRIDGE;
1460           else
1461         return CPUTYPE_NEHALEM;
1462     case 7: // Rocket Lake
1463 	  if(support_avx512())
1464             return CPUTYPE_SKYLAKEX;
1465           if(support_avx2())
1466             return CPUTYPE_HASWELL;
1467           if(support_avx())
1468 	    return CPUTYPE_SANDYBRIDGE;
1469 	  else
1470 	  return CPUTYPE_NEHALEM;
1471 	}
1472 	break;
1473       }
1474       break;
1475     case 0x7:
1476       return CPUTYPE_ITANIUM;
1477     case 0xf:
1478       switch (exfamily) {
1479       case 0 :
1480 	return CPUTYPE_PENTIUM4;
1481       case 1 :
1482 	return CPUTYPE_ITANIUM;
1483       }
1484       break;
1485     }
1486     return CPUTYPE_INTEL_UNKNOWN;
1487   }
1488 
1489   if (vendor == VENDOR_AMD){
1490     switch (family) {
1491     case 0x4:
1492       return CPUTYPE_AMD5X86;
1493     case 0x5:
1494       return CPUTYPE_AMDK6;
1495     case 0x6:
1496 #if defined(__x86_64__) || defined(__amd64__)
1497       return CPUTYPE_BARCELONA;
1498 #else
1499       return CPUTYPE_ATHLON;
1500 #endif
1501     case 0xf:
1502       switch (exfamily) {
1503       case  0:
1504       case  2:
1505 	return CPUTYPE_OPTERON;
1506       case  1:
1507       case  3:
1508 //      case  7:
1509 //      case 10:
1510 	return CPUTYPE_BARCELONA;
1511       case  5:
1512       case  7:
1513 	return CPUTYPE_BOBCAT;
1514       case  6:
1515 	switch (model) {
1516 	case 1:
1517 	  //AMD Bulldozer Opteron 6200 / Opteron 4200 / AMD FX-Series
1518 	  if(support_avx())
1519 	    return CPUTYPE_BULLDOZER;
1520 	  else
1521 	    return CPUTYPE_BARCELONA; //OS don't support AVX.
1522 	case 2: //AMD Piledriver
1523 	case 3: //AMD Richland
1524 	  if(support_avx())
1525 	    return CPUTYPE_PILEDRIVER;
1526 	  else
1527 	    return CPUTYPE_BARCELONA; //OS don't support AVX.
1528 	case 5: // New EXCAVATOR CPUS
1529 	  if(support_avx())
1530 	    return CPUTYPE_EXCAVATOR;
1531 	  else
1532 	    return CPUTYPE_BARCELONA; //OS don't support AVX.
1533 	case 0:
1534         case 8:
1535 	  switch(exmodel){
1536 	  case 1: //AMD Trinity
1537 	    if(support_avx())
1538 	      return CPUTYPE_PILEDRIVER;
1539 	    else
1540 	      return CPUTYPE_BARCELONA; //OS don't support AVX.
1541 	  case 3:
1542 	    if(support_avx())
1543 	      return CPUTYPE_STEAMROLLER;
1544 	    else
1545 	      return CPUTYPE_BARCELONA; //OS don't support AVX.
1546 
1547 	  case 6:
1548 	    if(support_avx())
1549 	      return CPUTYPE_EXCAVATOR;
1550 	    else
1551 	      return CPUTYPE_BARCELONA; //OS don't support AVX.
1552 	  }
1553 	  break;
1554 	}
1555 	break;
1556       case 8:
1557 	switch (model) {
1558 	case 1:
1559 	  // AMD Ryzen
1560 	case 8:
1561 	  // AMD Ryzen2
1562 	default:
1563 	  // Matisse/Renoir and other recent Ryzen2
1564 	  if(support_avx())
1565 #ifndef NO_AVX2
1566 	    return CPUTYPE_ZEN;
1567 #else
1568 	    return CPUTYPE_SANDYBRIDGE; // Zen is closer in architecture to Sandy Bridge than to Excavator
1569 #endif
1570 	  else
1571 	    return CPUTYPE_BARCELONA;
1572         }
1573 	break;
1574       case 10: // Zen3
1575 	if(support_avx())
1576 #ifndef NO_AVX2
1577 	    return CPUTYPE_ZEN;
1578 #else
1579 	    return CPUTYPE_SANDYBRIDGE; // Zen is closer in architecture to Sandy Bridge than to Excavator
1580 #endif
1581 	  else
1582 	    return CPUTYPE_BARCELONA;
1583       }
1584       break;
1585     }
1586     return CPUTYPE_AMD_UNKNOWN;
1587   }
1588 
1589   if (vendor == VENDOR_HYGON){
1590     switch (family) {
1591     case 0xf:
1592       switch (exfamily) {
1593       case 9:
1594           //Hygon Dhyana
1595 	  if(support_avx())
1596 #ifndef NO_AVX2
1597 	    return CPUTYPE_ZEN;
1598 #else
1599 	    return CPUTYPE_SANDYBRIDGE; // closer in architecture to Sandy Bridge than to Excavator
1600 #endif
1601 	  else
1602 	    return CPUTYPE_BARCELONA;
1603         }
1604       break;
1605     }
1606     return CPUTYPE_HYGON_UNKNOWN;
1607   }
1608 
1609   if (vendor == VENDOR_CYRIX){
1610     switch (family) {
1611     case 0x4:
1612       return CPUTYPE_CYRIX5X86;
1613     case 0x5:
1614       return CPUTYPE_CYRIXM1;
1615     case 0x6:
1616       return CPUTYPE_CYRIXM2;
1617     }
1618     return CPUTYPE_CYRIX_UNKNOWN;
1619   }
1620 
1621   if (vendor == VENDOR_NEXGEN){
1622     switch (family) {
1623     case 0x5:
1624       return CPUTYPE_NEXGENNX586;
1625     }
1626     return CPUTYPE_NEXGEN_UNKNOWN;
1627   }
1628 
1629   if (vendor == VENDOR_CENTAUR){
1630     switch (family) {
1631     case 0x5:
1632       return CPUTYPE_CENTAURC6;
1633     case 0x6:
1634       if (model == 0xf && stepping < 0xe)
1635         return CPUTYPE_NANO;
1636       return CPUTYPE_NEHALEM;
1637     default:
1638       if (family >= 0x7)
1639         return CPUTYPE_NEHALEM;
1640       else
1641         return CPUTYPE_VIAC3;
1642     }
1643   }
1644 
1645   if (vendor == VENDOR_ZHAOXIN){
1646     return CPUTYPE_NEHALEM;
1647   }
1648 
1649   if (vendor == VENDOR_RISE){
1650     switch (family) {
1651     case 0x5:
1652       return CPUTYPE_RISEMP6;
1653     }
1654     return CPUTYPE_RISE_UNKNOWN;
1655   }
1656 
1657   if (vendor == VENDOR_SIS){
1658     switch (family) {
1659     case 0x5:
1660       return CPUTYPE_SYS55X;
1661     }
1662     return CPUTYPE_SIS_UNKNOWN;
1663   }
1664 
1665   if (vendor == VENDOR_TRANSMETA){
1666     switch (family) {
1667     case 0x5:
1668       return CPUTYPE_CRUSOETM3X;
1669     }
1670     return CPUTYPE_TRANSMETA_UNKNOWN;
1671   }
1672 
1673   if (vendor == VENDOR_NSC){
1674     switch (family) {
1675     case 0x5:
1676       return CPUTYPE_NSGEODE;
1677     }
1678     return CPUTYPE_NSC_UNKNOWN;
1679   }
1680 
1681   return CPUTYPE_UNKNOWN;
1682 }
1683 
1684 static char *cpuname[] = {
1685   "UNKNOWN",
1686   "INTEL_UNKNOWN",
1687   "UMC_UNKNOWN",
1688   "AMD_UNKNOWN",
1689   "CYRIX_UNKNOWN",
1690   "NEXGEN_UNKNOWN",
1691   "CENTAUR_UNKNOWN",
1692   "RISE_UNKNOWN",
1693   "SIS_UNKNOWN",
1694   "TRANSMETA_UNKNOWN",
1695   "NSC_UNKNOWN",
1696   "80386",
1697   "80486",
1698   "PENTIUM",
1699   "PENTIUM2",
1700   "PENTIUM3",
1701   "PENTIUMM",
1702   "PENTIUM4",
1703   "CORE2",
1704   "PENRYN",
1705   "DUNNINGTON",
1706   "NEHALEM",
1707   "ATOM",
1708   "ITANIUM",
1709   "ITANIUM2",
1710   "5X86",
1711   "K6",
1712   "ATHLON",
1713   "DURON",
1714   "OPTERON",
1715   "BARCELONA",
1716   "SHANGHAI",
1717   "ISTANBUL",
1718   "CYRIX5X86",
1719   "CYRIXM1",
1720   "CYRIXM2",
1721   "NEXGENNX586",
1722   "CENTAURC6",
1723   "RISEMP6",
1724   "SYS55X",
1725   "TM3X00",
1726   "NSGEODE",
1727   "VIAC3",
1728   "NANO",
1729   "SANDYBRIDGE",
1730   "BOBCAT",
1731   "BULLDOZER",
1732   "PILEDRIVER",
1733   "HASWELL",
1734   "STEAMROLLER",
1735   "EXCAVATOR",
1736   "ZEN",
1737   "SKYLAKEX",
1738   "DHYANA",
1739   "COOPERLAKE"
1740 };
1741 
1742 static char *lowercpuname[] = {
1743   "unknown",
1744   "intel_unknown",
1745   "umc_unknown",
1746   "amd_unknown",
1747   "cyrix_unknown",
1748   "nexgen_unknown",
1749   "centaur_unknown",
1750   "rise_unknown",
1751   "sis_unknown",
1752   "transmeta_unknown",
1753   "nsc_unknown",
1754   "80386",
1755   "80486",
1756   "pentium",
1757   "pentium2",
1758   "pentium3",
1759   "pentiumm",
1760   "pentium4",
1761   "core2",
1762   "penryn",
1763   "dunnington",
1764   "nehalem",
1765   "atom",
1766   "itanium",
1767   "itanium2",
1768   "5x86",
1769   "k6",
1770   "athlon",
1771   "duron",
1772   "opteron",
1773   "barcelona",
1774   "shanghai",
1775   "istanbul",
1776   "cyrix5x86",
1777   "cyrixm1",
1778   "cyrixm2",
1779   "nexgennx586",
1780   "centaurc6",
1781   "risemp6",
1782   "sys55x",
1783   "tms3x00",
1784   "nsgeode",
1785   "nano",
1786   "sandybridge",
1787   "bobcat",
1788   "bulldozer",
1789   "piledriver",
1790   "haswell",
1791   "steamroller",
1792   "excavator",
1793   "zen",
1794   "skylakex",
1795   "dhyana",
1796   "cooperlake"
1797 };
1798 
1799 static char *corename[] = {
1800   "UNKNOWN",
1801   "80486",
1802   "P5",
1803   "P6",
1804   "KATMAI",
1805   "COPPERMINE",
1806   "NORTHWOOD",
1807   "PRESCOTT",
1808   "BANIAS",
1809   "ATHLON",
1810   "OPTERON",
1811   "BARCELONA",
1812   "VIAC3",
1813   "YONAH",
1814   "CORE2",
1815   "PENRYN",
1816   "DUNNINGTON",
1817   "NEHALEM",
1818   "ATOM",
1819   "NANO",
1820   "SANDYBRIDGE",
1821   "BOBCAT",
1822   "BULLDOZER",
1823   "PILEDRIVER",
1824   "HASWELL",
1825   "STEAMROLLER",
1826   "EXCAVATOR",
1827   "ZEN",
1828   "SKYLAKEX",
1829   "DHYANA",
1830   "COOPERLAKE"
1831 };
1832 
1833 static char *corename_lower[] = {
1834   "unknown",
1835   "80486",
1836   "p5",
1837   "p6",
1838   "katmai",
1839   "coppermine",
1840   "northwood",
1841   "prescott",
1842   "banias",
1843   "athlon",
1844   "opteron",
1845   "barcelona",
1846   "viac3",
1847   "yonah",
1848   "core2",
1849   "penryn",
1850   "dunnington",
1851   "nehalem",
1852   "atom",
1853   "nano",
1854   "sandybridge",
1855   "bobcat",
1856   "bulldozer",
1857   "piledriver",
1858   "haswell",
1859   "steamroller",
1860   "excavator",
1861   "zen",
1862   "skylakex",
1863   "dhyana",
1864   "cooperlake"
1865 };
1866 
1867 
get_cpunamechar(void)1868 char *get_cpunamechar(void){
1869   return cpuname[get_cpuname()];
1870 }
1871 
get_lower_cpunamechar(void)1872 char *get_lower_cpunamechar(void){
1873   return lowercpuname[get_cpuname()];
1874 }
1875 
1876 
get_coretype(void)1877 int get_coretype(void){
1878 
1879   int family, exfamily, model, exmodel, vendor, stepping;
1880 
1881   if (!have_cpuid()) return CORE_80486;
1882 
1883   family   = get_cputype(GET_FAMILY);
1884   exfamily = get_cputype(GET_EXFAMILY);
1885   model    = get_cputype(GET_MODEL);
1886   exmodel  = get_cputype(GET_EXMODEL);
1887   stepping = get_cputype(GET_STEPPING);
1888 
1889   vendor = get_vendor();
1890 
1891   if (vendor == VENDOR_INTEL){
1892     switch (family) {
1893     case  4:
1894       return CORE_80486;
1895     case  5:
1896       return CORE_P5;
1897     case  6:
1898       switch (exmodel) {
1899       case  0:
1900 	switch (model) {
1901 	case  0:
1902 	case  1:
1903 	case  2:
1904 	case  3:
1905 	case  4:
1906 	case  5:
1907 	case  6:
1908 #if defined(__x86_64__) || defined(__amd64__)
1909 	  return CORE_CORE2;
1910 #else
1911 	  return CORE_P6;
1912 #endif
1913 	case  7:
1914 	  return CORE_KATMAI;
1915 	case  8:
1916 	case 10:
1917 	case 11:
1918 	  return CORE_COPPERMINE;
1919 	case  9:
1920 	case 13:
1921 	case 14:
1922 	  return CORE_BANIAS;
1923 	case 15:
1924 	  return CORE_CORE2;
1925 	}
1926 	break;
1927       case  1:
1928 	switch (model) {
1929 	case  6:
1930 	  return CORE_CORE2;
1931 	case  7:
1932 	  return CORE_PENRYN;
1933 	case 10:
1934 	case 11:
1935 	case 14:
1936 	case 15:
1937 	  return CORE_NEHALEM;
1938 	case 12:
1939 	  return CORE_ATOM;
1940 	case 13:
1941 	  return CORE_DUNNINGTON;
1942 	}
1943 	break;
1944       case  2:
1945 	switch (model) {
1946 	case 5:
1947 	  //Intel Core (Clarkdale) / Core (Arrandale)
1948 	  // Pentium (Clarkdale) / Pentium Mobile (Arrandale)
1949 	  // Xeon (Clarkdale), 32nm
1950 	  return CORE_NEHALEM;
1951 	case 10:
1952           //Intel Core i5-2000 /i7-2000 (Sandy Bridge)
1953 	  if(support_avx())
1954 	    return CORE_SANDYBRIDGE;
1955 	  else
1956 	    return CORE_NEHALEM; //OS doesn't support AVX
1957 	case 12:
1958 	  //Xeon Processor 5600 (Westmere-EP)
1959 	  return CORE_NEHALEM;
1960 	case 13:
1961           //Intel Core i7-3000 / Xeon E5 (Sandy Bridge)
1962 	  if(support_avx())
1963 	    return CORE_SANDYBRIDGE;
1964 	  else
1965 	    return CORE_NEHALEM; //OS doesn't support AVX
1966 	case 14:
1967 	  //Xeon E7540
1968 	case 15:
1969 	  //Xeon Processor E7 (Westmere-EX)
1970 	  return CORE_NEHALEM;
1971 	}
1972 	break;
1973       case 3:
1974 	switch (model) {
1975 	case 7:
1976 	  return CORE_ATOM;
1977 	case 10:
1978 	case 14:
1979 	  if(support_avx())
1980 	    return CORE_SANDYBRIDGE;
1981 	  else
1982 	    return CORE_NEHALEM; //OS doesn't support AVX
1983         case 12:
1984 	case 15:
1985           if(support_avx())
1986 #ifndef NO_AVX2
1987             return CORE_HASWELL;
1988 #else
1989 	    return CORE_SANDYBRIDGE;
1990 #endif
1991           else
1992 	    return CORE_NEHALEM;
1993 	case 13:
1994 	  //broadwell
1995           if(support_avx())
1996 #ifndef NO_AVX2
1997             return CORE_HASWELL;
1998 #else
1999 	    return CORE_SANDYBRIDGE;
2000 #endif
2001           else
2002 	    return CORE_NEHALEM;
2003 	}
2004 	break;
2005       case 4:
2006         switch (model) {
2007         case 5:
2008 	case 6:
2009           if(support_avx())
2010 #ifndef NO_AVX2
2011             return CORE_HASWELL;
2012 #else
2013 	    return CORE_SANDYBRIDGE;
2014 #endif
2015           else
2016 	    return CORE_NEHALEM;
2017 	case 7:
2018 	case 15:
2019 	  //broadwell
2020           if(support_avx())
2021 #ifndef NO_AVX2
2022             return CORE_HASWELL;
2023 #else
2024 	    return CORE_SANDYBRIDGE;
2025 #endif
2026           else
2027 	    return CORE_NEHALEM;
2028 	case 14:
2029 	  //Skylake
2030           if(support_avx())
2031 #ifndef NO_AVX2
2032             return CORE_HASWELL;
2033 #else
2034 	    return CORE_SANDYBRIDGE;
2035 #endif
2036           else
2037 	    return CORE_NEHALEM;
2038 	case 12:
2039 	  // Braswell
2040 	case 13:
2041 	  // Avoton
2042 	    return CORE_NEHALEM;
2043         }
2044         break;
2045       case 10:
2046         switch (model) {
2047 	  case 5: // Comet Lake H and S
2048     	  case 6: // Comet Lake U
2049             if(support_avx())
2050   #ifndef NO_AVX2
2051               return CORE_HASWELL;
2052   #else
2053               return CORE_SANDYBRIDGE;
2054   #endif
2055             else
2056               return CORE_NEHALEM;
2057 	  case 7:// Rocket Lake
2058 #ifndef NO_AVX512
2059 	  if(support_avx512())
2060             return CORE_SKYLAKEX;
2061 #endif
2062 #ifndef NO_AVX2
2063 	  if(support_avx2())
2064             return CORE_HASWELL;
2065 #endif
2066 	  if(support_avx())
2067 	    return CORE_SANDYBRIDGE;
2068 	  else
2069 	  return CORE_NEHALEM;
2070         }
2071       case 5:
2072         switch (model) {
2073 	case 6:
2074 	  //broadwell
2075           if(support_avx())
2076 #ifndef NO_AVX2
2077             return CORE_HASWELL;
2078 #else
2079 	    return CORE_SANDYBRIDGE;
2080 #endif
2081           else
2082 	    return CORE_NEHALEM;
2083 	case 5:
2084 	 // Skylake X
2085 #ifndef NO_AVX512
2086           if(support_avx512_bf16())
2087             return CORE_COOPERLAKE;
2088 	  return CORE_SKYLAKEX;
2089 #else
2090 	  if(support_avx())
2091 #ifndef NO_AVX2
2092 	    return CORE_HASWELL;
2093 #else
2094 	    return CORE_SANDYBRIDGE;
2095 #endif
2096 	  else
2097 	    return CORE_NEHALEM;
2098 #endif
2099 	case 14:
2100 	  // Skylake
2101           if(support_avx())
2102 #ifndef NO_AVX2
2103             return CORE_HASWELL;
2104 #else
2105 	    return CORE_SANDYBRIDGE;
2106 #endif
2107           else
2108 	    return CORE_NEHALEM;
2109 	case 7:
2110 	  // Phi Knights Landing
2111           if(support_avx())
2112 #ifndef NO_AVX2
2113             return CORE_HASWELL;
2114 #else
2115 	    return CORE_SANDYBRIDGE;
2116 #endif
2117           else
2118 	    return CORE_NEHALEM;
2119 	case 12:
2120 	  // Apollo Lake
2121 	    return CORE_NEHALEM;
2122         }
2123 	break;
2124       case 6:
2125         if (model == 6)
2126 #ifndef NO_AVX512
2127 	    return CORE_SKYLAKEX;
2128 #else
2129 	  if(support_avx())
2130 #ifndef NO_AVX2
2131 	    return CORE_HASWELL;
2132 #else
2133 	    return CORE_SANDYBRIDGE;
2134 #endif
2135 	  else
2136 	    return CORE_NEHALEM;
2137 #endif
2138 	if (model == 10)
2139 #ifndef NO_AVX512
2140 	  if(support_avx512_bf16())
2141             return CORE_COOPERLAKE;
2142 	  return CORE_SKYLAKEX;
2143 #else
2144 	  if(support_avx())
2145 #ifndef NO_AVX2
2146 	    return CORE_HASWELL;
2147 #else
2148 	    return CORE_SANDYBRIDGE;
2149 #endif
2150 	  else
2151 	    return CORE_NEHALEM;
2152 #endif
2153         break;
2154       case 7:
2155         if (model == 10)
2156             return CORE_NEHALEM;
2157         if (model == 14)
2158 #ifndef NO_AVX512
2159 	    return CORE_SKYLAKEX;
2160 #else
2161 	  if(support_avx())
2162 #ifndef NO_AVX2
2163 	    return CORE_HASWELL;
2164 #else
2165 	    return CORE_SANDYBRIDGE;
2166 #endif
2167 	  else
2168 	    return CORE_NEHALEM;
2169 #endif
2170         break;
2171       case 9:
2172       case 8:
2173        if (model == 12) { // Tiger Lake
2174           if(support_avx512())
2175             return CORE_SKYLAKEX;
2176           if(support_avx2())
2177             return CORE_HASWELL;
2178           if(support_avx())
2179             return CORE_SANDYBRIDGE;
2180           else
2181           return CORE_NEHALEM;
2182         }
2183         if (model == 14) { // Kaby Lake
2184 	  if(support_avx())
2185 #ifndef NO_AVX2
2186 	    return CORE_HASWELL;
2187 #else
2188 	    return CORE_SANDYBRIDGE;
2189 #endif
2190 	  else
2191             return CORE_NEHALEM;
2192 	}
2193       }
2194       break;
2195 
2196       case 15:
2197 	if (model <= 0x2) return CORE_NORTHWOOD;
2198 	else return CORE_PRESCOTT;
2199     }
2200   }
2201 
2202   if (vendor == VENDOR_AMD){
2203     if (family <= 0x5) return CORE_80486;
2204 #if defined(__x86_64__) || defined(__amd64__)
2205     if (family <= 0xe) return CORE_BARCELONA;
2206 #else
2207     if (family <= 0xe) return CORE_ATHLON;
2208 #endif
2209     if (family == 0xf){
2210       if ((exfamily == 0) || (exfamily == 2)) return CORE_OPTERON;
2211       else if (exfamily == 5) return CORE_BOBCAT;
2212       else if (exfamily == 6) {
2213 	switch (model) {
2214 	case 1:
2215 	  //AMD Bulldozer Opteron 6200 / Opteron 4200 / AMD FX-Series
2216 	  if(support_avx())
2217 	    return CORE_BULLDOZER;
2218 	  else
2219 	    return CORE_BARCELONA; //OS don't support AVX.
2220 	case 2: //AMD Piledriver
2221 	case 3: //AMD Richland
2222 	  if(support_avx())
2223 	    return CORE_PILEDRIVER;
2224 	  else
2225 	    return CORE_BARCELONA; //OS don't support AVX.
2226         case 5: // New EXCAVATOR
2227 	  if(support_avx())
2228 	    return CORE_EXCAVATOR;
2229 	  else
2230 	    return CORE_BARCELONA; //OS don't support AVX.
2231 	case 0:
2232         case 8:
2233 	  switch(exmodel){
2234 	  case 1: //AMD Trinity
2235 	    if(support_avx())
2236 	      return CORE_PILEDRIVER;
2237 	    else
2238 	      return CORE_BARCELONA; //OS don't support AVX.
2239 
2240 	  case 3:
2241 	    if(support_avx())
2242 	      return CORE_STEAMROLLER;
2243 	    else
2244 	      return CORE_BARCELONA; //OS don't support AVX.
2245 
2246 	  case 6:
2247 	    if(support_avx())
2248 	      return CORE_EXCAVATOR;
2249 	    else
2250 	      return CORE_BARCELONA; //OS don't support AVX.
2251 	  }
2252 	  break;
2253 	}
2254       } else if (exfamily == 8 || exfamily == 10) {
2255 	switch (model) {
2256 	case 1:
2257 	  // AMD Ryzen
2258 	case 8:
2259 	  // Ryzen 2
2260 	default:
2261 	  // Matisse,Renoir Ryzen2 models
2262 	  if(support_avx())
2263 #ifndef NO_AVX2
2264 	    return CORE_ZEN;
2265 #else
2266 	    return CORE_SANDYBRIDGE; // Zen is closer in architecture to Sandy Bridge than to Excavator
2267 #endif
2268 	  else
2269 	    return CORE_BARCELONA;
2270 	}
2271       } else {
2272 	return CORE_BARCELONA;
2273       }
2274     }
2275   }
2276 
2277   if (vendor == VENDOR_HYGON){
2278     if (family == 0xf){
2279         if (exfamily == 9) {
2280 	  if(support_avx())
2281 #ifndef NO_AVX2
2282 	    return CORE_ZEN;
2283 #else
2284 	    return CORE_SANDYBRIDGE; // closer in architecture to Sandy Bridge than to Excavator
2285 #endif
2286 	  else
2287 	    return CORE_BARCELONA;
2288 	} else {
2289 		return CORE_BARCELONA;
2290 	}
2291     }
2292   }
2293 
2294   if (vendor == VENDOR_CENTAUR) {
2295     switch (family) {
2296     case 0x6:
2297       if (model == 0xf && stepping < 0xe)
2298         return CORE_NANO;
2299       return CORE_NEHALEM;
2300     default:
2301       if (family >= 0x7)
2302         return CORE_NEHALEM;
2303       else
2304         return CORE_VIAC3;
2305     }
2306   }
2307 
2308   if (vendor == VENDOR_ZHAOXIN) {
2309      return CORE_NEHALEM;
2310   }
2311 
2312   return CORE_UNKNOWN;
2313 }
2314 
get_cpuconfig(void)2315 void get_cpuconfig(void){
2316 
2317   cache_info_t info;
2318   int features;
2319 
2320   printf("#define %s\n", cpuname[get_cpuname()]);
2321 
2322 
2323   if (get_coretype() != CORE_P5) {
2324 
2325     get_cacheinfo(CACHE_INFO_L1_I, &info);
2326     if (info.size > 0) {
2327       printf("#define L1_CODE_SIZE %d\n", info.size * 1024);
2328       printf("#define L1_CODE_ASSOCIATIVE %d\n", info.associative);
2329       printf("#define L1_CODE_LINESIZE %d\n", info.linesize);
2330     }
2331 
2332     get_cacheinfo(CACHE_INFO_L1_D, &info);
2333     if (info.size > 0) {
2334       printf("#define L1_DATA_SIZE %d\n", info.size * 1024);
2335       printf("#define L1_DATA_ASSOCIATIVE %d\n", info.associative);
2336       printf("#define L1_DATA_LINESIZE %d\n", info.linesize);
2337     }
2338 
2339     get_cacheinfo(CACHE_INFO_L2, &info);
2340     if (info.size > 0) {
2341       printf("#define L2_SIZE %d\n", info.size * 1024);
2342       printf("#define L2_ASSOCIATIVE %d\n", info.associative);
2343       printf("#define L2_LINESIZE %d\n", info.linesize);
2344     } else {
2345       //fall back for some virtual machines.
2346       printf("#define L2_SIZE 1048576\n");
2347       printf("#define L2_ASSOCIATIVE 6\n");
2348       printf("#define L2_LINESIZE 64\n");
2349     }
2350 
2351 
2352     get_cacheinfo(CACHE_INFO_L3, &info);
2353     if (info.size > 0) {
2354       printf("#define L3_SIZE %d\n", info.size * 1024);
2355       printf("#define L3_ASSOCIATIVE %d\n", info.associative);
2356       printf("#define L3_LINESIZE %d\n", info.linesize);
2357     }
2358 
2359     get_cacheinfo(CACHE_INFO_L1_ITB, &info);
2360     if (info.size > 0) {
2361       printf("#define ITB_SIZE %d\n", info.size * 1024);
2362       printf("#define ITB_ASSOCIATIVE %d\n", info.associative);
2363       printf("#define ITB_ENTRIES %d\n", info.linesize);
2364     }
2365 
2366     get_cacheinfo(CACHE_INFO_L1_DTB, &info);
2367     if (info.size > 0) {
2368       printf("#define DTB_SIZE %d\n", info.size * 1024);
2369       printf("#define DTB_ASSOCIATIVE %d\n", info.associative);
2370       printf("#define DTB_DEFAULT_ENTRIES %d\n", info.linesize);
2371     } else {
2372       //fall back for some virtual machines.
2373       printf("#define DTB_DEFAULT_ENTRIES 32\n");
2374     }
2375 
2376     features = get_cputype(GET_FEATURE);
2377 
2378     if (features & HAVE_CMOV )   printf("#define HAVE_CMOV\n");
2379     if (features & HAVE_MMX  )   printf("#define HAVE_MMX\n");
2380     if (features & HAVE_SSE  )   printf("#define HAVE_SSE\n");
2381     if (features & HAVE_SSE2 )   printf("#define HAVE_SSE2\n");
2382     if (features & HAVE_SSE3 )   printf("#define HAVE_SSE3\n");
2383     if (features & HAVE_SSSE3)   printf("#define HAVE_SSSE3\n");
2384     if (features & HAVE_SSE4_1)   printf("#define HAVE_SSE4_1\n");
2385     if (features & HAVE_SSE4_2)   printf("#define HAVE_SSE4_2\n");
2386     if (features & HAVE_SSE4A)   printf("#define HAVE_SSE4A\n");
2387     if (features & HAVE_SSE5 )   printf("#define HAVE_SSSE5\n");
2388     if (features & HAVE_AVX )    printf("#define HAVE_AVX\n");
2389     if (features & HAVE_AVX2 )    printf("#define HAVE_AVX2\n");
2390     if (features & HAVE_AVX512VL )    printf("#define HAVE_AVX512VL\n");
2391     if (features & HAVE_AVX512BF16 )    printf("#define HAVE_AVX512BF16\n");
2392     if (features & HAVE_3DNOWEX) printf("#define HAVE_3DNOWEX\n");
2393     if (features & HAVE_3DNOW)   printf("#define HAVE_3DNOW\n");
2394     if (features & HAVE_FMA4 )    printf("#define HAVE_FMA4\n");
2395     if (features & HAVE_FMA3 )    printf("#define HAVE_FMA3\n");
2396     if (features & HAVE_CFLUSH)  printf("#define HAVE_CFLUSH\n");
2397     if (features & HAVE_HIT)     printf("#define HAVE_HIT 1\n");
2398     if (features & HAVE_MISALIGNSSE) printf("#define HAVE_MISALIGNSSE\n");
2399     if (features & HAVE_128BITFPU)   printf("#define HAVE_128BITFPU\n");
2400     if (features & HAVE_FASTMOVU)    printf("#define HAVE_FASTMOVU\n");
2401 
2402     printf("#define NUM_SHAREDCACHE %d\n", get_cputype(GET_NUMSHARE) + 1);
2403     printf("#define NUM_CORES %d\n", get_cputype(GET_NUMCORES) + 1);
2404 
2405     features = get_coretype();
2406     if (features > 0) printf("#define CORE_%s\n", corename[features]);
2407   } else {
2408     printf("#define DTB_DEFAULT_ENTRIES 16\n");
2409     printf("#define L1_CODE_SIZE 8192\n");
2410     printf("#define L1_DATA_SIZE 8192\n");
2411     printf("#define L2_SIZE 0\n");
2412   }
2413 }
2414 
get_architecture(void)2415 void get_architecture(void){
2416 #ifndef __64BIT__
2417     printf("X86");
2418 #else
2419     printf("X86_64");
2420 #endif
2421 }
2422 
get_subarchitecture(void)2423 void get_subarchitecture(void){
2424     printf("%s", get_cpunamechar());
2425 }
2426 
get_subdirname(void)2427 void get_subdirname(void){
2428 #ifndef __64BIT__
2429     printf("x86");
2430 #else
2431     printf("x86_64");
2432 #endif
2433 }
2434 
get_corename(void)2435 char *get_corename(void){
2436   return corename[get_coretype()];
2437 }
2438 
get_libname(void)2439 void get_libname(void){
2440   printf("%s",   corename_lower[get_coretype()]);
2441 }
2442 
2443 /* This if for Makefile */
get_sse(void)2444 void get_sse(void){
2445 
2446   int features;
2447 
2448   features = get_cputype(GET_FEATURE);
2449 
2450   if (features & HAVE_MMX  )   printf("HAVE_MMX=1\n");
2451   if (features & HAVE_SSE  )   printf("HAVE_SSE=1\n");
2452   if (features & HAVE_SSE2 )   printf("HAVE_SSE2=1\n");
2453   if (features & HAVE_SSE3 )   printf("HAVE_SSE3=1\n");
2454   if (features & HAVE_SSSE3)   printf("HAVE_SSSE3=1\n");
2455   if (features & HAVE_SSE4_1)   printf("HAVE_SSE4_1=1\n");
2456   if (features & HAVE_SSE4_2)   printf("HAVE_SSE4_2=1\n");
2457   if (features & HAVE_SSE4A)   printf("HAVE_SSE4A=1\n");
2458   if (features & HAVE_SSE5 )   printf("HAVE_SSSE5=1\n");
2459   if (features & HAVE_AVX )    printf("HAVE_AVX=1\n");
2460   if (features & HAVE_AVX2 )    printf("HAVE_AVX2=1\n");
2461   if (features & HAVE_AVX512VL )    printf("HAVE_AVX512VL=1\n");
2462   if (features & HAVE_AVX512BF16 )    printf("HAVE_AVX512BF16=1\n");
2463   if (features & HAVE_3DNOWEX) printf("HAVE_3DNOWEX=1\n");
2464   if (features & HAVE_3DNOW)   printf("HAVE_3DNOW=1\n");
2465   if (features & HAVE_FMA4 )    printf("HAVE_FMA4=1\n");
2466   if (features & HAVE_FMA3 )    printf("HAVE_FMA3=1\n");
2467 
2468 }
2469