1 /*
2 
3    BLIS
4    An object-based framework for developing high-performance BLAS-like
5    libraries.
6 
7    Copyright (C) 2015, The University of Texas at Austin
8    Copyright (C) 2018 - 2019, Advanced Micro Devices, Inc.
9 
10    Redistribution and use in source and binary forms, with or without
11    modification, are permitted provided that the following conditions are
12    met:
13     - Redistributions of source code must retain the above copyright
14       notice, this list of conditions and the following disclaimer.
15     - Redistributions in binary form must reproduce the above copyright
16       notice, this list of conditions and the following disclaimer in the
17       documentation and/or other materials provided with the distribution.
18     - Neither the name(s) of the copyright holder(s) nor the names of its
19       contributors may be used to endorse or promote products derived
20       from this software without specific prior written permission.
21 
22    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
23    "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
24    LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
25    A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
26    HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
27    SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
28    LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
29    DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
30    THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
31    (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
32    OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
33 
34 */
35 
36 #include <stdio.h>
37 #include <string.h>
38 
39 #define VENDOR_UNKNOWN       0
40 #define VENDOR_INTEL         1
41 #define VENDOR_AMD           2
42 
43 #define CPUNAME_GENERIC      0
44 #define CPUNAME_PENRYN       1
45 #define CPUNAME_SANDYBRIDGE  2
46 #define CPUNAME_HASWELL      3
47 #define CPUNAME_KNC          4
48 #define CPUNAME_KNL          5
49 #define CPUNAME_BULLDOZER    6
50 #define CPUNAME_PILEDRIVER   7
51 #define CPUNAME_STEAMROLLER  8
52 #define CPUNAME_EXCAVATOR    9
53 #define CPUNAME_ZEN         10
54 
55 static char *cpuname[] = {
56   "generic",
57   "penryn",
58   "sandybridge",
59   "haswell",
60   "knc",
61   "knl",
62   "bulldozer",
63   "piledriver",
64   "steamroller",
65   "excavator",
66   "zen",
67 };
68 
69 #define BITMASK(a, b, c) ((((a) >> (b)) & (c)))
70 
cpuid(int op,int * eax,int * ebx,int * ecx,int * edx)71 static inline void cpuid(int op, int *eax, int *ebx, int *ecx, int *edx){
72 #if defined(__i386__) && defined(__PIC__)
73   __asm__ __volatile__
74     ("mov %%ebx, %%edi;"
75      "cpuid;"
76      "xchgl %%ebx, %%edi;"
77      : "=a" (*eax), "=D" (*ebx), "=c" (*ecx), "=d" (*edx) : "a" (op) : "cc");
78 #else
79   __asm__ __volatile__
80     ("cpuid": "=a" (*eax), "=b" (*ebx), "=c" (*ecx), "=d" (*edx) : "a" (op) : "cc");
81 #endif
82 }
83 
have_cpuid(void)84 static inline int have_cpuid(void){
85   int eax, ebx, ecx, edx;
86 
87   cpuid(0, &eax, &ebx, &ecx, &edx);
88   return eax;
89 }
90 
91 
get_vendor(void)92 int get_vendor(void){
93   int eax, ebx, ecx, edx;
94   char vendor[13];
95 
96   cpuid(0, &eax, &ebx, &ecx, &edx);
97 
98   *(int *)(&vendor[0]) = ebx;
99   *(int *)(&vendor[4]) = edx;
100   *(int *)(&vendor[8]) = ecx;
101   vendor[12] = (char)0;
102 
103   if (!strcmp(vendor, "GenuineIntel")) return VENDOR_INTEL;
104   if (!strcmp(vendor, "AuthenticAMD")) return VENDOR_AMD;
105 
106   if ((eax == 0) || ((eax & 0x500) != 0)) return VENDOR_INTEL;
107 
108   return VENDOR_UNKNOWN;
109 }
110 
111 
xgetbv(int op,int * eax,int * edx)112 static inline void xgetbv(int op, int * eax, int * edx){
113   //Use binary code for xgetbv
114   __asm__ __volatile__
115     (".byte 0x0f, 0x01, 0xd0": "=a" (*eax), "=d" (*edx) : "c" (op) : "cc");
116 }
117 
support_avx()118 int support_avx(){
119   int eax, ebx, ecx, edx;
120   int ret=0;
121 
122   cpuid(1, &eax, &ebx, &ecx, &edx);
123   if ((ecx & (1 << 28)) != 0 && (ecx & (1 << 27)) != 0 && (ecx & (1 << 26)) != 0){
124     xgetbv(0, &eax, &edx);
125     if((eax & 6) == 6){
126       ret=1;  //OS support AVX
127     }
128   }
129   return ret;
130 }
131 
support_avx512()132 int support_avx512(){
133   int eax, ebx, ecx, edx;
134   int ret=0;
135 
136   cpuid(1, &eax, &ebx, &ecx, &edx);
137   if ((ecx & (1 << 28)) != 0 && (ecx & (1 << 27)) != 0 && (ecx & (1 << 26)) != 0){
138     xgetbv(0, &eax, &edx);
139     if((eax & 0xE6) == 0xE6){
140       ret=1;  //OS support AVX-512
141     }
142   }
143   return ret;
144 }
145 
cpu_detect()146 int cpu_detect()
147 {
148   int eax, ebx, ecx, edx;
149   int vendor, family, extend_family, model, extend_model;
150 
151   if ( !have_cpuid() ) return CPUNAME_GENERIC;
152 
153   vendor = get_vendor();
154 
155   cpuid( 1, &eax, &ebx, &ecx, &edx );
156 
157   extend_family = BITMASK( eax, 20, 0xff );
158   extend_model  = BITMASK( eax, 16, 0x0f );
159   family        = BITMASK( eax,  8, 0x0f );
160   model         = BITMASK( eax,  4, 0x0f );
161 
162   if (vendor == VENDOR_INTEL){
163     model |= extend_model<<4;
164     switch (family) {
165     case 0x6:
166       switch (model) {
167         case 0x0F: //Core2
168         case 0x16: //Core2
169         case 0x17: //Penryn
170         case 0x1D: //Penryn
171         case 0x1A: //Nehalem
172         case 0x1E: //Nehalem
173         case 0x2E: //Nehalem
174         case 0x25: //Westmere
175         case 0x2C: //Westmere
176         case 0x2F: //Westmere
177           return CPUNAME_PENRYN;
178         case 0x2A: //Sandy Bridge
179         case 0x2D: //Sandy Bridge
180         case 0x3A: //Ivy Bridge
181         case 0x3E: //Ivy Bridge
182           if(support_avx()) {
183             return CPUNAME_SANDYBRIDGE;
184           }else{
185             return CPUNAME_GENERIC; //OS doesn't support AVX
186           }
187         case 0x3C: //Haswell
188         case 0x3F: //Haswell
189         case 0x3D: //Broadwell
190         case 0x47: //Broadwell
191         case 0x4F: //Broadwell
192         case 0x56: //Broadwell
193         case 0x4E: //Skylake
194         case 0x5E: //Skylake
195           if(support_avx()) {
196             return CPUNAME_HASWELL;
197           }else{
198             return CPUNAME_GENERIC; //OS doesn't support AVX
199           }
200         case 0x57: //KNL
201           if(support_avx512()) {
202             return CPUNAME_KNL;
203           }else{
204             return CPUNAME_GENERIC; //OS doesn't support AVX
205           }
206       }
207       break;
208     case 0xB:
209       switch (model) {
210         case 0x01: //KNC
211           return CPUNAME_KNC;
212       }
213     }
214   }else if (vendor == VENDOR_AMD){
215     switch (family) {
216     case 0xf:
217       switch (extend_family) {
218       case 6:
219         switch (model) {
220         case 1:
221           if(support_avx())
222             return CPUNAME_BULLDOZER;
223           else
224             return CPUNAME_GENERIC; //OS don't support AVX.
225         case 2:
226           if(support_avx())
227             return CPUNAME_PILEDRIVER;
228           else
229             return CPUNAME_GENERIC; //OS don't support AVX.
230         case 0:
231           // Steamroller. Temp use Piledriver.
232           if(support_avx())
233             return CPUNAME_STEAMROLLER;
234           else
235             return CPUNAME_GENERIC; //OS don't support AVX.
236         }
237       case 8:
238 	switch (model){
239 	case 1:
240           if(support_avx())
241 	    return CPUNAME_ZEN;
242           else
243             return CPUNAME_REFERENCE; //OS don't support AVX.
244 	}
245       }
246       break;
247     }
248   }
249 
250   return CPUNAME_GENERIC;
251 }
252 
253 
main()254 int main()
255 {
256   int cpuname_id;
257 
258   cpuname_id=cpu_detect();
259 
260   printf("%s\n", cpuname[cpuname_id]);
261   return 0;
262 }
263