1 /*
2
3 BLIS
4 An object-based framework for developing high-performance BLAS-like
5 libraries.
6
7 Copyright (C) 2015, The University of Texas at Austin
8 Copyright (C) 2018 - 2019, Advanced Micro Devices, Inc.
9
10 Redistribution and use in source and binary forms, with or without
11 modification, are permitted provided that the following conditions are
12 met:
13 - Redistributions of source code must retain the above copyright
14 notice, this list of conditions and the following disclaimer.
15 - Redistributions in binary form must reproduce the above copyright
16 notice, this list of conditions and the following disclaimer in the
17 documentation and/or other materials provided with the distribution.
18 - Neither the name(s) of the copyright holder(s) nor the names of its
19 contributors may be used to endorse or promote products derived
20 from this software without specific prior written permission.
21
22 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
23 "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
24 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
25 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
26 HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
27 SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
28 LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
29 DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
30 THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
31 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
32 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
33
34 */
35
36 #include <stdio.h>
37 #include <string.h>
38
39 #define VENDOR_UNKNOWN 0
40 #define VENDOR_INTEL 1
41 #define VENDOR_AMD 2
42
43 #define CPUNAME_GENERIC 0
44 #define CPUNAME_PENRYN 1
45 #define CPUNAME_SANDYBRIDGE 2
46 #define CPUNAME_HASWELL 3
47 #define CPUNAME_KNC 4
48 #define CPUNAME_KNL 5
49 #define CPUNAME_BULLDOZER 6
50 #define CPUNAME_PILEDRIVER 7
51 #define CPUNAME_STEAMROLLER 8
52 #define CPUNAME_EXCAVATOR 9
53 #define CPUNAME_ZEN 10
54
55 static char *cpuname[] = {
56 "generic",
57 "penryn",
58 "sandybridge",
59 "haswell",
60 "knc",
61 "knl",
62 "bulldozer",
63 "piledriver",
64 "steamroller",
65 "excavator",
66 "zen",
67 };
68
69 #define BITMASK(a, b, c) ((((a) >> (b)) & (c)))
70
cpuid(int op,int * eax,int * ebx,int * ecx,int * edx)71 static inline void cpuid(int op, int *eax, int *ebx, int *ecx, int *edx){
72 #if defined(__i386__) && defined(__PIC__)
73 __asm__ __volatile__
74 ("mov %%ebx, %%edi;"
75 "cpuid;"
76 "xchgl %%ebx, %%edi;"
77 : "=a" (*eax), "=D" (*ebx), "=c" (*ecx), "=d" (*edx) : "a" (op) : "cc");
78 #else
79 __asm__ __volatile__
80 ("cpuid": "=a" (*eax), "=b" (*ebx), "=c" (*ecx), "=d" (*edx) : "a" (op) : "cc");
81 #endif
82 }
83
have_cpuid(void)84 static inline int have_cpuid(void){
85 int eax, ebx, ecx, edx;
86
87 cpuid(0, &eax, &ebx, &ecx, &edx);
88 return eax;
89 }
90
91
get_vendor(void)92 int get_vendor(void){
93 int eax, ebx, ecx, edx;
94 char vendor[13];
95
96 cpuid(0, &eax, &ebx, &ecx, &edx);
97
98 *(int *)(&vendor[0]) = ebx;
99 *(int *)(&vendor[4]) = edx;
100 *(int *)(&vendor[8]) = ecx;
101 vendor[12] = (char)0;
102
103 if (!strcmp(vendor, "GenuineIntel")) return VENDOR_INTEL;
104 if (!strcmp(vendor, "AuthenticAMD")) return VENDOR_AMD;
105
106 if ((eax == 0) || ((eax & 0x500) != 0)) return VENDOR_INTEL;
107
108 return VENDOR_UNKNOWN;
109 }
110
111
xgetbv(int op,int * eax,int * edx)112 static inline void xgetbv(int op, int * eax, int * edx){
113 //Use binary code for xgetbv
114 __asm__ __volatile__
115 (".byte 0x0f, 0x01, 0xd0": "=a" (*eax), "=d" (*edx) : "c" (op) : "cc");
116 }
117
support_avx()118 int support_avx(){
119 int eax, ebx, ecx, edx;
120 int ret=0;
121
122 cpuid(1, &eax, &ebx, &ecx, &edx);
123 if ((ecx & (1 << 28)) != 0 && (ecx & (1 << 27)) != 0 && (ecx & (1 << 26)) != 0){
124 xgetbv(0, &eax, &edx);
125 if((eax & 6) == 6){
126 ret=1; //OS support AVX
127 }
128 }
129 return ret;
130 }
131
support_avx512()132 int support_avx512(){
133 int eax, ebx, ecx, edx;
134 int ret=0;
135
136 cpuid(1, &eax, &ebx, &ecx, &edx);
137 if ((ecx & (1 << 28)) != 0 && (ecx & (1 << 27)) != 0 && (ecx & (1 << 26)) != 0){
138 xgetbv(0, &eax, &edx);
139 if((eax & 0xE6) == 0xE6){
140 ret=1; //OS support AVX-512
141 }
142 }
143 return ret;
144 }
145
cpu_detect()146 int cpu_detect()
147 {
148 int eax, ebx, ecx, edx;
149 int vendor, family, extend_family, model, extend_model;
150
151 if ( !have_cpuid() ) return CPUNAME_GENERIC;
152
153 vendor = get_vendor();
154
155 cpuid( 1, &eax, &ebx, &ecx, &edx );
156
157 extend_family = BITMASK( eax, 20, 0xff );
158 extend_model = BITMASK( eax, 16, 0x0f );
159 family = BITMASK( eax, 8, 0x0f );
160 model = BITMASK( eax, 4, 0x0f );
161
162 if (vendor == VENDOR_INTEL){
163 model |= extend_model<<4;
164 switch (family) {
165 case 0x6:
166 switch (model) {
167 case 0x0F: //Core2
168 case 0x16: //Core2
169 case 0x17: //Penryn
170 case 0x1D: //Penryn
171 case 0x1A: //Nehalem
172 case 0x1E: //Nehalem
173 case 0x2E: //Nehalem
174 case 0x25: //Westmere
175 case 0x2C: //Westmere
176 case 0x2F: //Westmere
177 return CPUNAME_PENRYN;
178 case 0x2A: //Sandy Bridge
179 case 0x2D: //Sandy Bridge
180 case 0x3A: //Ivy Bridge
181 case 0x3E: //Ivy Bridge
182 if(support_avx()) {
183 return CPUNAME_SANDYBRIDGE;
184 }else{
185 return CPUNAME_GENERIC; //OS doesn't support AVX
186 }
187 case 0x3C: //Haswell
188 case 0x3F: //Haswell
189 case 0x3D: //Broadwell
190 case 0x47: //Broadwell
191 case 0x4F: //Broadwell
192 case 0x56: //Broadwell
193 case 0x4E: //Skylake
194 case 0x5E: //Skylake
195 if(support_avx()) {
196 return CPUNAME_HASWELL;
197 }else{
198 return CPUNAME_GENERIC; //OS doesn't support AVX
199 }
200 case 0x57: //KNL
201 if(support_avx512()) {
202 return CPUNAME_KNL;
203 }else{
204 return CPUNAME_GENERIC; //OS doesn't support AVX
205 }
206 }
207 break;
208 case 0xB:
209 switch (model) {
210 case 0x01: //KNC
211 return CPUNAME_KNC;
212 }
213 }
214 }else if (vendor == VENDOR_AMD){
215 switch (family) {
216 case 0xf:
217 switch (extend_family) {
218 case 6:
219 switch (model) {
220 case 1:
221 if(support_avx())
222 return CPUNAME_BULLDOZER;
223 else
224 return CPUNAME_GENERIC; //OS don't support AVX.
225 case 2:
226 if(support_avx())
227 return CPUNAME_PILEDRIVER;
228 else
229 return CPUNAME_GENERIC; //OS don't support AVX.
230 case 0:
231 // Steamroller. Temp use Piledriver.
232 if(support_avx())
233 return CPUNAME_STEAMROLLER;
234 else
235 return CPUNAME_GENERIC; //OS don't support AVX.
236 }
237 case 8:
238 switch (model){
239 case 1:
240 if(support_avx())
241 return CPUNAME_ZEN;
242 else
243 return CPUNAME_REFERENCE; //OS don't support AVX.
244 }
245 }
246 break;
247 }
248 }
249
250 return CPUNAME_GENERIC;
251 }
252
253
main()254 int main()
255 {
256 int cpuname_id;
257
258 cpuname_id=cpu_detect();
259
260 printf("%s\n", cpuname[cpuname_id]);
261 return 0;
262 }
263