1 /* Get CPU type and Features for x86 processors.
2 Copyright (C) 2012-2016 Free Software Foundation, Inc.
3 Contributed by Sriraman Tallam (tmsriram@google.com)
4
5 This file is part of GCC.
6
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 3, or (at your option) any later
10 version.
11
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 for more details.
16
17 Under Section 7 of GPL version 3, you are granted additional
18 permissions described in the GCC Runtime Library Exception, version
19 3.1, as published by the Free Software Foundation.
20
21 You should have received a copy of the GNU General Public License and
22 a copy of the GCC Runtime Library Exception along with this program;
23 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
24 <http://www.gnu.org/licenses/>. */
25
26 #include "cpuid.h"
27 #include "tsystem.h"
28 #include "auto-target.h"
29
30 #ifdef HAVE_INIT_PRIORITY
31 #define CONSTRUCTOR_PRIORITY (101)
32 #else
33 #define CONSTRUCTOR_PRIORITY
34 #endif
35
36 int __cpu_indicator_init (void)
37 __attribute__ ((constructor CONSTRUCTOR_PRIORITY));
38
39 /* Processor Vendor and Models. */
40
41 enum processor_vendor
42 {
43 VENDOR_INTEL = 1,
44 VENDOR_AMD,
45 VENDOR_OTHER,
46 VENDOR_MAX
47 };
48
49 /* Any new types or subtypes have to be inserted at the end. */
50
51 enum processor_types
52 {
53 INTEL_BONNELL = 1,
54 INTEL_CORE2,
55 INTEL_COREI7,
56 AMDFAM10H,
57 AMDFAM15H,
58 INTEL_SILVERMONT,
59 INTEL_KNL,
60 AMD_BTVER1,
61 AMD_BTVER2,
62 AMDFAM17H,
63 CPU_TYPE_MAX
64 };
65
66 enum processor_subtypes
67 {
68 INTEL_COREI7_NEHALEM = 1,
69 INTEL_COREI7_WESTMERE,
70 INTEL_COREI7_SANDYBRIDGE,
71 AMDFAM10H_BARCELONA,
72 AMDFAM10H_SHANGHAI,
73 AMDFAM10H_ISTANBUL,
74 AMDFAM15H_BDVER1,
75 AMDFAM15H_BDVER2,
76 AMDFAM15H_BDVER3,
77 AMDFAM15H_BDVER4,
78 AMDFAM17H_ZNVER1,
79 INTEL_COREI7_IVYBRIDGE,
80 INTEL_COREI7_HASWELL,
81 INTEL_COREI7_BROADWELL,
82 INTEL_COREI7_SKYLAKE,
83 INTEL_COREI7_SKYLAKE_AVX512,
84 CPU_SUBTYPE_MAX
85 };
86
87 /* ISA Features supported. New features have to be inserted at the end. */
88
89 enum processor_features
90 {
91 FEATURE_CMOV = 0,
92 FEATURE_MMX,
93 FEATURE_POPCNT,
94 FEATURE_SSE,
95 FEATURE_SSE2,
96 FEATURE_SSE3,
97 FEATURE_SSSE3,
98 FEATURE_SSE4_1,
99 FEATURE_SSE4_2,
100 FEATURE_AVX,
101 FEATURE_AVX2,
102 FEATURE_SSE4_A,
103 FEATURE_FMA4,
104 FEATURE_XOP,
105 FEATURE_FMA,
106 FEATURE_AVX512F,
107 FEATURE_BMI,
108 FEATURE_BMI2,
109 FEATURE_AES,
110 FEATURE_PCLMUL,
111 FEATURE_AVX512VL,
112 FEATURE_AVX512BW,
113 FEATURE_AVX512DQ,
114 FEATURE_AVX512CD,
115 FEATURE_AVX512ER,
116 FEATURE_AVX512PF,
117 FEATURE_AVX512VBMI,
118 FEATURE_AVX512IFMA
119 };
120
121 struct __processor_model
122 {
123 unsigned int __cpu_vendor;
124 unsigned int __cpu_type;
125 unsigned int __cpu_subtype;
126 unsigned int __cpu_features[1];
127 } __cpu_model = { };
128
129
130 /* Get the specific type of AMD CPU. */
131
132 static void
get_amd_cpu(unsigned int family,unsigned int model)133 get_amd_cpu (unsigned int family, unsigned int model)
134 {
135 switch (family)
136 {
137 /* AMD Family 10h. */
138 case 0x10:
139 __cpu_model.__cpu_type = AMDFAM10H;
140 switch (model)
141 {
142 case 0x2:
143 /* Barcelona. */
144 __cpu_model.__cpu_subtype = AMDFAM10H_BARCELONA;
145 break;
146 case 0x4:
147 /* Shanghai. */
148 __cpu_model.__cpu_subtype = AMDFAM10H_SHANGHAI;
149 break;
150 case 0x8:
151 /* Istanbul. */
152 __cpu_model.__cpu_subtype = AMDFAM10H_ISTANBUL;
153 break;
154 default:
155 break;
156 }
157 break;
158 /* AMD Family 14h "btver1". */
159 case 0x14:
160 __cpu_model.__cpu_type = AMD_BTVER1;
161 break;
162 /* AMD Family 15h "Bulldozer". */
163 case 0x15:
164 __cpu_model.__cpu_type = AMDFAM15H;
165 /* Bulldozer version 1. */
166 if ( model <= 0xf)
167 __cpu_model.__cpu_subtype = AMDFAM15H_BDVER1;
168 /* Bulldozer version 2 "Piledriver" */
169 if (model >= 0x10 && model <= 0x2f)
170 __cpu_model.__cpu_subtype = AMDFAM15H_BDVER2;
171 /* Bulldozer version 3 "Steamroller" */
172 if (model >= 0x30 && model <= 0x4f)
173 __cpu_model.__cpu_subtype = AMDFAM15H_BDVER3;
174 /* Bulldozer version 4 "Excavator" */
175 if (model >= 0x60 && model <= 0x7f)
176 __cpu_model.__cpu_subtype = AMDFAM15H_BDVER4;
177 break;
178 /* AMD Family 16h "btver2" */
179 case 0x16:
180 __cpu_model.__cpu_type = AMD_BTVER2;
181 break;
182 case 0x17:
183 __cpu_model.__cpu_type = AMDFAM17H;
184 /* AMD family 17h version 1. */
185 if (model <= 0x1f)
186 __cpu_model.__cpu_subtype = AMDFAM17H_ZNVER1;
187 break;
188 default:
189 break;
190 }
191 }
192
193 /* Get the specific type of Intel CPU. */
194
195 static void
get_intel_cpu(unsigned int family,unsigned int model,unsigned int brand_id)196 get_intel_cpu (unsigned int family, unsigned int model, unsigned int brand_id)
197 {
198 /* Parse family and model only if brand ID is 0. */
199 if (brand_id == 0)
200 {
201 switch (family)
202 {
203 case 0x5:
204 /* Pentium. */
205 break;
206 case 0x6:
207 switch (model)
208 {
209 case 0x1c:
210 case 0x26:
211 /* Bonnell. */
212 __cpu_model.__cpu_type = INTEL_BONNELL;
213 break;
214 case 0x37:
215 case 0x4a:
216 case 0x4d:
217 case 0x5a:
218 case 0x5d:
219 /* Silvermont. */
220 __cpu_model.__cpu_type = INTEL_SILVERMONT;
221 break;
222 case 0x57:
223 /* Knights Landing. */
224 __cpu_model.__cpu_type = INTEL_KNL;
225 break;
226 case 0x1a:
227 case 0x1e:
228 case 0x1f:
229 case 0x2e:
230 /* Nehalem. */
231 __cpu_model.__cpu_type = INTEL_COREI7;
232 __cpu_model.__cpu_subtype = INTEL_COREI7_NEHALEM;
233 break;
234 case 0x25:
235 case 0x2c:
236 case 0x2f:
237 /* Westmere. */
238 __cpu_model.__cpu_type = INTEL_COREI7;
239 __cpu_model.__cpu_subtype = INTEL_COREI7_WESTMERE;
240 break;
241 case 0x2a:
242 case 0x2d:
243 /* Sandy Bridge. */
244 __cpu_model.__cpu_type = INTEL_COREI7;
245 __cpu_model.__cpu_subtype = INTEL_COREI7_SANDYBRIDGE;
246 break;
247 case 0x3a:
248 case 0x3e:
249 /* Ivy Bridge. */
250 __cpu_model.__cpu_type = INTEL_COREI7;
251 __cpu_model.__cpu_subtype = INTEL_COREI7_IVYBRIDGE;
252 break;
253 case 0x3c:
254 case 0x3f:
255 case 0x45:
256 case 0x46:
257 /* Haswell. */
258 __cpu_model.__cpu_type = INTEL_COREI7;
259 __cpu_model.__cpu_subtype = INTEL_COREI7_HASWELL;
260 break;
261 case 0x3d:
262 case 0x47:
263 case 0x4f:
264 case 0x56:
265 /* Broadwell. */
266 __cpu_model.__cpu_type = INTEL_COREI7;
267 __cpu_model.__cpu_subtype = INTEL_COREI7_BROADWELL;
268 break;
269 case 0x4e:
270 case 0x5e:
271 /* Skylake. */
272 __cpu_model.__cpu_type = INTEL_COREI7;
273 __cpu_model.__cpu_subtype = INTEL_COREI7_SKYLAKE;
274 break;
275 case 0x55:
276 /* Skylake with AVX-512 support. */
277 __cpu_model.__cpu_type = INTEL_COREI7;
278 __cpu_model.__cpu_subtype = INTEL_COREI7_SKYLAKE_AVX512;
279 break;
280 case 0x17:
281 case 0x1d:
282 /* Penryn. */
283 case 0x0f:
284 /* Merom. */
285 __cpu_model.__cpu_type = INTEL_CORE2;
286 break;
287 default:
288 break;
289 }
290 break;
291 default:
292 /* We have no idea. */
293 break;
294 }
295 }
296 }
297
298 /* ECX and EDX are output of CPUID at level one. MAX_CPUID_LEVEL is
299 the max possible level of CPUID insn. */
300 static void
get_available_features(unsigned int ecx,unsigned int edx,int max_cpuid_level)301 get_available_features (unsigned int ecx, unsigned int edx,
302 int max_cpuid_level)
303 {
304 unsigned int features = 0;
305
306 /* Get XCR_XFEATURE_ENABLED_MASK register with xgetbv. */
307 #define XCR_XFEATURE_ENABLED_MASK 0x0
308 #define XSTATE_FP 0x1
309 #define XSTATE_SSE 0x2
310 #define XSTATE_YMM 0x4
311 #define XSTATE_OPMASK 0x20
312 #define XSTATE_ZMM 0x40
313 #define XSTATE_HI_ZMM 0x80
314
315 #define XCR_AVX_ENABLED_MASK \
316 (XSTATE_SSE | XSTATE_YMM)
317 #define XCR_AVX512F_ENABLED_MASK \
318 (XSTATE_SSE | XSTATE_YMM | XSTATE_OPMASK | XSTATE_ZMM | XSTATE_HI_ZMM)
319
320 /* Check if AVX and AVX512 are usable. */
321 int avx_usable = 0;
322 int avx512_usable = 0;
323 if ((ecx & bit_OSXSAVE))
324 {
325 /* Check if XMM, YMM, OPMASK, upper 256 bits of ZMM0-ZMM15 and
326 ZMM16-ZMM31 states are supported by OSXSAVE. */
327 unsigned int xcrlow;
328 unsigned int xcrhigh;
329 asm (".byte 0x0f, 0x01, 0xd0"
330 : "=a" (xcrlow), "=d" (xcrhigh)
331 : "c" (XCR_XFEATURE_ENABLED_MASK));
332 if ((xcrlow & XCR_AVX_ENABLED_MASK) == XCR_AVX_ENABLED_MASK)
333 {
334 avx_usable = 1;
335 avx512_usable = ((xcrlow & XCR_AVX512F_ENABLED_MASK)
336 == XCR_AVX512F_ENABLED_MASK);
337 }
338 }
339
340 if (edx & bit_CMOV)
341 features |= (1 << FEATURE_CMOV);
342 if (edx & bit_MMX)
343 features |= (1 << FEATURE_MMX);
344 if (edx & bit_SSE)
345 features |= (1 << FEATURE_SSE);
346 if (edx & bit_SSE2)
347 features |= (1 << FEATURE_SSE2);
348 if (ecx & bit_POPCNT)
349 features |= (1 << FEATURE_POPCNT);
350 if (ecx & bit_AES)
351 features |= (1 << FEATURE_AES);
352 if (ecx & bit_PCLMUL)
353 features |= (1 << FEATURE_PCLMUL);
354 if (ecx & bit_SSE3)
355 features |= (1 << FEATURE_SSE3);
356 if (ecx & bit_SSSE3)
357 features |= (1 << FEATURE_SSSE3);
358 if (ecx & bit_SSE4_1)
359 features |= (1 << FEATURE_SSE4_1);
360 if (ecx & bit_SSE4_2)
361 features |= (1 << FEATURE_SSE4_2);
362 if (avx_usable)
363 {
364 if (ecx & bit_AVX)
365 features |= (1 << FEATURE_AVX);
366 if (ecx & bit_FMA)
367 features |= (1 << FEATURE_FMA);
368 }
369
370 /* Get Advanced Features at level 7 (eax = 7, ecx = 0). */
371 if (max_cpuid_level >= 7)
372 {
373 unsigned int eax, ebx, ecx, edx;
374 __cpuid_count (7, 0, eax, ebx, ecx, edx);
375 if (ebx & bit_BMI)
376 features |= (1 << FEATURE_BMI);
377 if (avx_usable)
378 {
379 if (ebx & bit_AVX2)
380 features |= (1 << FEATURE_AVX2);
381 }
382 if (ebx & bit_BMI2)
383 features |= (1 << FEATURE_BMI2);
384 if (avx512_usable)
385 {
386 if (ebx & bit_AVX512F)
387 features |= (1 << FEATURE_AVX512F);
388 if (ebx & bit_AVX512VL)
389 features |= (1 << FEATURE_AVX512VL);
390 if (ebx & bit_AVX512BW)
391 features |= (1 << FEATURE_AVX512BW);
392 if (ebx & bit_AVX512DQ)
393 features |= (1 << FEATURE_AVX512DQ);
394 if (ebx & bit_AVX512CD)
395 features |= (1 << FEATURE_AVX512CD);
396 if (ebx & bit_AVX512PF)
397 features |= (1 << FEATURE_AVX512PF);
398 if (ebx & bit_AVX512ER)
399 features |= (1 << FEATURE_AVX512ER);
400 if (ebx & bit_AVX512IFMA)
401 features |= (1 << FEATURE_AVX512IFMA);
402 if (ecx & bit_AVX512VBMI)
403 features |= (1 << FEATURE_AVX512VBMI);
404 }
405 }
406
407 unsigned int ext_level;
408 unsigned int eax, ebx;
409 /* Check cpuid level of extended features. */
410 __cpuid (0x80000000, ext_level, ebx, ecx, edx);
411
412 if (ext_level > 0x80000000)
413 {
414 __cpuid (0x80000001, eax, ebx, ecx, edx);
415
416 if (ecx & bit_SSE4a)
417 features |= (1 << FEATURE_SSE4_A);
418 if (avx_usable)
419 {
420 if (ecx & bit_FMA4)
421 features |= (1 << FEATURE_FMA4);
422 if (ecx & bit_XOP)
423 features |= (1 << FEATURE_XOP);
424 }
425 }
426
427 __cpu_model.__cpu_features[0] = features;
428 }
429
430 /* A noinline function calling __get_cpuid. Having many calls to
431 cpuid in one function in 32-bit mode causes GCC to complain:
432 "can't find a register in class CLOBBERED_REGS". This is
433 related to PR rtl-optimization 44174. */
434
435 static int __attribute__ ((noinline))
__get_cpuid_output(unsigned int __level,unsigned int * __eax,unsigned int * __ebx,unsigned int * __ecx,unsigned int * __edx)436 __get_cpuid_output (unsigned int __level,
437 unsigned int *__eax, unsigned int *__ebx,
438 unsigned int *__ecx, unsigned int *__edx)
439 {
440 return __get_cpuid (__level, __eax, __ebx, __ecx, __edx);
441 }
442
443
444 /* A constructor function that is sets __cpu_model and __cpu_features with
445 the right values. This needs to run only once. This constructor is
446 given the highest priority and it should run before constructors without
447 the priority set. However, it still runs after ifunc initializers and
448 needs to be called explicitly there. */
449
450 int __attribute__ ((constructor CONSTRUCTOR_PRIORITY))
__cpu_indicator_init(void)451 __cpu_indicator_init (void)
452 {
453 unsigned int eax, ebx, ecx, edx;
454
455 int max_level = 5;
456 unsigned int vendor;
457 unsigned int model, family, brand_id;
458 unsigned int extended_model, extended_family;
459
460 /* This function needs to run just once. */
461 if (__cpu_model.__cpu_vendor)
462 return 0;
463
464 /* Assume cpuid insn present. Run in level 0 to get vendor id. */
465 if (!__get_cpuid_output (0, &eax, &ebx, &ecx, &edx))
466 {
467 __cpu_model.__cpu_vendor = VENDOR_OTHER;
468 return -1;
469 }
470
471 vendor = ebx;
472 max_level = eax;
473
474 if (max_level < 1)
475 {
476 __cpu_model.__cpu_vendor = VENDOR_OTHER;
477 return -1;
478 }
479
480 if (!__get_cpuid_output (1, &eax, &ebx, &ecx, &edx))
481 {
482 __cpu_model.__cpu_vendor = VENDOR_OTHER;
483 return -1;
484 }
485
486 model = (eax >> 4) & 0x0f;
487 family = (eax >> 8) & 0x0f;
488 brand_id = ebx & 0xff;
489 extended_model = (eax >> 12) & 0xf0;
490 extended_family = (eax >> 20) & 0xff;
491
492 if (vendor == signature_INTEL_ebx)
493 {
494 /* Adjust model and family for Intel CPUS. */
495 if (family == 0x0f)
496 {
497 family += extended_family;
498 model += extended_model;
499 }
500 else if (family == 0x06)
501 model += extended_model;
502
503 /* Get CPU type. */
504 get_intel_cpu (family, model, brand_id);
505 /* Find available features. */
506 get_available_features (ecx, edx, max_level);
507 __cpu_model.__cpu_vendor = VENDOR_INTEL;
508 }
509 else if (vendor == signature_AMD_ebx)
510 {
511 /* Adjust model and family for AMD CPUS. */
512 if (family == 0x0f)
513 {
514 family += extended_family;
515 model += extended_model;
516 }
517
518 /* Get CPU type. */
519 get_amd_cpu (family, model);
520 /* Find available features. */
521 get_available_features (ecx, edx, max_level);
522 __cpu_model.__cpu_vendor = VENDOR_AMD;
523 }
524 else
525 __cpu_model.__cpu_vendor = VENDOR_OTHER;
526
527 gcc_assert (__cpu_model.__cpu_vendor < VENDOR_MAX);
528 gcc_assert (__cpu_model.__cpu_type < CPU_TYPE_MAX);
529 gcc_assert (__cpu_model.__cpu_subtype < CPU_SUBTYPE_MAX);
530
531 return 0;
532 }
533
534 #if defined SHARED && defined USE_ELF_SYMVER
535 __asm__ (".symver __cpu_indicator_init, __cpu_indicator_init@GCC_4.8.0");
536 __asm__ (".symver __cpu_model, __cpu_model@GCC_4.8.0");
537 #endif
538