1 /* Get CPU type and Features for x86 processors.
2    Copyright (C) 2012-2016 Free Software Foundation, Inc.
3    Contributed by Sriraman Tallam (tmsriram@google.com)
4 
5 This file is part of GCC.
6 
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 3, or (at your option) any later
10 version.
11 
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
15 for more details.
16 
17 Under Section 7 of GPL version 3, you are granted additional
18 permissions described in the GCC Runtime Library Exception, version
19 3.1, as published by the Free Software Foundation.
20 
21 You should have received a copy of the GNU General Public License and
22 a copy of the GCC Runtime Library Exception along with this program;
23 see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
24 <http://www.gnu.org/licenses/>.  */
25 
26 #include "cpuid.h"
27 #include "tsystem.h"
28 #include "auto-target.h"
29 
30 #ifdef HAVE_INIT_PRIORITY
31 #define CONSTRUCTOR_PRIORITY (101)
32 #else
33 #define CONSTRUCTOR_PRIORITY
34 #endif
35 
36 int __cpu_indicator_init (void)
37   __attribute__ ((constructor CONSTRUCTOR_PRIORITY));
38 
39 /* Processor Vendor and Models. */
40 
41 enum processor_vendor
42 {
43   VENDOR_INTEL = 1,
44   VENDOR_AMD,
45   VENDOR_OTHER,
46   VENDOR_MAX
47 };
48 
49 /* Any new types or subtypes have to be inserted at the end. */
50 
51 enum processor_types
52 {
53   INTEL_BONNELL = 1,
54   INTEL_CORE2,
55   INTEL_COREI7,
56   AMDFAM10H,
57   AMDFAM15H,
58   INTEL_SILVERMONT,
59   INTEL_KNL,
60   AMD_BTVER1,
61   AMD_BTVER2,
62   AMDFAM17H,
63   CPU_TYPE_MAX
64 };
65 
66 enum processor_subtypes
67 {
68   INTEL_COREI7_NEHALEM = 1,
69   INTEL_COREI7_WESTMERE,
70   INTEL_COREI7_SANDYBRIDGE,
71   AMDFAM10H_BARCELONA,
72   AMDFAM10H_SHANGHAI,
73   AMDFAM10H_ISTANBUL,
74   AMDFAM15H_BDVER1,
75   AMDFAM15H_BDVER2,
76   AMDFAM15H_BDVER3,
77   AMDFAM15H_BDVER4,
78   AMDFAM17H_ZNVER1,
79   INTEL_COREI7_IVYBRIDGE,
80   INTEL_COREI7_HASWELL,
81   INTEL_COREI7_BROADWELL,
82   INTEL_COREI7_SKYLAKE,
83   INTEL_COREI7_SKYLAKE_AVX512,
84   CPU_SUBTYPE_MAX
85 };
86 
87 /* ISA Features supported. New features have to be inserted at the end.  */
88 
89 enum processor_features
90 {
91   FEATURE_CMOV = 0,
92   FEATURE_MMX,
93   FEATURE_POPCNT,
94   FEATURE_SSE,
95   FEATURE_SSE2,
96   FEATURE_SSE3,
97   FEATURE_SSSE3,
98   FEATURE_SSE4_1,
99   FEATURE_SSE4_2,
100   FEATURE_AVX,
101   FEATURE_AVX2,
102   FEATURE_SSE4_A,
103   FEATURE_FMA4,
104   FEATURE_XOP,
105   FEATURE_FMA,
106   FEATURE_AVX512F,
107   FEATURE_BMI,
108   FEATURE_BMI2,
109   FEATURE_AES,
110   FEATURE_PCLMUL,
111   FEATURE_AVX512VL,
112   FEATURE_AVX512BW,
113   FEATURE_AVX512DQ,
114   FEATURE_AVX512CD,
115   FEATURE_AVX512ER,
116   FEATURE_AVX512PF,
117   FEATURE_AVX512VBMI,
118   FEATURE_AVX512IFMA
119 };
120 
121 struct __processor_model
122 {
123   unsigned int __cpu_vendor;
124   unsigned int __cpu_type;
125   unsigned int __cpu_subtype;
126   unsigned int __cpu_features[1];
127 } __cpu_model = { };
128 
129 
130 /* Get the specific type of AMD CPU.  */
131 
132 static void
get_amd_cpu(unsigned int family,unsigned int model)133 get_amd_cpu (unsigned int family, unsigned int model)
134 {
135   switch (family)
136     {
137     /* AMD Family 10h.  */
138     case 0x10:
139       __cpu_model.__cpu_type = AMDFAM10H;
140       switch (model)
141 	{
142 	case 0x2:
143 	  /* Barcelona.  */
144 	  __cpu_model.__cpu_subtype = AMDFAM10H_BARCELONA;
145 	  break;
146 	case 0x4:
147 	  /* Shanghai.  */
148 	  __cpu_model.__cpu_subtype = AMDFAM10H_SHANGHAI;
149 	  break;
150 	case 0x8:
151 	  /* Istanbul.  */
152 	  __cpu_model.__cpu_subtype = AMDFAM10H_ISTANBUL;
153 	  break;
154 	default:
155 	  break;
156 	}
157       break;
158     /* AMD Family 14h "btver1". */
159     case 0x14:
160       __cpu_model.__cpu_type = AMD_BTVER1;
161       break;
162     /* AMD Family 15h "Bulldozer".  */
163     case 0x15:
164       __cpu_model.__cpu_type = AMDFAM15H;
165       /* Bulldozer version 1.  */
166       if ( model <= 0xf)
167 	__cpu_model.__cpu_subtype = AMDFAM15H_BDVER1;
168       /* Bulldozer version 2 "Piledriver" */
169       if (model >= 0x10 && model <= 0x2f)
170 	__cpu_model.__cpu_subtype = AMDFAM15H_BDVER2;
171       /* Bulldozer version 3 "Steamroller"  */
172       if (model >= 0x30 && model <= 0x4f)
173 	__cpu_model.__cpu_subtype = AMDFAM15H_BDVER3;
174       /* Bulldozer version 4 "Excavator"   */
175       if (model >= 0x60 && model <= 0x7f)
176 	__cpu_model.__cpu_subtype = AMDFAM15H_BDVER4;
177       break;
178     /* AMD Family 16h "btver2" */
179     case 0x16:
180       __cpu_model.__cpu_type = AMD_BTVER2;
181       break;
182     case 0x17:
183       __cpu_model.__cpu_type = AMDFAM17H;
184       /* AMD family 17h version 1.  */
185       if (model <= 0x1f)
186 	__cpu_model.__cpu_subtype = AMDFAM17H_ZNVER1;
187       break;
188     default:
189       break;
190     }
191 }
192 
193 /* Get the specific type of Intel CPU.  */
194 
195 static void
get_intel_cpu(unsigned int family,unsigned int model,unsigned int brand_id)196 get_intel_cpu (unsigned int family, unsigned int model, unsigned int brand_id)
197 {
198   /* Parse family and model only if brand ID is 0. */
199   if (brand_id == 0)
200     {
201       switch (family)
202 	{
203 	case 0x5:
204 	  /* Pentium.  */
205 	  break;
206 	case 0x6:
207 	  switch (model)
208 	    {
209 	    case 0x1c:
210 	    case 0x26:
211 	      /* Bonnell.  */
212 	      __cpu_model.__cpu_type = INTEL_BONNELL;
213 	      break;
214 	    case 0x37:
215 	    case 0x4a:
216 	    case 0x4d:
217 	    case 0x5a:
218 	    case 0x5d:
219 	      /* Silvermont.  */
220 	      __cpu_model.__cpu_type = INTEL_SILVERMONT;
221 	      break;
222 	    case 0x57:
223 	      /* Knights Landing.  */
224 	      __cpu_model.__cpu_type = INTEL_KNL;
225 	      break;
226 	    case 0x1a:
227 	    case 0x1e:
228 	    case 0x1f:
229 	    case 0x2e:
230 	      /* Nehalem.  */
231 	      __cpu_model.__cpu_type = INTEL_COREI7;
232 	      __cpu_model.__cpu_subtype = INTEL_COREI7_NEHALEM;
233 	      break;
234 	    case 0x25:
235 	    case 0x2c:
236 	    case 0x2f:
237 	      /* Westmere.  */
238 	      __cpu_model.__cpu_type = INTEL_COREI7;
239 	      __cpu_model.__cpu_subtype = INTEL_COREI7_WESTMERE;
240 	      break;
241 	    case 0x2a:
242 	    case 0x2d:
243 	      /* Sandy Bridge.  */
244 	      __cpu_model.__cpu_type = INTEL_COREI7;
245 	      __cpu_model.__cpu_subtype = INTEL_COREI7_SANDYBRIDGE;
246 	      break;
247 	    case 0x3a:
248 	    case 0x3e:
249 	      /* Ivy Bridge.  */
250 	      __cpu_model.__cpu_type = INTEL_COREI7;
251 	      __cpu_model.__cpu_subtype = INTEL_COREI7_IVYBRIDGE;
252 	      break;
253 	    case 0x3c:
254 	    case 0x3f:
255 	    case 0x45:
256 	    case 0x46:
257 	      /* Haswell.  */
258 	      __cpu_model.__cpu_type = INTEL_COREI7;
259 	      __cpu_model.__cpu_subtype = INTEL_COREI7_HASWELL;
260 	      break;
261 	    case 0x3d:
262 	    case 0x47:
263 	    case 0x4f:
264 	    case 0x56:
265 	      /* Broadwell.  */
266 	      __cpu_model.__cpu_type = INTEL_COREI7;
267 	      __cpu_model.__cpu_subtype = INTEL_COREI7_BROADWELL;
268 	      break;
269 	    case 0x4e:
270 	    case 0x5e:
271 	      /* Skylake.  */
272 	      __cpu_model.__cpu_type = INTEL_COREI7;
273 	      __cpu_model.__cpu_subtype = INTEL_COREI7_SKYLAKE;
274 	      break;
275 	    case 0x55:
276 	      /* Skylake with AVX-512 support.  */
277 	      __cpu_model.__cpu_type = INTEL_COREI7;
278 	      __cpu_model.__cpu_subtype = INTEL_COREI7_SKYLAKE_AVX512;
279 	      break;
280 	    case 0x17:
281 	    case 0x1d:
282 	      /* Penryn.  */
283 	    case 0x0f:
284 	      /* Merom.  */
285 	      __cpu_model.__cpu_type = INTEL_CORE2;
286 	      break;
287 	    default:
288 	      break;
289 	    }
290 	  break;
291 	default:
292 	  /* We have no idea.  */
293 	  break;
294 	}
295     }
296 }
297 
298 /* ECX and EDX are output of CPUID at level one.  MAX_CPUID_LEVEL is
299    the max possible level of CPUID insn.  */
300 static void
get_available_features(unsigned int ecx,unsigned int edx,int max_cpuid_level)301 get_available_features (unsigned int ecx, unsigned int edx,
302 			int max_cpuid_level)
303 {
304   unsigned int features = 0;
305 
306   /* Get XCR_XFEATURE_ENABLED_MASK register with xgetbv.  */
307 #define XCR_XFEATURE_ENABLED_MASK	0x0
308 #define XSTATE_FP			0x1
309 #define XSTATE_SSE			0x2
310 #define XSTATE_YMM			0x4
311 #define XSTATE_OPMASK			0x20
312 #define XSTATE_ZMM			0x40
313 #define XSTATE_HI_ZMM			0x80
314 
315 #define XCR_AVX_ENABLED_MASK \
316   (XSTATE_SSE | XSTATE_YMM)
317 #define XCR_AVX512F_ENABLED_MASK \
318   (XSTATE_SSE | XSTATE_YMM | XSTATE_OPMASK | XSTATE_ZMM | XSTATE_HI_ZMM)
319 
320   /* Check if AVX and AVX512 are usable.  */
321   int avx_usable = 0;
322   int avx512_usable = 0;
323   if ((ecx & bit_OSXSAVE))
324     {
325       /* Check if XMM, YMM, OPMASK, upper 256 bits of ZMM0-ZMM15 and
326          ZMM16-ZMM31 states are supported by OSXSAVE.  */
327       unsigned int xcrlow;
328       unsigned int xcrhigh;
329       asm (".byte 0x0f, 0x01, 0xd0"
330 	   : "=a" (xcrlow), "=d" (xcrhigh)
331 	   : "c" (XCR_XFEATURE_ENABLED_MASK));
332       if ((xcrlow & XCR_AVX_ENABLED_MASK) == XCR_AVX_ENABLED_MASK)
333 	{
334 	  avx_usable = 1;
335 	  avx512_usable = ((xcrlow & XCR_AVX512F_ENABLED_MASK)
336 			   == XCR_AVX512F_ENABLED_MASK);
337 	}
338     }
339 
340   if (edx & bit_CMOV)
341     features |= (1 << FEATURE_CMOV);
342   if (edx & bit_MMX)
343     features |= (1 << FEATURE_MMX);
344   if (edx & bit_SSE)
345     features |= (1 << FEATURE_SSE);
346   if (edx & bit_SSE2)
347     features |= (1 << FEATURE_SSE2);
348   if (ecx & bit_POPCNT)
349     features |= (1 << FEATURE_POPCNT);
350   if (ecx & bit_AES)
351     features |= (1 << FEATURE_AES);
352   if (ecx & bit_PCLMUL)
353     features |= (1 << FEATURE_PCLMUL);
354   if (ecx & bit_SSE3)
355     features |= (1 << FEATURE_SSE3);
356   if (ecx & bit_SSSE3)
357     features |= (1 << FEATURE_SSSE3);
358   if (ecx & bit_SSE4_1)
359     features |= (1 << FEATURE_SSE4_1);
360   if (ecx & bit_SSE4_2)
361     features |= (1 << FEATURE_SSE4_2);
362   if (avx_usable)
363     {
364       if (ecx & bit_AVX)
365 	features |= (1 << FEATURE_AVX);
366       if (ecx & bit_FMA)
367 	features |= (1 << FEATURE_FMA);
368     }
369 
370   /* Get Advanced Features at level 7 (eax = 7, ecx = 0). */
371   if (max_cpuid_level >= 7)
372     {
373       unsigned int eax, ebx, ecx, edx;
374       __cpuid_count (7, 0, eax, ebx, ecx, edx);
375       if (ebx & bit_BMI)
376         features |= (1 << FEATURE_BMI);
377       if (avx_usable)
378 	{
379 	  if (ebx & bit_AVX2)
380 	    features |= (1 << FEATURE_AVX2);
381 	}
382       if (ebx & bit_BMI2)
383         features |= (1 << FEATURE_BMI2);
384       if (avx512_usable)
385 	{
386 	  if (ebx & bit_AVX512F)
387 	    features |= (1 << FEATURE_AVX512F);
388 	  if (ebx & bit_AVX512VL)
389 	    features |= (1 << FEATURE_AVX512VL);
390 	  if (ebx & bit_AVX512BW)
391 	    features |= (1 << FEATURE_AVX512BW);
392 	  if (ebx & bit_AVX512DQ)
393 	    features |= (1 << FEATURE_AVX512DQ);
394 	  if (ebx & bit_AVX512CD)
395 	    features |= (1 << FEATURE_AVX512CD);
396 	  if (ebx & bit_AVX512PF)
397 	    features |= (1 << FEATURE_AVX512PF);
398 	  if (ebx & bit_AVX512ER)
399 	    features |= (1 << FEATURE_AVX512ER);
400 	  if (ebx & bit_AVX512IFMA)
401 	    features |= (1 << FEATURE_AVX512IFMA);
402 	  if (ecx & bit_AVX512VBMI)
403 	    features |= (1 << FEATURE_AVX512VBMI);
404 	}
405     }
406 
407   unsigned int ext_level;
408   unsigned int eax, ebx;
409   /* Check cpuid level of extended features.  */
410   __cpuid (0x80000000, ext_level, ebx, ecx, edx);
411 
412   if (ext_level > 0x80000000)
413     {
414       __cpuid (0x80000001, eax, ebx, ecx, edx);
415 
416       if (ecx & bit_SSE4a)
417 	features |= (1 << FEATURE_SSE4_A);
418       if (avx_usable)
419 	{
420 	  if (ecx & bit_FMA4)
421 	    features |= (1 << FEATURE_FMA4);
422 	  if (ecx & bit_XOP)
423 	    features |= (1 << FEATURE_XOP);
424 	}
425     }
426 
427   __cpu_model.__cpu_features[0] = features;
428 }
429 
430 /* A noinline function calling __get_cpuid. Having many calls to
431    cpuid in one function in 32-bit mode causes GCC to complain:
432    "can't find a register in class CLOBBERED_REGS".  This is
433    related to PR rtl-optimization 44174. */
434 
435 static int __attribute__ ((noinline))
__get_cpuid_output(unsigned int __level,unsigned int * __eax,unsigned int * __ebx,unsigned int * __ecx,unsigned int * __edx)436 __get_cpuid_output (unsigned int __level,
437 		    unsigned int *__eax, unsigned int *__ebx,
438 		    unsigned int *__ecx, unsigned int *__edx)
439 {
440   return __get_cpuid (__level, __eax, __ebx, __ecx, __edx);
441 }
442 
443 
444 /* A constructor function that is sets __cpu_model and __cpu_features with
445    the right values.  This needs to run only once.  This constructor is
446    given the highest priority and it should run before constructors without
447    the priority set.  However, it still runs after ifunc initializers and
448    needs to be called explicitly there.  */
449 
450 int __attribute__ ((constructor CONSTRUCTOR_PRIORITY))
__cpu_indicator_init(void)451 __cpu_indicator_init (void)
452 {
453   unsigned int eax, ebx, ecx, edx;
454 
455   int max_level = 5;
456   unsigned int vendor;
457   unsigned int model, family, brand_id;
458   unsigned int extended_model, extended_family;
459 
460   /* This function needs to run just once.  */
461   if (__cpu_model.__cpu_vendor)
462     return 0;
463 
464   /* Assume cpuid insn present. Run in level 0 to get vendor id. */
465   if (!__get_cpuid_output (0, &eax, &ebx, &ecx, &edx))
466     {
467       __cpu_model.__cpu_vendor = VENDOR_OTHER;
468       return -1;
469     }
470 
471   vendor = ebx;
472   max_level = eax;
473 
474   if (max_level < 1)
475     {
476       __cpu_model.__cpu_vendor = VENDOR_OTHER;
477       return -1;
478     }
479 
480   if (!__get_cpuid_output (1, &eax, &ebx, &ecx, &edx))
481     {
482       __cpu_model.__cpu_vendor = VENDOR_OTHER;
483       return -1;
484     }
485 
486   model = (eax >> 4) & 0x0f;
487   family = (eax >> 8) & 0x0f;
488   brand_id = ebx & 0xff;
489   extended_model = (eax >> 12) & 0xf0;
490   extended_family = (eax >> 20) & 0xff;
491 
492   if (vendor == signature_INTEL_ebx)
493     {
494       /* Adjust model and family for Intel CPUS. */
495       if (family == 0x0f)
496 	{
497 	  family += extended_family;
498 	  model += extended_model;
499 	}
500       else if (family == 0x06)
501 	model += extended_model;
502 
503       /* Get CPU type.  */
504       get_intel_cpu (family, model, brand_id);
505       /* Find available features. */
506       get_available_features (ecx, edx, max_level);
507       __cpu_model.__cpu_vendor = VENDOR_INTEL;
508     }
509   else if (vendor == signature_AMD_ebx)
510     {
511       /* Adjust model and family for AMD CPUS. */
512       if (family == 0x0f)
513 	{
514 	  family += extended_family;
515 	  model += extended_model;
516 	}
517 
518       /* Get CPU type.  */
519       get_amd_cpu (family, model);
520       /* Find available features. */
521       get_available_features (ecx, edx, max_level);
522       __cpu_model.__cpu_vendor = VENDOR_AMD;
523     }
524   else
525     __cpu_model.__cpu_vendor = VENDOR_OTHER;
526 
527   gcc_assert (__cpu_model.__cpu_vendor < VENDOR_MAX);
528   gcc_assert (__cpu_model.__cpu_type < CPU_TYPE_MAX);
529   gcc_assert (__cpu_model.__cpu_subtype < CPU_SUBTYPE_MAX);
530 
531   return 0;
532 }
533 
534 #if defined SHARED && defined USE_ELF_SYMVER
535 __asm__ (".symver __cpu_indicator_init, __cpu_indicator_init@GCC_4.8.0");
536 __asm__ (".symver __cpu_model, __cpu_model@GCC_4.8.0");
537 #endif
538