1 /* Subroutines for the gcc driver.
2    Copyright (C) 2006-2018 Free Software Foundation, Inc.
3 
4 This file is part of GCC.
5 
6 GCC is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 3, or (at your option)
9 any later version.
10 
11 GCC is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14 GNU General Public License for more details.
15 
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3.  If not see
18 <http://www.gnu.org/licenses/>.  */
19 
20 #define IN_TARGET_CODE 1
21 
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "tm.h"
26 
27 const char *host_detect_local_cpu (int argc, const char **argv);
28 
29 #if defined(__GNUC__) && (__GNUC__ >= 5 || !defined(__PIC__))
30 #include "cpuid.h"
31 
32 struct cache_desc
33 {
34   unsigned sizekb;
35   unsigned assoc;
36   unsigned line;
37 };
38 
39 /* Returns command line parameters that describe size and
40    cache line size of the processor caches.  */
41 
42 static char *
describe_cache(struct cache_desc level1,struct cache_desc level2)43 describe_cache (struct cache_desc level1, struct cache_desc level2)
44 {
45   char size[100], line[100], size2[100];
46 
47   /* At the moment, gcc does not use the information
48      about the associativity of the cache.  */
49 
50   snprintf (size, sizeof (size),
51 	    "--param l1-cache-size=%u ", level1.sizekb);
52   snprintf (line, sizeof (line),
53 	    "--param l1-cache-line-size=%u ", level1.line);
54 
55   snprintf (size2, sizeof (size2),
56 	    "--param l2-cache-size=%u ", level2.sizekb);
57 
58   return concat (size, line, size2, NULL);
59 }
60 
61 /* Detect L2 cache parameters using CPUID extended function 0x80000006.  */
62 
63 static void
detect_l2_cache(struct cache_desc * level2)64 detect_l2_cache (struct cache_desc *level2)
65 {
66   unsigned eax, ebx, ecx, edx;
67   unsigned assoc;
68 
69   __cpuid (0x80000006, eax, ebx, ecx, edx);
70 
71   level2->sizekb = (ecx >> 16) & 0xffff;
72   level2->line = ecx & 0xff;
73 
74   assoc = (ecx >> 12) & 0xf;
75   if (assoc == 6)
76     assoc = 8;
77   else if (assoc == 8)
78     assoc = 16;
79   else if (assoc >= 0xa && assoc <= 0xc)
80     assoc = 32 + (assoc - 0xa) * 16;
81   else if (assoc >= 0xd && assoc <= 0xe)
82     assoc = 96 + (assoc - 0xd) * 32;
83 
84   level2->assoc = assoc;
85 }
86 
87 /* Returns the description of caches for an AMD processor.  */
88 
89 static const char *
detect_caches_amd(unsigned max_ext_level)90 detect_caches_amd (unsigned max_ext_level)
91 {
92   unsigned eax, ebx, ecx, edx;
93 
94   struct cache_desc level1, level2 = {0, 0, 0};
95 
96   if (max_ext_level < 0x80000005)
97     return "";
98 
99   __cpuid (0x80000005, eax, ebx, ecx, edx);
100 
101   level1.sizekb = (ecx >> 24) & 0xff;
102   level1.assoc = (ecx >> 16) & 0xff;
103   level1.line = ecx & 0xff;
104 
105   if (max_ext_level >= 0x80000006)
106     detect_l2_cache (&level2);
107 
108   return describe_cache (level1, level2);
109 }
110 
111 /* Decodes the size, the associativity and the cache line size of
112    L1/L2 caches of an Intel processor.  Values are based on
113    "Intel Processor Identification and the CPUID Instruction"
114    [Application Note 485], revision -032, December 2007.  */
115 
116 static void
decode_caches_intel(unsigned reg,bool xeon_mp,struct cache_desc * level1,struct cache_desc * level2)117 decode_caches_intel (unsigned reg, bool xeon_mp,
118 		     struct cache_desc *level1, struct cache_desc *level2)
119 {
120   int i;
121 
122   for (i = 24; i >= 0; i -= 8)
123     switch ((reg >> i) & 0xff)
124       {
125       case 0x0a:
126 	level1->sizekb = 8; level1->assoc = 2; level1->line = 32;
127 	break;
128       case 0x0c:
129 	level1->sizekb = 16; level1->assoc = 4; level1->line = 32;
130 	break;
131       case 0x0d:
132 	level1->sizekb = 16; level1->assoc = 4; level1->line = 64;
133 	break;
134       case 0x0e:
135 	level1->sizekb = 24; level1->assoc = 6; level1->line = 64;
136 	break;
137       case 0x21:
138 	level2->sizekb = 256; level2->assoc = 8; level2->line = 64;
139 	break;
140       case 0x24:
141 	level2->sizekb = 1024; level2->assoc = 16; level2->line = 64;
142 	break;
143       case 0x2c:
144 	level1->sizekb = 32; level1->assoc = 8; level1->line = 64;
145 	break;
146       case 0x39:
147 	level2->sizekb = 128; level2->assoc = 4; level2->line = 64;
148 	break;
149       case 0x3a:
150 	level2->sizekb = 192; level2->assoc = 6; level2->line = 64;
151 	break;
152       case 0x3b:
153 	level2->sizekb = 128; level2->assoc = 2; level2->line = 64;
154 	break;
155       case 0x3c:
156 	level2->sizekb = 256; level2->assoc = 4; level2->line = 64;
157 	break;
158       case 0x3d:
159 	level2->sizekb = 384; level2->assoc = 6; level2->line = 64;
160 	break;
161       case 0x3e:
162 	level2->sizekb = 512; level2->assoc = 4; level2->line = 64;
163 	break;
164       case 0x41:
165 	level2->sizekb = 128; level2->assoc = 4; level2->line = 32;
166 	break;
167       case 0x42:
168 	level2->sizekb = 256; level2->assoc = 4; level2->line = 32;
169 	break;
170       case 0x43:
171 	level2->sizekb = 512; level2->assoc = 4; level2->line = 32;
172 	break;
173       case 0x44:
174 	level2->sizekb = 1024; level2->assoc = 4; level2->line = 32;
175 	break;
176       case 0x45:
177 	level2->sizekb = 2048; level2->assoc = 4; level2->line = 32;
178 	break;
179       case 0x48:
180 	level2->sizekb = 3072; level2->assoc = 12; level2->line = 64;
181 	break;
182       case 0x49:
183 	if (xeon_mp)
184 	  break;
185 	level2->sizekb = 4096; level2->assoc = 16; level2->line = 64;
186 	break;
187       case 0x4e:
188 	level2->sizekb = 6144; level2->assoc = 24; level2->line = 64;
189 	break;
190       case 0x60:
191 	level1->sizekb = 16; level1->assoc = 8; level1->line = 64;
192 	break;
193       case 0x66:
194 	level1->sizekb = 8; level1->assoc = 4; level1->line = 64;
195 	break;
196       case 0x67:
197 	level1->sizekb = 16; level1->assoc = 4; level1->line = 64;
198 	break;
199       case 0x68:
200 	level1->sizekb = 32; level1->assoc = 4; level1->line = 64;
201 	break;
202       case 0x78:
203 	level2->sizekb = 1024; level2->assoc = 4; level2->line = 64;
204 	break;
205       case 0x79:
206 	level2->sizekb = 128; level2->assoc = 8; level2->line = 64;
207 	break;
208       case 0x7a:
209 	level2->sizekb = 256; level2->assoc = 8; level2->line = 64;
210 	break;
211       case 0x7b:
212 	level2->sizekb = 512; level2->assoc = 8; level2->line = 64;
213 	break;
214       case 0x7c:
215 	level2->sizekb = 1024; level2->assoc = 8; level2->line = 64;
216 	break;
217       case 0x7d:
218 	level2->sizekb = 2048; level2->assoc = 8; level2->line = 64;
219 	break;
220       case 0x7f:
221 	level2->sizekb = 512; level2->assoc = 2; level2->line = 64;
222 	break;
223       case 0x80:
224 	level2->sizekb = 512; level2->assoc = 8; level2->line = 64;
225 	break;
226       case 0x82:
227 	level2->sizekb = 256; level2->assoc = 8; level2->line = 32;
228 	break;
229       case 0x83:
230 	level2->sizekb = 512; level2->assoc = 8; level2->line = 32;
231 	break;
232       case 0x84:
233 	level2->sizekb = 1024; level2->assoc = 8; level2->line = 32;
234 	break;
235       case 0x85:
236 	level2->sizekb = 2048; level2->assoc = 8; level2->line = 32;
237 	break;
238       case 0x86:
239 	level2->sizekb = 512; level2->assoc = 4; level2->line = 64;
240 	break;
241       case 0x87:
242 	level2->sizekb = 1024; level2->assoc = 8; level2->line = 64;
243 
244       default:
245 	break;
246       }
247 }
248 
249 /* Detect cache parameters using CPUID function 2.  */
250 
251 static void
detect_caches_cpuid2(bool xeon_mp,struct cache_desc * level1,struct cache_desc * level2)252 detect_caches_cpuid2 (bool xeon_mp,
253 		      struct cache_desc *level1, struct cache_desc *level2)
254 {
255   unsigned regs[4];
256   int nreps, i;
257 
258   __cpuid (2, regs[0], regs[1], regs[2], regs[3]);
259 
260   nreps = regs[0] & 0x0f;
261   regs[0] &= ~0x0f;
262 
263   while (--nreps >= 0)
264     {
265       for (i = 0; i < 4; i++)
266 	if (regs[i] && !((regs[i] >> 31) & 1))
267 	  decode_caches_intel (regs[i], xeon_mp, level1, level2);
268 
269       if (nreps)
270 	__cpuid (2, regs[0], regs[1], regs[2], regs[3]);
271     }
272 }
273 
274 /* Detect cache parameters using CPUID function 4. This
275    method doesn't require hardcoded tables.  */
276 
277 enum cache_type
278 {
279   CACHE_END = 0,
280   CACHE_DATA = 1,
281   CACHE_INST = 2,
282   CACHE_UNIFIED = 3
283 };
284 
285 static void
detect_caches_cpuid4(struct cache_desc * level1,struct cache_desc * level2,struct cache_desc * level3)286 detect_caches_cpuid4 (struct cache_desc *level1, struct cache_desc *level2,
287 		      struct cache_desc *level3)
288 {
289   struct cache_desc *cache;
290 
291   unsigned eax, ebx, ecx, edx;
292   int count;
293 
294   for (count = 0;; count++)
295     {
296       __cpuid_count(4, count, eax, ebx, ecx, edx);
297       switch (eax & 0x1f)
298 	{
299 	case CACHE_END:
300 	  return;
301 	case CACHE_DATA:
302 	case CACHE_UNIFIED:
303 	  {
304 	    switch ((eax >> 5) & 0x07)
305 	      {
306 	      case 1:
307 		cache = level1;
308 		break;
309 	      case 2:
310 		cache = level2;
311 		break;
312 	      case 3:
313 		cache = level3;
314 		break;
315 	      default:
316 		cache = NULL;
317 	      }
318 
319 	    if (cache)
320 	      {
321 		unsigned sets = ecx + 1;
322 		unsigned part = ((ebx >> 12) & 0x03ff) + 1;
323 
324 		cache->assoc = ((ebx >> 22) & 0x03ff) + 1;
325 		cache->line = (ebx & 0x0fff) + 1;
326 
327 		cache->sizekb = (cache->assoc * part
328 				 * cache->line * sets) / 1024;
329 	      }
330 	  }
331 	default:
332 	  break;
333 	}
334     }
335 }
336 
337 /* Returns the description of caches for an Intel processor.  */
338 
339 static const char *
detect_caches_intel(bool xeon_mp,unsigned max_level,unsigned max_ext_level,unsigned * l2sizekb)340 detect_caches_intel (bool xeon_mp, unsigned max_level,
341 		     unsigned max_ext_level, unsigned *l2sizekb)
342 {
343   struct cache_desc level1 = {0, 0, 0}, level2 = {0, 0, 0}, level3 = {0, 0, 0};
344 
345   if (max_level >= 4)
346     detect_caches_cpuid4 (&level1, &level2, &level3);
347   else if (max_level >= 2)
348     detect_caches_cpuid2 (xeon_mp, &level1, &level2);
349   else
350     return "";
351 
352   if (level1.sizekb == 0)
353     return "";
354 
355   /* Let the L3 replace the L2. This assumes inclusive caches
356      and single threaded program for now. */
357   if (level3.sizekb)
358     level2 = level3;
359 
360   /* Intel CPUs are equipped with AMD style L2 cache info.  Try this
361      method if other methods fail to provide L2 cache parameters.  */
362   if (level2.sizekb == 0 && max_ext_level >= 0x80000006)
363     detect_l2_cache (&level2);
364 
365   *l2sizekb = level2.sizekb;
366 
367   return describe_cache (level1, level2);
368 }
369 
370 /* This will be called by the spec parser in gcc.c when it sees
371    a %:local_cpu_detect(args) construct.  Currently it will be called
372    with either "arch" or "tune" as argument depending on if -march=native
373    or -mtune=native is to be substituted.
374 
375    It returns a string containing new command line parameters to be
376    put at the place of the above two options, depending on what CPU
377    this is executed.  E.g. "-march=k8" on an AMD64 machine
378    for -march=native.
379 
380    ARGC and ARGV are set depending on the actual arguments given
381    in the spec.  */
382 
host_detect_local_cpu(int argc,const char ** argv)383 const char *host_detect_local_cpu (int argc, const char **argv)
384 {
385   enum processor_type processor = PROCESSOR_I386;
386   const char *cpu = "i386";
387 
388   const char *cache = "";
389   const char *options = "";
390 
391   unsigned int eax, ebx, ecx, edx;
392 
393   unsigned int max_level, ext_level;
394 
395   unsigned int vendor;
396   unsigned int model, family;
397 
398   unsigned int has_sse3, has_ssse3, has_cmpxchg16b;
399   unsigned int has_cmpxchg8b, has_cmov, has_mmx, has_sse, has_sse2;
400 
401   /* Extended features */
402   unsigned int has_lahf_lm = 0, has_sse4a = 0;
403   unsigned int has_longmode = 0, has_3dnowp = 0, has_3dnow = 0;
404   unsigned int has_movbe = 0, has_sse4_1 = 0, has_sse4_2 = 0;
405   unsigned int has_popcnt = 0, has_aes = 0, has_avx = 0, has_avx2 = 0;
406   unsigned int has_pclmul = 0, has_abm = 0, has_lwp = 0;
407   unsigned int has_fma = 0, has_fma4 = 0, has_xop = 0;
408   unsigned int has_bmi = 0, has_bmi2 = 0, has_tbm = 0, has_lzcnt = 0;
409   unsigned int has_hle = 0, has_rtm = 0, has_sgx = 0;
410   unsigned int has_pconfig = 0, has_wbnoinvd = 0;
411   unsigned int has_rdrnd = 0, has_f16c = 0, has_fsgsbase = 0;
412   unsigned int has_rdseed = 0, has_prfchw = 0, has_adx = 0;
413   unsigned int has_osxsave = 0, has_fxsr = 0, has_xsave = 0, has_xsaveopt = 0;
414   unsigned int has_avx512er = 0, has_avx512pf = 0, has_avx512cd = 0;
415   unsigned int has_avx512f = 0, has_sha = 0, has_prefetchwt1 = 0;
416   unsigned int has_clflushopt = 0, has_xsavec = 0, has_xsaves = 0;
417   unsigned int has_avx512dq = 0, has_avx512bw = 0, has_avx512vl = 0;
418   unsigned int has_avx512vbmi = 0, has_avx512ifma = 0, has_clwb = 0;
419   unsigned int has_mwaitx = 0, has_clzero = 0, has_pku = 0, has_rdpid = 0;
420   unsigned int has_avx5124fmaps = 0, has_avx5124vnniw = 0;
421   unsigned int has_gfni = 0, has_avx512vbmi2 = 0;
422   unsigned int has_avx512bitalg = 0;
423   unsigned int has_shstk = 0;
424   unsigned int has_avx512vnni = 0, has_vaes = 0;
425   unsigned int has_vpclmulqdq = 0;
426   unsigned int has_movdiri = 0, has_movdir64b = 0;
427 
428   bool arch;
429 
430   unsigned int l2sizekb = 0;
431 
432   if (argc < 1)
433     return NULL;
434 
435   arch = !strcmp (argv[0], "arch");
436 
437   if (!arch && strcmp (argv[0], "tune"))
438     return NULL;
439 
440   max_level = __get_cpuid_max (0, &vendor);
441   if (max_level < 1)
442     goto done;
443 
444   __cpuid (1, eax, ebx, ecx, edx);
445 
446   model = (eax >> 4) & 0x0f;
447   family = (eax >> 8) & 0x0f;
448   if (vendor == signature_INTEL_ebx
449       || vendor == signature_AMD_ebx)
450     {
451       unsigned int extended_model, extended_family;
452 
453       extended_model = (eax >> 12) & 0xf0;
454       extended_family = (eax >> 20) & 0xff;
455       if (family == 0x0f)
456 	{
457 	  family += extended_family;
458 	  model += extended_model;
459 	}
460       else if (family == 0x06)
461 	model += extended_model;
462     }
463 
464   has_sse3 = ecx & bit_SSE3;
465   has_ssse3 = ecx & bit_SSSE3;
466   has_sse4_1 = ecx & bit_SSE4_1;
467   has_sse4_2 = ecx & bit_SSE4_2;
468   has_avx = ecx & bit_AVX;
469   has_osxsave = ecx & bit_OSXSAVE;
470   has_cmpxchg16b = ecx & bit_CMPXCHG16B;
471   has_movbe = ecx & bit_MOVBE;
472   has_popcnt = ecx & bit_POPCNT;
473   has_aes = ecx & bit_AES;
474   has_pclmul = ecx & bit_PCLMUL;
475   has_fma = ecx & bit_FMA;
476   has_f16c = ecx & bit_F16C;
477   has_rdrnd = ecx & bit_RDRND;
478   has_xsave = ecx & bit_XSAVE;
479 
480   has_cmpxchg8b = edx & bit_CMPXCHG8B;
481   has_cmov = edx & bit_CMOV;
482   has_mmx = edx & bit_MMX;
483   has_fxsr = edx & bit_FXSAVE;
484   has_sse = edx & bit_SSE;
485   has_sse2 = edx & bit_SSE2;
486 
487   if (max_level >= 7)
488     {
489       __cpuid_count (7, 0, eax, ebx, ecx, edx);
490 
491       has_bmi = ebx & bit_BMI;
492       has_sgx = ebx & bit_SGX;
493       has_hle = ebx & bit_HLE;
494       has_rtm = ebx & bit_RTM;
495       has_avx2 = ebx & bit_AVX2;
496       has_bmi2 = ebx & bit_BMI2;
497       has_fsgsbase = ebx & bit_FSGSBASE;
498       has_rdseed = ebx & bit_RDSEED;
499       has_adx = ebx & bit_ADX;
500       has_avx512f = ebx & bit_AVX512F;
501       has_avx512er = ebx & bit_AVX512ER;
502       has_avx512pf = ebx & bit_AVX512PF;
503       has_avx512cd = ebx & bit_AVX512CD;
504       has_sha = ebx & bit_SHA;
505       has_clflushopt = ebx & bit_CLFLUSHOPT;
506       has_clwb = ebx & bit_CLWB;
507       has_avx512dq = ebx & bit_AVX512DQ;
508       has_avx512bw = ebx & bit_AVX512BW;
509       has_avx512vl = ebx & bit_AVX512VL;
510       has_avx512ifma = ebx & bit_AVX512IFMA;
511 
512       has_prefetchwt1 = ecx & bit_PREFETCHWT1;
513       has_avx512vbmi = ecx & bit_AVX512VBMI;
514       has_pku = ecx & bit_OSPKE;
515       has_avx512vbmi2 = ecx & bit_AVX512VBMI2;
516       has_avx512vnni = ecx & bit_AVX512VNNI;
517       has_rdpid = ecx & bit_RDPID;
518       has_gfni = ecx & bit_GFNI;
519       has_vaes = ecx & bit_VAES;
520       has_vpclmulqdq = ecx & bit_VPCLMULQDQ;
521       has_avx512bitalg = ecx & bit_AVX512BITALG;
522       has_movdiri = ecx & bit_MOVDIRI;
523       has_movdir64b = ecx & bit_MOVDIR64B;
524 
525       has_avx5124vnniw = edx & bit_AVX5124VNNIW;
526       has_avx5124fmaps = edx & bit_AVX5124FMAPS;
527 
528       has_shstk = ecx & bit_SHSTK;
529       has_pconfig = edx & bit_PCONFIG;
530     }
531 
532   if (max_level >= 13)
533     {
534       __cpuid_count (13, 1, eax, ebx, ecx, edx);
535 
536       has_xsaveopt = eax & bit_XSAVEOPT;
537       has_xsavec = eax & bit_XSAVEC;
538       has_xsaves = eax & bit_XSAVES;
539     }
540 
541   /* Check cpuid level of extended features.  */
542   __cpuid (0x80000000, ext_level, ebx, ecx, edx);
543 
544   if (ext_level >= 0x80000001)
545     {
546       __cpuid (0x80000001, eax, ebx, ecx, edx);
547 
548       has_lahf_lm = ecx & bit_LAHF_LM;
549       has_sse4a = ecx & bit_SSE4a;
550       has_abm = ecx & bit_ABM;
551       has_lwp = ecx & bit_LWP;
552       has_fma4 = ecx & bit_FMA4;
553       has_xop = ecx & bit_XOP;
554       has_tbm = ecx & bit_TBM;
555       has_lzcnt = ecx & bit_LZCNT;
556       has_prfchw = ecx & bit_PRFCHW;
557 
558       has_longmode = edx & bit_LM;
559       has_3dnowp = edx & bit_3DNOWP;
560       has_3dnow = edx & bit_3DNOW;
561       has_mwaitx = ecx & bit_MWAITX;
562     }
563 
564   if (ext_level >= 0x80000008)
565     {
566       __cpuid (0x80000008, eax, ebx, ecx, edx);
567       has_clzero = ebx & bit_CLZERO;
568       has_wbnoinvd = ebx & bit_WBNOINVD;
569     }
570 
571   /* Get XCR_XFEATURE_ENABLED_MASK register with xgetbv.  */
572 #define XCR_XFEATURE_ENABLED_MASK	0x0
573 #define XSTATE_FP			0x1
574 #define XSTATE_SSE			0x2
575 #define XSTATE_YMM			0x4
576 #define XSTATE_OPMASK			0x20
577 #define XSTATE_ZMM			0x40
578 #define XSTATE_HI_ZMM			0x80
579 
580 #define XCR_AVX_ENABLED_MASK \
581   (XSTATE_SSE | XSTATE_YMM)
582 #define XCR_AVX512F_ENABLED_MASK \
583   (XSTATE_SSE | XSTATE_YMM | XSTATE_OPMASK | XSTATE_ZMM | XSTATE_HI_ZMM)
584 
585   if (has_osxsave)
586     asm (".byte 0x0f; .byte 0x01; .byte 0xd0"
587 	 : "=a" (eax), "=d" (edx)
588 	 : "c" (XCR_XFEATURE_ENABLED_MASK));
589   else
590     eax = 0;
591 
592   /* Check if AVX registers are supported.  */
593   if ((eax & XCR_AVX_ENABLED_MASK) != XCR_AVX_ENABLED_MASK)
594     {
595       has_avx = 0;
596       has_avx2 = 0;
597       has_fma = 0;
598       has_fma4 = 0;
599       has_f16c = 0;
600       has_xop = 0;
601       has_xsave = 0;
602       has_xsaveopt = 0;
603       has_xsaves = 0;
604       has_xsavec = 0;
605     }
606 
607   /* Check if AVX512F registers are supported.  */
608   if ((eax & XCR_AVX512F_ENABLED_MASK) != XCR_AVX512F_ENABLED_MASK)
609     {
610       has_avx512f = 0;
611       has_avx512er = 0;
612       has_avx512pf = 0;
613       has_avx512cd = 0;
614       has_avx512dq = 0;
615       has_avx512bw = 0;
616       has_avx512vl = 0;
617     }
618 
619   if (!arch)
620     {
621       if (vendor == signature_AMD_ebx
622 	  || vendor == signature_CENTAUR_ebx
623 	  || vendor == signature_CYRIX_ebx
624 	  || vendor == signature_NSC_ebx)
625 	cache = detect_caches_amd (ext_level);
626       else if (vendor == signature_INTEL_ebx)
627 	{
628 	  bool xeon_mp = (family == 15 && model == 6);
629 	  cache = detect_caches_intel (xeon_mp, max_level,
630 				       ext_level, &l2sizekb);
631 	}
632     }
633 
634   if (vendor == signature_AMD_ebx)
635     {
636       unsigned int name;
637 
638       /* Detect geode processor by its processor signature.  */
639       if (ext_level >= 0x80000002)
640 	__cpuid (0x80000002, name, ebx, ecx, edx);
641       else
642 	name = 0;
643 
644       if (name == signature_NSC_ebx)
645 	processor = PROCESSOR_GEODE;
646       else if (has_movbe && family == 22)
647 	processor = PROCESSOR_BTVER2;
648       else if (has_clzero)
649 	processor = PROCESSOR_ZNVER1;
650       else if (has_avx2)
651         processor = PROCESSOR_BDVER4;
652       else if (has_xsaveopt)
653         processor = PROCESSOR_BDVER3;
654       else if (has_bmi)
655         processor = PROCESSOR_BDVER2;
656       else if (has_xop)
657 	processor = PROCESSOR_BDVER1;
658       else if (has_sse4a && has_ssse3)
659         processor = PROCESSOR_BTVER1;
660       else if (has_sse4a)
661 	processor = PROCESSOR_AMDFAM10;
662       else if (has_sse2 || has_longmode)
663 	processor = PROCESSOR_K8;
664       else if (has_3dnowp && family == 6)
665 	processor = PROCESSOR_ATHLON;
666       else if (has_mmx)
667 	processor = PROCESSOR_K6;
668       else
669 	processor = PROCESSOR_PENTIUM;
670     }
671   else if (vendor == signature_CENTAUR_ebx)
672     {
673       processor = PROCESSOR_GENERIC;
674 
675       switch (family)
676 	{
677 	default:
678 	  /* We have no idea.  */
679 	  break;
680 
681 	case 5:
682 	  if (has_3dnow || has_mmx)
683 	    processor = PROCESSOR_I486;
684 	  break;
685 
686 	case 6:
687 	  if (has_longmode)
688 	    processor = PROCESSOR_K8;
689 	  else if (model >= 9)
690 	    processor = PROCESSOR_PENTIUMPRO;
691 	  else if (model >= 6)
692 	    processor = PROCESSOR_I486;
693 	}
694     }
695   else
696     {
697       switch (family)
698 	{
699 	case 4:
700 	  processor = PROCESSOR_I486;
701 	  break;
702 	case 5:
703 	  processor = PROCESSOR_PENTIUM;
704 	  break;
705 	case 6:
706 	  processor = PROCESSOR_PENTIUMPRO;
707 	  break;
708 	case 15:
709 	  processor = PROCESSOR_PENTIUM4;
710 	  break;
711 	default:
712 	  /* We have no idea.  */
713 	  processor = PROCESSOR_GENERIC;
714 	}
715     }
716 
717   switch (processor)
718     {
719     case PROCESSOR_I386:
720       /* Default.  */
721       break;
722     case PROCESSOR_I486:
723       if (arch && vendor == signature_CENTAUR_ebx)
724 	{
725 	  if (model >= 6)
726 	    cpu = "c3";
727 	  else if (has_3dnow)
728 	    cpu = "winchip2";
729 	  else
730 	    /* Assume WinChip C6.  */
731 	    cpu = "winchip-c6";
732 	}
733       else
734 	cpu = "i486";
735       break;
736     case PROCESSOR_PENTIUM:
737       if (arch && has_mmx)
738 	cpu = "pentium-mmx";
739       else
740 	cpu = "pentium";
741       break;
742     case PROCESSOR_PENTIUMPRO:
743       switch (model)
744 	{
745 	case 0x1c:
746 	case 0x26:
747 	  /* Bonnell.  */
748 	  cpu = "bonnell";
749 	  break;
750 	case 0x37:
751 	case 0x4a:
752 	case 0x4d:
753 	case 0x5a:
754 	case 0x5d:
755 	  /* Silvermont.  */
756 	  cpu = "silvermont";
757 	  break;
758 	case 0x0f:
759 	  /* Merom.  */
760 	case 0x17:
761 	case 0x1d:
762 	  /* Penryn.  */
763 	  cpu = "core2";
764 	  break;
765 	case 0x1a:
766 	case 0x1e:
767 	case 0x1f:
768 	case 0x2e:
769 	  /* Nehalem.  */
770 	  cpu = "nehalem";
771 	  break;
772 	case 0x25:
773 	case 0x2c:
774 	case 0x2f:
775 	  /* Westmere.  */
776 	  cpu = "westmere";
777 	  break;
778 	case 0x2a:
779 	case 0x2d:
780 	  /* Sandy Bridge.  */
781 	  cpu = "sandybridge";
782 	  break;
783 	case 0x3a:
784 	case 0x3e:
785 	  /* Ivy Bridge.  */
786 	  cpu = "ivybridge";
787 	  break;
788 	case 0x3c:
789 	case 0x3f:
790 	case 0x45:
791 	case 0x46:
792 	  /* Haswell.  */
793 	  cpu = "haswell";
794 	  break;
795 	case 0x3d:
796 	case 0x47:
797 	case 0x4f:
798 	case 0x56:
799 	  /* Broadwell.  */
800 	  cpu = "broadwell";
801 	  break;
802 	case 0x4e:
803 	case 0x5e:
804 	  /* Skylake.  */
805 	case 0x8e:
806 	case 0x9e:
807 	  /* Kaby Lake.  */
808 	  cpu = "skylake";
809 	  break;
810 	case 0x55:
811 	  /* Skylake with AVX-512.  */
812 	  cpu = "skylake-avx512";
813 	  break;
814 	case 0x57:
815 	  /* Knights Landing.  */
816 	  cpu = "knl";
817 	  break;
818 	case 0x66:
819 	  /* Cannon Lake.  */
820 	  cpu = "cannonlake";
821 	  break;
822 	case 0x85:
823 	  /* Knights Mill.  */
824 	  cpu = "knm";
825 	  break;
826 	default:
827 	  if (arch)
828 	    {
829 	      /* This is unknown family 0x6 CPU.  */
830 	      /* Assume Ice Lake Server.  */
831 	      if (has_wbnoinvd)
832 		cpu = "icelake-server";
833 	      /* Assume Ice Lake.  */
834 	      else if (has_gfni)
835 		cpu = "icelake-client";
836 	      /* Assume Cannon Lake.  */
837 	      else if (has_avx512vbmi)
838 		cpu = "cannonlake";
839 	      /* Assume Knights Mill.  */
840 	      else if (has_avx5124vnniw)
841 		cpu = "knm";
842 	      /* Assume Knights Landing.  */
843 	      else if (has_avx512er)
844 		cpu = "knl";
845 	      /* Assume Skylake with AVX-512.  */
846 	      else if (has_avx512f)
847 		cpu = "skylake-avx512";
848 	      /* Assume Skylake.  */
849 	      else if (has_clflushopt)
850 		cpu = "skylake";
851 	      /* Assume Broadwell.  */
852 	      else if (has_adx)
853 		cpu = "broadwell";
854 	      else if (has_avx2)
855 		/* Assume Haswell.  */
856 		cpu = "haswell";
857 	      else if (has_avx)
858 		/* Assume Sandy Bridge.  */
859 		cpu = "sandybridge";
860 	      else if (has_sse4_2)
861 		{
862 		  if (has_movbe)
863 		    /* Assume Silvermont.  */
864 		    cpu = "silvermont";
865 		  else
866 		    /* Assume Nehalem.  */
867 		    cpu = "nehalem";
868 		}
869 	      else if (has_ssse3)
870 		{
871 		  if (has_movbe)
872 		    /* Assume Bonnell.  */
873 		    cpu = "bonnell";
874 		  else
875 		    /* Assume Core 2.  */
876 		    cpu = "core2";
877 		}
878 	      else if (has_longmode)
879 		/* Perhaps some emulator?  Assume x86-64, otherwise gcc
880 		   -march=native would be unusable for 64-bit compilations,
881 		   as all the CPUs below are 32-bit only.  */
882 		cpu = "x86-64";
883 	      else if (has_sse3)
884 		{
885 		  if (vendor == signature_CENTAUR_ebx)
886 		    /* C7 / Eden "Esther" */
887 		    cpu = "c7";
888 		  else
889 		    /* It is Core Duo.  */
890 		    cpu = "pentium-m";
891 		}
892 	      else if (has_sse2)
893 		/* It is Pentium M.  */
894 		cpu = "pentium-m";
895 	      else if (has_sse)
896 		{
897 		  if (vendor == signature_CENTAUR_ebx)
898 		    {
899 		      if (model >= 9)
900 			/* Eden "Nehemiah" */
901 			cpu = "nehemiah";
902 		      else
903 			cpu = "c3-2";
904 		    }
905 		  else
906 		    /* It is Pentium III.  */
907 		    cpu = "pentium3";
908 		}
909 	      else if (has_mmx)
910 		/* It is Pentium II.  */
911 		cpu = "pentium2";
912 	      else
913 		/* Default to Pentium Pro.  */
914 		cpu = "pentiumpro";
915 	    }
916 	  else
917 	    /* For -mtune, we default to -mtune=generic.  */
918 	    cpu = "generic";
919 	  break;
920 	}
921       break;
922     case PROCESSOR_PENTIUM4:
923       if (has_sse3)
924 	{
925 	  if (has_longmode)
926 	    cpu = "nocona";
927 	  else
928 	    cpu = "prescott";
929 	}
930       else
931 	cpu = "pentium4";
932       break;
933     case PROCESSOR_GEODE:
934       cpu = "geode";
935       break;
936     case PROCESSOR_K6:
937       if (arch && has_3dnow)
938 	cpu = "k6-3";
939       else
940 	cpu = "k6";
941       break;
942     case PROCESSOR_ATHLON:
943       if (arch && has_sse)
944 	cpu = "athlon-4";
945       else
946 	cpu = "athlon";
947       break;
948     case PROCESSOR_K8:
949       if (arch)
950 	{
951 	  if (vendor == signature_CENTAUR_ebx)
952 	    {
953 	      if (has_sse4_1)
954 		/* Nano 3000 | Nano dual / quad core | Eden X4 */
955 		cpu = "nano-3000";
956 	      else if (has_ssse3)
957 		/* Nano 1000 | Nano 2000 */
958 		cpu = "nano";
959 	      else if (has_sse3)
960 		/* Eden X2 */
961 		cpu = "eden-x2";
962 	      else
963 		/* Default to k8 */
964 		cpu = "k8";
965 	    }
966 	  else if (has_sse3)
967 	    cpu = "k8-sse3";
968 	  else
969 	    cpu = "k8";
970 	}
971       else
972 	/* For -mtune, we default to -mtune=k8 */
973 	cpu = "k8";
974       break;
975     case PROCESSOR_AMDFAM10:
976       cpu = "amdfam10";
977       break;
978     case PROCESSOR_BDVER1:
979       cpu = "bdver1";
980       break;
981     case PROCESSOR_BDVER2:
982       cpu = "bdver2";
983       break;
984     case PROCESSOR_BDVER3:
985       cpu = "bdver3";
986       break;
987     case PROCESSOR_BDVER4:
988       cpu = "bdver4";
989       break;
990     case PROCESSOR_ZNVER1:
991       cpu = "znver1";
992       break;
993     case PROCESSOR_BTVER1:
994       cpu = "btver1";
995       break;
996     case PROCESSOR_BTVER2:
997       cpu = "btver2";
998       break;
999 
1000     default:
1001       /* Use something reasonable.  */
1002       if (arch)
1003 	{
1004 	  if (has_ssse3)
1005 	    cpu = "core2";
1006 	  else if (has_sse3)
1007 	    {
1008 	      if (has_longmode)
1009 		cpu = "nocona";
1010 	      else
1011 		cpu = "prescott";
1012 	    }
1013 	  else if (has_longmode)
1014 	    /* Perhaps some emulator?  Assume x86-64, otherwise gcc
1015 	       -march=native would be unusable for 64-bit compilations,
1016 	       as all the CPUs below are 32-bit only.  */
1017 	    cpu = "x86-64";
1018 	  else if (has_sse2)
1019 	    cpu = "pentium4";
1020 	  else if (has_cmov)
1021 	    cpu = "pentiumpro";
1022 	  else if (has_mmx)
1023 	    cpu = "pentium-mmx";
1024 	  else if (has_cmpxchg8b)
1025 	    cpu = "pentium";
1026 	}
1027       else
1028 	cpu = "generic";
1029     }
1030 
1031   if (arch)
1032     {
1033       const char *mmx = has_mmx ? " -mmmx" : " -mno-mmx";
1034       const char *mmx3dnow = has_3dnow ? " -m3dnow" : " -mno-3dnow";
1035       const char *sse = has_sse ? " -msse" : " -mno-sse";
1036       const char *sse2 = has_sse2 ? " -msse2" : " -mno-sse2";
1037       const char *sse3 = has_sse3 ? " -msse3" : " -mno-sse3";
1038       const char *ssse3 = has_ssse3 ? " -mssse3" : " -mno-ssse3";
1039       const char *sse4a = has_sse4a ? " -msse4a" : " -mno-sse4a";
1040       const char *cx16 = has_cmpxchg16b ? " -mcx16" : " -mno-cx16";
1041       const char *sahf = has_lahf_lm ? " -msahf" : " -mno-sahf";
1042       const char *movbe = has_movbe ? " -mmovbe" : " -mno-movbe";
1043       const char *aes = has_aes ? " -maes" : " -mno-aes";
1044       const char *sha = has_sha ? " -msha" : " -mno-sha";
1045       const char *pclmul = has_pclmul ? " -mpclmul" : " -mno-pclmul";
1046       const char *popcnt = has_popcnt ? " -mpopcnt" : " -mno-popcnt";
1047       const char *abm = has_abm ? " -mabm" : " -mno-abm";
1048       const char *lwp = has_lwp ? " -mlwp" : " -mno-lwp";
1049       const char *fma = has_fma ? " -mfma" : " -mno-fma";
1050       const char *fma4 = has_fma4 ? " -mfma4" : " -mno-fma4";
1051       const char *xop = has_xop ? " -mxop" : " -mno-xop";
1052       const char *bmi = has_bmi ? " -mbmi" : " -mno-bmi";
1053       const char *pconfig = has_pconfig ? " -mpconfig" : " -mno-pconfig";
1054       const char *wbnoinvd = has_wbnoinvd ? " -mwbnoinvd" : " -mno-wbnoinvd";
1055       const char *sgx = has_sgx ? " -msgx" : " -mno-sgx";
1056       const char *bmi2 = has_bmi2 ? " -mbmi2" : " -mno-bmi2";
1057       const char *tbm = has_tbm ? " -mtbm" : " -mno-tbm";
1058       const char *avx = has_avx ? " -mavx" : " -mno-avx";
1059       const char *avx2 = has_avx2 ? " -mavx2" : " -mno-avx2";
1060       const char *sse4_2 = has_sse4_2 ? " -msse4.2" : " -mno-sse4.2";
1061       const char *sse4_1 = has_sse4_1 ? " -msse4.1" : " -mno-sse4.1";
1062       const char *lzcnt = has_lzcnt ? " -mlzcnt" : " -mno-lzcnt";
1063       const char *hle = has_hle ? " -mhle" : " -mno-hle";
1064       const char *rtm = has_rtm ? " -mrtm" : " -mno-rtm";
1065       const char *rdrnd = has_rdrnd ? " -mrdrnd" : " -mno-rdrnd";
1066       const char *f16c = has_f16c ? " -mf16c" : " -mno-f16c";
1067       const char *fsgsbase = has_fsgsbase ? " -mfsgsbase" : " -mno-fsgsbase";
1068       const char *rdseed = has_rdseed ? " -mrdseed" : " -mno-rdseed";
1069       const char *prfchw = has_prfchw ? " -mprfchw" : " -mno-prfchw";
1070       const char *adx = has_adx ? " -madx" : " -mno-adx";
1071       const char *fxsr = has_fxsr ? " -mfxsr" : " -mno-fxsr";
1072       const char *xsave = has_xsave ? " -mxsave" : " -mno-xsave";
1073       const char *xsaveopt = has_xsaveopt ? " -mxsaveopt" : " -mno-xsaveopt";
1074       const char *avx512f = has_avx512f ? " -mavx512f" : " -mno-avx512f";
1075       const char *avx512er = has_avx512er ? " -mavx512er" : " -mno-avx512er";
1076       const char *avx512cd = has_avx512cd ? " -mavx512cd" : " -mno-avx512cd";
1077       const char *avx512pf = has_avx512pf ? " -mavx512pf" : " -mno-avx512pf";
1078       const char *prefetchwt1 = has_prefetchwt1 ? " -mprefetchwt1" : " -mno-prefetchwt1";
1079       const char *clflushopt = has_clflushopt ? " -mclflushopt" : " -mno-clflushopt";
1080       const char *xsavec = has_xsavec ? " -mxsavec" : " -mno-xsavec";
1081       const char *xsaves = has_xsaves ? " -mxsaves" : " -mno-xsaves";
1082       const char *avx512dq = has_avx512dq ? " -mavx512dq" : " -mno-avx512dq";
1083       const char *avx512bw = has_avx512bw ? " -mavx512bw" : " -mno-avx512bw";
1084       const char *avx512vl = has_avx512vl ? " -mavx512vl" : " -mno-avx512vl";
1085       const char *avx512ifma = has_avx512ifma ? " -mavx512ifma" : " -mno-avx512ifma";
1086       const char *avx512vbmi = has_avx512vbmi ? " -mavx512vbmi" : " -mno-avx512vbmi";
1087       const char *avx5124vnniw = has_avx5124vnniw ? " -mavx5124vnniw" : " -mno-avx5124vnniw";
1088       const char *avx512vbmi2 = has_avx512vbmi2 ? " -mavx512vbmi2" : " -mno-avx512vbmi2";
1089       const char *avx512vnni = has_avx512vnni ? " -mavx512vnni" : " -mno-avx512vnni";
1090       const char *avx5124fmaps = has_avx5124fmaps ? " -mavx5124fmaps" : " -mno-avx5124fmaps";
1091       const char *clwb = has_clwb ? " -mclwb" : " -mno-clwb";
1092       const char *mwaitx  = has_mwaitx  ? " -mmwaitx"  : " -mno-mwaitx";
1093       const char *clzero  = has_clzero  ? " -mclzero"  : " -mno-clzero";
1094       const char *pku = has_pku ? " -mpku" : " -mno-pku";
1095       const char *rdpid = has_rdpid ? " -mrdpid" : " -mno-rdpid";
1096       const char *gfni = has_gfni ? " -mgfni" : " -mno-gfni";
1097       const char *shstk = has_shstk ? " -mshstk" : " -mno-shstk";
1098       const char *vaes = has_vaes ? " -mvaes" : " -mno-vaes";
1099       const char *vpclmulqdq = has_vpclmulqdq ? " -mvpclmulqdq" : " -mno-vpclmulqdq";
1100       const char *avx512bitalg = has_avx512bitalg ? " -mavx512bitalg" : " -mno-avx512bitalg";
1101       const char *movdiri = has_movdiri ? " -mmovdiri" : " -mno-movdiri";
1102       const char *movdir64b = has_movdir64b ? " -mmovdir64b" : " -mno-movdir64b";
1103       options = concat (options, mmx, mmx3dnow, sse, sse2, sse3, ssse3,
1104 			sse4a, cx16, sahf, movbe, aes, sha, pclmul,
1105 			popcnt, abm, lwp, fma, fma4, xop, bmi, sgx, bmi2,
1106 			pconfig, wbnoinvd,
1107 			tbm, avx, avx2, sse4_2, sse4_1, lzcnt, rtm,
1108 			hle, rdrnd, f16c, fsgsbase, rdseed, prfchw, adx,
1109 			fxsr, xsave, xsaveopt, avx512f, avx512er,
1110 			avx512cd, avx512pf, prefetchwt1, clflushopt,
1111 			xsavec, xsaves, avx512dq, avx512bw, avx512vl,
1112 			avx512ifma, avx512vbmi, avx5124fmaps, avx5124vnniw,
1113 			clwb, mwaitx, clzero, pku, rdpid, gfni, shstk,
1114 			avx512vbmi2, avx512vnni, vaes, vpclmulqdq,
1115 			avx512bitalg, movdiri, movdir64b, NULL);
1116     }
1117 
1118 done:
1119   return concat (cache, "-m", argv[0], "=", cpu, options, NULL);
1120 }
1121 #else
1122 
1123 /* If we are compiling with GCC where %EBX register is fixed, then the
1124    driver will just ignore -march and -mtune "native" target and will leave
1125    to the newly built compiler to generate code for its default target.  */
1126 
host_detect_local_cpu(int,const char **)1127 const char *host_detect_local_cpu (int, const char **)
1128 {
1129   return NULL;
1130 }
1131 #endif /* __GNUC__ */
1132