1 /* Subroutines for the gcc driver.
2    Copyright (C) 2006-2016 Free Software Foundation, Inc.
3 
4 This file is part of GCC.
5 
6 GCC is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 3, or (at your option)
9 any later version.
10 
11 GCC is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14 GNU General Public License for more details.
15 
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3.  If not see
18 <http://www.gnu.org/licenses/>.  */
19 
20 #include "config.h"
21 #include "system.h"
22 #include "coretypes.h"
23 #include "tm.h"
24 
25 const char *host_detect_local_cpu (int argc, const char **argv);
26 
27 #if defined(__GNUC__) && (__GNUC__ >= 5 || !defined(__PIC__))
28 #include "cpuid.h"
29 
30 struct cache_desc
31 {
32   unsigned sizekb;
33   unsigned assoc;
34   unsigned line;
35 };
36 
37 /* Returns command line parameters that describe size and
38    cache line size of the processor caches.  */
39 
40 static char *
describe_cache(struct cache_desc level1,struct cache_desc level2)41 describe_cache (struct cache_desc level1, struct cache_desc level2)
42 {
43   char size[100], line[100], size2[100];
44 
45   /* At the moment, gcc does not use the information
46      about the associativity of the cache.  */
47 
48   snprintf (size, sizeof (size),
49 	    "--param l1-cache-size=%u ", level1.sizekb);
50   snprintf (line, sizeof (line),
51 	    "--param l1-cache-line-size=%u ", level1.line);
52 
53   snprintf (size2, sizeof (size2),
54 	    "--param l2-cache-size=%u ", level2.sizekb);
55 
56   return concat (size, line, size2, NULL);
57 }
58 
59 /* Detect L2 cache parameters using CPUID extended function 0x80000006.  */
60 
61 static void
detect_l2_cache(struct cache_desc * level2)62 detect_l2_cache (struct cache_desc *level2)
63 {
64   unsigned eax, ebx, ecx, edx;
65   unsigned assoc;
66 
67   __cpuid (0x80000006, eax, ebx, ecx, edx);
68 
69   level2->sizekb = (ecx >> 16) & 0xffff;
70   level2->line = ecx & 0xff;
71 
72   assoc = (ecx >> 12) & 0xf;
73   if (assoc == 6)
74     assoc = 8;
75   else if (assoc == 8)
76     assoc = 16;
77   else if (assoc >= 0xa && assoc <= 0xc)
78     assoc = 32 + (assoc - 0xa) * 16;
79   else if (assoc >= 0xd && assoc <= 0xe)
80     assoc = 96 + (assoc - 0xd) * 32;
81 
82   level2->assoc = assoc;
83 }
84 
85 /* Returns the description of caches for an AMD processor.  */
86 
87 static const char *
detect_caches_amd(unsigned max_ext_level)88 detect_caches_amd (unsigned max_ext_level)
89 {
90   unsigned eax, ebx, ecx, edx;
91 
92   struct cache_desc level1, level2 = {0, 0, 0};
93 
94   if (max_ext_level < 0x80000005)
95     return "";
96 
97   __cpuid (0x80000005, eax, ebx, ecx, edx);
98 
99   level1.sizekb = (ecx >> 24) & 0xff;
100   level1.assoc = (ecx >> 16) & 0xff;
101   level1.line = ecx & 0xff;
102 
103   if (max_ext_level >= 0x80000006)
104     detect_l2_cache (&level2);
105 
106   return describe_cache (level1, level2);
107 }
108 
109 /* Decodes the size, the associativity and the cache line size of
110    L1/L2 caches of an Intel processor.  Values are based on
111    "Intel Processor Identification and the CPUID Instruction"
112    [Application Note 485], revision -032, December 2007.  */
113 
114 static void
decode_caches_intel(unsigned reg,bool xeon_mp,struct cache_desc * level1,struct cache_desc * level2)115 decode_caches_intel (unsigned reg, bool xeon_mp,
116 		     struct cache_desc *level1, struct cache_desc *level2)
117 {
118   int i;
119 
120   for (i = 24; i >= 0; i -= 8)
121     switch ((reg >> i) & 0xff)
122       {
123       case 0x0a:
124 	level1->sizekb = 8; level1->assoc = 2; level1->line = 32;
125 	break;
126       case 0x0c:
127 	level1->sizekb = 16; level1->assoc = 4; level1->line = 32;
128 	break;
129       case 0x0d:
130 	level1->sizekb = 16; level1->assoc = 4; level1->line = 64;
131 	break;
132       case 0x0e:
133 	level1->sizekb = 24; level1->assoc = 6; level1->line = 64;
134 	break;
135       case 0x21:
136 	level2->sizekb = 256; level2->assoc = 8; level2->line = 64;
137 	break;
138       case 0x24:
139 	level2->sizekb = 1024; level2->assoc = 16; level2->line = 64;
140 	break;
141       case 0x2c:
142 	level1->sizekb = 32; level1->assoc = 8; level1->line = 64;
143 	break;
144       case 0x39:
145 	level2->sizekb = 128; level2->assoc = 4; level2->line = 64;
146 	break;
147       case 0x3a:
148 	level2->sizekb = 192; level2->assoc = 6; level2->line = 64;
149 	break;
150       case 0x3b:
151 	level2->sizekb = 128; level2->assoc = 2; level2->line = 64;
152 	break;
153       case 0x3c:
154 	level2->sizekb = 256; level2->assoc = 4; level2->line = 64;
155 	break;
156       case 0x3d:
157 	level2->sizekb = 384; level2->assoc = 6; level2->line = 64;
158 	break;
159       case 0x3e:
160 	level2->sizekb = 512; level2->assoc = 4; level2->line = 64;
161 	break;
162       case 0x41:
163 	level2->sizekb = 128; level2->assoc = 4; level2->line = 32;
164 	break;
165       case 0x42:
166 	level2->sizekb = 256; level2->assoc = 4; level2->line = 32;
167 	break;
168       case 0x43:
169 	level2->sizekb = 512; level2->assoc = 4; level2->line = 32;
170 	break;
171       case 0x44:
172 	level2->sizekb = 1024; level2->assoc = 4; level2->line = 32;
173 	break;
174       case 0x45:
175 	level2->sizekb = 2048; level2->assoc = 4; level2->line = 32;
176 	break;
177       case 0x48:
178 	level2->sizekb = 3072; level2->assoc = 12; level2->line = 64;
179 	break;
180       case 0x49:
181 	if (xeon_mp)
182 	  break;
183 	level2->sizekb = 4096; level2->assoc = 16; level2->line = 64;
184 	break;
185       case 0x4e:
186 	level2->sizekb = 6144; level2->assoc = 24; level2->line = 64;
187 	break;
188       case 0x60:
189 	level1->sizekb = 16; level1->assoc = 8; level1->line = 64;
190 	break;
191       case 0x66:
192 	level1->sizekb = 8; level1->assoc = 4; level1->line = 64;
193 	break;
194       case 0x67:
195 	level1->sizekb = 16; level1->assoc = 4; level1->line = 64;
196 	break;
197       case 0x68:
198 	level1->sizekb = 32; level1->assoc = 4; level1->line = 64;
199 	break;
200       case 0x78:
201 	level2->sizekb = 1024; level2->assoc = 4; level2->line = 64;
202 	break;
203       case 0x79:
204 	level2->sizekb = 128; level2->assoc = 8; level2->line = 64;
205 	break;
206       case 0x7a:
207 	level2->sizekb = 256; level2->assoc = 8; level2->line = 64;
208 	break;
209       case 0x7b:
210 	level2->sizekb = 512; level2->assoc = 8; level2->line = 64;
211 	break;
212       case 0x7c:
213 	level2->sizekb = 1024; level2->assoc = 8; level2->line = 64;
214 	break;
215       case 0x7d:
216 	level2->sizekb = 2048; level2->assoc = 8; level2->line = 64;
217 	break;
218       case 0x7f:
219 	level2->sizekb = 512; level2->assoc = 2; level2->line = 64;
220 	break;
221       case 0x80:
222 	level2->sizekb = 512; level2->assoc = 8; level2->line = 64;
223 	break;
224       case 0x82:
225 	level2->sizekb = 256; level2->assoc = 8; level2->line = 32;
226 	break;
227       case 0x83:
228 	level2->sizekb = 512; level2->assoc = 8; level2->line = 32;
229 	break;
230       case 0x84:
231 	level2->sizekb = 1024; level2->assoc = 8; level2->line = 32;
232 	break;
233       case 0x85:
234 	level2->sizekb = 2048; level2->assoc = 8; level2->line = 32;
235 	break;
236       case 0x86:
237 	level2->sizekb = 512; level2->assoc = 4; level2->line = 64;
238 	break;
239       case 0x87:
240 	level2->sizekb = 1024; level2->assoc = 8; level2->line = 64;
241 
242       default:
243 	break;
244       }
245 }
246 
247 /* Detect cache parameters using CPUID function 2.  */
248 
249 static void
detect_caches_cpuid2(bool xeon_mp,struct cache_desc * level1,struct cache_desc * level2)250 detect_caches_cpuid2 (bool xeon_mp,
251 		      struct cache_desc *level1, struct cache_desc *level2)
252 {
253   unsigned regs[4];
254   int nreps, i;
255 
256   __cpuid (2, regs[0], regs[1], regs[2], regs[3]);
257 
258   nreps = regs[0] & 0x0f;
259   regs[0] &= ~0x0f;
260 
261   while (--nreps >= 0)
262     {
263       for (i = 0; i < 4; i++)
264 	if (regs[i] && !((regs[i] >> 31) & 1))
265 	  decode_caches_intel (regs[i], xeon_mp, level1, level2);
266 
267       if (nreps)
268 	__cpuid (2, regs[0], regs[1], regs[2], regs[3]);
269     }
270 }
271 
272 /* Detect cache parameters using CPUID function 4. This
273    method doesn't require hardcoded tables.  */
274 
275 enum cache_type
276 {
277   CACHE_END = 0,
278   CACHE_DATA = 1,
279   CACHE_INST = 2,
280   CACHE_UNIFIED = 3
281 };
282 
283 static void
detect_caches_cpuid4(struct cache_desc * level1,struct cache_desc * level2,struct cache_desc * level3)284 detect_caches_cpuid4 (struct cache_desc *level1, struct cache_desc *level2,
285 		      struct cache_desc *level3)
286 {
287   struct cache_desc *cache;
288 
289   unsigned eax, ebx, ecx, edx;
290   int count;
291 
292   for (count = 0;; count++)
293     {
294       __cpuid_count(4, count, eax, ebx, ecx, edx);
295       switch (eax & 0x1f)
296 	{
297 	case CACHE_END:
298 	  return;
299 	case CACHE_DATA:
300 	case CACHE_UNIFIED:
301 	  {
302 	    switch ((eax >> 5) & 0x07)
303 	      {
304 	      case 1:
305 		cache = level1;
306 		break;
307 	      case 2:
308 		cache = level2;
309 		break;
310 	      case 3:
311 		cache = level3;
312 		break;
313 	      default:
314 		cache = NULL;
315 	      }
316 
317 	    if (cache)
318 	      {
319 		unsigned sets = ecx + 1;
320 		unsigned part = ((ebx >> 12) & 0x03ff) + 1;
321 
322 		cache->assoc = ((ebx >> 22) & 0x03ff) + 1;
323 		cache->line = (ebx & 0x0fff) + 1;
324 
325 		cache->sizekb = (cache->assoc * part
326 				 * cache->line * sets) / 1024;
327 	      }
328 	  }
329 	default:
330 	  break;
331 	}
332     }
333 }
334 
335 /* Returns the description of caches for an Intel processor.  */
336 
337 static const char *
detect_caches_intel(bool xeon_mp,unsigned max_level,unsigned max_ext_level,unsigned * l2sizekb)338 detect_caches_intel (bool xeon_mp, unsigned max_level,
339 		     unsigned max_ext_level, unsigned *l2sizekb)
340 {
341   struct cache_desc level1 = {0, 0, 0}, level2 = {0, 0, 0}, level3 = {0, 0, 0};
342 
343   if (max_level >= 4)
344     detect_caches_cpuid4 (&level1, &level2, &level3);
345   else if (max_level >= 2)
346     detect_caches_cpuid2 (xeon_mp, &level1, &level2);
347   else
348     return "";
349 
350   if (level1.sizekb == 0)
351     return "";
352 
353   /* Let the L3 replace the L2. This assumes inclusive caches
354      and single threaded program for now. */
355   if (level3.sizekb)
356     level2 = level3;
357 
358   /* Intel CPUs are equipped with AMD style L2 cache info.  Try this
359      method if other methods fail to provide L2 cache parameters.  */
360   if (level2.sizekb == 0 && max_ext_level >= 0x80000006)
361     detect_l2_cache (&level2);
362 
363   *l2sizekb = level2.sizekb;
364 
365   return describe_cache (level1, level2);
366 }
367 
368 /* This will be called by the spec parser in gcc.c when it sees
369    a %:local_cpu_detect(args) construct.  Currently it will be called
370    with either "arch" or "tune" as argument depending on if -march=native
371    or -mtune=native is to be substituted.
372 
373    It returns a string containing new command line parameters to be
374    put at the place of the above two options, depending on what CPU
375    this is executed.  E.g. "-march=k8" on an AMD64 machine
376    for -march=native.
377 
378    ARGC and ARGV are set depending on the actual arguments given
379    in the spec.  */
380 
host_detect_local_cpu(int argc,const char ** argv)381 const char *host_detect_local_cpu (int argc, const char **argv)
382 {
383   enum processor_type processor = PROCESSOR_I386;
384   const char *cpu = "i386";
385 
386   const char *cache = "";
387   const char *options = "";
388 
389   unsigned int eax, ebx, ecx, edx;
390 
391   unsigned int max_level, ext_level;
392 
393   unsigned int vendor;
394   unsigned int model, family;
395 
396   unsigned int has_sse3, has_ssse3, has_cmpxchg16b;
397   unsigned int has_cmpxchg8b, has_cmov, has_mmx, has_sse, has_sse2;
398 
399   /* Extended features */
400   unsigned int has_lahf_lm = 0, has_sse4a = 0;
401   unsigned int has_longmode = 0, has_3dnowp = 0, has_3dnow = 0;
402   unsigned int has_movbe = 0, has_sse4_1 = 0, has_sse4_2 = 0;
403   unsigned int has_popcnt = 0, has_aes = 0, has_avx = 0, has_avx2 = 0;
404   unsigned int has_pclmul = 0, has_abm = 0, has_lwp = 0;
405   unsigned int has_fma = 0, has_fma4 = 0, has_xop = 0;
406   unsigned int has_bmi = 0, has_bmi2 = 0, has_tbm = 0, has_lzcnt = 0;
407   unsigned int has_hle = 0, has_rtm = 0;
408   unsigned int has_rdrnd = 0, has_f16c = 0, has_fsgsbase = 0;
409   unsigned int has_rdseed = 0, has_prfchw = 0, has_adx = 0;
410   unsigned int has_osxsave = 0, has_fxsr = 0, has_xsave = 0, has_xsaveopt = 0;
411   unsigned int has_avx512er = 0, has_avx512pf = 0, has_avx512cd = 0;
412   unsigned int has_avx512f = 0, has_sha = 0, has_prefetchwt1 = 0;
413   unsigned int has_clflushopt = 0, has_xsavec = 0, has_xsaves = 0;
414   unsigned int has_avx512dq = 0, has_avx512bw = 0, has_avx512vl = 0;
415   unsigned int has_avx512vbmi = 0, has_avx512ifma = 0, has_clwb = 0;
416   unsigned int has_mwaitx = 0, has_clzero = 0, has_pku = 0;
417 
418   bool arch;
419 
420   unsigned int l2sizekb = 0;
421 
422   if (argc < 1)
423     return NULL;
424 
425   arch = !strcmp (argv[0], "arch");
426 
427   if (!arch && strcmp (argv[0], "tune"))
428     return NULL;
429 
430   max_level = __get_cpuid_max (0, &vendor);
431   if (max_level < 1)
432     goto done;
433 
434   __cpuid (1, eax, ebx, ecx, edx);
435 
436   model = (eax >> 4) & 0x0f;
437   family = (eax >> 8) & 0x0f;
438   if (vendor == signature_INTEL_ebx
439       || vendor == signature_AMD_ebx)
440     {
441       unsigned int extended_model, extended_family;
442 
443       extended_model = (eax >> 12) & 0xf0;
444       extended_family = (eax >> 20) & 0xff;
445       if (family == 0x0f)
446 	{
447 	  family += extended_family;
448 	  model += extended_model;
449 	}
450       else if (family == 0x06)
451 	model += extended_model;
452     }
453 
454   has_sse3 = ecx & bit_SSE3;
455   has_ssse3 = ecx & bit_SSSE3;
456   has_sse4_1 = ecx & bit_SSE4_1;
457   has_sse4_2 = ecx & bit_SSE4_2;
458   has_avx = ecx & bit_AVX;
459   has_osxsave = ecx & bit_OSXSAVE;
460   has_cmpxchg16b = ecx & bit_CMPXCHG16B;
461   has_movbe = ecx & bit_MOVBE;
462   has_popcnt = ecx & bit_POPCNT;
463   has_aes = ecx & bit_AES;
464   has_pclmul = ecx & bit_PCLMUL;
465   has_fma = ecx & bit_FMA;
466   has_f16c = ecx & bit_F16C;
467   has_rdrnd = ecx & bit_RDRND;
468   has_xsave = ecx & bit_XSAVE;
469 
470   has_cmpxchg8b = edx & bit_CMPXCHG8B;
471   has_cmov = edx & bit_CMOV;
472   has_mmx = edx & bit_MMX;
473   has_fxsr = edx & bit_FXSAVE;
474   has_sse = edx & bit_SSE;
475   has_sse2 = edx & bit_SSE2;
476 
477   if (max_level >= 7)
478     {
479       __cpuid_count (7, 0, eax, ebx, ecx, edx);
480 
481       has_bmi = ebx & bit_BMI;
482       has_hle = ebx & bit_HLE;
483       has_rtm = ebx & bit_RTM;
484       has_avx2 = ebx & bit_AVX2;
485       has_bmi2 = ebx & bit_BMI2;
486       has_fsgsbase = ebx & bit_FSGSBASE;
487       has_rdseed = ebx & bit_RDSEED;
488       has_adx = ebx & bit_ADX;
489       has_avx512f = ebx & bit_AVX512F;
490       has_avx512er = ebx & bit_AVX512ER;
491       has_avx512pf = ebx & bit_AVX512PF;
492       has_avx512cd = ebx & bit_AVX512CD;
493       has_sha = ebx & bit_SHA;
494       has_clflushopt = ebx & bit_CLFLUSHOPT;
495       has_clwb = ebx & bit_CLWB;
496       has_avx512dq = ebx & bit_AVX512DQ;
497       has_avx512bw = ebx & bit_AVX512BW;
498       has_avx512vl = ebx & bit_AVX512VL;
499       has_avx512vl = ebx & bit_AVX512IFMA;
500 
501       has_prefetchwt1 = ecx & bit_PREFETCHWT1;
502       has_avx512vbmi = ecx & bit_AVX512VBMI;
503       has_pku = ecx & bit_OSPKE;
504     }
505 
506   if (max_level >= 13)
507     {
508       __cpuid_count (13, 1, eax, ebx, ecx, edx);
509 
510       has_xsaveopt = eax & bit_XSAVEOPT;
511       has_xsavec = eax & bit_XSAVEC;
512       has_xsaves = eax & bit_XSAVES;
513     }
514 
515   /* Check cpuid level of extended features.  */
516   __cpuid (0x80000000, ext_level, ebx, ecx, edx);
517 
518   if (ext_level >= 0x80000001)
519     {
520       __cpuid (0x80000001, eax, ebx, ecx, edx);
521 
522       has_lahf_lm = ecx & bit_LAHF_LM;
523       has_sse4a = ecx & bit_SSE4a;
524       has_abm = ecx & bit_ABM;
525       has_lwp = ecx & bit_LWP;
526       has_fma4 = ecx & bit_FMA4;
527       has_xop = ecx & bit_XOP;
528       has_tbm = ecx & bit_TBM;
529       has_lzcnt = ecx & bit_LZCNT;
530       has_prfchw = ecx & bit_PRFCHW;
531 
532       has_longmode = edx & bit_LM;
533       has_3dnowp = edx & bit_3DNOWP;
534       has_3dnow = edx & bit_3DNOW;
535       has_mwaitx = ecx & bit_MWAITX;
536     }
537 
538   if (ext_level >= 0x80000008)
539     {
540       __cpuid (0x80000008, eax, ebx, ecx, edx);
541       has_clzero = ebx & bit_CLZERO;
542     }
543 
544   /* Get XCR_XFEATURE_ENABLED_MASK register with xgetbv.  */
545 #define XCR_XFEATURE_ENABLED_MASK	0x0
546 #define XSTATE_FP			0x1
547 #define XSTATE_SSE			0x2
548 #define XSTATE_YMM			0x4
549 #define XSTATE_OPMASK			0x20
550 #define XSTATE_ZMM			0x40
551 #define XSTATE_HI_ZMM			0x80
552   if (has_osxsave)
553     asm (".byte 0x0f; .byte 0x01; .byte 0xd0"
554 	 : "=a" (eax), "=d" (edx)
555 	 : "c" (XCR_XFEATURE_ENABLED_MASK));
556 
557   /* Check if SSE and YMM states are supported.  */
558   if (!has_osxsave
559       || (eax & (XSTATE_SSE | XSTATE_YMM)) != (XSTATE_SSE | XSTATE_YMM))
560     {
561       has_avx = 0;
562       has_avx2 = 0;
563       has_fma = 0;
564       has_fma4 = 0;
565       has_f16c = 0;
566       has_xop = 0;
567       has_xsave = 0;
568       has_xsaveopt = 0;
569       has_xsaves = 0;
570       has_xsavec = 0;
571     }
572 
573   if (!has_osxsave
574       || (eax &
575 	  (XSTATE_SSE | XSTATE_YMM | XSTATE_OPMASK | XSTATE_ZMM | XSTATE_HI_ZMM))
576 	  != (XSTATE_SSE | XSTATE_YMM | XSTATE_OPMASK | XSTATE_ZMM | XSTATE_HI_ZMM))
577     {
578       has_avx512f = 0;
579       has_avx512er = 0;
580       has_avx512pf = 0;
581       has_avx512cd = 0;
582       has_avx512dq = 0;
583       has_avx512bw = 0;
584       has_avx512vl = 0;
585     }
586 
587   if (!arch)
588     {
589       if (vendor == signature_AMD_ebx
590 	  || vendor == signature_CENTAUR_ebx
591 	  || vendor == signature_CYRIX_ebx
592 	  || vendor == signature_NSC_ebx)
593 	cache = detect_caches_amd (ext_level);
594       else if (vendor == signature_INTEL_ebx)
595 	{
596 	  bool xeon_mp = (family == 15 && model == 6);
597 	  cache = detect_caches_intel (xeon_mp, max_level,
598 				       ext_level, &l2sizekb);
599 	}
600     }
601 
602   if (vendor == signature_AMD_ebx)
603     {
604       unsigned int name;
605 
606       /* Detect geode processor by its processor signature.  */
607       if (ext_level >= 0x80000002)
608 	__cpuid (0x80000002, name, ebx, ecx, edx);
609       else
610 	name = 0;
611 
612       if (name == signature_NSC_ebx)
613 	processor = PROCESSOR_GEODE;
614       else if (has_movbe && family == 22)
615 	processor = PROCESSOR_BTVER2;
616       else if (has_clzero)
617 	processor = PROCESSOR_ZNVER1;
618       else if (has_avx2)
619         processor = PROCESSOR_BDVER4;
620       else if (has_xsaveopt)
621         processor = PROCESSOR_BDVER3;
622       else if (has_bmi)
623         processor = PROCESSOR_BDVER2;
624       else if (has_xop)
625 	processor = PROCESSOR_BDVER1;
626       else if (has_sse4a && has_ssse3)
627         processor = PROCESSOR_BTVER1;
628       else if (has_sse4a)
629 	processor = PROCESSOR_AMDFAM10;
630       else if (has_sse2 || has_longmode)
631 	processor = PROCESSOR_K8;
632       else if (has_3dnowp && family == 6)
633 	processor = PROCESSOR_ATHLON;
634       else if (has_mmx)
635 	processor = PROCESSOR_K6;
636       else
637 	processor = PROCESSOR_PENTIUM;
638     }
639   else if (vendor == signature_CENTAUR_ebx)
640     {
641       processor = PROCESSOR_GENERIC;
642 
643       switch (family)
644 	{
645 	default:
646 	  /* We have no idea.  */
647 	  break;
648 
649 	case 5:
650 	  if (has_3dnow || has_mmx)
651 	    processor = PROCESSOR_I486;
652 	  break;
653 
654 	case 6:
655 	  if (model > 9 || has_longmode)
656 	    /* Use the default detection procedure.  */
657 	    ;
658 	  else if (model == 9)
659 	    processor = PROCESSOR_PENTIUMPRO;
660 	  else if (model >= 6)
661 	    processor = PROCESSOR_I486;
662 	}
663     }
664   else
665     {
666       switch (family)
667 	{
668 	case 4:
669 	  processor = PROCESSOR_I486;
670 	  break;
671 	case 5:
672 	  processor = PROCESSOR_PENTIUM;
673 	  break;
674 	case 6:
675 	  processor = PROCESSOR_PENTIUMPRO;
676 	  break;
677 	case 15:
678 	  processor = PROCESSOR_PENTIUM4;
679 	  break;
680 	default:
681 	  /* We have no idea.  */
682 	  processor = PROCESSOR_GENERIC;
683 	}
684     }
685 
686   switch (processor)
687     {
688     case PROCESSOR_I386:
689       /* Default.  */
690       break;
691     case PROCESSOR_I486:
692       if (arch && vendor == signature_CENTAUR_ebx)
693 	{
694 	  if (model >= 6)
695 	    cpu = "c3";
696 	  else if (has_3dnow)
697 	    cpu = "winchip2";
698 	  else
699 	    /* Assume WinChip C6.  */
700 	    cpu = "winchip-c6";
701 	}
702       else
703 	cpu = "i486";
704       break;
705     case PROCESSOR_PENTIUM:
706       if (arch && has_mmx)
707 	cpu = "pentium-mmx";
708       else
709 	cpu = "pentium";
710       break;
711     case PROCESSOR_PENTIUMPRO:
712       switch (model)
713 	{
714 	case 0x1c:
715 	case 0x26:
716 	  /* Bonnell.  */
717 	  cpu = "bonnell";
718 	  break;
719 	case 0x37:
720 	case 0x4a:
721 	case 0x4d:
722 	case 0x5a:
723 	case 0x5d:
724 	  /* Silvermont.  */
725 	  cpu = "silvermont";
726 	  break;
727 	case 0x0f:
728 	  /* Merom.  */
729 	case 0x17:
730 	case 0x1d:
731 	  /* Penryn.  */
732 	  cpu = "core2";
733 	  break;
734 	case 0x1a:
735 	case 0x1e:
736 	case 0x1f:
737 	case 0x2e:
738 	  /* Nehalem.  */
739 	  cpu = "nehalem";
740 	  break;
741 	case 0x25:
742 	case 0x2c:
743 	case 0x2f:
744 	  /* Westmere.  */
745 	  cpu = "westmere";
746 	  break;
747 	case 0x2a:
748 	case 0x2d:
749 	  /* Sandy Bridge.  */
750 	  cpu = "sandybridge";
751 	  break;
752 	case 0x3a:
753 	case 0x3e:
754 	  /* Ivy Bridge.  */
755 	  cpu = "ivybridge";
756 	  break;
757 	case 0x3c:
758 	case 0x3f:
759 	case 0x45:
760 	case 0x46:
761 	  /* Haswell.  */
762 	  cpu = "haswell";
763 	  break;
764 	case 0x3d:
765 	case 0x47:
766 	case 0x4f:
767 	case 0x56:
768 	  /* Broadwell.  */
769 	  cpu = "broadwell";
770 	  break;
771 	case 0x4e:
772 	case 0x5e:
773 	  /* Skylake.  */
774 	  cpu = "skylake";
775 	  break;
776 	case 0x57:
777 	  /* Knights Landing.  */
778 	  cpu = "knl";
779 	  break;
780 	default:
781 	  if (arch)
782 	    {
783 	      /* This is unknown family 0x6 CPU.  */
784 	      /* Assume Knights Landing.  */
785 	      if (has_avx512f)
786 		cpu = "knl";
787 	      /* Assume Broadwell.  */
788 	      else if (has_adx)
789 		cpu = "broadwell";
790 	      else if (has_avx2)
791 		/* Assume Haswell.  */
792 		cpu = "haswell";
793 	      else if (has_avx)
794 		/* Assume Sandy Bridge.  */
795 		cpu = "sandybridge";
796 	      else if (has_sse4_2)
797 		{
798 		  if (has_movbe)
799 		    /* Assume Silvermont.  */
800 		    cpu = "silvermont";
801 		  else
802 		    /* Assume Nehalem.  */
803 		    cpu = "nehalem";
804 		}
805 	      else if (has_ssse3)
806 		{
807 		  if (has_movbe)
808 		    /* Assume Bonnell.  */
809 		    cpu = "bonnell";
810 		  else
811 		    /* Assume Core 2.  */
812 		    cpu = "core2";
813 		}
814 	      else if (has_longmode)
815 		/* Perhaps some emulator?  Assume x86-64, otherwise gcc
816 		   -march=native would be unusable for 64-bit compilations,
817 		   as all the CPUs below are 32-bit only.  */
818 		cpu = "x86-64";
819 	      else if (has_sse3)
820 		/* It is Core Duo.  */
821 		cpu = "pentium-m";
822 	      else if (has_sse2)
823 		/* It is Pentium M.  */
824 		cpu = "pentium-m";
825 	      else if (has_sse)
826 		{
827 		  if (vendor == signature_CENTAUR_ebx)
828 		    cpu = "c3-2";
829 		  else
830 		    /* It is Pentium III.  */
831 		    cpu = "pentium3";
832 		}
833 	      else if (has_mmx)
834 		/* It is Pentium II.  */
835 		cpu = "pentium2";
836 	      else
837 		/* Default to Pentium Pro.  */
838 		cpu = "pentiumpro";
839 	    }
840 	  else
841 	    /* For -mtune, we default to -mtune=generic.  */
842 	    cpu = "generic";
843 	  break;
844 	}
845       break;
846     case PROCESSOR_PENTIUM4:
847       if (has_sse3)
848 	{
849 	  if (has_longmode)
850 	    cpu = "nocona";
851 	  else
852 	    cpu = "prescott";
853 	}
854       else
855 	cpu = "pentium4";
856       break;
857     case PROCESSOR_GEODE:
858       cpu = "geode";
859       break;
860     case PROCESSOR_K6:
861       if (arch && has_3dnow)
862 	cpu = "k6-3";
863       else
864 	cpu = "k6";
865       break;
866     case PROCESSOR_ATHLON:
867       if (arch && has_sse)
868 	cpu = "athlon-4";
869       else
870 	cpu = "athlon";
871       break;
872     case PROCESSOR_K8:
873       if (arch && has_sse3)
874 	cpu = "k8-sse3";
875       else
876 	cpu = "k8";
877       break;
878     case PROCESSOR_AMDFAM10:
879       cpu = "amdfam10";
880       break;
881     case PROCESSOR_BDVER1:
882       cpu = "bdver1";
883       break;
884     case PROCESSOR_BDVER2:
885       cpu = "bdver2";
886       break;
887     case PROCESSOR_BDVER3:
888       cpu = "bdver3";
889       break;
890     case PROCESSOR_BDVER4:
891       cpu = "bdver4";
892       break;
893     case PROCESSOR_ZNVER1:
894       cpu = "znver1";
895       break;
896     case PROCESSOR_BTVER1:
897       cpu = "btver1";
898       break;
899     case PROCESSOR_BTVER2:
900       cpu = "btver2";
901       break;
902 
903     default:
904       /* Use something reasonable.  */
905       if (arch)
906 	{
907 	  if (has_ssse3)
908 	    cpu = "core2";
909 	  else if (has_sse3)
910 	    {
911 	      if (has_longmode)
912 		cpu = "nocona";
913 	      else
914 		cpu = "prescott";
915 	    }
916 	  else if (has_longmode)
917 	    /* Perhaps some emulator?  Assume x86-64, otherwise gcc
918 	       -march=native would be unusable for 64-bit compilations,
919 	       as all the CPUs below are 32-bit only.  */
920 	    cpu = "x86-64";
921 	  else if (has_sse2)
922 	    cpu = "pentium4";
923 	  else if (has_cmov)
924 	    cpu = "pentiumpro";
925 	  else if (has_mmx)
926 	    cpu = "pentium-mmx";
927 	  else if (has_cmpxchg8b)
928 	    cpu = "pentium";
929 	}
930       else
931 	cpu = "generic";
932     }
933 
934   if (arch)
935     {
936       const char *mmx = has_mmx ? " -mmmx" : " -mno-mmx";
937       const char *mmx3dnow = has_3dnow ? " -m3dnow" : " -mno-3dnow";
938       const char *sse = has_sse ? " -msse" : " -mno-sse";
939       const char *sse2 = has_sse2 ? " -msse2" : " -mno-sse2";
940       const char *sse3 = has_sse3 ? " -msse3" : " -mno-sse3";
941       const char *ssse3 = has_ssse3 ? " -mssse3" : " -mno-ssse3";
942       const char *sse4a = has_sse4a ? " -msse4a" : " -mno-sse4a";
943       const char *cx16 = has_cmpxchg16b ? " -mcx16" : " -mno-cx16";
944       const char *sahf = has_lahf_lm ? " -msahf" : " -mno-sahf";
945       const char *movbe = has_movbe ? " -mmovbe" : " -mno-movbe";
946       const char *aes = has_aes ? " -maes" : " -mno-aes";
947       const char *sha = has_sha ? " -msha" : " -mno-sha";
948       const char *pclmul = has_pclmul ? " -mpclmul" : " -mno-pclmul";
949       const char *popcnt = has_popcnt ? " -mpopcnt" : " -mno-popcnt";
950       const char *abm = has_abm ? " -mabm" : " -mno-abm";
951       const char *lwp = has_lwp ? " -mlwp" : " -mno-lwp";
952       const char *fma = has_fma ? " -mfma" : " -mno-fma";
953       const char *fma4 = has_fma4 ? " -mfma4" : " -mno-fma4";
954       const char *xop = has_xop ? " -mxop" : " -mno-xop";
955       const char *bmi = has_bmi ? " -mbmi" : " -mno-bmi";
956       const char *bmi2 = has_bmi2 ? " -mbmi2" : " -mno-bmi2";
957       const char *tbm = has_tbm ? " -mtbm" : " -mno-tbm";
958       const char *avx = has_avx ? " -mavx" : " -mno-avx";
959       const char *avx2 = has_avx2 ? " -mavx2" : " -mno-avx2";
960       const char *sse4_2 = has_sse4_2 ? " -msse4.2" : " -mno-sse4.2";
961       const char *sse4_1 = has_sse4_1 ? " -msse4.1" : " -mno-sse4.1";
962       const char *lzcnt = has_lzcnt ? " -mlzcnt" : " -mno-lzcnt";
963       const char *hle = has_hle ? " -mhle" : " -mno-hle";
964       const char *rtm = has_rtm ? " -mrtm" : " -mno-rtm";
965       const char *rdrnd = has_rdrnd ? " -mrdrnd" : " -mno-rdrnd";
966       const char *f16c = has_f16c ? " -mf16c" : " -mno-f16c";
967       const char *fsgsbase = has_fsgsbase ? " -mfsgsbase" : " -mno-fsgsbase";
968       const char *rdseed = has_rdseed ? " -mrdseed" : " -mno-rdseed";
969       const char *prfchw = has_prfchw ? " -mprfchw" : " -mno-prfchw";
970       const char *adx = has_adx ? " -madx" : " -mno-adx";
971       const char *fxsr = has_fxsr ? " -mfxsr" : " -mno-fxsr";
972       const char *xsave = has_xsave ? " -mxsave" : " -mno-xsave";
973       const char *xsaveopt = has_xsaveopt ? " -mxsaveopt" : " -mno-xsaveopt";
974       const char *avx512f = has_avx512f ? " -mavx512f" : " -mno-avx512f";
975       const char *avx512er = has_avx512er ? " -mavx512er" : " -mno-avx512er";
976       const char *avx512cd = has_avx512cd ? " -mavx512cd" : " -mno-avx512cd";
977       const char *avx512pf = has_avx512pf ? " -mavx512pf" : " -mno-avx512pf";
978       const char *prefetchwt1 = has_prefetchwt1 ? " -mprefetchwt1" : " -mno-prefetchwt1";
979       const char *clflushopt = has_clflushopt ? " -mclflushopt" : " -mno-clflushopt";
980       const char *xsavec = has_xsavec ? " -mxsavec" : " -mno-xsavec";
981       const char *xsaves = has_xsaves ? " -mxsaves" : " -mno-xsaves";
982       const char *avx512dq = has_avx512dq ? " -mavx512dq" : " -mno-avx512dq";
983       const char *avx512bw = has_avx512bw ? " -mavx512bw" : " -mno-avx512bw";
984       const char *avx512vl = has_avx512vl ? " -mavx512vl" : " -mno-avx512vl";
985       const char *avx512ifma = has_avx512ifma ? " -mavx512ifma" : " -mno-avx512ifma";
986       const char *avx512vbmi = has_avx512vbmi ? " -mavx512vbmi" : " -mno-avx512vbmi";
987       const char *clwb = has_clwb ? " -mclwb" : " -mno-clwb";
988       const char *mwaitx  = has_mwaitx  ? " -mmwaitx"  : " -mno-mwaitx";
989       const char *clzero  = has_clzero  ? " -mclzero"  : " -mno-clzero";
990       const char *pku = has_pku ? " -mpku" : " -mno-pku";
991       options = concat (options, mmx, mmx3dnow, sse, sse2, sse3, ssse3,
992 			sse4a, cx16, sahf, movbe, aes, sha, pclmul,
993 			popcnt, abm, lwp, fma, fma4, xop, bmi, bmi2,
994 			tbm, avx, avx2, sse4_2, sse4_1, lzcnt, rtm,
995 			hle, rdrnd, f16c, fsgsbase, rdseed, prfchw, adx,
996 			fxsr, xsave, xsaveopt, avx512f, avx512er,
997 			avx512cd, avx512pf, prefetchwt1, clflushopt,
998 			xsavec, xsaves, avx512dq, avx512bw, avx512vl,
999 			avx512ifma, avx512vbmi, clwb, mwaitx, clzero,
1000 			pku, NULL);
1001     }
1002 
1003 done:
1004   return concat (cache, "-m", argv[0], "=", cpu, options, NULL);
1005 }
1006 #else
1007 
1008 /* If we are compiling with GCC where %EBX register is fixed, then the
1009    driver will just ignore -march and -mtune "native" target and will leave
1010    to the newly built compiler to generate code for its default target.  */
1011 
host_detect_local_cpu(int,const char **)1012 const char *host_detect_local_cpu (int, const char **)
1013 {
1014   return NULL;
1015 }
1016 #endif /* __GNUC__ */
1017