1 /* Subroutines for the gcc driver.
2 Copyright (C) 2006-2016 Free Software Foundation, Inc.
3
4 This file is part of GCC.
5
6 GCC is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 3, or (at your option)
9 any later version.
10
11 GCC is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
15
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3. If not see
18 <http://www.gnu.org/licenses/>. */
19
20 #include "config.h"
21 #include "system.h"
22 #include "coretypes.h"
23 #include "tm.h"
24
25 const char *host_detect_local_cpu (int argc, const char **argv);
26
27 #if defined(__GNUC__) && (__GNUC__ >= 5 || !defined(__PIC__))
28 #include "cpuid.h"
29
30 struct cache_desc
31 {
32 unsigned sizekb;
33 unsigned assoc;
34 unsigned line;
35 };
36
37 /* Returns command line parameters that describe size and
38 cache line size of the processor caches. */
39
40 static char *
describe_cache(struct cache_desc level1,struct cache_desc level2)41 describe_cache (struct cache_desc level1, struct cache_desc level2)
42 {
43 char size[100], line[100], size2[100];
44
45 /* At the moment, gcc does not use the information
46 about the associativity of the cache. */
47
48 snprintf (size, sizeof (size),
49 "--param l1-cache-size=%u ", level1.sizekb);
50 snprintf (line, sizeof (line),
51 "--param l1-cache-line-size=%u ", level1.line);
52
53 snprintf (size2, sizeof (size2),
54 "--param l2-cache-size=%u ", level2.sizekb);
55
56 return concat (size, line, size2, NULL);
57 }
58
59 /* Detect L2 cache parameters using CPUID extended function 0x80000006. */
60
61 static void
detect_l2_cache(struct cache_desc * level2)62 detect_l2_cache (struct cache_desc *level2)
63 {
64 unsigned eax, ebx, ecx, edx;
65 unsigned assoc;
66
67 __cpuid (0x80000006, eax, ebx, ecx, edx);
68
69 level2->sizekb = (ecx >> 16) & 0xffff;
70 level2->line = ecx & 0xff;
71
72 assoc = (ecx >> 12) & 0xf;
73 if (assoc == 6)
74 assoc = 8;
75 else if (assoc == 8)
76 assoc = 16;
77 else if (assoc >= 0xa && assoc <= 0xc)
78 assoc = 32 + (assoc - 0xa) * 16;
79 else if (assoc >= 0xd && assoc <= 0xe)
80 assoc = 96 + (assoc - 0xd) * 32;
81
82 level2->assoc = assoc;
83 }
84
85 /* Returns the description of caches for an AMD processor. */
86
87 static const char *
detect_caches_amd(unsigned max_ext_level)88 detect_caches_amd (unsigned max_ext_level)
89 {
90 unsigned eax, ebx, ecx, edx;
91
92 struct cache_desc level1, level2 = {0, 0, 0};
93
94 if (max_ext_level < 0x80000005)
95 return "";
96
97 __cpuid (0x80000005, eax, ebx, ecx, edx);
98
99 level1.sizekb = (ecx >> 24) & 0xff;
100 level1.assoc = (ecx >> 16) & 0xff;
101 level1.line = ecx & 0xff;
102
103 if (max_ext_level >= 0x80000006)
104 detect_l2_cache (&level2);
105
106 return describe_cache (level1, level2);
107 }
108
109 /* Decodes the size, the associativity and the cache line size of
110 L1/L2 caches of an Intel processor. Values are based on
111 "Intel Processor Identification and the CPUID Instruction"
112 [Application Note 485], revision -032, December 2007. */
113
114 static void
decode_caches_intel(unsigned reg,bool xeon_mp,struct cache_desc * level1,struct cache_desc * level2)115 decode_caches_intel (unsigned reg, bool xeon_mp,
116 struct cache_desc *level1, struct cache_desc *level2)
117 {
118 int i;
119
120 for (i = 24; i >= 0; i -= 8)
121 switch ((reg >> i) & 0xff)
122 {
123 case 0x0a:
124 level1->sizekb = 8; level1->assoc = 2; level1->line = 32;
125 break;
126 case 0x0c:
127 level1->sizekb = 16; level1->assoc = 4; level1->line = 32;
128 break;
129 case 0x0d:
130 level1->sizekb = 16; level1->assoc = 4; level1->line = 64;
131 break;
132 case 0x0e:
133 level1->sizekb = 24; level1->assoc = 6; level1->line = 64;
134 break;
135 case 0x21:
136 level2->sizekb = 256; level2->assoc = 8; level2->line = 64;
137 break;
138 case 0x24:
139 level2->sizekb = 1024; level2->assoc = 16; level2->line = 64;
140 break;
141 case 0x2c:
142 level1->sizekb = 32; level1->assoc = 8; level1->line = 64;
143 break;
144 case 0x39:
145 level2->sizekb = 128; level2->assoc = 4; level2->line = 64;
146 break;
147 case 0x3a:
148 level2->sizekb = 192; level2->assoc = 6; level2->line = 64;
149 break;
150 case 0x3b:
151 level2->sizekb = 128; level2->assoc = 2; level2->line = 64;
152 break;
153 case 0x3c:
154 level2->sizekb = 256; level2->assoc = 4; level2->line = 64;
155 break;
156 case 0x3d:
157 level2->sizekb = 384; level2->assoc = 6; level2->line = 64;
158 break;
159 case 0x3e:
160 level2->sizekb = 512; level2->assoc = 4; level2->line = 64;
161 break;
162 case 0x41:
163 level2->sizekb = 128; level2->assoc = 4; level2->line = 32;
164 break;
165 case 0x42:
166 level2->sizekb = 256; level2->assoc = 4; level2->line = 32;
167 break;
168 case 0x43:
169 level2->sizekb = 512; level2->assoc = 4; level2->line = 32;
170 break;
171 case 0x44:
172 level2->sizekb = 1024; level2->assoc = 4; level2->line = 32;
173 break;
174 case 0x45:
175 level2->sizekb = 2048; level2->assoc = 4; level2->line = 32;
176 break;
177 case 0x48:
178 level2->sizekb = 3072; level2->assoc = 12; level2->line = 64;
179 break;
180 case 0x49:
181 if (xeon_mp)
182 break;
183 level2->sizekb = 4096; level2->assoc = 16; level2->line = 64;
184 break;
185 case 0x4e:
186 level2->sizekb = 6144; level2->assoc = 24; level2->line = 64;
187 break;
188 case 0x60:
189 level1->sizekb = 16; level1->assoc = 8; level1->line = 64;
190 break;
191 case 0x66:
192 level1->sizekb = 8; level1->assoc = 4; level1->line = 64;
193 break;
194 case 0x67:
195 level1->sizekb = 16; level1->assoc = 4; level1->line = 64;
196 break;
197 case 0x68:
198 level1->sizekb = 32; level1->assoc = 4; level1->line = 64;
199 break;
200 case 0x78:
201 level2->sizekb = 1024; level2->assoc = 4; level2->line = 64;
202 break;
203 case 0x79:
204 level2->sizekb = 128; level2->assoc = 8; level2->line = 64;
205 break;
206 case 0x7a:
207 level2->sizekb = 256; level2->assoc = 8; level2->line = 64;
208 break;
209 case 0x7b:
210 level2->sizekb = 512; level2->assoc = 8; level2->line = 64;
211 break;
212 case 0x7c:
213 level2->sizekb = 1024; level2->assoc = 8; level2->line = 64;
214 break;
215 case 0x7d:
216 level2->sizekb = 2048; level2->assoc = 8; level2->line = 64;
217 break;
218 case 0x7f:
219 level2->sizekb = 512; level2->assoc = 2; level2->line = 64;
220 break;
221 case 0x80:
222 level2->sizekb = 512; level2->assoc = 8; level2->line = 64;
223 break;
224 case 0x82:
225 level2->sizekb = 256; level2->assoc = 8; level2->line = 32;
226 break;
227 case 0x83:
228 level2->sizekb = 512; level2->assoc = 8; level2->line = 32;
229 break;
230 case 0x84:
231 level2->sizekb = 1024; level2->assoc = 8; level2->line = 32;
232 break;
233 case 0x85:
234 level2->sizekb = 2048; level2->assoc = 8; level2->line = 32;
235 break;
236 case 0x86:
237 level2->sizekb = 512; level2->assoc = 4; level2->line = 64;
238 break;
239 case 0x87:
240 level2->sizekb = 1024; level2->assoc = 8; level2->line = 64;
241
242 default:
243 break;
244 }
245 }
246
247 /* Detect cache parameters using CPUID function 2. */
248
249 static void
detect_caches_cpuid2(bool xeon_mp,struct cache_desc * level1,struct cache_desc * level2)250 detect_caches_cpuid2 (bool xeon_mp,
251 struct cache_desc *level1, struct cache_desc *level2)
252 {
253 unsigned regs[4];
254 int nreps, i;
255
256 __cpuid (2, regs[0], regs[1], regs[2], regs[3]);
257
258 nreps = regs[0] & 0x0f;
259 regs[0] &= ~0x0f;
260
261 while (--nreps >= 0)
262 {
263 for (i = 0; i < 4; i++)
264 if (regs[i] && !((regs[i] >> 31) & 1))
265 decode_caches_intel (regs[i], xeon_mp, level1, level2);
266
267 if (nreps)
268 __cpuid (2, regs[0], regs[1], regs[2], regs[3]);
269 }
270 }
271
272 /* Detect cache parameters using CPUID function 4. This
273 method doesn't require hardcoded tables. */
274
275 enum cache_type
276 {
277 CACHE_END = 0,
278 CACHE_DATA = 1,
279 CACHE_INST = 2,
280 CACHE_UNIFIED = 3
281 };
282
283 static void
detect_caches_cpuid4(struct cache_desc * level1,struct cache_desc * level2,struct cache_desc * level3)284 detect_caches_cpuid4 (struct cache_desc *level1, struct cache_desc *level2,
285 struct cache_desc *level3)
286 {
287 struct cache_desc *cache;
288
289 unsigned eax, ebx, ecx, edx;
290 int count;
291
292 for (count = 0;; count++)
293 {
294 __cpuid_count(4, count, eax, ebx, ecx, edx);
295 switch (eax & 0x1f)
296 {
297 case CACHE_END:
298 return;
299 case CACHE_DATA:
300 case CACHE_UNIFIED:
301 {
302 switch ((eax >> 5) & 0x07)
303 {
304 case 1:
305 cache = level1;
306 break;
307 case 2:
308 cache = level2;
309 break;
310 case 3:
311 cache = level3;
312 break;
313 default:
314 cache = NULL;
315 }
316
317 if (cache)
318 {
319 unsigned sets = ecx + 1;
320 unsigned part = ((ebx >> 12) & 0x03ff) + 1;
321
322 cache->assoc = ((ebx >> 22) & 0x03ff) + 1;
323 cache->line = (ebx & 0x0fff) + 1;
324
325 cache->sizekb = (cache->assoc * part
326 * cache->line * sets) / 1024;
327 }
328 }
329 default:
330 break;
331 }
332 }
333 }
334
335 /* Returns the description of caches for an Intel processor. */
336
337 static const char *
detect_caches_intel(bool xeon_mp,unsigned max_level,unsigned max_ext_level,unsigned * l2sizekb)338 detect_caches_intel (bool xeon_mp, unsigned max_level,
339 unsigned max_ext_level, unsigned *l2sizekb)
340 {
341 struct cache_desc level1 = {0, 0, 0}, level2 = {0, 0, 0}, level3 = {0, 0, 0};
342
343 if (max_level >= 4)
344 detect_caches_cpuid4 (&level1, &level2, &level3);
345 else if (max_level >= 2)
346 detect_caches_cpuid2 (xeon_mp, &level1, &level2);
347 else
348 return "";
349
350 if (level1.sizekb == 0)
351 return "";
352
353 /* Let the L3 replace the L2. This assumes inclusive caches
354 and single threaded program for now. */
355 if (level3.sizekb)
356 level2 = level3;
357
358 /* Intel CPUs are equipped with AMD style L2 cache info. Try this
359 method if other methods fail to provide L2 cache parameters. */
360 if (level2.sizekb == 0 && max_ext_level >= 0x80000006)
361 detect_l2_cache (&level2);
362
363 *l2sizekb = level2.sizekb;
364
365 return describe_cache (level1, level2);
366 }
367
368 /* This will be called by the spec parser in gcc.c when it sees
369 a %:local_cpu_detect(args) construct. Currently it will be called
370 with either "arch" or "tune" as argument depending on if -march=native
371 or -mtune=native is to be substituted.
372
373 It returns a string containing new command line parameters to be
374 put at the place of the above two options, depending on what CPU
375 this is executed. E.g. "-march=k8" on an AMD64 machine
376 for -march=native.
377
378 ARGC and ARGV are set depending on the actual arguments given
379 in the spec. */
380
host_detect_local_cpu(int argc,const char ** argv)381 const char *host_detect_local_cpu (int argc, const char **argv)
382 {
383 enum processor_type processor = PROCESSOR_I386;
384 const char *cpu = "i386";
385
386 const char *cache = "";
387 const char *options = "";
388
389 unsigned int eax, ebx, ecx, edx;
390
391 unsigned int max_level, ext_level;
392
393 unsigned int vendor;
394 unsigned int model, family;
395
396 unsigned int has_sse3, has_ssse3, has_cmpxchg16b;
397 unsigned int has_cmpxchg8b, has_cmov, has_mmx, has_sse, has_sse2;
398
399 /* Extended features */
400 unsigned int has_lahf_lm = 0, has_sse4a = 0;
401 unsigned int has_longmode = 0, has_3dnowp = 0, has_3dnow = 0;
402 unsigned int has_movbe = 0, has_sse4_1 = 0, has_sse4_2 = 0;
403 unsigned int has_popcnt = 0, has_aes = 0, has_avx = 0, has_avx2 = 0;
404 unsigned int has_pclmul = 0, has_abm = 0, has_lwp = 0;
405 unsigned int has_fma = 0, has_fma4 = 0, has_xop = 0;
406 unsigned int has_bmi = 0, has_bmi2 = 0, has_tbm = 0, has_lzcnt = 0;
407 unsigned int has_hle = 0, has_rtm = 0;
408 unsigned int has_rdrnd = 0, has_f16c = 0, has_fsgsbase = 0;
409 unsigned int has_rdseed = 0, has_prfchw = 0, has_adx = 0;
410 unsigned int has_osxsave = 0, has_fxsr = 0, has_xsave = 0, has_xsaveopt = 0;
411 unsigned int has_avx512er = 0, has_avx512pf = 0, has_avx512cd = 0;
412 unsigned int has_avx512f = 0, has_sha = 0, has_prefetchwt1 = 0;
413 unsigned int has_clflushopt = 0, has_xsavec = 0, has_xsaves = 0;
414 unsigned int has_avx512dq = 0, has_avx512bw = 0, has_avx512vl = 0;
415 unsigned int has_avx512vbmi = 0, has_avx512ifma = 0, has_clwb = 0;
416 unsigned int has_mwaitx = 0, has_clzero = 0, has_pku = 0;
417
418 bool arch;
419
420 unsigned int l2sizekb = 0;
421
422 if (argc < 1)
423 return NULL;
424
425 arch = !strcmp (argv[0], "arch");
426
427 if (!arch && strcmp (argv[0], "tune"))
428 return NULL;
429
430 max_level = __get_cpuid_max (0, &vendor);
431 if (max_level < 1)
432 goto done;
433
434 __cpuid (1, eax, ebx, ecx, edx);
435
436 model = (eax >> 4) & 0x0f;
437 family = (eax >> 8) & 0x0f;
438 if (vendor == signature_INTEL_ebx
439 || vendor == signature_AMD_ebx)
440 {
441 unsigned int extended_model, extended_family;
442
443 extended_model = (eax >> 12) & 0xf0;
444 extended_family = (eax >> 20) & 0xff;
445 if (family == 0x0f)
446 {
447 family += extended_family;
448 model += extended_model;
449 }
450 else if (family == 0x06)
451 model += extended_model;
452 }
453
454 has_sse3 = ecx & bit_SSE3;
455 has_ssse3 = ecx & bit_SSSE3;
456 has_sse4_1 = ecx & bit_SSE4_1;
457 has_sse4_2 = ecx & bit_SSE4_2;
458 has_avx = ecx & bit_AVX;
459 has_osxsave = ecx & bit_OSXSAVE;
460 has_cmpxchg16b = ecx & bit_CMPXCHG16B;
461 has_movbe = ecx & bit_MOVBE;
462 has_popcnt = ecx & bit_POPCNT;
463 has_aes = ecx & bit_AES;
464 has_pclmul = ecx & bit_PCLMUL;
465 has_fma = ecx & bit_FMA;
466 has_f16c = ecx & bit_F16C;
467 has_rdrnd = ecx & bit_RDRND;
468 has_xsave = ecx & bit_XSAVE;
469
470 has_cmpxchg8b = edx & bit_CMPXCHG8B;
471 has_cmov = edx & bit_CMOV;
472 has_mmx = edx & bit_MMX;
473 has_fxsr = edx & bit_FXSAVE;
474 has_sse = edx & bit_SSE;
475 has_sse2 = edx & bit_SSE2;
476
477 if (max_level >= 7)
478 {
479 __cpuid_count (7, 0, eax, ebx, ecx, edx);
480
481 has_bmi = ebx & bit_BMI;
482 has_hle = ebx & bit_HLE;
483 has_rtm = ebx & bit_RTM;
484 has_avx2 = ebx & bit_AVX2;
485 has_bmi2 = ebx & bit_BMI2;
486 has_fsgsbase = ebx & bit_FSGSBASE;
487 has_rdseed = ebx & bit_RDSEED;
488 has_adx = ebx & bit_ADX;
489 has_avx512f = ebx & bit_AVX512F;
490 has_avx512er = ebx & bit_AVX512ER;
491 has_avx512pf = ebx & bit_AVX512PF;
492 has_avx512cd = ebx & bit_AVX512CD;
493 has_sha = ebx & bit_SHA;
494 has_clflushopt = ebx & bit_CLFLUSHOPT;
495 has_clwb = ebx & bit_CLWB;
496 has_avx512dq = ebx & bit_AVX512DQ;
497 has_avx512bw = ebx & bit_AVX512BW;
498 has_avx512vl = ebx & bit_AVX512VL;
499 has_avx512vl = ebx & bit_AVX512IFMA;
500
501 has_prefetchwt1 = ecx & bit_PREFETCHWT1;
502 has_avx512vbmi = ecx & bit_AVX512VBMI;
503 has_pku = ecx & bit_OSPKE;
504 }
505
506 if (max_level >= 13)
507 {
508 __cpuid_count (13, 1, eax, ebx, ecx, edx);
509
510 has_xsaveopt = eax & bit_XSAVEOPT;
511 has_xsavec = eax & bit_XSAVEC;
512 has_xsaves = eax & bit_XSAVES;
513 }
514
515 /* Check cpuid level of extended features. */
516 __cpuid (0x80000000, ext_level, ebx, ecx, edx);
517
518 if (ext_level >= 0x80000001)
519 {
520 __cpuid (0x80000001, eax, ebx, ecx, edx);
521
522 has_lahf_lm = ecx & bit_LAHF_LM;
523 has_sse4a = ecx & bit_SSE4a;
524 has_abm = ecx & bit_ABM;
525 has_lwp = ecx & bit_LWP;
526 has_fma4 = ecx & bit_FMA4;
527 has_xop = ecx & bit_XOP;
528 has_tbm = ecx & bit_TBM;
529 has_lzcnt = ecx & bit_LZCNT;
530 has_prfchw = ecx & bit_PRFCHW;
531
532 has_longmode = edx & bit_LM;
533 has_3dnowp = edx & bit_3DNOWP;
534 has_3dnow = edx & bit_3DNOW;
535 has_mwaitx = ecx & bit_MWAITX;
536 }
537
538 if (ext_level >= 0x80000008)
539 {
540 __cpuid (0x80000008, eax, ebx, ecx, edx);
541 has_clzero = ebx & bit_CLZERO;
542 }
543
544 /* Get XCR_XFEATURE_ENABLED_MASK register with xgetbv. */
545 #define XCR_XFEATURE_ENABLED_MASK 0x0
546 #define XSTATE_FP 0x1
547 #define XSTATE_SSE 0x2
548 #define XSTATE_YMM 0x4
549 #define XSTATE_OPMASK 0x20
550 #define XSTATE_ZMM 0x40
551 #define XSTATE_HI_ZMM 0x80
552 if (has_osxsave)
553 asm (".byte 0x0f; .byte 0x01; .byte 0xd0"
554 : "=a" (eax), "=d" (edx)
555 : "c" (XCR_XFEATURE_ENABLED_MASK));
556
557 /* Check if SSE and YMM states are supported. */
558 if (!has_osxsave
559 || (eax & (XSTATE_SSE | XSTATE_YMM)) != (XSTATE_SSE | XSTATE_YMM))
560 {
561 has_avx = 0;
562 has_avx2 = 0;
563 has_fma = 0;
564 has_fma4 = 0;
565 has_f16c = 0;
566 has_xop = 0;
567 has_xsave = 0;
568 has_xsaveopt = 0;
569 has_xsaves = 0;
570 has_xsavec = 0;
571 }
572
573 if (!has_osxsave
574 || (eax &
575 (XSTATE_SSE | XSTATE_YMM | XSTATE_OPMASK | XSTATE_ZMM | XSTATE_HI_ZMM))
576 != (XSTATE_SSE | XSTATE_YMM | XSTATE_OPMASK | XSTATE_ZMM | XSTATE_HI_ZMM))
577 {
578 has_avx512f = 0;
579 has_avx512er = 0;
580 has_avx512pf = 0;
581 has_avx512cd = 0;
582 has_avx512dq = 0;
583 has_avx512bw = 0;
584 has_avx512vl = 0;
585 }
586
587 if (!arch)
588 {
589 if (vendor == signature_AMD_ebx
590 || vendor == signature_CENTAUR_ebx
591 || vendor == signature_CYRIX_ebx
592 || vendor == signature_NSC_ebx)
593 cache = detect_caches_amd (ext_level);
594 else if (vendor == signature_INTEL_ebx)
595 {
596 bool xeon_mp = (family == 15 && model == 6);
597 cache = detect_caches_intel (xeon_mp, max_level,
598 ext_level, &l2sizekb);
599 }
600 }
601
602 if (vendor == signature_AMD_ebx)
603 {
604 unsigned int name;
605
606 /* Detect geode processor by its processor signature. */
607 if (ext_level >= 0x80000002)
608 __cpuid (0x80000002, name, ebx, ecx, edx);
609 else
610 name = 0;
611
612 if (name == signature_NSC_ebx)
613 processor = PROCESSOR_GEODE;
614 else if (has_movbe && family == 22)
615 processor = PROCESSOR_BTVER2;
616 else if (has_clzero)
617 processor = PROCESSOR_ZNVER1;
618 else if (has_avx2)
619 processor = PROCESSOR_BDVER4;
620 else if (has_xsaveopt)
621 processor = PROCESSOR_BDVER3;
622 else if (has_bmi)
623 processor = PROCESSOR_BDVER2;
624 else if (has_xop)
625 processor = PROCESSOR_BDVER1;
626 else if (has_sse4a && has_ssse3)
627 processor = PROCESSOR_BTVER1;
628 else if (has_sse4a)
629 processor = PROCESSOR_AMDFAM10;
630 else if (has_sse2 || has_longmode)
631 processor = PROCESSOR_K8;
632 else if (has_3dnowp && family == 6)
633 processor = PROCESSOR_ATHLON;
634 else if (has_mmx)
635 processor = PROCESSOR_K6;
636 else
637 processor = PROCESSOR_PENTIUM;
638 }
639 else if (vendor == signature_CENTAUR_ebx)
640 {
641 processor = PROCESSOR_GENERIC;
642
643 switch (family)
644 {
645 default:
646 /* We have no idea. */
647 break;
648
649 case 5:
650 if (has_3dnow || has_mmx)
651 processor = PROCESSOR_I486;
652 break;
653
654 case 6:
655 if (model > 9 || has_longmode)
656 /* Use the default detection procedure. */
657 ;
658 else if (model == 9)
659 processor = PROCESSOR_PENTIUMPRO;
660 else if (model >= 6)
661 processor = PROCESSOR_I486;
662 }
663 }
664 else
665 {
666 switch (family)
667 {
668 case 4:
669 processor = PROCESSOR_I486;
670 break;
671 case 5:
672 processor = PROCESSOR_PENTIUM;
673 break;
674 case 6:
675 processor = PROCESSOR_PENTIUMPRO;
676 break;
677 case 15:
678 processor = PROCESSOR_PENTIUM4;
679 break;
680 default:
681 /* We have no idea. */
682 processor = PROCESSOR_GENERIC;
683 }
684 }
685
686 switch (processor)
687 {
688 case PROCESSOR_I386:
689 /* Default. */
690 break;
691 case PROCESSOR_I486:
692 if (arch && vendor == signature_CENTAUR_ebx)
693 {
694 if (model >= 6)
695 cpu = "c3";
696 else if (has_3dnow)
697 cpu = "winchip2";
698 else
699 /* Assume WinChip C6. */
700 cpu = "winchip-c6";
701 }
702 else
703 cpu = "i486";
704 break;
705 case PROCESSOR_PENTIUM:
706 if (arch && has_mmx)
707 cpu = "pentium-mmx";
708 else
709 cpu = "pentium";
710 break;
711 case PROCESSOR_PENTIUMPRO:
712 switch (model)
713 {
714 case 0x1c:
715 case 0x26:
716 /* Bonnell. */
717 cpu = "bonnell";
718 break;
719 case 0x37:
720 case 0x4a:
721 case 0x4d:
722 case 0x5a:
723 case 0x5d:
724 /* Silvermont. */
725 cpu = "silvermont";
726 break;
727 case 0x0f:
728 /* Merom. */
729 case 0x17:
730 case 0x1d:
731 /* Penryn. */
732 cpu = "core2";
733 break;
734 case 0x1a:
735 case 0x1e:
736 case 0x1f:
737 case 0x2e:
738 /* Nehalem. */
739 cpu = "nehalem";
740 break;
741 case 0x25:
742 case 0x2c:
743 case 0x2f:
744 /* Westmere. */
745 cpu = "westmere";
746 break;
747 case 0x2a:
748 case 0x2d:
749 /* Sandy Bridge. */
750 cpu = "sandybridge";
751 break;
752 case 0x3a:
753 case 0x3e:
754 /* Ivy Bridge. */
755 cpu = "ivybridge";
756 break;
757 case 0x3c:
758 case 0x3f:
759 case 0x45:
760 case 0x46:
761 /* Haswell. */
762 cpu = "haswell";
763 break;
764 case 0x3d:
765 case 0x47:
766 case 0x4f:
767 case 0x56:
768 /* Broadwell. */
769 cpu = "broadwell";
770 break;
771 case 0x4e:
772 case 0x5e:
773 /* Skylake. */
774 cpu = "skylake";
775 break;
776 case 0x57:
777 /* Knights Landing. */
778 cpu = "knl";
779 break;
780 default:
781 if (arch)
782 {
783 /* This is unknown family 0x6 CPU. */
784 /* Assume Knights Landing. */
785 if (has_avx512f)
786 cpu = "knl";
787 /* Assume Broadwell. */
788 else if (has_adx)
789 cpu = "broadwell";
790 else if (has_avx2)
791 /* Assume Haswell. */
792 cpu = "haswell";
793 else if (has_avx)
794 /* Assume Sandy Bridge. */
795 cpu = "sandybridge";
796 else if (has_sse4_2)
797 {
798 if (has_movbe)
799 /* Assume Silvermont. */
800 cpu = "silvermont";
801 else
802 /* Assume Nehalem. */
803 cpu = "nehalem";
804 }
805 else if (has_ssse3)
806 {
807 if (has_movbe)
808 /* Assume Bonnell. */
809 cpu = "bonnell";
810 else
811 /* Assume Core 2. */
812 cpu = "core2";
813 }
814 else if (has_longmode)
815 /* Perhaps some emulator? Assume x86-64, otherwise gcc
816 -march=native would be unusable for 64-bit compilations,
817 as all the CPUs below are 32-bit only. */
818 cpu = "x86-64";
819 else if (has_sse3)
820 /* It is Core Duo. */
821 cpu = "pentium-m";
822 else if (has_sse2)
823 /* It is Pentium M. */
824 cpu = "pentium-m";
825 else if (has_sse)
826 {
827 if (vendor == signature_CENTAUR_ebx)
828 cpu = "c3-2";
829 else
830 /* It is Pentium III. */
831 cpu = "pentium3";
832 }
833 else if (has_mmx)
834 /* It is Pentium II. */
835 cpu = "pentium2";
836 else
837 /* Default to Pentium Pro. */
838 cpu = "pentiumpro";
839 }
840 else
841 /* For -mtune, we default to -mtune=generic. */
842 cpu = "generic";
843 break;
844 }
845 break;
846 case PROCESSOR_PENTIUM4:
847 if (has_sse3)
848 {
849 if (has_longmode)
850 cpu = "nocona";
851 else
852 cpu = "prescott";
853 }
854 else
855 cpu = "pentium4";
856 break;
857 case PROCESSOR_GEODE:
858 cpu = "geode";
859 break;
860 case PROCESSOR_K6:
861 if (arch && has_3dnow)
862 cpu = "k6-3";
863 else
864 cpu = "k6";
865 break;
866 case PROCESSOR_ATHLON:
867 if (arch && has_sse)
868 cpu = "athlon-4";
869 else
870 cpu = "athlon";
871 break;
872 case PROCESSOR_K8:
873 if (arch && has_sse3)
874 cpu = "k8-sse3";
875 else
876 cpu = "k8";
877 break;
878 case PROCESSOR_AMDFAM10:
879 cpu = "amdfam10";
880 break;
881 case PROCESSOR_BDVER1:
882 cpu = "bdver1";
883 break;
884 case PROCESSOR_BDVER2:
885 cpu = "bdver2";
886 break;
887 case PROCESSOR_BDVER3:
888 cpu = "bdver3";
889 break;
890 case PROCESSOR_BDVER4:
891 cpu = "bdver4";
892 break;
893 case PROCESSOR_ZNVER1:
894 cpu = "znver1";
895 break;
896 case PROCESSOR_BTVER1:
897 cpu = "btver1";
898 break;
899 case PROCESSOR_BTVER2:
900 cpu = "btver2";
901 break;
902
903 default:
904 /* Use something reasonable. */
905 if (arch)
906 {
907 if (has_ssse3)
908 cpu = "core2";
909 else if (has_sse3)
910 {
911 if (has_longmode)
912 cpu = "nocona";
913 else
914 cpu = "prescott";
915 }
916 else if (has_longmode)
917 /* Perhaps some emulator? Assume x86-64, otherwise gcc
918 -march=native would be unusable for 64-bit compilations,
919 as all the CPUs below are 32-bit only. */
920 cpu = "x86-64";
921 else if (has_sse2)
922 cpu = "pentium4";
923 else if (has_cmov)
924 cpu = "pentiumpro";
925 else if (has_mmx)
926 cpu = "pentium-mmx";
927 else if (has_cmpxchg8b)
928 cpu = "pentium";
929 }
930 else
931 cpu = "generic";
932 }
933
934 if (arch)
935 {
936 const char *mmx = has_mmx ? " -mmmx" : " -mno-mmx";
937 const char *mmx3dnow = has_3dnow ? " -m3dnow" : " -mno-3dnow";
938 const char *sse = has_sse ? " -msse" : " -mno-sse";
939 const char *sse2 = has_sse2 ? " -msse2" : " -mno-sse2";
940 const char *sse3 = has_sse3 ? " -msse3" : " -mno-sse3";
941 const char *ssse3 = has_ssse3 ? " -mssse3" : " -mno-ssse3";
942 const char *sse4a = has_sse4a ? " -msse4a" : " -mno-sse4a";
943 const char *cx16 = has_cmpxchg16b ? " -mcx16" : " -mno-cx16";
944 const char *sahf = has_lahf_lm ? " -msahf" : " -mno-sahf";
945 const char *movbe = has_movbe ? " -mmovbe" : " -mno-movbe";
946 const char *aes = has_aes ? " -maes" : " -mno-aes";
947 const char *sha = has_sha ? " -msha" : " -mno-sha";
948 const char *pclmul = has_pclmul ? " -mpclmul" : " -mno-pclmul";
949 const char *popcnt = has_popcnt ? " -mpopcnt" : " -mno-popcnt";
950 const char *abm = has_abm ? " -mabm" : " -mno-abm";
951 const char *lwp = has_lwp ? " -mlwp" : " -mno-lwp";
952 const char *fma = has_fma ? " -mfma" : " -mno-fma";
953 const char *fma4 = has_fma4 ? " -mfma4" : " -mno-fma4";
954 const char *xop = has_xop ? " -mxop" : " -mno-xop";
955 const char *bmi = has_bmi ? " -mbmi" : " -mno-bmi";
956 const char *bmi2 = has_bmi2 ? " -mbmi2" : " -mno-bmi2";
957 const char *tbm = has_tbm ? " -mtbm" : " -mno-tbm";
958 const char *avx = has_avx ? " -mavx" : " -mno-avx";
959 const char *avx2 = has_avx2 ? " -mavx2" : " -mno-avx2";
960 const char *sse4_2 = has_sse4_2 ? " -msse4.2" : " -mno-sse4.2";
961 const char *sse4_1 = has_sse4_1 ? " -msse4.1" : " -mno-sse4.1";
962 const char *lzcnt = has_lzcnt ? " -mlzcnt" : " -mno-lzcnt";
963 const char *hle = has_hle ? " -mhle" : " -mno-hle";
964 const char *rtm = has_rtm ? " -mrtm" : " -mno-rtm";
965 const char *rdrnd = has_rdrnd ? " -mrdrnd" : " -mno-rdrnd";
966 const char *f16c = has_f16c ? " -mf16c" : " -mno-f16c";
967 const char *fsgsbase = has_fsgsbase ? " -mfsgsbase" : " -mno-fsgsbase";
968 const char *rdseed = has_rdseed ? " -mrdseed" : " -mno-rdseed";
969 const char *prfchw = has_prfchw ? " -mprfchw" : " -mno-prfchw";
970 const char *adx = has_adx ? " -madx" : " -mno-adx";
971 const char *fxsr = has_fxsr ? " -mfxsr" : " -mno-fxsr";
972 const char *xsave = has_xsave ? " -mxsave" : " -mno-xsave";
973 const char *xsaveopt = has_xsaveopt ? " -mxsaveopt" : " -mno-xsaveopt";
974 const char *avx512f = has_avx512f ? " -mavx512f" : " -mno-avx512f";
975 const char *avx512er = has_avx512er ? " -mavx512er" : " -mno-avx512er";
976 const char *avx512cd = has_avx512cd ? " -mavx512cd" : " -mno-avx512cd";
977 const char *avx512pf = has_avx512pf ? " -mavx512pf" : " -mno-avx512pf";
978 const char *prefetchwt1 = has_prefetchwt1 ? " -mprefetchwt1" : " -mno-prefetchwt1";
979 const char *clflushopt = has_clflushopt ? " -mclflushopt" : " -mno-clflushopt";
980 const char *xsavec = has_xsavec ? " -mxsavec" : " -mno-xsavec";
981 const char *xsaves = has_xsaves ? " -mxsaves" : " -mno-xsaves";
982 const char *avx512dq = has_avx512dq ? " -mavx512dq" : " -mno-avx512dq";
983 const char *avx512bw = has_avx512bw ? " -mavx512bw" : " -mno-avx512bw";
984 const char *avx512vl = has_avx512vl ? " -mavx512vl" : " -mno-avx512vl";
985 const char *avx512ifma = has_avx512ifma ? " -mavx512ifma" : " -mno-avx512ifma";
986 const char *avx512vbmi = has_avx512vbmi ? " -mavx512vbmi" : " -mno-avx512vbmi";
987 const char *clwb = has_clwb ? " -mclwb" : " -mno-clwb";
988 const char *mwaitx = has_mwaitx ? " -mmwaitx" : " -mno-mwaitx";
989 const char *clzero = has_clzero ? " -mclzero" : " -mno-clzero";
990 const char *pku = has_pku ? " -mpku" : " -mno-pku";
991 options = concat (options, mmx, mmx3dnow, sse, sse2, sse3, ssse3,
992 sse4a, cx16, sahf, movbe, aes, sha, pclmul,
993 popcnt, abm, lwp, fma, fma4, xop, bmi, bmi2,
994 tbm, avx, avx2, sse4_2, sse4_1, lzcnt, rtm,
995 hle, rdrnd, f16c, fsgsbase, rdseed, prfchw, adx,
996 fxsr, xsave, xsaveopt, avx512f, avx512er,
997 avx512cd, avx512pf, prefetchwt1, clflushopt,
998 xsavec, xsaves, avx512dq, avx512bw, avx512vl,
999 avx512ifma, avx512vbmi, clwb, mwaitx, clzero,
1000 pku, NULL);
1001 }
1002
1003 done:
1004 return concat (cache, "-m", argv[0], "=", cpu, options, NULL);
1005 }
1006 #else
1007
1008 /* If we are compiling with GCC where %EBX register is fixed, then the
1009 driver will just ignore -march and -mtune "native" target and will leave
1010 to the newly built compiler to generate code for its default target. */
1011
host_detect_local_cpu(int,const char **)1012 const char *host_detect_local_cpu (int, const char **)
1013 {
1014 return NULL;
1015 }
1016 #endif /* __GNUC__ */
1017