110d565efSmrg /* Subroutines for the gcc driver.
2*ec02198aSmrg Copyright (C) 2006-2020 Free Software Foundation, Inc.
310d565efSmrg
410d565efSmrg This file is part of GCC.
510d565efSmrg
610d565efSmrg GCC is free software; you can redistribute it and/or modify
710d565efSmrg it under the terms of the GNU General Public License as published by
810d565efSmrg the Free Software Foundation; either version 3, or (at your option)
910d565efSmrg any later version.
1010d565efSmrg
1110d565efSmrg GCC is distributed in the hope that it will be useful,
1210d565efSmrg but WITHOUT ANY WARRANTY; without even the implied warranty of
1310d565efSmrg MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
1410d565efSmrg GNU General Public License for more details.
1510d565efSmrg
1610d565efSmrg You should have received a copy of the GNU General Public License
1710d565efSmrg along with GCC; see the file COPYING3. If not see
1810d565efSmrg <http://www.gnu.org/licenses/>. */
1910d565efSmrg
20c7a68eb7Smrg #define IN_TARGET_CODE 1
21c7a68eb7Smrg
2210d565efSmrg #include "config.h"
2310d565efSmrg #include "system.h"
2410d565efSmrg #include "coretypes.h"
2510d565efSmrg #include "tm.h"
2610d565efSmrg
2710d565efSmrg const char *host_detect_local_cpu (int argc, const char **argv);
2810d565efSmrg
2910d565efSmrg #if defined(__GNUC__) && (__GNUC__ >= 5 || !defined(__PIC__))
3010d565efSmrg #include "cpuid.h"
3110d565efSmrg
3210d565efSmrg struct cache_desc
3310d565efSmrg {
3410d565efSmrg unsigned sizekb;
3510d565efSmrg unsigned assoc;
3610d565efSmrg unsigned line;
3710d565efSmrg };
3810d565efSmrg
3910d565efSmrg /* Returns command line parameters that describe size and
4010d565efSmrg cache line size of the processor caches. */
4110d565efSmrg
4210d565efSmrg static char *
describe_cache(struct cache_desc level1,struct cache_desc level2)4310d565efSmrg describe_cache (struct cache_desc level1, struct cache_desc level2)
4410d565efSmrg {
4510d565efSmrg char size[100], line[100], size2[100];
4610d565efSmrg
4710d565efSmrg /* At the moment, gcc does not use the information
4810d565efSmrg about the associativity of the cache. */
4910d565efSmrg
5010d565efSmrg snprintf (size, sizeof (size),
5110d565efSmrg "--param l1-cache-size=%u ", level1.sizekb);
5210d565efSmrg snprintf (line, sizeof (line),
5310d565efSmrg "--param l1-cache-line-size=%u ", level1.line);
5410d565efSmrg
5510d565efSmrg snprintf (size2, sizeof (size2),
5610d565efSmrg "--param l2-cache-size=%u ", level2.sizekb);
5710d565efSmrg
5810d565efSmrg return concat (size, line, size2, NULL);
5910d565efSmrg }
6010d565efSmrg
6110d565efSmrg /* Detect L2 cache parameters using CPUID extended function 0x80000006. */
6210d565efSmrg
6310d565efSmrg static void
detect_l2_cache(struct cache_desc * level2)6410d565efSmrg detect_l2_cache (struct cache_desc *level2)
6510d565efSmrg {
6610d565efSmrg unsigned eax, ebx, ecx, edx;
6710d565efSmrg unsigned assoc;
6810d565efSmrg
6910d565efSmrg __cpuid (0x80000006, eax, ebx, ecx, edx);
7010d565efSmrg
7110d565efSmrg level2->sizekb = (ecx >> 16) & 0xffff;
7210d565efSmrg level2->line = ecx & 0xff;
7310d565efSmrg
7410d565efSmrg assoc = (ecx >> 12) & 0xf;
7510d565efSmrg if (assoc == 6)
7610d565efSmrg assoc = 8;
7710d565efSmrg else if (assoc == 8)
7810d565efSmrg assoc = 16;
7910d565efSmrg else if (assoc >= 0xa && assoc <= 0xc)
8010d565efSmrg assoc = 32 + (assoc - 0xa) * 16;
8110d565efSmrg else if (assoc >= 0xd && assoc <= 0xe)
8210d565efSmrg assoc = 96 + (assoc - 0xd) * 32;
8310d565efSmrg
8410d565efSmrg level2->assoc = assoc;
8510d565efSmrg }
8610d565efSmrg
8710d565efSmrg /* Returns the description of caches for an AMD processor. */
8810d565efSmrg
8910d565efSmrg static const char *
detect_caches_amd(unsigned max_ext_level)9010d565efSmrg detect_caches_amd (unsigned max_ext_level)
9110d565efSmrg {
9210d565efSmrg unsigned eax, ebx, ecx, edx;
9310d565efSmrg
9410d565efSmrg struct cache_desc level1, level2 = {0, 0, 0};
9510d565efSmrg
9610d565efSmrg if (max_ext_level < 0x80000005)
9710d565efSmrg return "";
9810d565efSmrg
9910d565efSmrg __cpuid (0x80000005, eax, ebx, ecx, edx);
10010d565efSmrg
10110d565efSmrg level1.sizekb = (ecx >> 24) & 0xff;
10210d565efSmrg level1.assoc = (ecx >> 16) & 0xff;
10310d565efSmrg level1.line = ecx & 0xff;
10410d565efSmrg
10510d565efSmrg if (max_ext_level >= 0x80000006)
10610d565efSmrg detect_l2_cache (&level2);
10710d565efSmrg
10810d565efSmrg return describe_cache (level1, level2);
10910d565efSmrg }
11010d565efSmrg
11110d565efSmrg /* Decodes the size, the associativity and the cache line size of
11210d565efSmrg L1/L2 caches of an Intel processor. Values are based on
11310d565efSmrg "Intel Processor Identification and the CPUID Instruction"
11410d565efSmrg [Application Note 485], revision -032, December 2007. */
11510d565efSmrg
11610d565efSmrg static void
decode_caches_intel(unsigned reg,bool xeon_mp,struct cache_desc * level1,struct cache_desc * level2)11710d565efSmrg decode_caches_intel (unsigned reg, bool xeon_mp,
11810d565efSmrg struct cache_desc *level1, struct cache_desc *level2)
11910d565efSmrg {
12010d565efSmrg int i;
12110d565efSmrg
12210d565efSmrg for (i = 24; i >= 0; i -= 8)
12310d565efSmrg switch ((reg >> i) & 0xff)
12410d565efSmrg {
12510d565efSmrg case 0x0a:
12610d565efSmrg level1->sizekb = 8; level1->assoc = 2; level1->line = 32;
12710d565efSmrg break;
12810d565efSmrg case 0x0c:
12910d565efSmrg level1->sizekb = 16; level1->assoc = 4; level1->line = 32;
13010d565efSmrg break;
13110d565efSmrg case 0x0d:
13210d565efSmrg level1->sizekb = 16; level1->assoc = 4; level1->line = 64;
13310d565efSmrg break;
13410d565efSmrg case 0x0e:
13510d565efSmrg level1->sizekb = 24; level1->assoc = 6; level1->line = 64;
13610d565efSmrg break;
13710d565efSmrg case 0x21:
13810d565efSmrg level2->sizekb = 256; level2->assoc = 8; level2->line = 64;
13910d565efSmrg break;
14010d565efSmrg case 0x24:
14110d565efSmrg level2->sizekb = 1024; level2->assoc = 16; level2->line = 64;
14210d565efSmrg break;
14310d565efSmrg case 0x2c:
14410d565efSmrg level1->sizekb = 32; level1->assoc = 8; level1->line = 64;
14510d565efSmrg break;
14610d565efSmrg case 0x39:
14710d565efSmrg level2->sizekb = 128; level2->assoc = 4; level2->line = 64;
14810d565efSmrg break;
14910d565efSmrg case 0x3a:
15010d565efSmrg level2->sizekb = 192; level2->assoc = 6; level2->line = 64;
15110d565efSmrg break;
15210d565efSmrg case 0x3b:
15310d565efSmrg level2->sizekb = 128; level2->assoc = 2; level2->line = 64;
15410d565efSmrg break;
15510d565efSmrg case 0x3c:
15610d565efSmrg level2->sizekb = 256; level2->assoc = 4; level2->line = 64;
15710d565efSmrg break;
15810d565efSmrg case 0x3d:
15910d565efSmrg level2->sizekb = 384; level2->assoc = 6; level2->line = 64;
16010d565efSmrg break;
16110d565efSmrg case 0x3e:
16210d565efSmrg level2->sizekb = 512; level2->assoc = 4; level2->line = 64;
16310d565efSmrg break;
16410d565efSmrg case 0x41:
16510d565efSmrg level2->sizekb = 128; level2->assoc = 4; level2->line = 32;
16610d565efSmrg break;
16710d565efSmrg case 0x42:
16810d565efSmrg level2->sizekb = 256; level2->assoc = 4; level2->line = 32;
16910d565efSmrg break;
17010d565efSmrg case 0x43:
17110d565efSmrg level2->sizekb = 512; level2->assoc = 4; level2->line = 32;
17210d565efSmrg break;
17310d565efSmrg case 0x44:
17410d565efSmrg level2->sizekb = 1024; level2->assoc = 4; level2->line = 32;
17510d565efSmrg break;
17610d565efSmrg case 0x45:
17710d565efSmrg level2->sizekb = 2048; level2->assoc = 4; level2->line = 32;
17810d565efSmrg break;
17910d565efSmrg case 0x48:
18010d565efSmrg level2->sizekb = 3072; level2->assoc = 12; level2->line = 64;
18110d565efSmrg break;
18210d565efSmrg case 0x49:
18310d565efSmrg if (xeon_mp)
18410d565efSmrg break;
18510d565efSmrg level2->sizekb = 4096; level2->assoc = 16; level2->line = 64;
18610d565efSmrg break;
18710d565efSmrg case 0x4e:
18810d565efSmrg level2->sizekb = 6144; level2->assoc = 24; level2->line = 64;
18910d565efSmrg break;
19010d565efSmrg case 0x60:
19110d565efSmrg level1->sizekb = 16; level1->assoc = 8; level1->line = 64;
19210d565efSmrg break;
19310d565efSmrg case 0x66:
19410d565efSmrg level1->sizekb = 8; level1->assoc = 4; level1->line = 64;
19510d565efSmrg break;
19610d565efSmrg case 0x67:
19710d565efSmrg level1->sizekb = 16; level1->assoc = 4; level1->line = 64;
19810d565efSmrg break;
19910d565efSmrg case 0x68:
20010d565efSmrg level1->sizekb = 32; level1->assoc = 4; level1->line = 64;
20110d565efSmrg break;
20210d565efSmrg case 0x78:
20310d565efSmrg level2->sizekb = 1024; level2->assoc = 4; level2->line = 64;
20410d565efSmrg break;
20510d565efSmrg case 0x79:
20610d565efSmrg level2->sizekb = 128; level2->assoc = 8; level2->line = 64;
20710d565efSmrg break;
20810d565efSmrg case 0x7a:
20910d565efSmrg level2->sizekb = 256; level2->assoc = 8; level2->line = 64;
21010d565efSmrg break;
21110d565efSmrg case 0x7b:
21210d565efSmrg level2->sizekb = 512; level2->assoc = 8; level2->line = 64;
21310d565efSmrg break;
21410d565efSmrg case 0x7c:
21510d565efSmrg level2->sizekb = 1024; level2->assoc = 8; level2->line = 64;
21610d565efSmrg break;
21710d565efSmrg case 0x7d:
21810d565efSmrg level2->sizekb = 2048; level2->assoc = 8; level2->line = 64;
21910d565efSmrg break;
22010d565efSmrg case 0x7f:
22110d565efSmrg level2->sizekb = 512; level2->assoc = 2; level2->line = 64;
22210d565efSmrg break;
22310d565efSmrg case 0x80:
22410d565efSmrg level2->sizekb = 512; level2->assoc = 8; level2->line = 64;
22510d565efSmrg break;
22610d565efSmrg case 0x82:
22710d565efSmrg level2->sizekb = 256; level2->assoc = 8; level2->line = 32;
22810d565efSmrg break;
22910d565efSmrg case 0x83:
23010d565efSmrg level2->sizekb = 512; level2->assoc = 8; level2->line = 32;
23110d565efSmrg break;
23210d565efSmrg case 0x84:
23310d565efSmrg level2->sizekb = 1024; level2->assoc = 8; level2->line = 32;
23410d565efSmrg break;
23510d565efSmrg case 0x85:
23610d565efSmrg level2->sizekb = 2048; level2->assoc = 8; level2->line = 32;
23710d565efSmrg break;
23810d565efSmrg case 0x86:
23910d565efSmrg level2->sizekb = 512; level2->assoc = 4; level2->line = 64;
24010d565efSmrg break;
24110d565efSmrg case 0x87:
24210d565efSmrg level2->sizekb = 1024; level2->assoc = 8; level2->line = 64;
24310d565efSmrg
24410d565efSmrg default:
24510d565efSmrg break;
24610d565efSmrg }
24710d565efSmrg }
24810d565efSmrg
24910d565efSmrg /* Detect cache parameters using CPUID function 2. */
25010d565efSmrg
25110d565efSmrg static void
detect_caches_cpuid2(bool xeon_mp,struct cache_desc * level1,struct cache_desc * level2)25210d565efSmrg detect_caches_cpuid2 (bool xeon_mp,
25310d565efSmrg struct cache_desc *level1, struct cache_desc *level2)
25410d565efSmrg {
25510d565efSmrg unsigned regs[4];
25610d565efSmrg int nreps, i;
25710d565efSmrg
25810d565efSmrg __cpuid (2, regs[0], regs[1], regs[2], regs[3]);
25910d565efSmrg
26010d565efSmrg nreps = regs[0] & 0x0f;
26110d565efSmrg regs[0] &= ~0x0f;
26210d565efSmrg
26310d565efSmrg while (--nreps >= 0)
26410d565efSmrg {
26510d565efSmrg for (i = 0; i < 4; i++)
26610d565efSmrg if (regs[i] && !((regs[i] >> 31) & 1))
26710d565efSmrg decode_caches_intel (regs[i], xeon_mp, level1, level2);
26810d565efSmrg
26910d565efSmrg if (nreps)
27010d565efSmrg __cpuid (2, regs[0], regs[1], regs[2], regs[3]);
27110d565efSmrg }
27210d565efSmrg }
27310d565efSmrg
27410d565efSmrg /* Detect cache parameters using CPUID function 4. This
27510d565efSmrg method doesn't require hardcoded tables. */
27610d565efSmrg
27710d565efSmrg enum cache_type
27810d565efSmrg {
27910d565efSmrg CACHE_END = 0,
28010d565efSmrg CACHE_DATA = 1,
28110d565efSmrg CACHE_INST = 2,
28210d565efSmrg CACHE_UNIFIED = 3
28310d565efSmrg };
28410d565efSmrg
28510d565efSmrg static void
detect_caches_cpuid4(struct cache_desc * level1,struct cache_desc * level2,struct cache_desc * level3)28610d565efSmrg detect_caches_cpuid4 (struct cache_desc *level1, struct cache_desc *level2,
28710d565efSmrg struct cache_desc *level3)
28810d565efSmrg {
28910d565efSmrg struct cache_desc *cache;
29010d565efSmrg
29110d565efSmrg unsigned eax, ebx, ecx, edx;
29210d565efSmrg int count;
29310d565efSmrg
29410d565efSmrg for (count = 0;; count++)
29510d565efSmrg {
29610d565efSmrg __cpuid_count(4, count, eax, ebx, ecx, edx);
29710d565efSmrg switch (eax & 0x1f)
29810d565efSmrg {
29910d565efSmrg case CACHE_END:
30010d565efSmrg return;
30110d565efSmrg case CACHE_DATA:
30210d565efSmrg case CACHE_UNIFIED:
30310d565efSmrg {
30410d565efSmrg switch ((eax >> 5) & 0x07)
30510d565efSmrg {
30610d565efSmrg case 1:
30710d565efSmrg cache = level1;
30810d565efSmrg break;
30910d565efSmrg case 2:
31010d565efSmrg cache = level2;
31110d565efSmrg break;
31210d565efSmrg case 3:
31310d565efSmrg cache = level3;
31410d565efSmrg break;
31510d565efSmrg default:
31610d565efSmrg cache = NULL;
31710d565efSmrg }
31810d565efSmrg
31910d565efSmrg if (cache)
32010d565efSmrg {
32110d565efSmrg unsigned sets = ecx + 1;
32210d565efSmrg unsigned part = ((ebx >> 12) & 0x03ff) + 1;
32310d565efSmrg
32410d565efSmrg cache->assoc = ((ebx >> 22) & 0x03ff) + 1;
32510d565efSmrg cache->line = (ebx & 0x0fff) + 1;
32610d565efSmrg
32710d565efSmrg cache->sizekb = (cache->assoc * part
32810d565efSmrg * cache->line * sets) / 1024;
32910d565efSmrg }
33010d565efSmrg }
33110d565efSmrg default:
33210d565efSmrg break;
33310d565efSmrg }
33410d565efSmrg }
33510d565efSmrg }
33610d565efSmrg
33710d565efSmrg /* Returns the description of caches for an Intel processor. */
33810d565efSmrg
33910d565efSmrg static const char *
detect_caches_intel(bool xeon_mp,unsigned max_level,unsigned max_ext_level,unsigned * l2sizekb)34010d565efSmrg detect_caches_intel (bool xeon_mp, unsigned max_level,
34110d565efSmrg unsigned max_ext_level, unsigned *l2sizekb)
34210d565efSmrg {
34310d565efSmrg struct cache_desc level1 = {0, 0, 0}, level2 = {0, 0, 0}, level3 = {0, 0, 0};
34410d565efSmrg
34510d565efSmrg if (max_level >= 4)
34610d565efSmrg detect_caches_cpuid4 (&level1, &level2, &level3);
34710d565efSmrg else if (max_level >= 2)
34810d565efSmrg detect_caches_cpuid2 (xeon_mp, &level1, &level2);
34910d565efSmrg else
35010d565efSmrg return "";
35110d565efSmrg
35210d565efSmrg if (level1.sizekb == 0)
35310d565efSmrg return "";
35410d565efSmrg
35510d565efSmrg /* Let the L3 replace the L2. This assumes inclusive caches
35610d565efSmrg and single threaded program for now. */
35710d565efSmrg if (level3.sizekb)
35810d565efSmrg level2 = level3;
35910d565efSmrg
36010d565efSmrg /* Intel CPUs are equipped with AMD style L2 cache info. Try this
36110d565efSmrg method if other methods fail to provide L2 cache parameters. */
36210d565efSmrg if (level2.sizekb == 0 && max_ext_level >= 0x80000006)
36310d565efSmrg detect_l2_cache (&level2);
36410d565efSmrg
36510d565efSmrg *l2sizekb = level2.sizekb;
36610d565efSmrg
36710d565efSmrg return describe_cache (level1, level2);
36810d565efSmrg }
36910d565efSmrg
37010d565efSmrg /* This will be called by the spec parser in gcc.c when it sees
37110d565efSmrg a %:local_cpu_detect(args) construct. Currently it will be called
37210d565efSmrg with either "arch" or "tune" as argument depending on if -march=native
37310d565efSmrg or -mtune=native is to be substituted.
37410d565efSmrg
37510d565efSmrg It returns a string containing new command line parameters to be
37610d565efSmrg put at the place of the above two options, depending on what CPU
37710d565efSmrg this is executed. E.g. "-march=k8" on an AMD64 machine
37810d565efSmrg for -march=native.
37910d565efSmrg
38010d565efSmrg ARGC and ARGV are set depending on the actual arguments given
38110d565efSmrg in the spec. */
38210d565efSmrg
host_detect_local_cpu(int argc,const char ** argv)38310d565efSmrg const char *host_detect_local_cpu (int argc, const char **argv)
38410d565efSmrg {
38510d565efSmrg enum processor_type processor = PROCESSOR_I386;
38610d565efSmrg const char *cpu = "i386";
38710d565efSmrg
38810d565efSmrg const char *cache = "";
38910d565efSmrg const char *options = "";
39010d565efSmrg
39110d565efSmrg unsigned int eax, ebx, ecx, edx;
39210d565efSmrg
39310d565efSmrg unsigned int max_level, ext_level;
39410d565efSmrg
39510d565efSmrg unsigned int vendor;
39610d565efSmrg unsigned int model, family;
39710d565efSmrg
39810d565efSmrg unsigned int has_sse3, has_ssse3, has_cmpxchg16b;
39910d565efSmrg unsigned int has_cmpxchg8b, has_cmov, has_mmx, has_sse, has_sse2;
40010d565efSmrg
40110d565efSmrg /* Extended features */
40210d565efSmrg unsigned int has_lahf_lm = 0, has_sse4a = 0;
40310d565efSmrg unsigned int has_longmode = 0, has_3dnowp = 0, has_3dnow = 0;
40410d565efSmrg unsigned int has_movbe = 0, has_sse4_1 = 0, has_sse4_2 = 0;
40510d565efSmrg unsigned int has_popcnt = 0, has_aes = 0, has_avx = 0, has_avx2 = 0;
40610d565efSmrg unsigned int has_pclmul = 0, has_abm = 0, has_lwp = 0;
40710d565efSmrg unsigned int has_fma = 0, has_fma4 = 0, has_xop = 0;
40810d565efSmrg unsigned int has_bmi = 0, has_bmi2 = 0, has_tbm = 0, has_lzcnt = 0;
40910d565efSmrg unsigned int has_hle = 0, has_rtm = 0, has_sgx = 0;
410c7a68eb7Smrg unsigned int has_pconfig = 0, has_wbnoinvd = 0;
41110d565efSmrg unsigned int has_rdrnd = 0, has_f16c = 0, has_fsgsbase = 0;
41210d565efSmrg unsigned int has_rdseed = 0, has_prfchw = 0, has_adx = 0;
41310d565efSmrg unsigned int has_osxsave = 0, has_fxsr = 0, has_xsave = 0, has_xsaveopt = 0;
41410d565efSmrg unsigned int has_avx512er = 0, has_avx512pf = 0, has_avx512cd = 0;
41510d565efSmrg unsigned int has_avx512f = 0, has_sha = 0, has_prefetchwt1 = 0;
41610d565efSmrg unsigned int has_clflushopt = 0, has_xsavec = 0, has_xsaves = 0;
41710d565efSmrg unsigned int has_avx512dq = 0, has_avx512bw = 0, has_avx512vl = 0;
41810d565efSmrg unsigned int has_avx512vbmi = 0, has_avx512ifma = 0, has_clwb = 0;
41910d565efSmrg unsigned int has_mwaitx = 0, has_clzero = 0, has_pku = 0, has_rdpid = 0;
42010d565efSmrg unsigned int has_avx5124fmaps = 0, has_avx5124vnniw = 0;
421c7a68eb7Smrg unsigned int has_gfni = 0, has_avx512vbmi2 = 0;
422c7a68eb7Smrg unsigned int has_avx512bitalg = 0;
423*ec02198aSmrg unsigned int has_avx512vpopcntdq = 0;
424c7a68eb7Smrg unsigned int has_shstk = 0;
425c7a68eb7Smrg unsigned int has_avx512vnni = 0, has_vaes = 0;
426c7a68eb7Smrg unsigned int has_vpclmulqdq = 0;
427*ec02198aSmrg unsigned int has_avx512vp2intersect = 0;
428c7a68eb7Smrg unsigned int has_movdiri = 0, has_movdir64b = 0;
429*ec02198aSmrg unsigned int has_enqcmd = 0;
4300fc04c29Smrg unsigned int has_waitpkg = 0;
4310fc04c29Smrg unsigned int has_cldemote = 0;
432*ec02198aSmrg unsigned int has_avx512bf16 = 0;
4330fc04c29Smrg
4340fc04c29Smrg unsigned int has_ptwrite = 0;
43510d565efSmrg
43610d565efSmrg bool arch;
43710d565efSmrg
43810d565efSmrg unsigned int l2sizekb = 0;
43910d565efSmrg
44010d565efSmrg if (argc < 1)
44110d565efSmrg return NULL;
44210d565efSmrg
44310d565efSmrg arch = !strcmp (argv[0], "arch");
44410d565efSmrg
44510d565efSmrg if (!arch && strcmp (argv[0], "tune"))
44610d565efSmrg return NULL;
44710d565efSmrg
44810d565efSmrg max_level = __get_cpuid_max (0, &vendor);
44910d565efSmrg if (max_level < 1)
45010d565efSmrg goto done;
45110d565efSmrg
45210d565efSmrg __cpuid (1, eax, ebx, ecx, edx);
45310d565efSmrg
45410d565efSmrg model = (eax >> 4) & 0x0f;
45510d565efSmrg family = (eax >> 8) & 0x0f;
45610d565efSmrg if (vendor == signature_INTEL_ebx
45710d565efSmrg || vendor == signature_AMD_ebx)
45810d565efSmrg {
45910d565efSmrg unsigned int extended_model, extended_family;
46010d565efSmrg
46110d565efSmrg extended_model = (eax >> 12) & 0xf0;
46210d565efSmrg extended_family = (eax >> 20) & 0xff;
46310d565efSmrg if (family == 0x0f)
46410d565efSmrg {
46510d565efSmrg family += extended_family;
46610d565efSmrg model += extended_model;
46710d565efSmrg }
46810d565efSmrg else if (family == 0x06)
46910d565efSmrg model += extended_model;
47010d565efSmrg }
47110d565efSmrg
47210d565efSmrg has_sse3 = ecx & bit_SSE3;
47310d565efSmrg has_ssse3 = ecx & bit_SSSE3;
47410d565efSmrg has_sse4_1 = ecx & bit_SSE4_1;
47510d565efSmrg has_sse4_2 = ecx & bit_SSE4_2;
47610d565efSmrg has_avx = ecx & bit_AVX;
47710d565efSmrg has_osxsave = ecx & bit_OSXSAVE;
47810d565efSmrg has_cmpxchg16b = ecx & bit_CMPXCHG16B;
47910d565efSmrg has_movbe = ecx & bit_MOVBE;
48010d565efSmrg has_popcnt = ecx & bit_POPCNT;
48110d565efSmrg has_aes = ecx & bit_AES;
48210d565efSmrg has_pclmul = ecx & bit_PCLMUL;
48310d565efSmrg has_fma = ecx & bit_FMA;
48410d565efSmrg has_f16c = ecx & bit_F16C;
48510d565efSmrg has_rdrnd = ecx & bit_RDRND;
48610d565efSmrg has_xsave = ecx & bit_XSAVE;
48710d565efSmrg
48810d565efSmrg has_cmpxchg8b = edx & bit_CMPXCHG8B;
48910d565efSmrg has_cmov = edx & bit_CMOV;
49010d565efSmrg has_mmx = edx & bit_MMX;
49110d565efSmrg has_fxsr = edx & bit_FXSAVE;
49210d565efSmrg has_sse = edx & bit_SSE;
49310d565efSmrg has_sse2 = edx & bit_SSE2;
49410d565efSmrg
49510d565efSmrg if (max_level >= 7)
49610d565efSmrg {
49710d565efSmrg __cpuid_count (7, 0, eax, ebx, ecx, edx);
49810d565efSmrg
49910d565efSmrg has_bmi = ebx & bit_BMI;
50010d565efSmrg has_sgx = ebx & bit_SGX;
50110d565efSmrg has_hle = ebx & bit_HLE;
50210d565efSmrg has_rtm = ebx & bit_RTM;
50310d565efSmrg has_avx2 = ebx & bit_AVX2;
50410d565efSmrg has_bmi2 = ebx & bit_BMI2;
50510d565efSmrg has_fsgsbase = ebx & bit_FSGSBASE;
50610d565efSmrg has_rdseed = ebx & bit_RDSEED;
50710d565efSmrg has_adx = ebx & bit_ADX;
50810d565efSmrg has_avx512f = ebx & bit_AVX512F;
50910d565efSmrg has_avx512er = ebx & bit_AVX512ER;
51010d565efSmrg has_avx512pf = ebx & bit_AVX512PF;
51110d565efSmrg has_avx512cd = ebx & bit_AVX512CD;
51210d565efSmrg has_sha = ebx & bit_SHA;
51310d565efSmrg has_clflushopt = ebx & bit_CLFLUSHOPT;
51410d565efSmrg has_clwb = ebx & bit_CLWB;
51510d565efSmrg has_avx512dq = ebx & bit_AVX512DQ;
51610d565efSmrg has_avx512bw = ebx & bit_AVX512BW;
51710d565efSmrg has_avx512vl = ebx & bit_AVX512VL;
51810d565efSmrg has_avx512ifma = ebx & bit_AVX512IFMA;
51910d565efSmrg
52010d565efSmrg has_prefetchwt1 = ecx & bit_PREFETCHWT1;
52110d565efSmrg has_avx512vbmi = ecx & bit_AVX512VBMI;
52210d565efSmrg has_pku = ecx & bit_OSPKE;
523c7a68eb7Smrg has_avx512vbmi2 = ecx & bit_AVX512VBMI2;
524c7a68eb7Smrg has_avx512vnni = ecx & bit_AVX512VNNI;
52510d565efSmrg has_rdpid = ecx & bit_RDPID;
526c7a68eb7Smrg has_gfni = ecx & bit_GFNI;
527c7a68eb7Smrg has_vaes = ecx & bit_VAES;
528c7a68eb7Smrg has_vpclmulqdq = ecx & bit_VPCLMULQDQ;
529c7a68eb7Smrg has_avx512bitalg = ecx & bit_AVX512BITALG;
530*ec02198aSmrg has_avx512vpopcntdq = ecx & bit_AVX512VPOPCNTDQ;
531c7a68eb7Smrg has_movdiri = ecx & bit_MOVDIRI;
532c7a68eb7Smrg has_movdir64b = ecx & bit_MOVDIR64B;
533*ec02198aSmrg has_enqcmd = ecx & bit_ENQCMD;
5340fc04c29Smrg has_cldemote = ecx & bit_CLDEMOTE;
53510d565efSmrg
53610d565efSmrg has_avx5124vnniw = edx & bit_AVX5124VNNIW;
53710d565efSmrg has_avx5124fmaps = edx & bit_AVX5124FMAPS;
538*ec02198aSmrg has_avx512vp2intersect = edx & bit_AVX512VP2INTERSECT;
539c7a68eb7Smrg
540c7a68eb7Smrg has_shstk = ecx & bit_SHSTK;
541c7a68eb7Smrg has_pconfig = edx & bit_PCONFIG;
5420fc04c29Smrg has_waitpkg = ecx & bit_WAITPKG;
543*ec02198aSmrg
544*ec02198aSmrg __cpuid_count (7, 1, eax, ebx, ecx, edx);
545*ec02198aSmrg has_avx512bf16 = eax & bit_AVX512BF16;
54610d565efSmrg }
54710d565efSmrg
54810d565efSmrg if (max_level >= 13)
54910d565efSmrg {
55010d565efSmrg __cpuid_count (13, 1, eax, ebx, ecx, edx);
55110d565efSmrg
55210d565efSmrg has_xsaveopt = eax & bit_XSAVEOPT;
55310d565efSmrg has_xsavec = eax & bit_XSAVEC;
55410d565efSmrg has_xsaves = eax & bit_XSAVES;
55510d565efSmrg }
55610d565efSmrg
5570fc04c29Smrg if (max_level >= 0x14)
5580fc04c29Smrg {
5590fc04c29Smrg __cpuid_count (0x14, 0, eax, ebx, ecx, edx);
5600fc04c29Smrg
5610fc04c29Smrg has_ptwrite = ebx & bit_PTWRITE;
5620fc04c29Smrg }
5630fc04c29Smrg
56410d565efSmrg /* Check cpuid level of extended features. */
56510d565efSmrg __cpuid (0x80000000, ext_level, ebx, ecx, edx);
56610d565efSmrg
56710d565efSmrg if (ext_level >= 0x80000001)
56810d565efSmrg {
56910d565efSmrg __cpuid (0x80000001, eax, ebx, ecx, edx);
57010d565efSmrg
57110d565efSmrg has_lahf_lm = ecx & bit_LAHF_LM;
57210d565efSmrg has_sse4a = ecx & bit_SSE4a;
57310d565efSmrg has_abm = ecx & bit_ABM;
57410d565efSmrg has_lwp = ecx & bit_LWP;
57510d565efSmrg has_fma4 = ecx & bit_FMA4;
57610d565efSmrg has_xop = ecx & bit_XOP;
57710d565efSmrg has_tbm = ecx & bit_TBM;
57810d565efSmrg has_lzcnt = ecx & bit_LZCNT;
57910d565efSmrg has_prfchw = ecx & bit_PRFCHW;
58010d565efSmrg
58110d565efSmrg has_longmode = edx & bit_LM;
58210d565efSmrg has_3dnowp = edx & bit_3DNOWP;
58310d565efSmrg has_3dnow = edx & bit_3DNOW;
58410d565efSmrg has_mwaitx = ecx & bit_MWAITX;
58510d565efSmrg }
58610d565efSmrg
58710d565efSmrg if (ext_level >= 0x80000008)
58810d565efSmrg {
58910d565efSmrg __cpuid (0x80000008, eax, ebx, ecx, edx);
59010d565efSmrg has_clzero = ebx & bit_CLZERO;
591c7a68eb7Smrg has_wbnoinvd = ebx & bit_WBNOINVD;
59210d565efSmrg }
59310d565efSmrg
59410d565efSmrg /* Get XCR_XFEATURE_ENABLED_MASK register with xgetbv. */
59510d565efSmrg #define XCR_XFEATURE_ENABLED_MASK 0x0
59610d565efSmrg #define XSTATE_FP 0x1
59710d565efSmrg #define XSTATE_SSE 0x2
59810d565efSmrg #define XSTATE_YMM 0x4
59910d565efSmrg #define XSTATE_OPMASK 0x20
60010d565efSmrg #define XSTATE_ZMM 0x40
60110d565efSmrg #define XSTATE_HI_ZMM 0x80
60210d565efSmrg
60310d565efSmrg #define XCR_AVX_ENABLED_MASK \
60410d565efSmrg (XSTATE_SSE | XSTATE_YMM)
60510d565efSmrg #define XCR_AVX512F_ENABLED_MASK \
60610d565efSmrg (XSTATE_SSE | XSTATE_YMM | XSTATE_OPMASK | XSTATE_ZMM | XSTATE_HI_ZMM)
60710d565efSmrg
60810d565efSmrg if (has_osxsave)
60910d565efSmrg asm (".byte 0x0f; .byte 0x01; .byte 0xd0"
61010d565efSmrg : "=a" (eax), "=d" (edx)
61110d565efSmrg : "c" (XCR_XFEATURE_ENABLED_MASK));
61210d565efSmrg else
61310d565efSmrg eax = 0;
61410d565efSmrg
61510d565efSmrg /* Check if AVX registers are supported. */
61610d565efSmrg if ((eax & XCR_AVX_ENABLED_MASK) != XCR_AVX_ENABLED_MASK)
61710d565efSmrg {
61810d565efSmrg has_avx = 0;
61910d565efSmrg has_avx2 = 0;
62010d565efSmrg has_fma = 0;
62110d565efSmrg has_fma4 = 0;
62210d565efSmrg has_f16c = 0;
62310d565efSmrg has_xop = 0;
62410d565efSmrg has_xsave = 0;
62510d565efSmrg has_xsaveopt = 0;
62610d565efSmrg has_xsaves = 0;
62710d565efSmrg has_xsavec = 0;
62810d565efSmrg }
62910d565efSmrg
63010d565efSmrg /* Check if AVX512F registers are supported. */
63110d565efSmrg if ((eax & XCR_AVX512F_ENABLED_MASK) != XCR_AVX512F_ENABLED_MASK)
63210d565efSmrg {
63310d565efSmrg has_avx512f = 0;
63410d565efSmrg has_avx512er = 0;
63510d565efSmrg has_avx512pf = 0;
63610d565efSmrg has_avx512cd = 0;
63710d565efSmrg has_avx512dq = 0;
63810d565efSmrg has_avx512bw = 0;
63910d565efSmrg has_avx512vl = 0;
64010d565efSmrg }
64110d565efSmrg
64210d565efSmrg if (!arch)
64310d565efSmrg {
64410d565efSmrg if (vendor == signature_AMD_ebx
64510d565efSmrg || vendor == signature_CENTAUR_ebx
64610d565efSmrg || vendor == signature_CYRIX_ebx
64710d565efSmrg || vendor == signature_NSC_ebx)
64810d565efSmrg cache = detect_caches_amd (ext_level);
64910d565efSmrg else if (vendor == signature_INTEL_ebx)
65010d565efSmrg {
65110d565efSmrg bool xeon_mp = (family == 15 && model == 6);
65210d565efSmrg cache = detect_caches_intel (xeon_mp, max_level,
65310d565efSmrg ext_level, &l2sizekb);
65410d565efSmrg }
65510d565efSmrg }
65610d565efSmrg
65710d565efSmrg if (vendor == signature_AMD_ebx)
65810d565efSmrg {
65910d565efSmrg unsigned int name;
66010d565efSmrg
66110d565efSmrg /* Detect geode processor by its processor signature. */
66210d565efSmrg if (ext_level >= 0x80000002)
66310d565efSmrg __cpuid (0x80000002, name, ebx, ecx, edx);
66410d565efSmrg else
66510d565efSmrg name = 0;
66610d565efSmrg
66710d565efSmrg if (name == signature_NSC_ebx)
66810d565efSmrg processor = PROCESSOR_GEODE;
66910d565efSmrg else if (has_movbe && family == 22)
67010d565efSmrg processor = PROCESSOR_BTVER2;
671*ec02198aSmrg else if (has_vaes)
672*ec02198aSmrg processor = PROCESSOR_ZNVER3;
6730fc04c29Smrg else if (has_clwb)
6740fc04c29Smrg processor = PROCESSOR_ZNVER2;
67510d565efSmrg else if (has_clzero)
67610d565efSmrg processor = PROCESSOR_ZNVER1;
67710d565efSmrg else if (has_avx2)
67810d565efSmrg processor = PROCESSOR_BDVER4;
67910d565efSmrg else if (has_xsaveopt)
68010d565efSmrg processor = PROCESSOR_BDVER3;
68110d565efSmrg else if (has_bmi)
68210d565efSmrg processor = PROCESSOR_BDVER2;
68310d565efSmrg else if (has_xop)
68410d565efSmrg processor = PROCESSOR_BDVER1;
68510d565efSmrg else if (has_sse4a && has_ssse3)
68610d565efSmrg processor = PROCESSOR_BTVER1;
68710d565efSmrg else if (has_sse4a)
68810d565efSmrg processor = PROCESSOR_AMDFAM10;
68910d565efSmrg else if (has_sse2 || has_longmode)
69010d565efSmrg processor = PROCESSOR_K8;
69110d565efSmrg else if (has_3dnowp && family == 6)
69210d565efSmrg processor = PROCESSOR_ATHLON;
69310d565efSmrg else if (has_mmx)
69410d565efSmrg processor = PROCESSOR_K6;
69510d565efSmrg else
69610d565efSmrg processor = PROCESSOR_PENTIUM;
69710d565efSmrg }
69810d565efSmrg else if (vendor == signature_CENTAUR_ebx)
69910d565efSmrg {
70010d565efSmrg processor = PROCESSOR_GENERIC;
70110d565efSmrg
70210d565efSmrg switch (family)
70310d565efSmrg {
70410d565efSmrg default:
70510d565efSmrg /* We have no idea. */
70610d565efSmrg break;
70710d565efSmrg
70810d565efSmrg case 5:
70910d565efSmrg if (has_3dnow || has_mmx)
71010d565efSmrg processor = PROCESSOR_I486;
71110d565efSmrg break;
71210d565efSmrg
71310d565efSmrg case 6:
71410d565efSmrg if (has_longmode)
71510d565efSmrg processor = PROCESSOR_K8;
71610d565efSmrg else if (model >= 9)
71710d565efSmrg processor = PROCESSOR_PENTIUMPRO;
71810d565efSmrg else if (model >= 6)
71910d565efSmrg processor = PROCESSOR_I486;
72010d565efSmrg }
72110d565efSmrg }
72210d565efSmrg else
72310d565efSmrg {
72410d565efSmrg switch (family)
72510d565efSmrg {
72610d565efSmrg case 4:
72710d565efSmrg processor = PROCESSOR_I486;
72810d565efSmrg break;
72910d565efSmrg case 5:
73010d565efSmrg processor = PROCESSOR_PENTIUM;
73110d565efSmrg break;
73210d565efSmrg case 6:
73310d565efSmrg processor = PROCESSOR_PENTIUMPRO;
73410d565efSmrg break;
73510d565efSmrg case 15:
73610d565efSmrg processor = PROCESSOR_PENTIUM4;
73710d565efSmrg break;
73810d565efSmrg default:
73910d565efSmrg /* We have no idea. */
74010d565efSmrg processor = PROCESSOR_GENERIC;
74110d565efSmrg }
74210d565efSmrg }
74310d565efSmrg
74410d565efSmrg switch (processor)
74510d565efSmrg {
74610d565efSmrg case PROCESSOR_I386:
74710d565efSmrg /* Default. */
74810d565efSmrg break;
74910d565efSmrg case PROCESSOR_I486:
75010d565efSmrg if (arch && vendor == signature_CENTAUR_ebx)
75110d565efSmrg {
75210d565efSmrg if (model >= 6)
75310d565efSmrg cpu = "c3";
75410d565efSmrg else if (has_3dnow)
75510d565efSmrg cpu = "winchip2";
75610d565efSmrg else
75710d565efSmrg /* Assume WinChip C6. */
75810d565efSmrg cpu = "winchip-c6";
75910d565efSmrg }
76010d565efSmrg else
76110d565efSmrg cpu = "i486";
76210d565efSmrg break;
76310d565efSmrg case PROCESSOR_PENTIUM:
76410d565efSmrg if (arch && has_mmx)
76510d565efSmrg cpu = "pentium-mmx";
76610d565efSmrg else
76710d565efSmrg cpu = "pentium";
76810d565efSmrg break;
76910d565efSmrg case PROCESSOR_PENTIUMPRO:
77010d565efSmrg switch (model)
77110d565efSmrg {
77210d565efSmrg case 0x1c:
77310d565efSmrg case 0x26:
77410d565efSmrg /* Bonnell. */
77510d565efSmrg cpu = "bonnell";
77610d565efSmrg break;
77710d565efSmrg case 0x37:
77810d565efSmrg case 0x4a:
77910d565efSmrg case 0x4d:
78010d565efSmrg case 0x5d:
78110d565efSmrg /* Silvermont. */
782*ec02198aSmrg case 0x4c:
783*ec02198aSmrg case 0x5a:
784*ec02198aSmrg case 0x75:
785*ec02198aSmrg /* Airmont. */
78610d565efSmrg cpu = "silvermont";
78710d565efSmrg break;
7880fc04c29Smrg case 0x5c:
7890fc04c29Smrg case 0x5f:
7900fc04c29Smrg /* Goldmont. */
7910fc04c29Smrg cpu = "goldmont";
7920fc04c29Smrg break;
7930fc04c29Smrg case 0x7a:
7940fc04c29Smrg /* Goldmont Plus. */
7950fc04c29Smrg cpu = "goldmont-plus";
7960fc04c29Smrg break;
797*ec02198aSmrg case 0x86:
798*ec02198aSmrg case 0x96:
799*ec02198aSmrg case 0x9c:
800*ec02198aSmrg /* Tremont. */
801*ec02198aSmrg cpu = "tremont";
802*ec02198aSmrg break;
80310d565efSmrg case 0x0f:
80410d565efSmrg /* Merom. */
80510d565efSmrg case 0x17:
80610d565efSmrg case 0x1d:
80710d565efSmrg /* Penryn. */
80810d565efSmrg cpu = "core2";
80910d565efSmrg break;
81010d565efSmrg case 0x1a:
81110d565efSmrg case 0x1e:
81210d565efSmrg case 0x1f:
81310d565efSmrg case 0x2e:
81410d565efSmrg /* Nehalem. */
81510d565efSmrg cpu = "nehalem";
81610d565efSmrg break;
81710d565efSmrg case 0x25:
81810d565efSmrg case 0x2c:
81910d565efSmrg case 0x2f:
82010d565efSmrg /* Westmere. */
82110d565efSmrg cpu = "westmere";
82210d565efSmrg break;
82310d565efSmrg case 0x2a:
82410d565efSmrg case 0x2d:
82510d565efSmrg /* Sandy Bridge. */
82610d565efSmrg cpu = "sandybridge";
82710d565efSmrg break;
82810d565efSmrg case 0x3a:
82910d565efSmrg case 0x3e:
83010d565efSmrg /* Ivy Bridge. */
83110d565efSmrg cpu = "ivybridge";
83210d565efSmrg break;
83310d565efSmrg case 0x3c:
83410d565efSmrg case 0x3f:
83510d565efSmrg case 0x45:
83610d565efSmrg case 0x46:
83710d565efSmrg /* Haswell. */
83810d565efSmrg cpu = "haswell";
83910d565efSmrg break;
84010d565efSmrg case 0x3d:
84110d565efSmrg case 0x47:
84210d565efSmrg case 0x4f:
84310d565efSmrg case 0x56:
84410d565efSmrg /* Broadwell. */
84510d565efSmrg cpu = "broadwell";
84610d565efSmrg break;
84710d565efSmrg case 0x4e:
84810d565efSmrg case 0x5e:
84910d565efSmrg /* Skylake. */
850c7a68eb7Smrg case 0x8e:
851c7a68eb7Smrg case 0x9e:
852c7a68eb7Smrg /* Kaby Lake. */
853*ec02198aSmrg case 0xa5:
854*ec02198aSmrg case 0xa6:
855*ec02198aSmrg /* Comet Lake. */
85610d565efSmrg cpu = "skylake";
85710d565efSmrg break;
858c7a68eb7Smrg case 0x55:
8590fc04c29Smrg if (has_avx512vnni)
8600fc04c29Smrg /* Cascade Lake. */
8610fc04c29Smrg cpu = "cascadelake";
8620fc04c29Smrg else
863c7a68eb7Smrg /* Skylake with AVX-512. */
864c7a68eb7Smrg cpu = "skylake-avx512";
865c7a68eb7Smrg break;
866*ec02198aSmrg case 0x6a:
867*ec02198aSmrg case 0x6c:
868*ec02198aSmrg /* Ice Lake server. */
869*ec02198aSmrg cpu = "icelake-server";
870*ec02198aSmrg break;
871*ec02198aSmrg case 0x7e:
872*ec02198aSmrg case 0x7d:
873*ec02198aSmrg case 0x9d:
874*ec02198aSmrg /* Ice Lake client. */
875*ec02198aSmrg cpu = "icelake-client";
876*ec02198aSmrg break;
877*ec02198aSmrg case 0x8c:
878*ec02198aSmrg case 0x8d:
879*ec02198aSmrg /* Tiger Lake. */
880*ec02198aSmrg cpu = "tigerlake";
881*ec02198aSmrg break;
88210d565efSmrg case 0x57:
88310d565efSmrg /* Knights Landing. */
88410d565efSmrg cpu = "knl";
88510d565efSmrg break;
886c7a68eb7Smrg case 0x66:
887c7a68eb7Smrg /* Cannon Lake. */
888c7a68eb7Smrg cpu = "cannonlake";
889c7a68eb7Smrg break;
890c7a68eb7Smrg case 0x85:
891c7a68eb7Smrg /* Knights Mill. */
892c7a68eb7Smrg cpu = "knm";
893c7a68eb7Smrg break;
89410d565efSmrg default:
89510d565efSmrg if (arch)
89610d565efSmrg {
89710d565efSmrg /* This is unknown family 0x6 CPU. */
898*ec02198aSmrg if (has_avx)
899*ec02198aSmrg {
900*ec02198aSmrg /* Assume Tiger Lake */
901*ec02198aSmrg if (has_avx512vp2intersect)
902*ec02198aSmrg cpu = "tigerlake";
903*ec02198aSmrg /* Assume Cooper Lake */
904*ec02198aSmrg else if (has_avx512bf16)
905*ec02198aSmrg cpu = "cooperlake";
906c7a68eb7Smrg /* Assume Ice Lake Server. */
907*ec02198aSmrg else if (has_wbnoinvd)
908c7a68eb7Smrg cpu = "icelake-server";
909c7a68eb7Smrg /* Assume Ice Lake. */
910*ec02198aSmrg else if (has_avx512bitalg)
911c7a68eb7Smrg cpu = "icelake-client";
912c7a68eb7Smrg /* Assume Cannon Lake. */
913c7a68eb7Smrg else if (has_avx512vbmi)
914c7a68eb7Smrg cpu = "cannonlake";
915c7a68eb7Smrg /* Assume Knights Mill. */
916c7a68eb7Smrg else if (has_avx5124vnniw)
917c7a68eb7Smrg cpu = "knm";
91810d565efSmrg /* Assume Knights Landing. */
919c7a68eb7Smrg else if (has_avx512er)
92010d565efSmrg cpu = "knl";
921c7a68eb7Smrg /* Assume Skylake with AVX-512. */
922c7a68eb7Smrg else if (has_avx512f)
923c7a68eb7Smrg cpu = "skylake-avx512";
924c7a68eb7Smrg /* Assume Skylake. */
925c7a68eb7Smrg else if (has_clflushopt)
926c7a68eb7Smrg cpu = "skylake";
92710d565efSmrg /* Assume Broadwell. */
92810d565efSmrg else if (has_adx)
92910d565efSmrg cpu = "broadwell";
93010d565efSmrg else if (has_avx2)
93110d565efSmrg /* Assume Haswell. */
93210d565efSmrg cpu = "haswell";
933*ec02198aSmrg else
93410d565efSmrg /* Assume Sandy Bridge. */
93510d565efSmrg cpu = "sandybridge";
936*ec02198aSmrg }
93710d565efSmrg else if (has_sse4_2)
93810d565efSmrg {
9390fc04c29Smrg if (has_gfni)
9400fc04c29Smrg /* Assume Tremont. */
9410fc04c29Smrg cpu = "tremont";
9420fc04c29Smrg else if (has_sgx)
9430fc04c29Smrg /* Assume Goldmont Plus. */
9440fc04c29Smrg cpu = "goldmont-plus";
9450fc04c29Smrg else if (has_xsave)
9460fc04c29Smrg /* Assume Goldmont. */
9470fc04c29Smrg cpu = "goldmont";
9480fc04c29Smrg else if (has_movbe)
94910d565efSmrg /* Assume Silvermont. */
95010d565efSmrg cpu = "silvermont";
95110d565efSmrg else
95210d565efSmrg /* Assume Nehalem. */
95310d565efSmrg cpu = "nehalem";
95410d565efSmrg }
95510d565efSmrg else if (has_ssse3)
95610d565efSmrg {
95710d565efSmrg if (has_movbe)
95810d565efSmrg /* Assume Bonnell. */
95910d565efSmrg cpu = "bonnell";
96010d565efSmrg else
96110d565efSmrg /* Assume Core 2. */
96210d565efSmrg cpu = "core2";
96310d565efSmrg }
96410d565efSmrg else if (has_longmode)
96510d565efSmrg /* Perhaps some emulator? Assume x86-64, otherwise gcc
96610d565efSmrg -march=native would be unusable for 64-bit compilations,
96710d565efSmrg as all the CPUs below are 32-bit only. */
96810d565efSmrg cpu = "x86-64";
96910d565efSmrg else if (has_sse3)
97010d565efSmrg {
97110d565efSmrg if (vendor == signature_CENTAUR_ebx)
97210d565efSmrg /* C7 / Eden "Esther" */
97310d565efSmrg cpu = "c7";
97410d565efSmrg else
97510d565efSmrg /* It is Core Duo. */
97610d565efSmrg cpu = "pentium-m";
97710d565efSmrg }
97810d565efSmrg else if (has_sse2)
97910d565efSmrg /* It is Pentium M. */
98010d565efSmrg cpu = "pentium-m";
98110d565efSmrg else if (has_sse)
98210d565efSmrg {
98310d565efSmrg if (vendor == signature_CENTAUR_ebx)
98410d565efSmrg {
98510d565efSmrg if (model >= 9)
98610d565efSmrg /* Eden "Nehemiah" */
98710d565efSmrg cpu = "nehemiah";
98810d565efSmrg else
98910d565efSmrg cpu = "c3-2";
99010d565efSmrg }
99110d565efSmrg else
99210d565efSmrg /* It is Pentium III. */
99310d565efSmrg cpu = "pentium3";
99410d565efSmrg }
99510d565efSmrg else if (has_mmx)
99610d565efSmrg /* It is Pentium II. */
99710d565efSmrg cpu = "pentium2";
99810d565efSmrg else
99910d565efSmrg /* Default to Pentium Pro. */
100010d565efSmrg cpu = "pentiumpro";
100110d565efSmrg }
100210d565efSmrg else
100310d565efSmrg /* For -mtune, we default to -mtune=generic. */
100410d565efSmrg cpu = "generic";
100510d565efSmrg break;
100610d565efSmrg }
100710d565efSmrg break;
100810d565efSmrg case PROCESSOR_PENTIUM4:
100910d565efSmrg if (has_sse3)
101010d565efSmrg {
101110d565efSmrg if (has_longmode)
101210d565efSmrg cpu = "nocona";
101310d565efSmrg else
101410d565efSmrg cpu = "prescott";
101510d565efSmrg }
101610d565efSmrg else
101710d565efSmrg cpu = "pentium4";
101810d565efSmrg break;
101910d565efSmrg case PROCESSOR_GEODE:
102010d565efSmrg cpu = "geode";
102110d565efSmrg break;
102210d565efSmrg case PROCESSOR_K6:
102310d565efSmrg if (arch && has_3dnow)
102410d565efSmrg cpu = "k6-3";
102510d565efSmrg else
102610d565efSmrg cpu = "k6";
102710d565efSmrg break;
102810d565efSmrg case PROCESSOR_ATHLON:
102910d565efSmrg if (arch && has_sse)
103010d565efSmrg cpu = "athlon-4";
103110d565efSmrg else
103210d565efSmrg cpu = "athlon";
103310d565efSmrg break;
103410d565efSmrg case PROCESSOR_K8:
103510d565efSmrg if (arch)
103610d565efSmrg {
103710d565efSmrg if (vendor == signature_CENTAUR_ebx)
103810d565efSmrg {
103910d565efSmrg if (has_sse4_1)
104010d565efSmrg /* Nano 3000 | Nano dual / quad core | Eden X4 */
104110d565efSmrg cpu = "nano-3000";
104210d565efSmrg else if (has_ssse3)
104310d565efSmrg /* Nano 1000 | Nano 2000 */
104410d565efSmrg cpu = "nano";
104510d565efSmrg else if (has_sse3)
104610d565efSmrg /* Eden X2 */
104710d565efSmrg cpu = "eden-x2";
104810d565efSmrg else
104910d565efSmrg /* Default to k8 */
105010d565efSmrg cpu = "k8";
105110d565efSmrg }
105210d565efSmrg else if (has_sse3)
105310d565efSmrg cpu = "k8-sse3";
105410d565efSmrg else
105510d565efSmrg cpu = "k8";
105610d565efSmrg }
105710d565efSmrg else
105810d565efSmrg /* For -mtune, we default to -mtune=k8 */
105910d565efSmrg cpu = "k8";
106010d565efSmrg break;
106110d565efSmrg case PROCESSOR_AMDFAM10:
106210d565efSmrg cpu = "amdfam10";
106310d565efSmrg break;
106410d565efSmrg case PROCESSOR_BDVER1:
106510d565efSmrg cpu = "bdver1";
106610d565efSmrg break;
106710d565efSmrg case PROCESSOR_BDVER2:
106810d565efSmrg cpu = "bdver2";
106910d565efSmrg break;
107010d565efSmrg case PROCESSOR_BDVER3:
107110d565efSmrg cpu = "bdver3";
107210d565efSmrg break;
107310d565efSmrg case PROCESSOR_BDVER4:
107410d565efSmrg cpu = "bdver4";
107510d565efSmrg break;
107610d565efSmrg case PROCESSOR_ZNVER1:
107710d565efSmrg cpu = "znver1";
107810d565efSmrg break;
10790fc04c29Smrg case PROCESSOR_ZNVER2:
10800fc04c29Smrg cpu = "znver2";
10810fc04c29Smrg break;
1082*ec02198aSmrg case PROCESSOR_ZNVER3:
1083*ec02198aSmrg cpu = "znver3";
1084*ec02198aSmrg break;
108510d565efSmrg case PROCESSOR_BTVER1:
108610d565efSmrg cpu = "btver1";
108710d565efSmrg break;
108810d565efSmrg case PROCESSOR_BTVER2:
108910d565efSmrg cpu = "btver2";
109010d565efSmrg break;
109110d565efSmrg
109210d565efSmrg default:
109310d565efSmrg /* Use something reasonable. */
109410d565efSmrg if (arch)
109510d565efSmrg {
109610d565efSmrg if (has_ssse3)
109710d565efSmrg cpu = "core2";
109810d565efSmrg else if (has_sse3)
109910d565efSmrg {
110010d565efSmrg if (has_longmode)
110110d565efSmrg cpu = "nocona";
110210d565efSmrg else
110310d565efSmrg cpu = "prescott";
110410d565efSmrg }
110510d565efSmrg else if (has_longmode)
110610d565efSmrg /* Perhaps some emulator? Assume x86-64, otherwise gcc
110710d565efSmrg -march=native would be unusable for 64-bit compilations,
110810d565efSmrg as all the CPUs below are 32-bit only. */
110910d565efSmrg cpu = "x86-64";
111010d565efSmrg else if (has_sse2)
111110d565efSmrg cpu = "pentium4";
111210d565efSmrg else if (has_cmov)
111310d565efSmrg cpu = "pentiumpro";
111410d565efSmrg else if (has_mmx)
111510d565efSmrg cpu = "pentium-mmx";
111610d565efSmrg else if (has_cmpxchg8b)
111710d565efSmrg cpu = "pentium";
111810d565efSmrg }
111910d565efSmrg else
112010d565efSmrg cpu = "generic";
112110d565efSmrg }
112210d565efSmrg
112310d565efSmrg if (arch)
112410d565efSmrg {
112510d565efSmrg const char *mmx = has_mmx ? " -mmmx" : " -mno-mmx";
112610d565efSmrg const char *mmx3dnow = has_3dnow ? " -m3dnow" : " -mno-3dnow";
112710d565efSmrg const char *sse = has_sse ? " -msse" : " -mno-sse";
112810d565efSmrg const char *sse2 = has_sse2 ? " -msse2" : " -mno-sse2";
112910d565efSmrg const char *sse3 = has_sse3 ? " -msse3" : " -mno-sse3";
113010d565efSmrg const char *ssse3 = has_ssse3 ? " -mssse3" : " -mno-ssse3";
113110d565efSmrg const char *sse4a = has_sse4a ? " -msse4a" : " -mno-sse4a";
113210d565efSmrg const char *cx16 = has_cmpxchg16b ? " -mcx16" : " -mno-cx16";
113310d565efSmrg const char *sahf = has_lahf_lm ? " -msahf" : " -mno-sahf";
113410d565efSmrg const char *movbe = has_movbe ? " -mmovbe" : " -mno-movbe";
113510d565efSmrg const char *aes = has_aes ? " -maes" : " -mno-aes";
113610d565efSmrg const char *sha = has_sha ? " -msha" : " -mno-sha";
113710d565efSmrg const char *pclmul = has_pclmul ? " -mpclmul" : " -mno-pclmul";
113810d565efSmrg const char *popcnt = has_popcnt ? " -mpopcnt" : " -mno-popcnt";
113910d565efSmrg const char *abm = has_abm ? " -mabm" : " -mno-abm";
114010d565efSmrg const char *lwp = has_lwp ? " -mlwp" : " -mno-lwp";
114110d565efSmrg const char *fma = has_fma ? " -mfma" : " -mno-fma";
114210d565efSmrg const char *fma4 = has_fma4 ? " -mfma4" : " -mno-fma4";
114310d565efSmrg const char *xop = has_xop ? " -mxop" : " -mno-xop";
114410d565efSmrg const char *bmi = has_bmi ? " -mbmi" : " -mno-bmi";
1145c7a68eb7Smrg const char *pconfig = has_pconfig ? " -mpconfig" : " -mno-pconfig";
1146c7a68eb7Smrg const char *wbnoinvd = has_wbnoinvd ? " -mwbnoinvd" : " -mno-wbnoinvd";
114710d565efSmrg const char *sgx = has_sgx ? " -msgx" : " -mno-sgx";
114810d565efSmrg const char *bmi2 = has_bmi2 ? " -mbmi2" : " -mno-bmi2";
114910d565efSmrg const char *tbm = has_tbm ? " -mtbm" : " -mno-tbm";
115010d565efSmrg const char *avx = has_avx ? " -mavx" : " -mno-avx";
115110d565efSmrg const char *avx2 = has_avx2 ? " -mavx2" : " -mno-avx2";
115210d565efSmrg const char *sse4_2 = has_sse4_2 ? " -msse4.2" : " -mno-sse4.2";
115310d565efSmrg const char *sse4_1 = has_sse4_1 ? " -msse4.1" : " -mno-sse4.1";
115410d565efSmrg const char *lzcnt = has_lzcnt ? " -mlzcnt" : " -mno-lzcnt";
115510d565efSmrg const char *hle = has_hle ? " -mhle" : " -mno-hle";
115610d565efSmrg const char *rtm = has_rtm ? " -mrtm" : " -mno-rtm";
115710d565efSmrg const char *rdrnd = has_rdrnd ? " -mrdrnd" : " -mno-rdrnd";
115810d565efSmrg const char *f16c = has_f16c ? " -mf16c" : " -mno-f16c";
115910d565efSmrg const char *fsgsbase = has_fsgsbase ? " -mfsgsbase" : " -mno-fsgsbase";
116010d565efSmrg const char *rdseed = has_rdseed ? " -mrdseed" : " -mno-rdseed";
116110d565efSmrg const char *prfchw = has_prfchw ? " -mprfchw" : " -mno-prfchw";
116210d565efSmrg const char *adx = has_adx ? " -madx" : " -mno-adx";
116310d565efSmrg const char *fxsr = has_fxsr ? " -mfxsr" : " -mno-fxsr";
116410d565efSmrg const char *xsave = has_xsave ? " -mxsave" : " -mno-xsave";
116510d565efSmrg const char *xsaveopt = has_xsaveopt ? " -mxsaveopt" : " -mno-xsaveopt";
116610d565efSmrg const char *avx512f = has_avx512f ? " -mavx512f" : " -mno-avx512f";
116710d565efSmrg const char *avx512er = has_avx512er ? " -mavx512er" : " -mno-avx512er";
116810d565efSmrg const char *avx512cd = has_avx512cd ? " -mavx512cd" : " -mno-avx512cd";
116910d565efSmrg const char *avx512pf = has_avx512pf ? " -mavx512pf" : " -mno-avx512pf";
117010d565efSmrg const char *prefetchwt1 = has_prefetchwt1 ? " -mprefetchwt1" : " -mno-prefetchwt1";
117110d565efSmrg const char *clflushopt = has_clflushopt ? " -mclflushopt" : " -mno-clflushopt";
117210d565efSmrg const char *xsavec = has_xsavec ? " -mxsavec" : " -mno-xsavec";
117310d565efSmrg const char *xsaves = has_xsaves ? " -mxsaves" : " -mno-xsaves";
117410d565efSmrg const char *avx512dq = has_avx512dq ? " -mavx512dq" : " -mno-avx512dq";
117510d565efSmrg const char *avx512bw = has_avx512bw ? " -mavx512bw" : " -mno-avx512bw";
117610d565efSmrg const char *avx512vl = has_avx512vl ? " -mavx512vl" : " -mno-avx512vl";
117710d565efSmrg const char *avx512ifma = has_avx512ifma ? " -mavx512ifma" : " -mno-avx512ifma";
117810d565efSmrg const char *avx512vbmi = has_avx512vbmi ? " -mavx512vbmi" : " -mno-avx512vbmi";
117910d565efSmrg const char *avx5124vnniw = has_avx5124vnniw ? " -mavx5124vnniw" : " -mno-avx5124vnniw";
1180c7a68eb7Smrg const char *avx512vbmi2 = has_avx512vbmi2 ? " -mavx512vbmi2" : " -mno-avx512vbmi2";
1181c7a68eb7Smrg const char *avx512vnni = has_avx512vnni ? " -mavx512vnni" : " -mno-avx512vnni";
118210d565efSmrg const char *avx5124fmaps = has_avx5124fmaps ? " -mavx5124fmaps" : " -mno-avx5124fmaps";
118310d565efSmrg const char *clwb = has_clwb ? " -mclwb" : " -mno-clwb";
118410d565efSmrg const char *mwaitx = has_mwaitx ? " -mmwaitx" : " -mno-mwaitx";
118510d565efSmrg const char *clzero = has_clzero ? " -mclzero" : " -mno-clzero";
118610d565efSmrg const char *pku = has_pku ? " -mpku" : " -mno-pku";
118710d565efSmrg const char *rdpid = has_rdpid ? " -mrdpid" : " -mno-rdpid";
1188c7a68eb7Smrg const char *gfni = has_gfni ? " -mgfni" : " -mno-gfni";
1189c7a68eb7Smrg const char *shstk = has_shstk ? " -mshstk" : " -mno-shstk";
1190c7a68eb7Smrg const char *vaes = has_vaes ? " -mvaes" : " -mno-vaes";
1191c7a68eb7Smrg const char *vpclmulqdq = has_vpclmulqdq ? " -mvpclmulqdq" : " -mno-vpclmulqdq";
1192*ec02198aSmrg const char *avx512vp2intersect = has_avx512vp2intersect ? " -mavx512vp2intersect" : " -mno-avx512vp2intersect";
1193c7a68eb7Smrg const char *avx512bitalg = has_avx512bitalg ? " -mavx512bitalg" : " -mno-avx512bitalg";
1194*ec02198aSmrg const char *avx512vpopcntdq = has_avx512vpopcntdq ? " -mavx512vpopcntdq" : " -mno-avx512vpopcntdq";
1195c7a68eb7Smrg const char *movdiri = has_movdiri ? " -mmovdiri" : " -mno-movdiri";
1196c7a68eb7Smrg const char *movdir64b = has_movdir64b ? " -mmovdir64b" : " -mno-movdir64b";
1197*ec02198aSmrg const char *enqcmd = has_enqcmd ? " -menqcmd" : " -mno-enqcmd";
11980fc04c29Smrg const char *waitpkg = has_waitpkg ? " -mwaitpkg" : " -mno-waitpkg";
11990fc04c29Smrg const char *cldemote = has_cldemote ? " -mcldemote" : " -mno-cldemote";
12000fc04c29Smrg const char *ptwrite = has_ptwrite ? " -mptwrite" : " -mno-ptwrite";
1201*ec02198aSmrg const char *avx512bf16 = has_avx512bf16 ? " -mavx512bf16" : " -mno-avx512bf16";
12020fc04c29Smrg
120310d565efSmrg options = concat (options, mmx, mmx3dnow, sse, sse2, sse3, ssse3,
120410d565efSmrg sse4a, cx16, sahf, movbe, aes, sha, pclmul,
120510d565efSmrg popcnt, abm, lwp, fma, fma4, xop, bmi, sgx, bmi2,
1206c7a68eb7Smrg pconfig, wbnoinvd,
120710d565efSmrg tbm, avx, avx2, sse4_2, sse4_1, lzcnt, rtm,
120810d565efSmrg hle, rdrnd, f16c, fsgsbase, rdseed, prfchw, adx,
120910d565efSmrg fxsr, xsave, xsaveopt, avx512f, avx512er,
121010d565efSmrg avx512cd, avx512pf, prefetchwt1, clflushopt,
121110d565efSmrg xsavec, xsaves, avx512dq, avx512bw, avx512vl,
121210d565efSmrg avx512ifma, avx512vbmi, avx5124fmaps, avx5124vnniw,
1213c7a68eb7Smrg clwb, mwaitx, clzero, pku, rdpid, gfni, shstk,
1214c7a68eb7Smrg avx512vbmi2, avx512vnni, vaes, vpclmulqdq,
1215*ec02198aSmrg avx512bitalg, avx512vpopcntdq, movdiri, movdir64b,
1216*ec02198aSmrg waitpkg, cldemote, ptwrite, avx512bf16, enqcmd,
1217*ec02198aSmrg avx512vp2intersect, NULL);
121810d565efSmrg }
121910d565efSmrg
122010d565efSmrg done:
122110d565efSmrg return concat (cache, "-m", argv[0], "=", cpu, options, NULL);
122210d565efSmrg }
122310d565efSmrg #else
122410d565efSmrg
122510d565efSmrg /* If we are compiling with GCC where %EBX register is fixed, then the
122610d565efSmrg driver will just ignore -march and -mtune "native" target and will leave
122710d565efSmrg to the newly built compiler to generate code for its default target. */
122810d565efSmrg
host_detect_local_cpu(int,const char **)122910d565efSmrg const char *host_detect_local_cpu (int, const char **)
123010d565efSmrg {
123110d565efSmrg return NULL;
123210d565efSmrg }
123310d565efSmrg #endif /* __GNUC__ */
1234