1 /* Subroutines for the gcc driver. 2 Copyright (C) 2006-2018 Free Software Foundation, Inc. 3 4 This file is part of GCC. 5 6 GCC is free software; you can redistribute it and/or modify 7 it under the terms of the GNU General Public License as published by 8 the Free Software Foundation; either version 3, or (at your option) 9 any later version. 10 11 GCC is distributed in the hope that it will be useful, 12 but WITHOUT ANY WARRANTY; without even the implied warranty of 13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 14 GNU General Public License for more details. 15 16 You should have received a copy of the GNU General Public License 17 along with GCC; see the file COPYING3. If not see 18 <http://www.gnu.org/licenses/>. */ 19 20 #define IN_TARGET_CODE 1 21 22 #include "config.h" 23 #include "system.h" 24 #include "coretypes.h" 25 #include "tm.h" 26 27 const char *host_detect_local_cpu (int argc, const char **argv); 28 29 #if defined(__GNUC__) && (__GNUC__ >= 5 || !defined(__PIC__)) 30 #include "cpuid.h" 31 32 struct cache_desc 33 { 34 unsigned sizekb; 35 unsigned assoc; 36 unsigned line; 37 }; 38 39 /* Returns command line parameters that describe size and 40 cache line size of the processor caches. */ 41 42 static char * 43 describe_cache (struct cache_desc level1, struct cache_desc level2) 44 { 45 char size[100], line[100], size2[100]; 46 47 /* At the moment, gcc does not use the information 48 about the associativity of the cache. */ 49 50 snprintf (size, sizeof (size), 51 "--param l1-cache-size=%u ", level1.sizekb); 52 snprintf (line, sizeof (line), 53 "--param l1-cache-line-size=%u ", level1.line); 54 55 snprintf (size2, sizeof (size2), 56 "--param l2-cache-size=%u ", level2.sizekb); 57 58 return concat (size, line, size2, NULL); 59 } 60 61 /* Detect L2 cache parameters using CPUID extended function 0x80000006. */ 62 63 static void 64 detect_l2_cache (struct cache_desc *level2) 65 { 66 unsigned eax, ebx, ecx, edx; 67 unsigned assoc; 68 69 __cpuid (0x80000006, eax, ebx, ecx, edx); 70 71 level2->sizekb = (ecx >> 16) & 0xffff; 72 level2->line = ecx & 0xff; 73 74 assoc = (ecx >> 12) & 0xf; 75 if (assoc == 6) 76 assoc = 8; 77 else if (assoc == 8) 78 assoc = 16; 79 else if (assoc >= 0xa && assoc <= 0xc) 80 assoc = 32 + (assoc - 0xa) * 16; 81 else if (assoc >= 0xd && assoc <= 0xe) 82 assoc = 96 + (assoc - 0xd) * 32; 83 84 level2->assoc = assoc; 85 } 86 87 /* Returns the description of caches for an AMD processor. */ 88 89 static const char * 90 detect_caches_amd (unsigned max_ext_level) 91 { 92 unsigned eax, ebx, ecx, edx; 93 94 struct cache_desc level1, level2 = {0, 0, 0}; 95 96 if (max_ext_level < 0x80000005) 97 return ""; 98 99 __cpuid (0x80000005, eax, ebx, ecx, edx); 100 101 level1.sizekb = (ecx >> 24) & 0xff; 102 level1.assoc = (ecx >> 16) & 0xff; 103 level1.line = ecx & 0xff; 104 105 if (max_ext_level >= 0x80000006) 106 detect_l2_cache (&level2); 107 108 return describe_cache (level1, level2); 109 } 110 111 /* Decodes the size, the associativity and the cache line size of 112 L1/L2 caches of an Intel processor. Values are based on 113 "Intel Processor Identification and the CPUID Instruction" 114 [Application Note 485], revision -032, December 2007. */ 115 116 static void 117 decode_caches_intel (unsigned reg, bool xeon_mp, 118 struct cache_desc *level1, struct cache_desc *level2) 119 { 120 int i; 121 122 for (i = 24; i >= 0; i -= 8) 123 switch ((reg >> i) & 0xff) 124 { 125 case 0x0a: 126 level1->sizekb = 8; level1->assoc = 2; level1->line = 32; 127 break; 128 case 0x0c: 129 level1->sizekb = 16; level1->assoc = 4; level1->line = 32; 130 break; 131 case 0x0d: 132 level1->sizekb = 16; level1->assoc = 4; level1->line = 64; 133 break; 134 case 0x0e: 135 level1->sizekb = 24; level1->assoc = 6; level1->line = 64; 136 break; 137 case 0x21: 138 level2->sizekb = 256; level2->assoc = 8; level2->line = 64; 139 break; 140 case 0x24: 141 level2->sizekb = 1024; level2->assoc = 16; level2->line = 64; 142 break; 143 case 0x2c: 144 level1->sizekb = 32; level1->assoc = 8; level1->line = 64; 145 break; 146 case 0x39: 147 level2->sizekb = 128; level2->assoc = 4; level2->line = 64; 148 break; 149 case 0x3a: 150 level2->sizekb = 192; level2->assoc = 6; level2->line = 64; 151 break; 152 case 0x3b: 153 level2->sizekb = 128; level2->assoc = 2; level2->line = 64; 154 break; 155 case 0x3c: 156 level2->sizekb = 256; level2->assoc = 4; level2->line = 64; 157 break; 158 case 0x3d: 159 level2->sizekb = 384; level2->assoc = 6; level2->line = 64; 160 break; 161 case 0x3e: 162 level2->sizekb = 512; level2->assoc = 4; level2->line = 64; 163 break; 164 case 0x41: 165 level2->sizekb = 128; level2->assoc = 4; level2->line = 32; 166 break; 167 case 0x42: 168 level2->sizekb = 256; level2->assoc = 4; level2->line = 32; 169 break; 170 case 0x43: 171 level2->sizekb = 512; level2->assoc = 4; level2->line = 32; 172 break; 173 case 0x44: 174 level2->sizekb = 1024; level2->assoc = 4; level2->line = 32; 175 break; 176 case 0x45: 177 level2->sizekb = 2048; level2->assoc = 4; level2->line = 32; 178 break; 179 case 0x48: 180 level2->sizekb = 3072; level2->assoc = 12; level2->line = 64; 181 break; 182 case 0x49: 183 if (xeon_mp) 184 break; 185 level2->sizekb = 4096; level2->assoc = 16; level2->line = 64; 186 break; 187 case 0x4e: 188 level2->sizekb = 6144; level2->assoc = 24; level2->line = 64; 189 break; 190 case 0x60: 191 level1->sizekb = 16; level1->assoc = 8; level1->line = 64; 192 break; 193 case 0x66: 194 level1->sizekb = 8; level1->assoc = 4; level1->line = 64; 195 break; 196 case 0x67: 197 level1->sizekb = 16; level1->assoc = 4; level1->line = 64; 198 break; 199 case 0x68: 200 level1->sizekb = 32; level1->assoc = 4; level1->line = 64; 201 break; 202 case 0x78: 203 level2->sizekb = 1024; level2->assoc = 4; level2->line = 64; 204 break; 205 case 0x79: 206 level2->sizekb = 128; level2->assoc = 8; level2->line = 64; 207 break; 208 case 0x7a: 209 level2->sizekb = 256; level2->assoc = 8; level2->line = 64; 210 break; 211 case 0x7b: 212 level2->sizekb = 512; level2->assoc = 8; level2->line = 64; 213 break; 214 case 0x7c: 215 level2->sizekb = 1024; level2->assoc = 8; level2->line = 64; 216 break; 217 case 0x7d: 218 level2->sizekb = 2048; level2->assoc = 8; level2->line = 64; 219 break; 220 case 0x7f: 221 level2->sizekb = 512; level2->assoc = 2; level2->line = 64; 222 break; 223 case 0x80: 224 level2->sizekb = 512; level2->assoc = 8; level2->line = 64; 225 break; 226 case 0x82: 227 level2->sizekb = 256; level2->assoc = 8; level2->line = 32; 228 break; 229 case 0x83: 230 level2->sizekb = 512; level2->assoc = 8; level2->line = 32; 231 break; 232 case 0x84: 233 level2->sizekb = 1024; level2->assoc = 8; level2->line = 32; 234 break; 235 case 0x85: 236 level2->sizekb = 2048; level2->assoc = 8; level2->line = 32; 237 break; 238 case 0x86: 239 level2->sizekb = 512; level2->assoc = 4; level2->line = 64; 240 break; 241 case 0x87: 242 level2->sizekb = 1024; level2->assoc = 8; level2->line = 64; 243 244 default: 245 break; 246 } 247 } 248 249 /* Detect cache parameters using CPUID function 2. */ 250 251 static void 252 detect_caches_cpuid2 (bool xeon_mp, 253 struct cache_desc *level1, struct cache_desc *level2) 254 { 255 unsigned regs[4]; 256 int nreps, i; 257 258 __cpuid (2, regs[0], regs[1], regs[2], regs[3]); 259 260 nreps = regs[0] & 0x0f; 261 regs[0] &= ~0x0f; 262 263 while (--nreps >= 0) 264 { 265 for (i = 0; i < 4; i++) 266 if (regs[i] && !((regs[i] >> 31) & 1)) 267 decode_caches_intel (regs[i], xeon_mp, level1, level2); 268 269 if (nreps) 270 __cpuid (2, regs[0], regs[1], regs[2], regs[3]); 271 } 272 } 273 274 /* Detect cache parameters using CPUID function 4. This 275 method doesn't require hardcoded tables. */ 276 277 enum cache_type 278 { 279 CACHE_END = 0, 280 CACHE_DATA = 1, 281 CACHE_INST = 2, 282 CACHE_UNIFIED = 3 283 }; 284 285 static void 286 detect_caches_cpuid4 (struct cache_desc *level1, struct cache_desc *level2, 287 struct cache_desc *level3) 288 { 289 struct cache_desc *cache; 290 291 unsigned eax, ebx, ecx, edx; 292 int count; 293 294 for (count = 0;; count++) 295 { 296 __cpuid_count(4, count, eax, ebx, ecx, edx); 297 switch (eax & 0x1f) 298 { 299 case CACHE_END: 300 return; 301 case CACHE_DATA: 302 case CACHE_UNIFIED: 303 { 304 switch ((eax >> 5) & 0x07) 305 { 306 case 1: 307 cache = level1; 308 break; 309 case 2: 310 cache = level2; 311 break; 312 case 3: 313 cache = level3; 314 break; 315 default: 316 cache = NULL; 317 } 318 319 if (cache) 320 { 321 unsigned sets = ecx + 1; 322 unsigned part = ((ebx >> 12) & 0x03ff) + 1; 323 324 cache->assoc = ((ebx >> 22) & 0x03ff) + 1; 325 cache->line = (ebx & 0x0fff) + 1; 326 327 cache->sizekb = (cache->assoc * part 328 * cache->line * sets) / 1024; 329 } 330 } 331 default: 332 break; 333 } 334 } 335 } 336 337 /* Returns the description of caches for an Intel processor. */ 338 339 static const char * 340 detect_caches_intel (bool xeon_mp, unsigned max_level, 341 unsigned max_ext_level, unsigned *l2sizekb) 342 { 343 struct cache_desc level1 = {0, 0, 0}, level2 = {0, 0, 0}, level3 = {0, 0, 0}; 344 345 if (max_level >= 4) 346 detect_caches_cpuid4 (&level1, &level2, &level3); 347 else if (max_level >= 2) 348 detect_caches_cpuid2 (xeon_mp, &level1, &level2); 349 else 350 return ""; 351 352 if (level1.sizekb == 0) 353 return ""; 354 355 /* Let the L3 replace the L2. This assumes inclusive caches 356 and single threaded program for now. */ 357 if (level3.sizekb) 358 level2 = level3; 359 360 /* Intel CPUs are equipped with AMD style L2 cache info. Try this 361 method if other methods fail to provide L2 cache parameters. */ 362 if (level2.sizekb == 0 && max_ext_level >= 0x80000006) 363 detect_l2_cache (&level2); 364 365 *l2sizekb = level2.sizekb; 366 367 return describe_cache (level1, level2); 368 } 369 370 /* This will be called by the spec parser in gcc.c when it sees 371 a %:local_cpu_detect(args) construct. Currently it will be called 372 with either "arch" or "tune" as argument depending on if -march=native 373 or -mtune=native is to be substituted. 374 375 It returns a string containing new command line parameters to be 376 put at the place of the above two options, depending on what CPU 377 this is executed. E.g. "-march=k8" on an AMD64 machine 378 for -march=native. 379 380 ARGC and ARGV are set depending on the actual arguments given 381 in the spec. */ 382 383 const char *host_detect_local_cpu (int argc, const char **argv) 384 { 385 enum processor_type processor = PROCESSOR_I386; 386 const char *cpu = "i386"; 387 388 const char *cache = ""; 389 const char *options = ""; 390 391 unsigned int eax, ebx, ecx, edx; 392 393 unsigned int max_level, ext_level; 394 395 unsigned int vendor; 396 unsigned int model, family; 397 398 unsigned int has_sse3, has_ssse3, has_cmpxchg16b; 399 unsigned int has_cmpxchg8b, has_cmov, has_mmx, has_sse, has_sse2; 400 401 /* Extended features */ 402 unsigned int has_lahf_lm = 0, has_sse4a = 0; 403 unsigned int has_longmode = 0, has_3dnowp = 0, has_3dnow = 0; 404 unsigned int has_movbe = 0, has_sse4_1 = 0, has_sse4_2 = 0; 405 unsigned int has_popcnt = 0, has_aes = 0, has_avx = 0, has_avx2 = 0; 406 unsigned int has_pclmul = 0, has_abm = 0, has_lwp = 0; 407 unsigned int has_fma = 0, has_fma4 = 0, has_xop = 0; 408 unsigned int has_bmi = 0, has_bmi2 = 0, has_tbm = 0, has_lzcnt = 0; 409 unsigned int has_hle = 0, has_rtm = 0, has_sgx = 0; 410 unsigned int has_pconfig = 0, has_wbnoinvd = 0; 411 unsigned int has_rdrnd = 0, has_f16c = 0, has_fsgsbase = 0; 412 unsigned int has_rdseed = 0, has_prfchw = 0, has_adx = 0; 413 unsigned int has_osxsave = 0, has_fxsr = 0, has_xsave = 0, has_xsaveopt = 0; 414 unsigned int has_avx512er = 0, has_avx512pf = 0, has_avx512cd = 0; 415 unsigned int has_avx512f = 0, has_sha = 0, has_prefetchwt1 = 0; 416 unsigned int has_clflushopt = 0, has_xsavec = 0, has_xsaves = 0; 417 unsigned int has_avx512dq = 0, has_avx512bw = 0, has_avx512vl = 0; 418 unsigned int has_avx512vbmi = 0, has_avx512ifma = 0, has_clwb = 0; 419 unsigned int has_mwaitx = 0, has_clzero = 0, has_pku = 0, has_rdpid = 0; 420 unsigned int has_avx5124fmaps = 0, has_avx5124vnniw = 0; 421 unsigned int has_gfni = 0, has_avx512vbmi2 = 0; 422 unsigned int has_avx512bitalg = 0; 423 unsigned int has_shstk = 0; 424 unsigned int has_avx512vnni = 0, has_vaes = 0; 425 unsigned int has_vpclmulqdq = 0; 426 unsigned int has_movdiri = 0, has_movdir64b = 0; 427 428 bool arch; 429 430 unsigned int l2sizekb = 0; 431 432 if (argc < 1) 433 return NULL; 434 435 arch = !strcmp (argv[0], "arch"); 436 437 if (!arch && strcmp (argv[0], "tune")) 438 return NULL; 439 440 max_level = __get_cpuid_max (0, &vendor); 441 if (max_level < 1) 442 goto done; 443 444 __cpuid (1, eax, ebx, ecx, edx); 445 446 model = (eax >> 4) & 0x0f; 447 family = (eax >> 8) & 0x0f; 448 if (vendor == signature_INTEL_ebx 449 || vendor == signature_AMD_ebx) 450 { 451 unsigned int extended_model, extended_family; 452 453 extended_model = (eax >> 12) & 0xf0; 454 extended_family = (eax >> 20) & 0xff; 455 if (family == 0x0f) 456 { 457 family += extended_family; 458 model += extended_model; 459 } 460 else if (family == 0x06) 461 model += extended_model; 462 } 463 464 has_sse3 = ecx & bit_SSE3; 465 has_ssse3 = ecx & bit_SSSE3; 466 has_sse4_1 = ecx & bit_SSE4_1; 467 has_sse4_2 = ecx & bit_SSE4_2; 468 has_avx = ecx & bit_AVX; 469 has_osxsave = ecx & bit_OSXSAVE; 470 has_cmpxchg16b = ecx & bit_CMPXCHG16B; 471 has_movbe = ecx & bit_MOVBE; 472 has_popcnt = ecx & bit_POPCNT; 473 has_aes = ecx & bit_AES; 474 has_pclmul = ecx & bit_PCLMUL; 475 has_fma = ecx & bit_FMA; 476 has_f16c = ecx & bit_F16C; 477 has_rdrnd = ecx & bit_RDRND; 478 has_xsave = ecx & bit_XSAVE; 479 480 has_cmpxchg8b = edx & bit_CMPXCHG8B; 481 has_cmov = edx & bit_CMOV; 482 has_mmx = edx & bit_MMX; 483 has_fxsr = edx & bit_FXSAVE; 484 has_sse = edx & bit_SSE; 485 has_sse2 = edx & bit_SSE2; 486 487 if (max_level >= 7) 488 { 489 __cpuid_count (7, 0, eax, ebx, ecx, edx); 490 491 has_bmi = ebx & bit_BMI; 492 has_sgx = ebx & bit_SGX; 493 has_hle = ebx & bit_HLE; 494 has_rtm = ebx & bit_RTM; 495 has_avx2 = ebx & bit_AVX2; 496 has_bmi2 = ebx & bit_BMI2; 497 has_fsgsbase = ebx & bit_FSGSBASE; 498 has_rdseed = ebx & bit_RDSEED; 499 has_adx = ebx & bit_ADX; 500 has_avx512f = ebx & bit_AVX512F; 501 has_avx512er = ebx & bit_AVX512ER; 502 has_avx512pf = ebx & bit_AVX512PF; 503 has_avx512cd = ebx & bit_AVX512CD; 504 has_sha = ebx & bit_SHA; 505 has_clflushopt = ebx & bit_CLFLUSHOPT; 506 has_clwb = ebx & bit_CLWB; 507 has_avx512dq = ebx & bit_AVX512DQ; 508 has_avx512bw = ebx & bit_AVX512BW; 509 has_avx512vl = ebx & bit_AVX512VL; 510 has_avx512ifma = ebx & bit_AVX512IFMA; 511 512 has_prefetchwt1 = ecx & bit_PREFETCHWT1; 513 has_avx512vbmi = ecx & bit_AVX512VBMI; 514 has_pku = ecx & bit_OSPKE; 515 has_avx512vbmi2 = ecx & bit_AVX512VBMI2; 516 has_avx512vnni = ecx & bit_AVX512VNNI; 517 has_rdpid = ecx & bit_RDPID; 518 has_gfni = ecx & bit_GFNI; 519 has_vaes = ecx & bit_VAES; 520 has_vpclmulqdq = ecx & bit_VPCLMULQDQ; 521 has_avx512bitalg = ecx & bit_AVX512BITALG; 522 has_movdiri = ecx & bit_MOVDIRI; 523 has_movdir64b = ecx & bit_MOVDIR64B; 524 525 has_avx5124vnniw = edx & bit_AVX5124VNNIW; 526 has_avx5124fmaps = edx & bit_AVX5124FMAPS; 527 528 has_shstk = ecx & bit_SHSTK; 529 has_pconfig = edx & bit_PCONFIG; 530 } 531 532 if (max_level >= 13) 533 { 534 __cpuid_count (13, 1, eax, ebx, ecx, edx); 535 536 has_xsaveopt = eax & bit_XSAVEOPT; 537 has_xsavec = eax & bit_XSAVEC; 538 has_xsaves = eax & bit_XSAVES; 539 } 540 541 /* Check cpuid level of extended features. */ 542 __cpuid (0x80000000, ext_level, ebx, ecx, edx); 543 544 if (ext_level >= 0x80000001) 545 { 546 __cpuid (0x80000001, eax, ebx, ecx, edx); 547 548 has_lahf_lm = ecx & bit_LAHF_LM; 549 has_sse4a = ecx & bit_SSE4a; 550 has_abm = ecx & bit_ABM; 551 has_lwp = ecx & bit_LWP; 552 has_fma4 = ecx & bit_FMA4; 553 has_xop = ecx & bit_XOP; 554 has_tbm = ecx & bit_TBM; 555 has_lzcnt = ecx & bit_LZCNT; 556 has_prfchw = ecx & bit_PRFCHW; 557 558 has_longmode = edx & bit_LM; 559 has_3dnowp = edx & bit_3DNOWP; 560 has_3dnow = edx & bit_3DNOW; 561 has_mwaitx = ecx & bit_MWAITX; 562 } 563 564 if (ext_level >= 0x80000008) 565 { 566 __cpuid (0x80000008, eax, ebx, ecx, edx); 567 has_clzero = ebx & bit_CLZERO; 568 has_wbnoinvd = ebx & bit_WBNOINVD; 569 } 570 571 /* Get XCR_XFEATURE_ENABLED_MASK register with xgetbv. */ 572 #define XCR_XFEATURE_ENABLED_MASK 0x0 573 #define XSTATE_FP 0x1 574 #define XSTATE_SSE 0x2 575 #define XSTATE_YMM 0x4 576 #define XSTATE_OPMASK 0x20 577 #define XSTATE_ZMM 0x40 578 #define XSTATE_HI_ZMM 0x80 579 580 #define XCR_AVX_ENABLED_MASK \ 581 (XSTATE_SSE | XSTATE_YMM) 582 #define XCR_AVX512F_ENABLED_MASK \ 583 (XSTATE_SSE | XSTATE_YMM | XSTATE_OPMASK | XSTATE_ZMM | XSTATE_HI_ZMM) 584 585 if (has_osxsave) 586 asm (".byte 0x0f; .byte 0x01; .byte 0xd0" 587 : "=a" (eax), "=d" (edx) 588 : "c" (XCR_XFEATURE_ENABLED_MASK)); 589 else 590 eax = 0; 591 592 /* Check if AVX registers are supported. */ 593 if ((eax & XCR_AVX_ENABLED_MASK) != XCR_AVX_ENABLED_MASK) 594 { 595 has_avx = 0; 596 has_avx2 = 0; 597 has_fma = 0; 598 has_fma4 = 0; 599 has_f16c = 0; 600 has_xop = 0; 601 has_xsave = 0; 602 has_xsaveopt = 0; 603 has_xsaves = 0; 604 has_xsavec = 0; 605 } 606 607 /* Check if AVX512F registers are supported. */ 608 if ((eax & XCR_AVX512F_ENABLED_MASK) != XCR_AVX512F_ENABLED_MASK) 609 { 610 has_avx512f = 0; 611 has_avx512er = 0; 612 has_avx512pf = 0; 613 has_avx512cd = 0; 614 has_avx512dq = 0; 615 has_avx512bw = 0; 616 has_avx512vl = 0; 617 } 618 619 if (!arch) 620 { 621 if (vendor == signature_AMD_ebx 622 || vendor == signature_CENTAUR_ebx 623 || vendor == signature_CYRIX_ebx 624 || vendor == signature_NSC_ebx) 625 cache = detect_caches_amd (ext_level); 626 else if (vendor == signature_INTEL_ebx) 627 { 628 bool xeon_mp = (family == 15 && model == 6); 629 cache = detect_caches_intel (xeon_mp, max_level, 630 ext_level, &l2sizekb); 631 } 632 } 633 634 if (vendor == signature_AMD_ebx) 635 { 636 unsigned int name; 637 638 /* Detect geode processor by its processor signature. */ 639 if (ext_level >= 0x80000002) 640 __cpuid (0x80000002, name, ebx, ecx, edx); 641 else 642 name = 0; 643 644 if (name == signature_NSC_ebx) 645 processor = PROCESSOR_GEODE; 646 else if (has_movbe && family == 22) 647 processor = PROCESSOR_BTVER2; 648 else if (has_clzero) 649 processor = PROCESSOR_ZNVER1; 650 else if (has_avx2) 651 processor = PROCESSOR_BDVER4; 652 else if (has_xsaveopt) 653 processor = PROCESSOR_BDVER3; 654 else if (has_bmi) 655 processor = PROCESSOR_BDVER2; 656 else if (has_xop) 657 processor = PROCESSOR_BDVER1; 658 else if (has_sse4a && has_ssse3) 659 processor = PROCESSOR_BTVER1; 660 else if (has_sse4a) 661 processor = PROCESSOR_AMDFAM10; 662 else if (has_sse2 || has_longmode) 663 processor = PROCESSOR_K8; 664 else if (has_3dnowp && family == 6) 665 processor = PROCESSOR_ATHLON; 666 else if (has_mmx) 667 processor = PROCESSOR_K6; 668 else 669 processor = PROCESSOR_PENTIUM; 670 } 671 else if (vendor == signature_CENTAUR_ebx) 672 { 673 processor = PROCESSOR_GENERIC; 674 675 switch (family) 676 { 677 default: 678 /* We have no idea. */ 679 break; 680 681 case 5: 682 if (has_3dnow || has_mmx) 683 processor = PROCESSOR_I486; 684 break; 685 686 case 6: 687 if (has_longmode) 688 processor = PROCESSOR_K8; 689 else if (model >= 9) 690 processor = PROCESSOR_PENTIUMPRO; 691 else if (model >= 6) 692 processor = PROCESSOR_I486; 693 } 694 } 695 else 696 { 697 switch (family) 698 { 699 case 4: 700 processor = PROCESSOR_I486; 701 break; 702 case 5: 703 processor = PROCESSOR_PENTIUM; 704 break; 705 case 6: 706 processor = PROCESSOR_PENTIUMPRO; 707 break; 708 case 15: 709 processor = PROCESSOR_PENTIUM4; 710 break; 711 default: 712 /* We have no idea. */ 713 processor = PROCESSOR_GENERIC; 714 } 715 } 716 717 switch (processor) 718 { 719 case PROCESSOR_I386: 720 /* Default. */ 721 break; 722 case PROCESSOR_I486: 723 if (arch && vendor == signature_CENTAUR_ebx) 724 { 725 if (model >= 6) 726 cpu = "c3"; 727 else if (has_3dnow) 728 cpu = "winchip2"; 729 else 730 /* Assume WinChip C6. */ 731 cpu = "winchip-c6"; 732 } 733 else 734 cpu = "i486"; 735 break; 736 case PROCESSOR_PENTIUM: 737 if (arch && has_mmx) 738 cpu = "pentium-mmx"; 739 else 740 cpu = "pentium"; 741 break; 742 case PROCESSOR_PENTIUMPRO: 743 switch (model) 744 { 745 case 0x1c: 746 case 0x26: 747 /* Bonnell. */ 748 cpu = "bonnell"; 749 break; 750 case 0x37: 751 case 0x4a: 752 case 0x4d: 753 case 0x5a: 754 case 0x5d: 755 /* Silvermont. */ 756 cpu = "silvermont"; 757 break; 758 case 0x0f: 759 /* Merom. */ 760 case 0x17: 761 case 0x1d: 762 /* Penryn. */ 763 cpu = "core2"; 764 break; 765 case 0x1a: 766 case 0x1e: 767 case 0x1f: 768 case 0x2e: 769 /* Nehalem. */ 770 cpu = "nehalem"; 771 break; 772 case 0x25: 773 case 0x2c: 774 case 0x2f: 775 /* Westmere. */ 776 cpu = "westmere"; 777 break; 778 case 0x2a: 779 case 0x2d: 780 /* Sandy Bridge. */ 781 cpu = "sandybridge"; 782 break; 783 case 0x3a: 784 case 0x3e: 785 /* Ivy Bridge. */ 786 cpu = "ivybridge"; 787 break; 788 case 0x3c: 789 case 0x3f: 790 case 0x45: 791 case 0x46: 792 /* Haswell. */ 793 cpu = "haswell"; 794 break; 795 case 0x3d: 796 case 0x47: 797 case 0x4f: 798 case 0x56: 799 /* Broadwell. */ 800 cpu = "broadwell"; 801 break; 802 case 0x4e: 803 case 0x5e: 804 /* Skylake. */ 805 case 0x8e: 806 case 0x9e: 807 /* Kaby Lake. */ 808 cpu = "skylake"; 809 break; 810 case 0x55: 811 /* Skylake with AVX-512. */ 812 cpu = "skylake-avx512"; 813 break; 814 case 0x57: 815 /* Knights Landing. */ 816 cpu = "knl"; 817 break; 818 case 0x66: 819 /* Cannon Lake. */ 820 cpu = "cannonlake"; 821 break; 822 case 0x85: 823 /* Knights Mill. */ 824 cpu = "knm"; 825 break; 826 default: 827 if (arch) 828 { 829 /* This is unknown family 0x6 CPU. */ 830 /* Assume Ice Lake Server. */ 831 if (has_wbnoinvd) 832 cpu = "icelake-server"; 833 /* Assume Ice Lake. */ 834 else if (has_gfni) 835 cpu = "icelake-client"; 836 /* Assume Cannon Lake. */ 837 else if (has_avx512vbmi) 838 cpu = "cannonlake"; 839 /* Assume Knights Mill. */ 840 else if (has_avx5124vnniw) 841 cpu = "knm"; 842 /* Assume Knights Landing. */ 843 else if (has_avx512er) 844 cpu = "knl"; 845 /* Assume Skylake with AVX-512. */ 846 else if (has_avx512f) 847 cpu = "skylake-avx512"; 848 /* Assume Skylake. */ 849 else if (has_clflushopt) 850 cpu = "skylake"; 851 /* Assume Broadwell. */ 852 else if (has_adx) 853 cpu = "broadwell"; 854 else if (has_avx2) 855 /* Assume Haswell. */ 856 cpu = "haswell"; 857 else if (has_avx) 858 /* Assume Sandy Bridge. */ 859 cpu = "sandybridge"; 860 else if (has_sse4_2) 861 { 862 if (has_movbe) 863 /* Assume Silvermont. */ 864 cpu = "silvermont"; 865 else 866 /* Assume Nehalem. */ 867 cpu = "nehalem"; 868 } 869 else if (has_ssse3) 870 { 871 if (has_movbe) 872 /* Assume Bonnell. */ 873 cpu = "bonnell"; 874 else 875 /* Assume Core 2. */ 876 cpu = "core2"; 877 } 878 else if (has_longmode) 879 /* Perhaps some emulator? Assume x86-64, otherwise gcc 880 -march=native would be unusable for 64-bit compilations, 881 as all the CPUs below are 32-bit only. */ 882 cpu = "x86-64"; 883 else if (has_sse3) 884 { 885 if (vendor == signature_CENTAUR_ebx) 886 /* C7 / Eden "Esther" */ 887 cpu = "c7"; 888 else 889 /* It is Core Duo. */ 890 cpu = "pentium-m"; 891 } 892 else if (has_sse2) 893 /* It is Pentium M. */ 894 cpu = "pentium-m"; 895 else if (has_sse) 896 { 897 if (vendor == signature_CENTAUR_ebx) 898 { 899 if (model >= 9) 900 /* Eden "Nehemiah" */ 901 cpu = "nehemiah"; 902 else 903 cpu = "c3-2"; 904 } 905 else 906 /* It is Pentium III. */ 907 cpu = "pentium3"; 908 } 909 else if (has_mmx) 910 /* It is Pentium II. */ 911 cpu = "pentium2"; 912 else 913 /* Default to Pentium Pro. */ 914 cpu = "pentiumpro"; 915 } 916 else 917 /* For -mtune, we default to -mtune=generic. */ 918 cpu = "generic"; 919 break; 920 } 921 break; 922 case PROCESSOR_PENTIUM4: 923 if (has_sse3) 924 { 925 if (has_longmode) 926 cpu = "nocona"; 927 else 928 cpu = "prescott"; 929 } 930 else 931 cpu = "pentium4"; 932 break; 933 case PROCESSOR_GEODE: 934 cpu = "geode"; 935 break; 936 case PROCESSOR_K6: 937 if (arch && has_3dnow) 938 cpu = "k6-3"; 939 else 940 cpu = "k6"; 941 break; 942 case PROCESSOR_ATHLON: 943 if (arch && has_sse) 944 cpu = "athlon-4"; 945 else 946 cpu = "athlon"; 947 break; 948 case PROCESSOR_K8: 949 if (arch) 950 { 951 if (vendor == signature_CENTAUR_ebx) 952 { 953 if (has_sse4_1) 954 /* Nano 3000 | Nano dual / quad core | Eden X4 */ 955 cpu = "nano-3000"; 956 else if (has_ssse3) 957 /* Nano 1000 | Nano 2000 */ 958 cpu = "nano"; 959 else if (has_sse3) 960 /* Eden X2 */ 961 cpu = "eden-x2"; 962 else 963 /* Default to k8 */ 964 cpu = "k8"; 965 } 966 else if (has_sse3) 967 cpu = "k8-sse3"; 968 else 969 cpu = "k8"; 970 } 971 else 972 /* For -mtune, we default to -mtune=k8 */ 973 cpu = "k8"; 974 break; 975 case PROCESSOR_AMDFAM10: 976 cpu = "amdfam10"; 977 break; 978 case PROCESSOR_BDVER1: 979 cpu = "bdver1"; 980 break; 981 case PROCESSOR_BDVER2: 982 cpu = "bdver2"; 983 break; 984 case PROCESSOR_BDVER3: 985 cpu = "bdver3"; 986 break; 987 case PROCESSOR_BDVER4: 988 cpu = "bdver4"; 989 break; 990 case PROCESSOR_ZNVER1: 991 cpu = "znver1"; 992 break; 993 case PROCESSOR_BTVER1: 994 cpu = "btver1"; 995 break; 996 case PROCESSOR_BTVER2: 997 cpu = "btver2"; 998 break; 999 1000 default: 1001 /* Use something reasonable. */ 1002 if (arch) 1003 { 1004 if (has_ssse3) 1005 cpu = "core2"; 1006 else if (has_sse3) 1007 { 1008 if (has_longmode) 1009 cpu = "nocona"; 1010 else 1011 cpu = "prescott"; 1012 } 1013 else if (has_longmode) 1014 /* Perhaps some emulator? Assume x86-64, otherwise gcc 1015 -march=native would be unusable for 64-bit compilations, 1016 as all the CPUs below are 32-bit only. */ 1017 cpu = "x86-64"; 1018 else if (has_sse2) 1019 cpu = "pentium4"; 1020 else if (has_cmov) 1021 cpu = "pentiumpro"; 1022 else if (has_mmx) 1023 cpu = "pentium-mmx"; 1024 else if (has_cmpxchg8b) 1025 cpu = "pentium"; 1026 } 1027 else 1028 cpu = "generic"; 1029 } 1030 1031 if (arch) 1032 { 1033 const char *mmx = has_mmx ? " -mmmx" : " -mno-mmx"; 1034 const char *mmx3dnow = has_3dnow ? " -m3dnow" : " -mno-3dnow"; 1035 const char *sse = has_sse ? " -msse" : " -mno-sse"; 1036 const char *sse2 = has_sse2 ? " -msse2" : " -mno-sse2"; 1037 const char *sse3 = has_sse3 ? " -msse3" : " -mno-sse3"; 1038 const char *ssse3 = has_ssse3 ? " -mssse3" : " -mno-ssse3"; 1039 const char *sse4a = has_sse4a ? " -msse4a" : " -mno-sse4a"; 1040 const char *cx16 = has_cmpxchg16b ? " -mcx16" : " -mno-cx16"; 1041 const char *sahf = has_lahf_lm ? " -msahf" : " -mno-sahf"; 1042 const char *movbe = has_movbe ? " -mmovbe" : " -mno-movbe"; 1043 const char *aes = has_aes ? " -maes" : " -mno-aes"; 1044 const char *sha = has_sha ? " -msha" : " -mno-sha"; 1045 const char *pclmul = has_pclmul ? " -mpclmul" : " -mno-pclmul"; 1046 const char *popcnt = has_popcnt ? " -mpopcnt" : " -mno-popcnt"; 1047 const char *abm = has_abm ? " -mabm" : " -mno-abm"; 1048 const char *lwp = has_lwp ? " -mlwp" : " -mno-lwp"; 1049 const char *fma = has_fma ? " -mfma" : " -mno-fma"; 1050 const char *fma4 = has_fma4 ? " -mfma4" : " -mno-fma4"; 1051 const char *xop = has_xop ? " -mxop" : " -mno-xop"; 1052 const char *bmi = has_bmi ? " -mbmi" : " -mno-bmi"; 1053 const char *pconfig = has_pconfig ? " -mpconfig" : " -mno-pconfig"; 1054 const char *wbnoinvd = has_wbnoinvd ? " -mwbnoinvd" : " -mno-wbnoinvd"; 1055 const char *sgx = has_sgx ? " -msgx" : " -mno-sgx"; 1056 const char *bmi2 = has_bmi2 ? " -mbmi2" : " -mno-bmi2"; 1057 const char *tbm = has_tbm ? " -mtbm" : " -mno-tbm"; 1058 const char *avx = has_avx ? " -mavx" : " -mno-avx"; 1059 const char *avx2 = has_avx2 ? " -mavx2" : " -mno-avx2"; 1060 const char *sse4_2 = has_sse4_2 ? " -msse4.2" : " -mno-sse4.2"; 1061 const char *sse4_1 = has_sse4_1 ? " -msse4.1" : " -mno-sse4.1"; 1062 const char *lzcnt = has_lzcnt ? " -mlzcnt" : " -mno-lzcnt"; 1063 const char *hle = has_hle ? " -mhle" : " -mno-hle"; 1064 const char *rtm = has_rtm ? " -mrtm" : " -mno-rtm"; 1065 const char *rdrnd = has_rdrnd ? " -mrdrnd" : " -mno-rdrnd"; 1066 const char *f16c = has_f16c ? " -mf16c" : " -mno-f16c"; 1067 const char *fsgsbase = has_fsgsbase ? " -mfsgsbase" : " -mno-fsgsbase"; 1068 const char *rdseed = has_rdseed ? " -mrdseed" : " -mno-rdseed"; 1069 const char *prfchw = has_prfchw ? " -mprfchw" : " -mno-prfchw"; 1070 const char *adx = has_adx ? " -madx" : " -mno-adx"; 1071 const char *fxsr = has_fxsr ? " -mfxsr" : " -mno-fxsr"; 1072 const char *xsave = has_xsave ? " -mxsave" : " -mno-xsave"; 1073 const char *xsaveopt = has_xsaveopt ? " -mxsaveopt" : " -mno-xsaveopt"; 1074 const char *avx512f = has_avx512f ? " -mavx512f" : " -mno-avx512f"; 1075 const char *avx512er = has_avx512er ? " -mavx512er" : " -mno-avx512er"; 1076 const char *avx512cd = has_avx512cd ? " -mavx512cd" : " -mno-avx512cd"; 1077 const char *avx512pf = has_avx512pf ? " -mavx512pf" : " -mno-avx512pf"; 1078 const char *prefetchwt1 = has_prefetchwt1 ? " -mprefetchwt1" : " -mno-prefetchwt1"; 1079 const char *clflushopt = has_clflushopt ? " -mclflushopt" : " -mno-clflushopt"; 1080 const char *xsavec = has_xsavec ? " -mxsavec" : " -mno-xsavec"; 1081 const char *xsaves = has_xsaves ? " -mxsaves" : " -mno-xsaves"; 1082 const char *avx512dq = has_avx512dq ? " -mavx512dq" : " -mno-avx512dq"; 1083 const char *avx512bw = has_avx512bw ? " -mavx512bw" : " -mno-avx512bw"; 1084 const char *avx512vl = has_avx512vl ? " -mavx512vl" : " -mno-avx512vl"; 1085 const char *avx512ifma = has_avx512ifma ? " -mavx512ifma" : " -mno-avx512ifma"; 1086 const char *avx512vbmi = has_avx512vbmi ? " -mavx512vbmi" : " -mno-avx512vbmi"; 1087 const char *avx5124vnniw = has_avx5124vnniw ? " -mavx5124vnniw" : " -mno-avx5124vnniw"; 1088 const char *avx512vbmi2 = has_avx512vbmi2 ? " -mavx512vbmi2" : " -mno-avx512vbmi2"; 1089 const char *avx512vnni = has_avx512vnni ? " -mavx512vnni" : " -mno-avx512vnni"; 1090 const char *avx5124fmaps = has_avx5124fmaps ? " -mavx5124fmaps" : " -mno-avx5124fmaps"; 1091 const char *clwb = has_clwb ? " -mclwb" : " -mno-clwb"; 1092 const char *mwaitx = has_mwaitx ? " -mmwaitx" : " -mno-mwaitx"; 1093 const char *clzero = has_clzero ? " -mclzero" : " -mno-clzero"; 1094 const char *pku = has_pku ? " -mpku" : " -mno-pku"; 1095 const char *rdpid = has_rdpid ? " -mrdpid" : " -mno-rdpid"; 1096 const char *gfni = has_gfni ? " -mgfni" : " -mno-gfni"; 1097 const char *shstk = has_shstk ? " -mshstk" : " -mno-shstk"; 1098 const char *vaes = has_vaes ? " -mvaes" : " -mno-vaes"; 1099 const char *vpclmulqdq = has_vpclmulqdq ? " -mvpclmulqdq" : " -mno-vpclmulqdq"; 1100 const char *avx512bitalg = has_avx512bitalg ? " -mavx512bitalg" : " -mno-avx512bitalg"; 1101 const char *movdiri = has_movdiri ? " -mmovdiri" : " -mno-movdiri"; 1102 const char *movdir64b = has_movdir64b ? " -mmovdir64b" : " -mno-movdir64b"; 1103 options = concat (options, mmx, mmx3dnow, sse, sse2, sse3, ssse3, 1104 sse4a, cx16, sahf, movbe, aes, sha, pclmul, 1105 popcnt, abm, lwp, fma, fma4, xop, bmi, sgx, bmi2, 1106 pconfig, wbnoinvd, 1107 tbm, avx, avx2, sse4_2, sse4_1, lzcnt, rtm, 1108 hle, rdrnd, f16c, fsgsbase, rdseed, prfchw, adx, 1109 fxsr, xsave, xsaveopt, avx512f, avx512er, 1110 avx512cd, avx512pf, prefetchwt1, clflushopt, 1111 xsavec, xsaves, avx512dq, avx512bw, avx512vl, 1112 avx512ifma, avx512vbmi, avx5124fmaps, avx5124vnniw, 1113 clwb, mwaitx, clzero, pku, rdpid, gfni, shstk, 1114 avx512vbmi2, avx512vnni, vaes, vpclmulqdq, 1115 avx512bitalg, movdiri, movdir64b, NULL); 1116 } 1117 1118 done: 1119 return concat (cache, "-m", argv[0], "=", cpu, options, NULL); 1120 } 1121 #else 1122 1123 /* If we are compiling with GCC where %EBX register is fixed, then the 1124 driver will just ignore -march and -mtune "native" target and will leave 1125 to the newly built compiler to generate code for its default target. */ 1126 1127 const char *host_detect_local_cpu (int, const char **) 1128 { 1129 return NULL; 1130 } 1131 #endif /* __GNUC__ */ 1132