1 //===-- Host.cpp - Implement OS Host Detection ------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements the operating system Host detection.
10 //
11 //===----------------------------------------------------------------------===//
12
13 #include "llvm/TargetParser/Host.h"
14 #include "llvm/ADT/SmallVector.h"
15 #include "llvm/ADT/StringMap.h"
16 #include "llvm/ADT/StringRef.h"
17 #include "llvm/ADT/StringSwitch.h"
18 #include "llvm/Config/llvm-config.h"
19 #include "llvm/Support/MemoryBuffer.h"
20 #include "llvm/Support/raw_ostream.h"
21 #include "llvm/TargetParser/Triple.h"
22 #include "llvm/TargetParser/X86TargetParser.h"
23 #include <string.h>
24
25 // Include the platform-specific parts of this class.
26 #ifdef LLVM_ON_UNIX
27 #include "Unix/Host.inc"
28 #include <sched.h>
29 #endif
30 #ifdef _WIN32
31 #include "Windows/Host.inc"
32 #endif
33 #ifdef _MSC_VER
34 #include <intrin.h>
35 #endif
36 #ifdef __MVS__
37 #include "llvm/Support/BCD.h"
38 #endif
39 #if defined(__APPLE__)
40 #include <mach/host_info.h>
41 #include <mach/mach.h>
42 #include <mach/mach_host.h>
43 #include <mach/machine.h>
44 #include <sys/param.h>
45 #include <sys/sysctl.h>
46 #endif
47 #ifdef _AIX
48 #include <sys/systemcfg.h>
49 #endif
50 #if defined(__sun__) && defined(__svr4__)
51 #include <kstat.h>
52 #endif
53
54 #define DEBUG_TYPE "host-detection"
55
56 //===----------------------------------------------------------------------===//
57 //
58 // Implementations of the CPU detection routines
59 //
60 //===----------------------------------------------------------------------===//
61
62 using namespace llvm;
63
64 static std::unique_ptr<llvm::MemoryBuffer>
getProcCpuinfoContent()65 LLVM_ATTRIBUTE_UNUSED getProcCpuinfoContent() {
66 llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> Text =
67 llvm::MemoryBuffer::getFileAsStream("/proc/cpuinfo");
68 if (std::error_code EC = Text.getError()) {
69 llvm::errs() << "Can't read "
70 << "/proc/cpuinfo: " << EC.message() << "\n";
71 return nullptr;
72 }
73 return std::move(*Text);
74 }
75
getHostCPUNameForPowerPC(StringRef ProcCpuinfoContent)76 StringRef sys::detail::getHostCPUNameForPowerPC(StringRef ProcCpuinfoContent) {
77 // Access to the Processor Version Register (PVR) on PowerPC is privileged,
78 // and so we must use an operating-system interface to determine the current
79 // processor type. On Linux, this is exposed through the /proc/cpuinfo file.
80 const char *generic = "generic";
81
82 // The cpu line is second (after the 'processor: 0' line), so if this
83 // buffer is too small then something has changed (or is wrong).
84 StringRef::const_iterator CPUInfoStart = ProcCpuinfoContent.begin();
85 StringRef::const_iterator CPUInfoEnd = ProcCpuinfoContent.end();
86
87 StringRef::const_iterator CIP = CPUInfoStart;
88
89 StringRef::const_iterator CPUStart = nullptr;
90 size_t CPULen = 0;
91
92 // We need to find the first line which starts with cpu, spaces, and a colon.
93 // After the colon, there may be some additional spaces and then the cpu type.
94 while (CIP < CPUInfoEnd && CPUStart == nullptr) {
95 if (CIP < CPUInfoEnd && *CIP == '\n')
96 ++CIP;
97
98 if (CIP < CPUInfoEnd && *CIP == 'c') {
99 ++CIP;
100 if (CIP < CPUInfoEnd && *CIP == 'p') {
101 ++CIP;
102 if (CIP < CPUInfoEnd && *CIP == 'u') {
103 ++CIP;
104 while (CIP < CPUInfoEnd && (*CIP == ' ' || *CIP == '\t'))
105 ++CIP;
106
107 if (CIP < CPUInfoEnd && *CIP == ':') {
108 ++CIP;
109 while (CIP < CPUInfoEnd && (*CIP == ' ' || *CIP == '\t'))
110 ++CIP;
111
112 if (CIP < CPUInfoEnd) {
113 CPUStart = CIP;
114 while (CIP < CPUInfoEnd && (*CIP != ' ' && *CIP != '\t' &&
115 *CIP != ',' && *CIP != '\n'))
116 ++CIP;
117 CPULen = CIP - CPUStart;
118 }
119 }
120 }
121 }
122 }
123
124 if (CPUStart == nullptr)
125 while (CIP < CPUInfoEnd && *CIP != '\n')
126 ++CIP;
127 }
128
129 if (CPUStart == nullptr)
130 return generic;
131
132 return StringSwitch<const char *>(StringRef(CPUStart, CPULen))
133 .Case("604e", "604e")
134 .Case("604", "604")
135 .Case("7400", "7400")
136 .Case("7410", "7400")
137 .Case("7447", "7400")
138 .Case("7455", "7450")
139 .Case("G4", "g4")
140 .Case("POWER4", "970")
141 .Case("PPC970FX", "970")
142 .Case("PPC970MP", "970")
143 .Case("G5", "g5")
144 .Case("POWER5", "g5")
145 .Case("A2", "a2")
146 .Case("POWER6", "pwr6")
147 .Case("POWER7", "pwr7")
148 .Case("POWER8", "pwr8")
149 .Case("POWER8E", "pwr8")
150 .Case("POWER8NVL", "pwr8")
151 .Case("POWER9", "pwr9")
152 .Case("POWER10", "pwr10")
153 // FIXME: If we get a simulator or machine with the capabilities of
154 // mcpu=future, we should revisit this and add the name reported by the
155 // simulator/machine.
156 .Default(generic);
157 }
158
getHostCPUNameForARM(StringRef ProcCpuinfoContent)159 StringRef sys::detail::getHostCPUNameForARM(StringRef ProcCpuinfoContent) {
160 // The cpuid register on arm is not accessible from user space. On Linux,
161 // it is exposed through the /proc/cpuinfo file.
162
163 // Read 32 lines from /proc/cpuinfo, which should contain the CPU part line
164 // in all cases.
165 SmallVector<StringRef, 32> Lines;
166 ProcCpuinfoContent.split(Lines, "\n");
167
168 // Look for the CPU implementer line.
169 StringRef Implementer;
170 StringRef Hardware;
171 StringRef Part;
172 for (unsigned I = 0, E = Lines.size(); I != E; ++I) {
173 if (Lines[I].starts_with("CPU implementer"))
174 Implementer = Lines[I].substr(15).ltrim("\t :");
175 if (Lines[I].starts_with("Hardware"))
176 Hardware = Lines[I].substr(8).ltrim("\t :");
177 if (Lines[I].starts_with("CPU part"))
178 Part = Lines[I].substr(8).ltrim("\t :");
179 }
180
181 if (Implementer == "0x41") { // ARM Ltd.
182 // MSM8992/8994 may give cpu part for the core that the kernel is running on,
183 // which is undeterministic and wrong. Always return cortex-a53 for these SoC.
184 if (Hardware.ends_with("MSM8994") || Hardware.ends_with("MSM8996"))
185 return "cortex-a53";
186
187
188 // The CPU part is a 3 digit hexadecimal number with a 0x prefix. The
189 // values correspond to the "Part number" in the CP15/c0 register. The
190 // contents are specified in the various processor manuals.
191 // This corresponds to the Main ID Register in Technical Reference Manuals.
192 // and is used in programs like sys-utils
193 return StringSwitch<const char *>(Part)
194 .Case("0x926", "arm926ej-s")
195 .Case("0xb02", "mpcore")
196 .Case("0xb36", "arm1136j-s")
197 .Case("0xb56", "arm1156t2-s")
198 .Case("0xb76", "arm1176jz-s")
199 .Case("0xc08", "cortex-a8")
200 .Case("0xc09", "cortex-a9")
201 .Case("0xc0f", "cortex-a15")
202 .Case("0xc20", "cortex-m0")
203 .Case("0xc23", "cortex-m3")
204 .Case("0xc24", "cortex-m4")
205 .Case("0xd24", "cortex-m52")
206 .Case("0xd22", "cortex-m55")
207 .Case("0xd02", "cortex-a34")
208 .Case("0xd04", "cortex-a35")
209 .Case("0xd03", "cortex-a53")
210 .Case("0xd05", "cortex-a55")
211 .Case("0xd46", "cortex-a510")
212 .Case("0xd80", "cortex-a520")
213 .Case("0xd07", "cortex-a57")
214 .Case("0xd08", "cortex-a72")
215 .Case("0xd09", "cortex-a73")
216 .Case("0xd0a", "cortex-a75")
217 .Case("0xd0b", "cortex-a76")
218 .Case("0xd0d", "cortex-a77")
219 .Case("0xd41", "cortex-a78")
220 .Case("0xd47", "cortex-a710")
221 .Case("0xd4d", "cortex-a715")
222 .Case("0xd81", "cortex-a720")
223 .Case("0xd44", "cortex-x1")
224 .Case("0xd4c", "cortex-x1c")
225 .Case("0xd48", "cortex-x2")
226 .Case("0xd4e", "cortex-x3")
227 .Case("0xd82", "cortex-x4")
228 .Case("0xd0c", "neoverse-n1")
229 .Case("0xd49", "neoverse-n2")
230 .Case("0xd40", "neoverse-v1")
231 .Case("0xd4f", "neoverse-v2")
232 .Default("generic");
233 }
234
235 if (Implementer == "0x42" || Implementer == "0x43") { // Broadcom | Cavium.
236 return StringSwitch<const char *>(Part)
237 .Case("0x516", "thunderx2t99")
238 .Case("0x0516", "thunderx2t99")
239 .Case("0xaf", "thunderx2t99")
240 .Case("0x0af", "thunderx2t99")
241 .Case("0xa1", "thunderxt88")
242 .Case("0x0a1", "thunderxt88")
243 .Default("generic");
244 }
245
246 if (Implementer == "0x46") { // Fujitsu Ltd.
247 return StringSwitch<const char *>(Part)
248 .Case("0x001", "a64fx")
249 .Default("generic");
250 }
251
252 if (Implementer == "0x4e") { // NVIDIA Corporation
253 return StringSwitch<const char *>(Part)
254 .Case("0x004", "carmel")
255 .Default("generic");
256 }
257
258 if (Implementer == "0x48") // HiSilicon Technologies, Inc.
259 // The CPU part is a 3 digit hexadecimal number with a 0x prefix. The
260 // values correspond to the "Part number" in the CP15/c0 register. The
261 // contents are specified in the various processor manuals.
262 return StringSwitch<const char *>(Part)
263 .Case("0xd01", "tsv110")
264 .Default("generic");
265
266 if (Implementer == "0x51") // Qualcomm Technologies, Inc.
267 // The CPU part is a 3 digit hexadecimal number with a 0x prefix. The
268 // values correspond to the "Part number" in the CP15/c0 register. The
269 // contents are specified in the various processor manuals.
270 return StringSwitch<const char *>(Part)
271 .Case("0x06f", "krait") // APQ8064
272 .Case("0x201", "kryo")
273 .Case("0x205", "kryo")
274 .Case("0x211", "kryo")
275 .Case("0x800", "cortex-a73") // Kryo 2xx Gold
276 .Case("0x801", "cortex-a73") // Kryo 2xx Silver
277 .Case("0x802", "cortex-a75") // Kryo 3xx Gold
278 .Case("0x803", "cortex-a75") // Kryo 3xx Silver
279 .Case("0x804", "cortex-a76") // Kryo 4xx Gold
280 .Case("0x805", "cortex-a76") // Kryo 4xx/5xx Silver
281 .Case("0xc00", "falkor")
282 .Case("0xc01", "saphira")
283 .Default("generic");
284 if (Implementer == "0x53") { // Samsung Electronics Co., Ltd.
285 // The Exynos chips have a convoluted ID scheme that doesn't seem to follow
286 // any predictive pattern across variants and parts.
287 unsigned Variant = 0, Part = 0;
288
289 // Look for the CPU variant line, whose value is a 1 digit hexadecimal
290 // number, corresponding to the Variant bits in the CP15/C0 register.
291 for (auto I : Lines)
292 if (I.consume_front("CPU variant"))
293 I.ltrim("\t :").getAsInteger(0, Variant);
294
295 // Look for the CPU part line, whose value is a 3 digit hexadecimal
296 // number, corresponding to the PartNum bits in the CP15/C0 register.
297 for (auto I : Lines)
298 if (I.consume_front("CPU part"))
299 I.ltrim("\t :").getAsInteger(0, Part);
300
301 unsigned Exynos = (Variant << 12) | Part;
302 switch (Exynos) {
303 default:
304 // Default by falling through to Exynos M3.
305 [[fallthrough]];
306 case 0x1002:
307 return "exynos-m3";
308 case 0x1003:
309 return "exynos-m4";
310 }
311 }
312
313 if (Implementer == "0x6d") { // Microsoft Corporation.
314 // The Microsoft Azure Cobalt 100 CPU is handled as a Neoverse N2.
315 return StringSwitch<const char *>(Part)
316 .Case("0xd49", "neoverse-n2")
317 .Default("generic");
318 }
319
320 if (Implementer == "0xc0") { // Ampere Computing
321 return StringSwitch<const char *>(Part)
322 .Case("0xac3", "ampere1")
323 .Case("0xac4", "ampere1a")
324 .Case("0xac5", "ampere1b")
325 .Default("generic");
326 }
327
328 return "generic";
329 }
330
331 namespace {
getCPUNameFromS390Model(unsigned int Id,bool HaveVectorSupport)332 StringRef getCPUNameFromS390Model(unsigned int Id, bool HaveVectorSupport) {
333 switch (Id) {
334 case 2064: // z900 not supported by LLVM
335 case 2066:
336 case 2084: // z990 not supported by LLVM
337 case 2086:
338 case 2094: // z9-109 not supported by LLVM
339 case 2096:
340 return "generic";
341 case 2097:
342 case 2098:
343 return "z10";
344 case 2817:
345 case 2818:
346 return "z196";
347 case 2827:
348 case 2828:
349 return "zEC12";
350 case 2964:
351 case 2965:
352 return HaveVectorSupport? "z13" : "zEC12";
353 case 3906:
354 case 3907:
355 return HaveVectorSupport? "z14" : "zEC12";
356 case 8561:
357 case 8562:
358 return HaveVectorSupport? "z15" : "zEC12";
359 case 3931:
360 case 3932:
361 default:
362 return HaveVectorSupport? "z16" : "zEC12";
363 }
364 }
365 } // end anonymous namespace
366
getHostCPUNameForS390x(StringRef ProcCpuinfoContent)367 StringRef sys::detail::getHostCPUNameForS390x(StringRef ProcCpuinfoContent) {
368 // STIDP is a privileged operation, so use /proc/cpuinfo instead.
369
370 // The "processor 0:" line comes after a fair amount of other information,
371 // including a cache breakdown, but this should be plenty.
372 SmallVector<StringRef, 32> Lines;
373 ProcCpuinfoContent.split(Lines, "\n");
374
375 // Look for the CPU features.
376 SmallVector<StringRef, 32> CPUFeatures;
377 for (unsigned I = 0, E = Lines.size(); I != E; ++I)
378 if (Lines[I].starts_with("features")) {
379 size_t Pos = Lines[I].find(':');
380 if (Pos != StringRef::npos) {
381 Lines[I].drop_front(Pos + 1).split(CPUFeatures, ' ');
382 break;
383 }
384 }
385
386 // We need to check for the presence of vector support independently of
387 // the machine type, since we may only use the vector register set when
388 // supported by the kernel (and hypervisor).
389 bool HaveVectorSupport = false;
390 for (unsigned I = 0, E = CPUFeatures.size(); I != E; ++I) {
391 if (CPUFeatures[I] == "vx")
392 HaveVectorSupport = true;
393 }
394
395 // Now check the processor machine type.
396 for (unsigned I = 0, E = Lines.size(); I != E; ++I) {
397 if (Lines[I].starts_with("processor ")) {
398 size_t Pos = Lines[I].find("machine = ");
399 if (Pos != StringRef::npos) {
400 Pos += sizeof("machine = ") - 1;
401 unsigned int Id;
402 if (!Lines[I].drop_front(Pos).getAsInteger(10, Id))
403 return getCPUNameFromS390Model(Id, HaveVectorSupport);
404 }
405 break;
406 }
407 }
408
409 return "generic";
410 }
411
getHostCPUNameForRISCV(StringRef ProcCpuinfoContent)412 StringRef sys::detail::getHostCPUNameForRISCV(StringRef ProcCpuinfoContent) {
413 // There are 24 lines in /proc/cpuinfo
414 SmallVector<StringRef> Lines;
415 ProcCpuinfoContent.split(Lines, "\n");
416
417 // Look for uarch line to determine cpu name
418 StringRef UArch;
419 for (unsigned I = 0, E = Lines.size(); I != E; ++I) {
420 if (Lines[I].starts_with("uarch")) {
421 UArch = Lines[I].substr(5).ltrim("\t :");
422 break;
423 }
424 }
425
426 return StringSwitch<const char *>(UArch)
427 .Case("sifive,u74-mc", "sifive-u74")
428 .Case("sifive,bullet0", "sifive-u74")
429 .Default("generic");
430 }
431
getHostCPUNameForBPF()432 StringRef sys::detail::getHostCPUNameForBPF() {
433 #if !defined(__linux__) || !defined(__x86_64__)
434 return "generic";
435 #else
436 uint8_t v3_insns[40] __attribute__ ((aligned (8))) =
437 /* BPF_MOV64_IMM(BPF_REG_0, 0) */
438 { 0xb7, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0,
439 /* BPF_MOV64_IMM(BPF_REG_2, 1) */
440 0xb7, 0x2, 0x0, 0x0, 0x1, 0x0, 0x0, 0x0,
441 /* BPF_JMP32_REG(BPF_JLT, BPF_REG_0, BPF_REG_2, 1) */
442 0xae, 0x20, 0x1, 0x0, 0x0, 0x0, 0x0, 0x0,
443 /* BPF_MOV64_IMM(BPF_REG_0, 1) */
444 0xb7, 0x0, 0x0, 0x0, 0x1, 0x0, 0x0, 0x0,
445 /* BPF_EXIT_INSN() */
446 0x95, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 };
447
448 uint8_t v2_insns[40] __attribute__ ((aligned (8))) =
449 /* BPF_MOV64_IMM(BPF_REG_0, 0) */
450 { 0xb7, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0,
451 /* BPF_MOV64_IMM(BPF_REG_2, 1) */
452 0xb7, 0x2, 0x0, 0x0, 0x1, 0x0, 0x0, 0x0,
453 /* BPF_JMP_REG(BPF_JLT, BPF_REG_0, BPF_REG_2, 1) */
454 0xad, 0x20, 0x1, 0x0, 0x0, 0x0, 0x0, 0x0,
455 /* BPF_MOV64_IMM(BPF_REG_0, 1) */
456 0xb7, 0x0, 0x0, 0x0, 0x1, 0x0, 0x0, 0x0,
457 /* BPF_EXIT_INSN() */
458 0x95, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 };
459
460 struct bpf_prog_load_attr {
461 uint32_t prog_type;
462 uint32_t insn_cnt;
463 uint64_t insns;
464 uint64_t license;
465 uint32_t log_level;
466 uint32_t log_size;
467 uint64_t log_buf;
468 uint32_t kern_version;
469 uint32_t prog_flags;
470 } attr = {};
471 attr.prog_type = 1; /* BPF_PROG_TYPE_SOCKET_FILTER */
472 attr.insn_cnt = 5;
473 attr.insns = (uint64_t)v3_insns;
474 attr.license = (uint64_t)"DUMMY";
475
476 int fd = syscall(321 /* __NR_bpf */, 5 /* BPF_PROG_LOAD */, &attr,
477 sizeof(attr));
478 if (fd >= 0) {
479 close(fd);
480 return "v3";
481 }
482
483 /* Clear the whole attr in case its content changed by syscall. */
484 memset(&attr, 0, sizeof(attr));
485 attr.prog_type = 1; /* BPF_PROG_TYPE_SOCKET_FILTER */
486 attr.insn_cnt = 5;
487 attr.insns = (uint64_t)v2_insns;
488 attr.license = (uint64_t)"DUMMY";
489 fd = syscall(321 /* __NR_bpf */, 5 /* BPF_PROG_LOAD */, &attr, sizeof(attr));
490 if (fd >= 0) {
491 close(fd);
492 return "v2";
493 }
494 return "v1";
495 #endif
496 }
497
498 #if defined(__i386__) || defined(_M_IX86) || \
499 defined(__x86_64__) || defined(_M_X64)
500
501 // The check below for i386 was copied from clang's cpuid.h (__get_cpuid_max).
502 // Check motivated by bug reports for OpenSSL crashing on CPUs without CPUID
503 // support. Consequently, for i386, the presence of CPUID is checked first
504 // via the corresponding eflags bit.
505 // Removal of cpuid.h header motivated by PR30384
506 // Header cpuid.h and method __get_cpuid_max are not used in llvm, clang, openmp
507 // or test-suite, but are used in external projects e.g. libstdcxx
isCpuIdSupported()508 static bool isCpuIdSupported() {
509 #if defined(__GNUC__) || defined(__clang__)
510 #if defined(__i386__)
511 int __cpuid_supported;
512 __asm__(" pushfl\n"
513 " popl %%eax\n"
514 " movl %%eax,%%ecx\n"
515 " xorl $0x00200000,%%eax\n"
516 " pushl %%eax\n"
517 " popfl\n"
518 " pushfl\n"
519 " popl %%eax\n"
520 " movl $0,%0\n"
521 " cmpl %%eax,%%ecx\n"
522 " je 1f\n"
523 " movl $1,%0\n"
524 "1:"
525 : "=r"(__cpuid_supported)
526 :
527 : "eax", "ecx");
528 if (!__cpuid_supported)
529 return false;
530 #endif
531 return true;
532 #endif
533 return true;
534 }
535
536 /// getX86CpuIDAndInfo - Execute the specified cpuid and return the 4 values in
537 /// the specified arguments. If we can't run cpuid on the host, return true.
getX86CpuIDAndInfo(unsigned value,unsigned * rEAX,unsigned * rEBX,unsigned * rECX,unsigned * rEDX)538 static bool getX86CpuIDAndInfo(unsigned value, unsigned *rEAX, unsigned *rEBX,
539 unsigned *rECX, unsigned *rEDX) {
540 #if defined(__GNUC__) || defined(__clang__)
541 #if defined(__x86_64__)
542 // gcc doesn't know cpuid would clobber ebx/rbx. Preserve it manually.
543 // FIXME: should we save this for Clang?
544 __asm__("movq\t%%rbx, %%rsi\n\t"
545 "cpuid\n\t"
546 "xchgq\t%%rbx, %%rsi\n\t"
547 : "=a"(*rEAX), "=S"(*rEBX), "=c"(*rECX), "=d"(*rEDX)
548 : "a"(value));
549 return false;
550 #elif defined(__i386__)
551 __asm__("movl\t%%ebx, %%esi\n\t"
552 "cpuid\n\t"
553 "xchgl\t%%ebx, %%esi\n\t"
554 : "=a"(*rEAX), "=S"(*rEBX), "=c"(*rECX), "=d"(*rEDX)
555 : "a"(value));
556 return false;
557 #else
558 return true;
559 #endif
560 #elif defined(_MSC_VER)
561 // The MSVC intrinsic is portable across x86 and x64.
562 int registers[4];
563 __cpuid(registers, value);
564 *rEAX = registers[0];
565 *rEBX = registers[1];
566 *rECX = registers[2];
567 *rEDX = registers[3];
568 return false;
569 #else
570 return true;
571 #endif
572 }
573
574 namespace llvm {
575 namespace sys {
576 namespace detail {
577 namespace x86 {
578
getVendorSignature(unsigned * MaxLeaf)579 VendorSignatures getVendorSignature(unsigned *MaxLeaf) {
580 unsigned EAX = 0, EBX = 0, ECX = 0, EDX = 0;
581 if (MaxLeaf == nullptr)
582 MaxLeaf = &EAX;
583 else
584 *MaxLeaf = 0;
585
586 if (!isCpuIdSupported())
587 return VendorSignatures::UNKNOWN;
588
589 if (getX86CpuIDAndInfo(0, MaxLeaf, &EBX, &ECX, &EDX) || *MaxLeaf < 1)
590 return VendorSignatures::UNKNOWN;
591
592 // "Genu ineI ntel"
593 if (EBX == 0x756e6547 && EDX == 0x49656e69 && ECX == 0x6c65746e)
594 return VendorSignatures::GENUINE_INTEL;
595
596 // "Auth enti cAMD"
597 if (EBX == 0x68747541 && EDX == 0x69746e65 && ECX == 0x444d4163)
598 return VendorSignatures::AUTHENTIC_AMD;
599
600 return VendorSignatures::UNKNOWN;
601 }
602
603 } // namespace x86
604 } // namespace detail
605 } // namespace sys
606 } // namespace llvm
607
608 using namespace llvm::sys::detail::x86;
609
610 /// getX86CpuIDAndInfoEx - Execute the specified cpuid with subleaf and return
611 /// the 4 values in the specified arguments. If we can't run cpuid on the host,
612 /// return true.
getX86CpuIDAndInfoEx(unsigned value,unsigned subleaf,unsigned * rEAX,unsigned * rEBX,unsigned * rECX,unsigned * rEDX)613 static bool getX86CpuIDAndInfoEx(unsigned value, unsigned subleaf,
614 unsigned *rEAX, unsigned *rEBX, unsigned *rECX,
615 unsigned *rEDX) {
616 #if defined(__GNUC__) || defined(__clang__)
617 #if defined(__x86_64__)
618 // gcc doesn't know cpuid would clobber ebx/rbx. Preserve it manually.
619 // FIXME: should we save this for Clang?
620 __asm__("movq\t%%rbx, %%rsi\n\t"
621 "cpuid\n\t"
622 "xchgq\t%%rbx, %%rsi\n\t"
623 : "=a"(*rEAX), "=S"(*rEBX), "=c"(*rECX), "=d"(*rEDX)
624 : "a"(value), "c"(subleaf));
625 return false;
626 #elif defined(__i386__)
627 __asm__("movl\t%%ebx, %%esi\n\t"
628 "cpuid\n\t"
629 "xchgl\t%%ebx, %%esi\n\t"
630 : "=a"(*rEAX), "=S"(*rEBX), "=c"(*rECX), "=d"(*rEDX)
631 : "a"(value), "c"(subleaf));
632 return false;
633 #else
634 return true;
635 #endif
636 #elif defined(_MSC_VER)
637 int registers[4];
638 __cpuidex(registers, value, subleaf);
639 *rEAX = registers[0];
640 *rEBX = registers[1];
641 *rECX = registers[2];
642 *rEDX = registers[3];
643 return false;
644 #else
645 return true;
646 #endif
647 }
648
649 // Read control register 0 (XCR0). Used to detect features such as AVX.
getX86XCR0(unsigned * rEAX,unsigned * rEDX)650 static bool getX86XCR0(unsigned *rEAX, unsigned *rEDX) {
651 #if defined(__GNUC__) || defined(__clang__)
652 // Check xgetbv; this uses a .byte sequence instead of the instruction
653 // directly because older assemblers do not include support for xgetbv and
654 // there is no easy way to conditionally compile based on the assembler used.
655 __asm__(".byte 0x0f, 0x01, 0xd0" : "=a"(*rEAX), "=d"(*rEDX) : "c"(0));
656 return false;
657 #elif defined(_MSC_FULL_VER) && defined(_XCR_XFEATURE_ENABLED_MASK)
658 unsigned long long Result = _xgetbv(_XCR_XFEATURE_ENABLED_MASK);
659 *rEAX = Result;
660 *rEDX = Result >> 32;
661 return false;
662 #else
663 return true;
664 #endif
665 }
666
detectX86FamilyModel(unsigned EAX,unsigned * Family,unsigned * Model)667 static void detectX86FamilyModel(unsigned EAX, unsigned *Family,
668 unsigned *Model) {
669 *Family = (EAX >> 8) & 0xf; // Bits 8 - 11
670 *Model = (EAX >> 4) & 0xf; // Bits 4 - 7
671 if (*Family == 6 || *Family == 0xf) {
672 if (*Family == 0xf)
673 // Examine extended family ID if family ID is F.
674 *Family += (EAX >> 20) & 0xff; // Bits 20 - 27
675 // Examine extended model ID if family ID is 6 or F.
676 *Model += ((EAX >> 16) & 0xf) << 4; // Bits 16 - 19
677 }
678 }
679
680 static StringRef
getIntelProcessorTypeAndSubtype(unsigned Family,unsigned Model,const unsigned * Features,unsigned * Type,unsigned * Subtype)681 getIntelProcessorTypeAndSubtype(unsigned Family, unsigned Model,
682 const unsigned *Features,
683 unsigned *Type, unsigned *Subtype) {
684 auto testFeature = [&](unsigned F) {
685 return (Features[F / 32] & (1U << (F % 32))) != 0;
686 };
687
688 StringRef CPU;
689
690 switch (Family) {
691 case 3:
692 CPU = "i386";
693 break;
694 case 4:
695 CPU = "i486";
696 break;
697 case 5:
698 if (testFeature(X86::FEATURE_MMX)) {
699 CPU = "pentium-mmx";
700 break;
701 }
702 CPU = "pentium";
703 break;
704 case 6:
705 switch (Model) {
706 case 0x0f: // Intel Core 2 Duo processor, Intel Core 2 Duo mobile
707 // processor, Intel Core 2 Quad processor, Intel Core 2 Quad
708 // mobile processor, Intel Core 2 Extreme processor, Intel
709 // Pentium Dual-Core processor, Intel Xeon processor, model
710 // 0Fh. All processors are manufactured using the 65 nm process.
711 case 0x16: // Intel Celeron processor model 16h. All processors are
712 // manufactured using the 65 nm process
713 CPU = "core2";
714 *Type = X86::INTEL_CORE2;
715 break;
716 case 0x17: // Intel Core 2 Extreme processor, Intel Xeon processor, model
717 // 17h. All processors are manufactured using the 45 nm process.
718 //
719 // 45nm: Penryn , Wolfdale, Yorkfield (XE)
720 case 0x1d: // Intel Xeon processor MP. All processors are manufactured using
721 // the 45 nm process.
722 CPU = "penryn";
723 *Type = X86::INTEL_CORE2;
724 break;
725 case 0x1a: // Intel Core i7 processor and Intel Xeon processor. All
726 // processors are manufactured using the 45 nm process.
727 case 0x1e: // Intel(R) Core(TM) i7 CPU 870 @ 2.93GHz.
728 // As found in a Summer 2010 model iMac.
729 case 0x1f:
730 case 0x2e: // Nehalem EX
731 CPU = "nehalem";
732 *Type = X86::INTEL_COREI7;
733 *Subtype = X86::INTEL_COREI7_NEHALEM;
734 break;
735 case 0x25: // Intel Core i7, laptop version.
736 case 0x2c: // Intel Core i7 processor and Intel Xeon processor. All
737 // processors are manufactured using the 32 nm process.
738 case 0x2f: // Westmere EX
739 CPU = "westmere";
740 *Type = X86::INTEL_COREI7;
741 *Subtype = X86::INTEL_COREI7_WESTMERE;
742 break;
743 case 0x2a: // Intel Core i7 processor. All processors are manufactured
744 // using the 32 nm process.
745 case 0x2d:
746 CPU = "sandybridge";
747 *Type = X86::INTEL_COREI7;
748 *Subtype = X86::INTEL_COREI7_SANDYBRIDGE;
749 break;
750 case 0x3a:
751 case 0x3e: // Ivy Bridge EP
752 CPU = "ivybridge";
753 *Type = X86::INTEL_COREI7;
754 *Subtype = X86::INTEL_COREI7_IVYBRIDGE;
755 break;
756
757 // Haswell:
758 case 0x3c:
759 case 0x3f:
760 case 0x45:
761 case 0x46:
762 CPU = "haswell";
763 *Type = X86::INTEL_COREI7;
764 *Subtype = X86::INTEL_COREI7_HASWELL;
765 break;
766
767 // Broadwell:
768 case 0x3d:
769 case 0x47:
770 case 0x4f:
771 case 0x56:
772 CPU = "broadwell";
773 *Type = X86::INTEL_COREI7;
774 *Subtype = X86::INTEL_COREI7_BROADWELL;
775 break;
776
777 // Skylake:
778 case 0x4e: // Skylake mobile
779 case 0x5e: // Skylake desktop
780 case 0x8e: // Kaby Lake mobile
781 case 0x9e: // Kaby Lake desktop
782 case 0xa5: // Comet Lake-H/S
783 case 0xa6: // Comet Lake-U
784 CPU = "skylake";
785 *Type = X86::INTEL_COREI7;
786 *Subtype = X86::INTEL_COREI7_SKYLAKE;
787 break;
788
789 // Rocketlake:
790 case 0xa7:
791 CPU = "rocketlake";
792 *Type = X86::INTEL_COREI7;
793 *Subtype = X86::INTEL_COREI7_ROCKETLAKE;
794 break;
795
796 // Skylake Xeon:
797 case 0x55:
798 *Type = X86::INTEL_COREI7;
799 if (testFeature(X86::FEATURE_AVX512BF16)) {
800 CPU = "cooperlake";
801 *Subtype = X86::INTEL_COREI7_COOPERLAKE;
802 } else if (testFeature(X86::FEATURE_AVX512VNNI)) {
803 CPU = "cascadelake";
804 *Subtype = X86::INTEL_COREI7_CASCADELAKE;
805 } else {
806 CPU = "skylake-avx512";
807 *Subtype = X86::INTEL_COREI7_SKYLAKE_AVX512;
808 }
809 break;
810
811 // Cannonlake:
812 case 0x66:
813 CPU = "cannonlake";
814 *Type = X86::INTEL_COREI7;
815 *Subtype = X86::INTEL_COREI7_CANNONLAKE;
816 break;
817
818 // Icelake:
819 case 0x7d:
820 case 0x7e:
821 CPU = "icelake-client";
822 *Type = X86::INTEL_COREI7;
823 *Subtype = X86::INTEL_COREI7_ICELAKE_CLIENT;
824 break;
825
826 // Tigerlake:
827 case 0x8c:
828 case 0x8d:
829 CPU = "tigerlake";
830 *Type = X86::INTEL_COREI7;
831 *Subtype = X86::INTEL_COREI7_TIGERLAKE;
832 break;
833
834 // Alderlake:
835 case 0x97:
836 case 0x9a:
837 // Gracemont
838 case 0xbe:
839 // Raptorlake:
840 case 0xb7:
841 case 0xba:
842 case 0xbf:
843 // Meteorlake:
844 case 0xaa:
845 case 0xac:
846 CPU = "alderlake";
847 *Type = X86::INTEL_COREI7;
848 *Subtype = X86::INTEL_COREI7_ALDERLAKE;
849 break;
850
851 // Arrowlake:
852 case 0xc5:
853 CPU = "arrowlake";
854 *Type = X86::INTEL_COREI7;
855 *Subtype = X86::INTEL_COREI7_ARROWLAKE;
856 break;
857
858 // Arrowlake S:
859 case 0xc6:
860 // Lunarlake:
861 case 0xbd:
862 CPU = "arrowlake-s";
863 *Type = X86::INTEL_COREI7;
864 *Subtype = X86::INTEL_COREI7_ARROWLAKE_S;
865 break;
866
867 // Pantherlake:
868 case 0xcc:
869 CPU = "pantherlake";
870 *Type = X86::INTEL_COREI7;
871 *Subtype = X86::INTEL_COREI7_PANTHERLAKE;
872 break;
873
874 // Graniterapids:
875 case 0xad:
876 CPU = "graniterapids";
877 *Type = X86::INTEL_COREI7;
878 *Subtype = X86::INTEL_COREI7_GRANITERAPIDS;
879 break;
880
881 // Granite Rapids D:
882 case 0xae:
883 CPU = "graniterapids-d";
884 *Type = X86::INTEL_COREI7;
885 *Subtype = X86::INTEL_COREI7_GRANITERAPIDS_D;
886 break;
887
888 // Icelake Xeon:
889 case 0x6a:
890 case 0x6c:
891 CPU = "icelake-server";
892 *Type = X86::INTEL_COREI7;
893 *Subtype = X86::INTEL_COREI7_ICELAKE_SERVER;
894 break;
895
896 // Emerald Rapids:
897 case 0xcf:
898 // Sapphire Rapids:
899 case 0x8f:
900 CPU = "sapphirerapids";
901 *Type = X86::INTEL_COREI7;
902 *Subtype = X86::INTEL_COREI7_SAPPHIRERAPIDS;
903 break;
904
905 case 0x1c: // Most 45 nm Intel Atom processors
906 case 0x26: // 45 nm Atom Lincroft
907 case 0x27: // 32 nm Atom Medfield
908 case 0x35: // 32 nm Atom Midview
909 case 0x36: // 32 nm Atom Midview
910 CPU = "bonnell";
911 *Type = X86::INTEL_BONNELL;
912 break;
913
914 // Atom Silvermont codes from the Intel software optimization guide.
915 case 0x37:
916 case 0x4a:
917 case 0x4d:
918 case 0x5a:
919 case 0x5d:
920 case 0x4c: // really airmont
921 CPU = "silvermont";
922 *Type = X86::INTEL_SILVERMONT;
923 break;
924 // Goldmont:
925 case 0x5c: // Apollo Lake
926 case 0x5f: // Denverton
927 CPU = "goldmont";
928 *Type = X86::INTEL_GOLDMONT;
929 break;
930 case 0x7a:
931 CPU = "goldmont-plus";
932 *Type = X86::INTEL_GOLDMONT_PLUS;
933 break;
934 case 0x86:
935 case 0x8a: // Lakefield
936 case 0x96: // Elkhart Lake
937 case 0x9c: // Jasper Lake
938 CPU = "tremont";
939 *Type = X86::INTEL_TREMONT;
940 break;
941
942 // Sierraforest:
943 case 0xaf:
944 CPU = "sierraforest";
945 *Type = X86::INTEL_SIERRAFOREST;
946 break;
947
948 // Grandridge:
949 case 0xb6:
950 CPU = "grandridge";
951 *Type = X86::INTEL_GRANDRIDGE;
952 break;
953
954 // Clearwaterforest:
955 case 0xdd:
956 CPU = "clearwaterforest";
957 *Type = X86::INTEL_CLEARWATERFOREST;
958 break;
959
960 // Xeon Phi (Knights Landing + Knights Mill):
961 case 0x57:
962 CPU = "knl";
963 *Type = X86::INTEL_KNL;
964 break;
965 case 0x85:
966 CPU = "knm";
967 *Type = X86::INTEL_KNM;
968 break;
969
970 default: // Unknown family 6 CPU, try to guess.
971 // Don't both with Type/Subtype here, they aren't used by the caller.
972 // They're used above to keep the code in sync with compiler-rt.
973 // TODO detect tigerlake host from model
974 if (testFeature(X86::FEATURE_AVX512VP2INTERSECT)) {
975 CPU = "tigerlake";
976 } else if (testFeature(X86::FEATURE_AVX512VBMI2)) {
977 CPU = "icelake-client";
978 } else if (testFeature(X86::FEATURE_AVX512VBMI)) {
979 CPU = "cannonlake";
980 } else if (testFeature(X86::FEATURE_AVX512BF16)) {
981 CPU = "cooperlake";
982 } else if (testFeature(X86::FEATURE_AVX512VNNI)) {
983 CPU = "cascadelake";
984 } else if (testFeature(X86::FEATURE_AVX512VL)) {
985 CPU = "skylake-avx512";
986 } else if (testFeature(X86::FEATURE_AVX512ER)) {
987 CPU = "knl";
988 } else if (testFeature(X86::FEATURE_CLFLUSHOPT)) {
989 if (testFeature(X86::FEATURE_SHA))
990 CPU = "goldmont";
991 else
992 CPU = "skylake";
993 } else if (testFeature(X86::FEATURE_ADX)) {
994 CPU = "broadwell";
995 } else if (testFeature(X86::FEATURE_AVX2)) {
996 CPU = "haswell";
997 } else if (testFeature(X86::FEATURE_AVX)) {
998 CPU = "sandybridge";
999 } else if (testFeature(X86::FEATURE_SSE4_2)) {
1000 if (testFeature(X86::FEATURE_MOVBE))
1001 CPU = "silvermont";
1002 else
1003 CPU = "nehalem";
1004 } else if (testFeature(X86::FEATURE_SSE4_1)) {
1005 CPU = "penryn";
1006 } else if (testFeature(X86::FEATURE_SSSE3)) {
1007 if (testFeature(X86::FEATURE_MOVBE))
1008 CPU = "bonnell";
1009 else
1010 CPU = "core2";
1011 } else if (testFeature(X86::FEATURE_64BIT)) {
1012 CPU = "core2";
1013 } else if (testFeature(X86::FEATURE_SSE3)) {
1014 CPU = "yonah";
1015 } else if (testFeature(X86::FEATURE_SSE2)) {
1016 CPU = "pentium-m";
1017 } else if (testFeature(X86::FEATURE_SSE)) {
1018 CPU = "pentium3";
1019 } else if (testFeature(X86::FEATURE_MMX)) {
1020 CPU = "pentium2";
1021 } else {
1022 CPU = "pentiumpro";
1023 }
1024 break;
1025 }
1026 break;
1027 case 15: {
1028 if (testFeature(X86::FEATURE_64BIT)) {
1029 CPU = "nocona";
1030 break;
1031 }
1032 if (testFeature(X86::FEATURE_SSE3)) {
1033 CPU = "prescott";
1034 break;
1035 }
1036 CPU = "pentium4";
1037 break;
1038 }
1039 default:
1040 break; // Unknown.
1041 }
1042
1043 return CPU;
1044 }
1045
1046 static StringRef
getAMDProcessorTypeAndSubtype(unsigned Family,unsigned Model,const unsigned * Features,unsigned * Type,unsigned * Subtype)1047 getAMDProcessorTypeAndSubtype(unsigned Family, unsigned Model,
1048 const unsigned *Features,
1049 unsigned *Type, unsigned *Subtype) {
1050 auto testFeature = [&](unsigned F) {
1051 return (Features[F / 32] & (1U << (F % 32))) != 0;
1052 };
1053
1054 StringRef CPU;
1055
1056 switch (Family) {
1057 case 4:
1058 CPU = "i486";
1059 break;
1060 case 5:
1061 CPU = "pentium";
1062 switch (Model) {
1063 case 6:
1064 case 7:
1065 CPU = "k6";
1066 break;
1067 case 8:
1068 CPU = "k6-2";
1069 break;
1070 case 9:
1071 case 13:
1072 CPU = "k6-3";
1073 break;
1074 case 10:
1075 CPU = "geode";
1076 break;
1077 }
1078 break;
1079 case 6:
1080 if (testFeature(X86::FEATURE_SSE)) {
1081 CPU = "athlon-xp";
1082 break;
1083 }
1084 CPU = "athlon";
1085 break;
1086 case 15:
1087 if (testFeature(X86::FEATURE_SSE3)) {
1088 CPU = "k8-sse3";
1089 break;
1090 }
1091 CPU = "k8";
1092 break;
1093 case 16:
1094 CPU = "amdfam10";
1095 *Type = X86::AMDFAM10H; // "amdfam10"
1096 switch (Model) {
1097 case 2:
1098 *Subtype = X86::AMDFAM10H_BARCELONA;
1099 break;
1100 case 4:
1101 *Subtype = X86::AMDFAM10H_SHANGHAI;
1102 break;
1103 case 8:
1104 *Subtype = X86::AMDFAM10H_ISTANBUL;
1105 break;
1106 }
1107 break;
1108 case 20:
1109 CPU = "btver1";
1110 *Type = X86::AMD_BTVER1;
1111 break;
1112 case 21:
1113 CPU = "bdver1";
1114 *Type = X86::AMDFAM15H;
1115 if (Model >= 0x60 && Model <= 0x7f) {
1116 CPU = "bdver4";
1117 *Subtype = X86::AMDFAM15H_BDVER4;
1118 break; // 60h-7Fh: Excavator
1119 }
1120 if (Model >= 0x30 && Model <= 0x3f) {
1121 CPU = "bdver3";
1122 *Subtype = X86::AMDFAM15H_BDVER3;
1123 break; // 30h-3Fh: Steamroller
1124 }
1125 if ((Model >= 0x10 && Model <= 0x1f) || Model == 0x02) {
1126 CPU = "bdver2";
1127 *Subtype = X86::AMDFAM15H_BDVER2;
1128 break; // 02h, 10h-1Fh: Piledriver
1129 }
1130 if (Model <= 0x0f) {
1131 *Subtype = X86::AMDFAM15H_BDVER1;
1132 break; // 00h-0Fh: Bulldozer
1133 }
1134 break;
1135 case 22:
1136 CPU = "btver2";
1137 *Type = X86::AMD_BTVER2;
1138 break;
1139 case 23:
1140 CPU = "znver1";
1141 *Type = X86::AMDFAM17H;
1142 if ((Model >= 0x30 && Model <= 0x3f) || (Model == 0x47) ||
1143 (Model >= 0x60 && Model <= 0x67) || (Model >= 0x68 && Model <= 0x6f) ||
1144 (Model >= 0x70 && Model <= 0x7f) || (Model >= 0x84 && Model <= 0x87) ||
1145 (Model >= 0x90 && Model <= 0x97) || (Model >= 0x98 && Model <= 0x9f) ||
1146 (Model >= 0xa0 && Model <= 0xaf)) {
1147 // Family 17h Models 30h-3Fh (Starship) Zen 2
1148 // Family 17h Models 47h (Cardinal) Zen 2
1149 // Family 17h Models 60h-67h (Renoir) Zen 2
1150 // Family 17h Models 68h-6Fh (Lucienne) Zen 2
1151 // Family 17h Models 70h-7Fh (Matisse) Zen 2
1152 // Family 17h Models 84h-87h (ProjectX) Zen 2
1153 // Family 17h Models 90h-97h (VanGogh) Zen 2
1154 // Family 17h Models 98h-9Fh (Mero) Zen 2
1155 // Family 17h Models A0h-AFh (Mendocino) Zen 2
1156 CPU = "znver2";
1157 *Subtype = X86::AMDFAM17H_ZNVER2;
1158 break;
1159 }
1160 if ((Model >= 0x10 && Model <= 0x1f) || (Model >= 0x20 && Model <= 0x2f)) {
1161 // Family 17h Models 10h-1Fh (Raven1) Zen
1162 // Family 17h Models 10h-1Fh (Picasso) Zen+
1163 // Family 17h Models 20h-2Fh (Raven2 x86) Zen
1164 *Subtype = X86::AMDFAM17H_ZNVER1;
1165 break;
1166 }
1167 break;
1168 case 25:
1169 CPU = "znver3";
1170 *Type = X86::AMDFAM19H;
1171 if (Model <= 0x0f || (Model >= 0x20 && Model <= 0x2f) ||
1172 (Model >= 0x30 && Model <= 0x3f) || (Model >= 0x40 && Model <= 0x4f) ||
1173 (Model >= 0x50 && Model <= 0x5f)) {
1174 // Family 19h Models 00h-0Fh (Genesis, Chagall) Zen 3
1175 // Family 19h Models 20h-2Fh (Vermeer) Zen 3
1176 // Family 19h Models 30h-3Fh (Badami) Zen 3
1177 // Family 19h Models 40h-4Fh (Rembrandt) Zen 3+
1178 // Family 19h Models 50h-5Fh (Cezanne) Zen 3
1179 *Subtype = X86::AMDFAM19H_ZNVER3;
1180 break;
1181 }
1182 if ((Model >= 0x10 && Model <= 0x1f) || (Model >= 0x60 && Model <= 0x6f) ||
1183 (Model >= 0x70 && Model <= 0x77) || (Model >= 0x78 && Model <= 0x7f) ||
1184 (Model >= 0xa0 && Model <= 0xaf)) {
1185 // Family 19h Models 10h-1Fh (Stones; Storm Peak) Zen 4
1186 // Family 19h Models 60h-6Fh (Raphael) Zen 4
1187 // Family 19h Models 70h-77h (Phoenix, Hawkpoint1) Zen 4
1188 // Family 19h Models 78h-7Fh (Phoenix 2, Hawkpoint2) Zen 4
1189 // Family 19h Models A0h-AFh (Stones-Dense) Zen 4
1190 CPU = "znver4";
1191 *Subtype = X86::AMDFAM19H_ZNVER4;
1192 break; // "znver4"
1193 }
1194 break;
1195 default:
1196 break; // Unknown AMD CPU.
1197 }
1198
1199 return CPU;
1200 }
1201
getAvailableFeatures(unsigned ECX,unsigned EDX,unsigned MaxLeaf,unsigned * Features)1202 static void getAvailableFeatures(unsigned ECX, unsigned EDX, unsigned MaxLeaf,
1203 unsigned *Features) {
1204 unsigned EAX, EBX;
1205
1206 auto setFeature = [&](unsigned F) {
1207 Features[F / 32] |= 1U << (F % 32);
1208 };
1209
1210 if ((EDX >> 15) & 1)
1211 setFeature(X86::FEATURE_CMOV);
1212 if ((EDX >> 23) & 1)
1213 setFeature(X86::FEATURE_MMX);
1214 if ((EDX >> 25) & 1)
1215 setFeature(X86::FEATURE_SSE);
1216 if ((EDX >> 26) & 1)
1217 setFeature(X86::FEATURE_SSE2);
1218
1219 if ((ECX >> 0) & 1)
1220 setFeature(X86::FEATURE_SSE3);
1221 if ((ECX >> 1) & 1)
1222 setFeature(X86::FEATURE_PCLMUL);
1223 if ((ECX >> 9) & 1)
1224 setFeature(X86::FEATURE_SSSE3);
1225 if ((ECX >> 12) & 1)
1226 setFeature(X86::FEATURE_FMA);
1227 if ((ECX >> 19) & 1)
1228 setFeature(X86::FEATURE_SSE4_1);
1229 if ((ECX >> 20) & 1) {
1230 setFeature(X86::FEATURE_SSE4_2);
1231 setFeature(X86::FEATURE_CRC32);
1232 }
1233 if ((ECX >> 23) & 1)
1234 setFeature(X86::FEATURE_POPCNT);
1235 if ((ECX >> 25) & 1)
1236 setFeature(X86::FEATURE_AES);
1237
1238 if ((ECX >> 22) & 1)
1239 setFeature(X86::FEATURE_MOVBE);
1240
1241 // If CPUID indicates support for XSAVE, XRESTORE and AVX, and XGETBV
1242 // indicates that the AVX registers will be saved and restored on context
1243 // switch, then we have full AVX support.
1244 const unsigned AVXBits = (1 << 27) | (1 << 28);
1245 bool HasAVX = ((ECX & AVXBits) == AVXBits) && !getX86XCR0(&EAX, &EDX) &&
1246 ((EAX & 0x6) == 0x6);
1247 #if defined(__APPLE__)
1248 // Darwin lazily saves the AVX512 context on first use: trust that the OS will
1249 // save the AVX512 context if we use AVX512 instructions, even the bit is not
1250 // set right now.
1251 bool HasAVX512Save = true;
1252 #else
1253 // AVX512 requires additional context to be saved by the OS.
1254 bool HasAVX512Save = HasAVX && ((EAX & 0xe0) == 0xe0);
1255 #endif
1256
1257 if (HasAVX)
1258 setFeature(X86::FEATURE_AVX);
1259
1260 bool HasLeaf7 =
1261 MaxLeaf >= 0x7 && !getX86CpuIDAndInfoEx(0x7, 0x0, &EAX, &EBX, &ECX, &EDX);
1262
1263 if (HasLeaf7 && ((EBX >> 3) & 1))
1264 setFeature(X86::FEATURE_BMI);
1265 if (HasLeaf7 && ((EBX >> 5) & 1) && HasAVX)
1266 setFeature(X86::FEATURE_AVX2);
1267 if (HasLeaf7 && ((EBX >> 8) & 1))
1268 setFeature(X86::FEATURE_BMI2);
1269 if (HasLeaf7 && ((EBX >> 16) & 1) && HasAVX512Save) {
1270 setFeature(X86::FEATURE_AVX512F);
1271 setFeature(X86::FEATURE_EVEX512);
1272 }
1273 if (HasLeaf7 && ((EBX >> 17) & 1) && HasAVX512Save)
1274 setFeature(X86::FEATURE_AVX512DQ);
1275 if (HasLeaf7 && ((EBX >> 19) & 1))
1276 setFeature(X86::FEATURE_ADX);
1277 if (HasLeaf7 && ((EBX >> 21) & 1) && HasAVX512Save)
1278 setFeature(X86::FEATURE_AVX512IFMA);
1279 if (HasLeaf7 && ((EBX >> 23) & 1))
1280 setFeature(X86::FEATURE_CLFLUSHOPT);
1281 if (HasLeaf7 && ((EBX >> 26) & 1) && HasAVX512Save)
1282 setFeature(X86::FEATURE_AVX512PF);
1283 if (HasLeaf7 && ((EBX >> 27) & 1) && HasAVX512Save)
1284 setFeature(X86::FEATURE_AVX512ER);
1285 if (HasLeaf7 && ((EBX >> 28) & 1) && HasAVX512Save)
1286 setFeature(X86::FEATURE_AVX512CD);
1287 if (HasLeaf7 && ((EBX >> 29) & 1))
1288 setFeature(X86::FEATURE_SHA);
1289 if (HasLeaf7 && ((EBX >> 30) & 1) && HasAVX512Save)
1290 setFeature(X86::FEATURE_AVX512BW);
1291 if (HasLeaf7 && ((EBX >> 31) & 1) && HasAVX512Save)
1292 setFeature(X86::FEATURE_AVX512VL);
1293
1294 if (HasLeaf7 && ((ECX >> 1) & 1) && HasAVX512Save)
1295 setFeature(X86::FEATURE_AVX512VBMI);
1296 if (HasLeaf7 && ((ECX >> 6) & 1) && HasAVX512Save)
1297 setFeature(X86::FEATURE_AVX512VBMI2);
1298 if (HasLeaf7 && ((ECX >> 8) & 1))
1299 setFeature(X86::FEATURE_GFNI);
1300 if (HasLeaf7 && ((ECX >> 10) & 1) && HasAVX)
1301 setFeature(X86::FEATURE_VPCLMULQDQ);
1302 if (HasLeaf7 && ((ECX >> 11) & 1) && HasAVX512Save)
1303 setFeature(X86::FEATURE_AVX512VNNI);
1304 if (HasLeaf7 && ((ECX >> 12) & 1) && HasAVX512Save)
1305 setFeature(X86::FEATURE_AVX512BITALG);
1306 if (HasLeaf7 && ((ECX >> 14) & 1) && HasAVX512Save)
1307 setFeature(X86::FEATURE_AVX512VPOPCNTDQ);
1308
1309 if (HasLeaf7 && ((EDX >> 2) & 1) && HasAVX512Save)
1310 setFeature(X86::FEATURE_AVX5124VNNIW);
1311 if (HasLeaf7 && ((EDX >> 3) & 1) && HasAVX512Save)
1312 setFeature(X86::FEATURE_AVX5124FMAPS);
1313 if (HasLeaf7 && ((EDX >> 8) & 1) && HasAVX512Save)
1314 setFeature(X86::FEATURE_AVX512VP2INTERSECT);
1315
1316 // EAX from subleaf 0 is the maximum subleaf supported. Some CPUs don't
1317 // return all 0s for invalid subleaves so check the limit.
1318 bool HasLeaf7Subleaf1 =
1319 HasLeaf7 && EAX >= 1 &&
1320 !getX86CpuIDAndInfoEx(0x7, 0x1, &EAX, &EBX, &ECX, &EDX);
1321 if (HasLeaf7Subleaf1 && ((EAX >> 5) & 1) && HasAVX512Save)
1322 setFeature(X86::FEATURE_AVX512BF16);
1323
1324 unsigned MaxExtLevel;
1325 getX86CpuIDAndInfo(0x80000000, &MaxExtLevel, &EBX, &ECX, &EDX);
1326
1327 bool HasExtLeaf1 = MaxExtLevel >= 0x80000001 &&
1328 !getX86CpuIDAndInfo(0x80000001, &EAX, &EBX, &ECX, &EDX);
1329 if (HasExtLeaf1 && ((ECX >> 6) & 1))
1330 setFeature(X86::FEATURE_SSE4_A);
1331 if (HasExtLeaf1 && ((ECX >> 11) & 1))
1332 setFeature(X86::FEATURE_XOP);
1333 if (HasExtLeaf1 && ((ECX >> 16) & 1))
1334 setFeature(X86::FEATURE_FMA4);
1335
1336 if (HasExtLeaf1 && ((EDX >> 29) & 1))
1337 setFeature(X86::FEATURE_64BIT);
1338 }
1339
getHostCPUName()1340 StringRef sys::getHostCPUName() {
1341 unsigned MaxLeaf = 0;
1342 const VendorSignatures Vendor = getVendorSignature(&MaxLeaf);
1343 if (Vendor == VendorSignatures::UNKNOWN)
1344 return "generic";
1345
1346 unsigned EAX = 0, EBX = 0, ECX = 0, EDX = 0;
1347 getX86CpuIDAndInfo(0x1, &EAX, &EBX, &ECX, &EDX);
1348
1349 unsigned Family = 0, Model = 0;
1350 unsigned Features[(X86::CPU_FEATURE_MAX + 31) / 32] = {0};
1351 detectX86FamilyModel(EAX, &Family, &Model);
1352 getAvailableFeatures(ECX, EDX, MaxLeaf, Features);
1353
1354 // These aren't consumed in this file, but we try to keep some source code the
1355 // same or similar to compiler-rt.
1356 unsigned Type = 0;
1357 unsigned Subtype = 0;
1358
1359 StringRef CPU;
1360
1361 if (Vendor == VendorSignatures::GENUINE_INTEL) {
1362 CPU = getIntelProcessorTypeAndSubtype(Family, Model, Features, &Type,
1363 &Subtype);
1364 } else if (Vendor == VendorSignatures::AUTHENTIC_AMD) {
1365 CPU = getAMDProcessorTypeAndSubtype(Family, Model, Features, &Type,
1366 &Subtype);
1367 }
1368
1369 if (!CPU.empty())
1370 return CPU;
1371
1372 return "generic";
1373 }
1374
1375 #elif defined(__APPLE__) && defined(__powerpc__)
getHostCPUName()1376 StringRef sys::getHostCPUName() {
1377 host_basic_info_data_t hostInfo;
1378 mach_msg_type_number_t infoCount;
1379
1380 infoCount = HOST_BASIC_INFO_COUNT;
1381 mach_port_t hostPort = mach_host_self();
1382 host_info(hostPort, HOST_BASIC_INFO, (host_info_t)&hostInfo,
1383 &infoCount);
1384 mach_port_deallocate(mach_task_self(), hostPort);
1385
1386 if (hostInfo.cpu_type != CPU_TYPE_POWERPC)
1387 return "generic";
1388
1389 switch (hostInfo.cpu_subtype) {
1390 case CPU_SUBTYPE_POWERPC_601:
1391 return "601";
1392 case CPU_SUBTYPE_POWERPC_602:
1393 return "602";
1394 case CPU_SUBTYPE_POWERPC_603:
1395 return "603";
1396 case CPU_SUBTYPE_POWERPC_603e:
1397 return "603e";
1398 case CPU_SUBTYPE_POWERPC_603ev:
1399 return "603ev";
1400 case CPU_SUBTYPE_POWERPC_604:
1401 return "604";
1402 case CPU_SUBTYPE_POWERPC_604e:
1403 return "604e";
1404 case CPU_SUBTYPE_POWERPC_620:
1405 return "620";
1406 case CPU_SUBTYPE_POWERPC_750:
1407 return "750";
1408 case CPU_SUBTYPE_POWERPC_7400:
1409 return "7400";
1410 case CPU_SUBTYPE_POWERPC_7450:
1411 return "7450";
1412 case CPU_SUBTYPE_POWERPC_970:
1413 return "970";
1414 default:;
1415 }
1416
1417 return "generic";
1418 }
1419 #elif defined(__linux__) && defined(__powerpc__)
getHostCPUName()1420 StringRef sys::getHostCPUName() {
1421 std::unique_ptr<llvm::MemoryBuffer> P = getProcCpuinfoContent();
1422 StringRef Content = P ? P->getBuffer() : "";
1423 return detail::getHostCPUNameForPowerPC(Content);
1424 }
1425 #elif defined(__linux__) && (defined(__arm__) || defined(__aarch64__))
getHostCPUName()1426 StringRef sys::getHostCPUName() {
1427 std::unique_ptr<llvm::MemoryBuffer> P = getProcCpuinfoContent();
1428 StringRef Content = P ? P->getBuffer() : "";
1429 return detail::getHostCPUNameForARM(Content);
1430 }
1431 #elif defined(__linux__) && defined(__s390x__)
getHostCPUName()1432 StringRef sys::getHostCPUName() {
1433 std::unique_ptr<llvm::MemoryBuffer> P = getProcCpuinfoContent();
1434 StringRef Content = P ? P->getBuffer() : "";
1435 return detail::getHostCPUNameForS390x(Content);
1436 }
1437 #elif defined(__MVS__)
getHostCPUName()1438 StringRef sys::getHostCPUName() {
1439 // Get pointer to Communications Vector Table (CVT).
1440 // The pointer is located at offset 16 of the Prefixed Save Area (PSA).
1441 // It is stored as 31 bit pointer and will be zero-extended to 64 bit.
1442 int *StartToCVTOffset = reinterpret_cast<int *>(0x10);
1443 // Since its stored as a 31-bit pointer, get the 4 bytes from the start
1444 // of address.
1445 int ReadValue = *StartToCVTOffset;
1446 // Explicitly clear the high order bit.
1447 ReadValue = (ReadValue & 0x7FFFFFFF);
1448 char *CVT = reinterpret_cast<char *>(ReadValue);
1449 // The model number is located in the CVT prefix at offset -6 and stored as
1450 // signless packed decimal.
1451 uint16_t Id = *(uint16_t *)&CVT[-6];
1452 // Convert number to integer.
1453 Id = decodePackedBCD<uint16_t>(Id, false);
1454 // Check for vector support. It's stored in field CVTFLAG5 (offset 244),
1455 // bit CVTVEF (X'80'). The facilities list is part of the PSA but the vector
1456 // extension can only be used if bit CVTVEF is on.
1457 bool HaveVectorSupport = CVT[244] & 0x80;
1458 return getCPUNameFromS390Model(Id, HaveVectorSupport);
1459 }
1460 #elif defined(__APPLE__) && (defined(__arm__) || defined(__aarch64__))
1461 #define CPUFAMILY_ARM_SWIFT 0x1e2d6381
1462 #define CPUFAMILY_ARM_CYCLONE 0x37a09642
1463 #define CPUFAMILY_ARM_TYPHOON 0x2c91a47e
1464 #define CPUFAMILY_ARM_TWISTER 0x92fb37c8
1465 #define CPUFAMILY_ARM_HURRICANE 0x67ceee93
1466 #define CPUFAMILY_ARM_MONSOON_MISTRAL 0xe81e7ef6
1467 #define CPUFAMILY_ARM_VORTEX_TEMPEST 0x07d34b9f
1468 #define CPUFAMILY_ARM_LIGHTNING_THUNDER 0x462504d2
1469 #define CPUFAMILY_ARM_FIRESTORM_ICESTORM 0x1b588bb3
1470
getHostCPUName()1471 StringRef sys::getHostCPUName() {
1472 uint32_t Family;
1473 size_t Length = sizeof(Family);
1474 sysctlbyname("hw.cpufamily", &Family, &Length, NULL, 0);
1475
1476 switch (Family) {
1477 case CPUFAMILY_ARM_SWIFT:
1478 return "swift";
1479 case CPUFAMILY_ARM_CYCLONE:
1480 return "apple-a7";
1481 case CPUFAMILY_ARM_TYPHOON:
1482 return "apple-a8";
1483 case CPUFAMILY_ARM_TWISTER:
1484 return "apple-a9";
1485 case CPUFAMILY_ARM_HURRICANE:
1486 return "apple-a10";
1487 case CPUFAMILY_ARM_MONSOON_MISTRAL:
1488 return "apple-a11";
1489 case CPUFAMILY_ARM_VORTEX_TEMPEST:
1490 return "apple-a12";
1491 case CPUFAMILY_ARM_LIGHTNING_THUNDER:
1492 return "apple-a13";
1493 case CPUFAMILY_ARM_FIRESTORM_ICESTORM:
1494 return "apple-m1";
1495 default:
1496 // Default to the newest CPU we know about.
1497 return "apple-m1";
1498 }
1499 }
1500 #elif defined(_AIX)
getHostCPUName()1501 StringRef sys::getHostCPUName() {
1502 switch (_system_configuration.implementation) {
1503 case POWER_4:
1504 if (_system_configuration.version == PV_4_3)
1505 return "970";
1506 return "pwr4";
1507 case POWER_5:
1508 if (_system_configuration.version == PV_5)
1509 return "pwr5";
1510 return "pwr5x";
1511 case POWER_6:
1512 if (_system_configuration.version == PV_6_Compat)
1513 return "pwr6";
1514 return "pwr6x";
1515 case POWER_7:
1516 return "pwr7";
1517 case POWER_8:
1518 return "pwr8";
1519 case POWER_9:
1520 return "pwr9";
1521 // TODO: simplify this once the macro is available in all OS levels.
1522 #ifdef POWER_10
1523 case POWER_10:
1524 #else
1525 case 0x40000:
1526 #endif
1527 return "pwr10";
1528 default:
1529 return "generic";
1530 }
1531 }
1532 #elif defined(__loongarch__)
getHostCPUName()1533 StringRef sys::getHostCPUName() {
1534 // Use processor id to detect cpu name.
1535 uint32_t processor_id;
1536 __asm__("cpucfg %[prid], $zero\n\t" : [prid] "=r"(processor_id));
1537 // Refer PRID_SERIES_MASK in linux kernel: arch/loongarch/include/asm/cpu.h.
1538 switch (processor_id & 0xf000) {
1539 case 0xc000: // Loongson 64bit, 4-issue
1540 return "la464";
1541 // TODO: Others.
1542 default:
1543 break;
1544 }
1545 return "generic";
1546 }
1547 #elif defined(__riscv)
getHostCPUName()1548 StringRef sys::getHostCPUName() {
1549 #if defined(__linux__)
1550 std::unique_ptr<llvm::MemoryBuffer> P = getProcCpuinfoContent();
1551 StringRef Content = P ? P->getBuffer() : "";
1552 return detail::getHostCPUNameForRISCV(Content);
1553 #else
1554 #if __riscv_xlen == 64
1555 return "generic-rv64";
1556 #elif __riscv_xlen == 32
1557 return "generic-rv32";
1558 #else
1559 #error "Unhandled value of __riscv_xlen"
1560 #endif
1561 #endif
1562 }
1563 #elif defined(__sparc__)
1564 #if defined(__linux__)
getHostCPUNameForSPARC(StringRef ProcCpuinfoContent)1565 StringRef sys::detail::getHostCPUNameForSPARC(StringRef ProcCpuinfoContent) {
1566 SmallVector<StringRef> Lines;
1567 ProcCpuinfoContent.split(Lines, "\n");
1568
1569 // Look for cpu line to determine cpu name
1570 StringRef Cpu;
1571 for (unsigned I = 0, E = Lines.size(); I != E; ++I) {
1572 if (Lines[I].starts_with("cpu")) {
1573 Cpu = Lines[I].substr(5).ltrim("\t :");
1574 break;
1575 }
1576 }
1577
1578 return StringSwitch<const char *>(Cpu)
1579 .StartsWith("SuperSparc", "supersparc")
1580 .StartsWith("HyperSparc", "hypersparc")
1581 .StartsWith("SpitFire", "ultrasparc")
1582 .StartsWith("BlackBird", "ultrasparc")
1583 .StartsWith("Sabre", " ultrasparc")
1584 .StartsWith("Hummingbird", "ultrasparc")
1585 .StartsWith("Cheetah", "ultrasparc3")
1586 .StartsWith("Jalapeno", "ultrasparc3")
1587 .StartsWith("Jaguar", "ultrasparc3")
1588 .StartsWith("Panther", "ultrasparc3")
1589 .StartsWith("Serrano", "ultrasparc3")
1590 .StartsWith("UltraSparc T1", "niagara")
1591 .StartsWith("UltraSparc T2", "niagara2")
1592 .StartsWith("UltraSparc T3", "niagara3")
1593 .StartsWith("UltraSparc T4", "niagara4")
1594 .StartsWith("UltraSparc T5", "niagara4")
1595 .StartsWith("LEON", "leon3")
1596 // niagara7/m8 not supported by LLVM yet.
1597 .StartsWith("SPARC-M7", "niagara4" /* "niagara7" */)
1598 .StartsWith("SPARC-S7", "niagara4" /* "niagara7" */)
1599 .StartsWith("SPARC-M8", "niagara4" /* "m8" */)
1600 .Default("generic");
1601 }
1602 #endif
1603
getHostCPUName()1604 StringRef sys::getHostCPUName() {
1605 #if defined(__linux__)
1606 std::unique_ptr<llvm::MemoryBuffer> P = getProcCpuinfoContent();
1607 StringRef Content = P ? P->getBuffer() : "";
1608 return detail::getHostCPUNameForSPARC(Content);
1609 #elif defined(__sun__) && defined(__svr4__)
1610 char *buf = NULL;
1611 kstat_ctl_t *kc;
1612 kstat_t *ksp;
1613 kstat_named_t *brand = NULL;
1614
1615 kc = kstat_open();
1616 if (kc != NULL) {
1617 ksp = kstat_lookup(kc, const_cast<char *>("cpu_info"), -1, NULL);
1618 if (ksp != NULL && kstat_read(kc, ksp, NULL) != -1 &&
1619 ksp->ks_type == KSTAT_TYPE_NAMED)
1620 brand =
1621 (kstat_named_t *)kstat_data_lookup(ksp, const_cast<char *>("brand"));
1622 if (brand != NULL && brand->data_type == KSTAT_DATA_STRING)
1623 buf = KSTAT_NAMED_STR_PTR(brand);
1624 }
1625 kstat_close(kc);
1626
1627 return StringSwitch<const char *>(buf)
1628 .Case("TMS390S10", "supersparc") // Texas Instruments microSPARC I
1629 .Case("TMS390Z50", "supersparc") // Texas Instruments SuperSPARC I
1630 .Case("TMS390Z55",
1631 "supersparc") // Texas Instruments SuperSPARC I with SuperCache
1632 .Case("MB86904", "supersparc") // Fujitsu microSPARC II
1633 .Case("MB86907", "supersparc") // Fujitsu TurboSPARC
1634 .Case("RT623", "hypersparc") // Ross hyperSPARC
1635 .Case("RT625", "hypersparc")
1636 .Case("RT626", "hypersparc")
1637 .Case("UltraSPARC-I", "ultrasparc")
1638 .Case("UltraSPARC-II", "ultrasparc")
1639 .Case("UltraSPARC-IIe", "ultrasparc")
1640 .Case("UltraSPARC-IIi", "ultrasparc")
1641 .Case("SPARC64-III", "ultrasparc")
1642 .Case("SPARC64-IV", "ultrasparc")
1643 .Case("UltraSPARC-III", "ultrasparc3")
1644 .Case("UltraSPARC-III+", "ultrasparc3")
1645 .Case("UltraSPARC-IIIi", "ultrasparc3")
1646 .Case("UltraSPARC-IIIi+", "ultrasparc3")
1647 .Case("UltraSPARC-IV", "ultrasparc3")
1648 .Case("UltraSPARC-IV+", "ultrasparc3")
1649 .Case("SPARC64-V", "ultrasparc3")
1650 .Case("SPARC64-VI", "ultrasparc3")
1651 .Case("SPARC64-VII", "ultrasparc3")
1652 .Case("UltraSPARC-T1", "niagara")
1653 .Case("UltraSPARC-T2", "niagara2")
1654 .Case("UltraSPARC-T2", "niagara2")
1655 .Case("UltraSPARC-T2+", "niagara2")
1656 .Case("SPARC-T3", "niagara3")
1657 .Case("SPARC-T4", "niagara4")
1658 .Case("SPARC-T5", "niagara4")
1659 // niagara7/m8 not supported by LLVM yet.
1660 .Case("SPARC-M7", "niagara4" /* "niagara7" */)
1661 .Case("SPARC-S7", "niagara4" /* "niagara7" */)
1662 .Case("SPARC-M8", "niagara4" /* "m8" */)
1663 .Default("generic");
1664 #else
1665 return "generic";
1666 #endif
1667 }
1668 #else
getHostCPUName()1669 StringRef sys::getHostCPUName() { return "generic"; }
1670 namespace llvm {
1671 namespace sys {
1672 namespace detail {
1673 namespace x86 {
1674
getVendorSignature(unsigned * MaxLeaf)1675 VendorSignatures getVendorSignature(unsigned *MaxLeaf) {
1676 return VendorSignatures::UNKNOWN;
1677 }
1678
1679 } // namespace x86
1680 } // namespace detail
1681 } // namespace sys
1682 } // namespace llvm
1683 #endif
1684
1685 #if defined(__i386__) || defined(_M_IX86) || \
1686 defined(__x86_64__) || defined(_M_X64)
getHostCPUFeatures(StringMap<bool> & Features)1687 bool sys::getHostCPUFeatures(StringMap<bool> &Features) {
1688 unsigned EAX = 0, EBX = 0, ECX = 0, EDX = 0;
1689 unsigned MaxLevel;
1690
1691 if (getX86CpuIDAndInfo(0, &MaxLevel, &EBX, &ECX, &EDX) || MaxLevel < 1)
1692 return false;
1693
1694 getX86CpuIDAndInfo(1, &EAX, &EBX, &ECX, &EDX);
1695
1696 Features["cx8"] = (EDX >> 8) & 1;
1697 Features["cmov"] = (EDX >> 15) & 1;
1698 Features["mmx"] = (EDX >> 23) & 1;
1699 Features["fxsr"] = (EDX >> 24) & 1;
1700 Features["sse"] = (EDX >> 25) & 1;
1701 Features["sse2"] = (EDX >> 26) & 1;
1702
1703 Features["sse3"] = (ECX >> 0) & 1;
1704 Features["pclmul"] = (ECX >> 1) & 1;
1705 Features["ssse3"] = (ECX >> 9) & 1;
1706 Features["cx16"] = (ECX >> 13) & 1;
1707 Features["sse4.1"] = (ECX >> 19) & 1;
1708 Features["sse4.2"] = (ECX >> 20) & 1;
1709 Features["crc32"] = Features["sse4.2"];
1710 Features["movbe"] = (ECX >> 22) & 1;
1711 Features["popcnt"] = (ECX >> 23) & 1;
1712 Features["aes"] = (ECX >> 25) & 1;
1713 Features["rdrnd"] = (ECX >> 30) & 1;
1714
1715 // If CPUID indicates support for XSAVE, XRESTORE and AVX, and XGETBV
1716 // indicates that the AVX registers will be saved and restored on context
1717 // switch, then we have full AVX support.
1718 bool HasXSave = ((ECX >> 27) & 1) && !getX86XCR0(&EAX, &EDX);
1719 bool HasAVXSave = HasXSave && ((ECX >> 28) & 1) && ((EAX & 0x6) == 0x6);
1720 #if defined(__APPLE__)
1721 // Darwin lazily saves the AVX512 context on first use: trust that the OS will
1722 // save the AVX512 context if we use AVX512 instructions, even the bit is not
1723 // set right now.
1724 bool HasAVX512Save = true;
1725 #else
1726 // AVX512 requires additional context to be saved by the OS.
1727 bool HasAVX512Save = HasAVXSave && ((EAX & 0xe0) == 0xe0);
1728 #endif
1729 // AMX requires additional context to be saved by the OS.
1730 const unsigned AMXBits = (1 << 17) | (1 << 18);
1731 bool HasAMXSave = HasXSave && ((EAX & AMXBits) == AMXBits);
1732
1733 Features["avx"] = HasAVXSave;
1734 Features["fma"] = ((ECX >> 12) & 1) && HasAVXSave;
1735 // Only enable XSAVE if OS has enabled support for saving YMM state.
1736 Features["xsave"] = ((ECX >> 26) & 1) && HasAVXSave;
1737 Features["f16c"] = ((ECX >> 29) & 1) && HasAVXSave;
1738
1739 unsigned MaxExtLevel;
1740 getX86CpuIDAndInfo(0x80000000, &MaxExtLevel, &EBX, &ECX, &EDX);
1741
1742 bool HasExtLeaf1 = MaxExtLevel >= 0x80000001 &&
1743 !getX86CpuIDAndInfo(0x80000001, &EAX, &EBX, &ECX, &EDX);
1744 Features["sahf"] = HasExtLeaf1 && ((ECX >> 0) & 1);
1745 Features["lzcnt"] = HasExtLeaf1 && ((ECX >> 5) & 1);
1746 Features["sse4a"] = HasExtLeaf1 && ((ECX >> 6) & 1);
1747 Features["prfchw"] = HasExtLeaf1 && ((ECX >> 8) & 1);
1748 Features["xop"] = HasExtLeaf1 && ((ECX >> 11) & 1) && HasAVXSave;
1749 Features["lwp"] = HasExtLeaf1 && ((ECX >> 15) & 1);
1750 Features["fma4"] = HasExtLeaf1 && ((ECX >> 16) & 1) && HasAVXSave;
1751 Features["tbm"] = HasExtLeaf1 && ((ECX >> 21) & 1);
1752 Features["mwaitx"] = HasExtLeaf1 && ((ECX >> 29) & 1);
1753
1754 Features["64bit"] = HasExtLeaf1 && ((EDX >> 29) & 1);
1755
1756 // Miscellaneous memory related features, detected by
1757 // using the 0x80000008 leaf of the CPUID instruction
1758 bool HasExtLeaf8 = MaxExtLevel >= 0x80000008 &&
1759 !getX86CpuIDAndInfo(0x80000008, &EAX, &EBX, &ECX, &EDX);
1760 Features["clzero"] = HasExtLeaf8 && ((EBX >> 0) & 1);
1761 Features["rdpru"] = HasExtLeaf8 && ((EBX >> 4) & 1);
1762 Features["wbnoinvd"] = HasExtLeaf8 && ((EBX >> 9) & 1);
1763
1764 bool HasLeaf7 =
1765 MaxLevel >= 7 && !getX86CpuIDAndInfoEx(0x7, 0x0, &EAX, &EBX, &ECX, &EDX);
1766
1767 Features["fsgsbase"] = HasLeaf7 && ((EBX >> 0) & 1);
1768 Features["sgx"] = HasLeaf7 && ((EBX >> 2) & 1);
1769 Features["bmi"] = HasLeaf7 && ((EBX >> 3) & 1);
1770 // AVX2 is only supported if we have the OS save support from AVX.
1771 Features["avx2"] = HasLeaf7 && ((EBX >> 5) & 1) && HasAVXSave;
1772 Features["bmi2"] = HasLeaf7 && ((EBX >> 8) & 1);
1773 Features["invpcid"] = HasLeaf7 && ((EBX >> 10) & 1);
1774 Features["rtm"] = HasLeaf7 && ((EBX >> 11) & 1);
1775 // AVX512 is only supported if the OS supports the context save for it.
1776 Features["avx512f"] = HasLeaf7 && ((EBX >> 16) & 1) && HasAVX512Save;
1777 if (Features["avx512f"])
1778 Features["evex512"] = true;
1779 Features["avx512dq"] = HasLeaf7 && ((EBX >> 17) & 1) && HasAVX512Save;
1780 Features["rdseed"] = HasLeaf7 && ((EBX >> 18) & 1);
1781 Features["adx"] = HasLeaf7 && ((EBX >> 19) & 1);
1782 Features["avx512ifma"] = HasLeaf7 && ((EBX >> 21) & 1) && HasAVX512Save;
1783 Features["clflushopt"] = HasLeaf7 && ((EBX >> 23) & 1);
1784 Features["clwb"] = HasLeaf7 && ((EBX >> 24) & 1);
1785 Features["avx512pf"] = HasLeaf7 && ((EBX >> 26) & 1) && HasAVX512Save;
1786 Features["avx512er"] = HasLeaf7 && ((EBX >> 27) & 1) && HasAVX512Save;
1787 Features["avx512cd"] = HasLeaf7 && ((EBX >> 28) & 1) && HasAVX512Save;
1788 Features["sha"] = HasLeaf7 && ((EBX >> 29) & 1);
1789 Features["avx512bw"] = HasLeaf7 && ((EBX >> 30) & 1) && HasAVX512Save;
1790 Features["avx512vl"] = HasLeaf7 && ((EBX >> 31) & 1) && HasAVX512Save;
1791
1792 Features["prefetchwt1"] = HasLeaf7 && ((ECX >> 0) & 1);
1793 Features["avx512vbmi"] = HasLeaf7 && ((ECX >> 1) & 1) && HasAVX512Save;
1794 Features["pku"] = HasLeaf7 && ((ECX >> 4) & 1);
1795 Features["waitpkg"] = HasLeaf7 && ((ECX >> 5) & 1);
1796 Features["avx512vbmi2"] = HasLeaf7 && ((ECX >> 6) & 1) && HasAVX512Save;
1797 Features["shstk"] = HasLeaf7 && ((ECX >> 7) & 1);
1798 Features["gfni"] = HasLeaf7 && ((ECX >> 8) & 1);
1799 Features["vaes"] = HasLeaf7 && ((ECX >> 9) & 1) && HasAVXSave;
1800 Features["vpclmulqdq"] = HasLeaf7 && ((ECX >> 10) & 1) && HasAVXSave;
1801 Features["avx512vnni"] = HasLeaf7 && ((ECX >> 11) & 1) && HasAVX512Save;
1802 Features["avx512bitalg"] = HasLeaf7 && ((ECX >> 12) & 1) && HasAVX512Save;
1803 Features["avx512vpopcntdq"] = HasLeaf7 && ((ECX >> 14) & 1) && HasAVX512Save;
1804 Features["rdpid"] = HasLeaf7 && ((ECX >> 22) & 1);
1805 Features["kl"] = HasLeaf7 && ((ECX >> 23) & 1); // key locker
1806 Features["cldemote"] = HasLeaf7 && ((ECX >> 25) & 1);
1807 Features["movdiri"] = HasLeaf7 && ((ECX >> 27) & 1);
1808 Features["movdir64b"] = HasLeaf7 && ((ECX >> 28) & 1);
1809 Features["enqcmd"] = HasLeaf7 && ((ECX >> 29) & 1);
1810
1811 Features["uintr"] = HasLeaf7 && ((EDX >> 5) & 1);
1812 Features["avx512vp2intersect"] =
1813 HasLeaf7 && ((EDX >> 8) & 1) && HasAVX512Save;
1814 Features["serialize"] = HasLeaf7 && ((EDX >> 14) & 1);
1815 Features["tsxldtrk"] = HasLeaf7 && ((EDX >> 16) & 1);
1816 // There are two CPUID leafs which information associated with the pconfig
1817 // instruction:
1818 // EAX=0x7, ECX=0x0 indicates the availability of the instruction (via the 18th
1819 // bit of EDX), while the EAX=0x1b leaf returns information on the
1820 // availability of specific pconfig leafs.
1821 // The target feature here only refers to the the first of these two.
1822 // Users might need to check for the availability of specific pconfig
1823 // leaves using cpuid, since that information is ignored while
1824 // detecting features using the "-march=native" flag.
1825 // For more info, see X86 ISA docs.
1826 Features["pconfig"] = HasLeaf7 && ((EDX >> 18) & 1);
1827 Features["amx-bf16"] = HasLeaf7 && ((EDX >> 22) & 1) && HasAMXSave;
1828 Features["avx512fp16"] = HasLeaf7 && ((EDX >> 23) & 1) && HasAVX512Save;
1829 Features["amx-tile"] = HasLeaf7 && ((EDX >> 24) & 1) && HasAMXSave;
1830 Features["amx-int8"] = HasLeaf7 && ((EDX >> 25) & 1) && HasAMXSave;
1831 // EAX from subleaf 0 is the maximum subleaf supported. Some CPUs don't
1832 // return all 0s for invalid subleaves so check the limit.
1833 bool HasLeaf7Subleaf1 =
1834 HasLeaf7 && EAX >= 1 &&
1835 !getX86CpuIDAndInfoEx(0x7, 0x1, &EAX, &EBX, &ECX, &EDX);
1836 Features["sha512"] = HasLeaf7Subleaf1 && ((EAX >> 0) & 1);
1837 Features["sm3"] = HasLeaf7Subleaf1 && ((EAX >> 1) & 1);
1838 Features["sm4"] = HasLeaf7Subleaf1 && ((EAX >> 2) & 1);
1839 Features["raoint"] = HasLeaf7Subleaf1 && ((EAX >> 3) & 1);
1840 Features["avxvnni"] = HasLeaf7Subleaf1 && ((EAX >> 4) & 1) && HasAVXSave;
1841 Features["avx512bf16"] = HasLeaf7Subleaf1 && ((EAX >> 5) & 1) && HasAVX512Save;
1842 Features["amx-fp16"] = HasLeaf7Subleaf1 && ((EAX >> 21) & 1) && HasAMXSave;
1843 Features["cmpccxadd"] = HasLeaf7Subleaf1 && ((EAX >> 7) & 1);
1844 Features["hreset"] = HasLeaf7Subleaf1 && ((EAX >> 22) & 1);
1845 Features["avxifma"] = HasLeaf7Subleaf1 && ((EAX >> 23) & 1) && HasAVXSave;
1846 Features["avxvnniint8"] = HasLeaf7Subleaf1 && ((EDX >> 4) & 1) && HasAVXSave;
1847 Features["avxneconvert"] = HasLeaf7Subleaf1 && ((EDX >> 5) & 1) && HasAVXSave;
1848 Features["amx-complex"] = HasLeaf7Subleaf1 && ((EDX >> 8) & 1) && HasAMXSave;
1849 Features["avxvnniint16"] = HasLeaf7Subleaf1 && ((EDX >> 10) & 1) && HasAVXSave;
1850 Features["prefetchi"] = HasLeaf7Subleaf1 && ((EDX >> 14) & 1);
1851 Features["usermsr"] = HasLeaf7Subleaf1 && ((EDX >> 15) & 1);
1852 Features["avx10.1-256"] = HasLeaf7Subleaf1 && ((EDX >> 19) & 1);
1853
1854 bool HasLeafD = MaxLevel >= 0xd &&
1855 !getX86CpuIDAndInfoEx(0xd, 0x1, &EAX, &EBX, &ECX, &EDX);
1856
1857 // Only enable XSAVE if OS has enabled support for saving YMM state.
1858 Features["xsaveopt"] = HasLeafD && ((EAX >> 0) & 1) && HasAVXSave;
1859 Features["xsavec"] = HasLeafD && ((EAX >> 1) & 1) && HasAVXSave;
1860 Features["xsaves"] = HasLeafD && ((EAX >> 3) & 1) && HasAVXSave;
1861
1862 bool HasLeaf14 = MaxLevel >= 0x14 &&
1863 !getX86CpuIDAndInfoEx(0x14, 0x0, &EAX, &EBX, &ECX, &EDX);
1864
1865 Features["ptwrite"] = HasLeaf14 && ((EBX >> 4) & 1);
1866
1867 bool HasLeaf19 =
1868 MaxLevel >= 0x19 && !getX86CpuIDAndInfo(0x19, &EAX, &EBX, &ECX, &EDX);
1869 Features["widekl"] = HasLeaf7 && HasLeaf19 && ((EBX >> 2) & 1);
1870
1871 bool HasLeaf24 =
1872 MaxLevel >= 0x24 && !getX86CpuIDAndInfo(0x24, &EAX, &EBX, &ECX, &EDX);
1873 Features["avx10.1-512"] =
1874 Features["avx10.1-256"] && HasLeaf24 && ((EBX >> 18) & 1);
1875
1876 return true;
1877 }
1878 #elif defined(__linux__) && (defined(__arm__) || defined(__aarch64__))
getHostCPUFeatures(StringMap<bool> & Features)1879 bool sys::getHostCPUFeatures(StringMap<bool> &Features) {
1880 std::unique_ptr<llvm::MemoryBuffer> P = getProcCpuinfoContent();
1881 if (!P)
1882 return false;
1883
1884 SmallVector<StringRef, 32> Lines;
1885 P->getBuffer().split(Lines, "\n");
1886
1887 SmallVector<StringRef, 32> CPUFeatures;
1888
1889 // Look for the CPU features.
1890 for (unsigned I = 0, E = Lines.size(); I != E; ++I)
1891 if (Lines[I].starts_with("Features")) {
1892 Lines[I].split(CPUFeatures, ' ');
1893 break;
1894 }
1895
1896 #if defined(__aarch64__)
1897 // Keep track of which crypto features we have seen
1898 enum { CAP_AES = 0x1, CAP_PMULL = 0x2, CAP_SHA1 = 0x4, CAP_SHA2 = 0x8 };
1899 uint32_t crypto = 0;
1900 #endif
1901
1902 for (unsigned I = 0, E = CPUFeatures.size(); I != E; ++I) {
1903 StringRef LLVMFeatureStr = StringSwitch<StringRef>(CPUFeatures[I])
1904 #if defined(__aarch64__)
1905 .Case("asimd", "neon")
1906 .Case("fp", "fp-armv8")
1907 .Case("crc32", "crc")
1908 .Case("atomics", "lse")
1909 .Case("sve", "sve")
1910 .Case("sve2", "sve2")
1911 #else
1912 .Case("half", "fp16")
1913 .Case("neon", "neon")
1914 .Case("vfpv3", "vfp3")
1915 .Case("vfpv3d16", "vfp3d16")
1916 .Case("vfpv4", "vfp4")
1917 .Case("idiva", "hwdiv-arm")
1918 .Case("idivt", "hwdiv")
1919 #endif
1920 .Default("");
1921
1922 #if defined(__aarch64__)
1923 // We need to check crypto separately since we need all of the crypto
1924 // extensions to enable the subtarget feature
1925 if (CPUFeatures[I] == "aes")
1926 crypto |= CAP_AES;
1927 else if (CPUFeatures[I] == "pmull")
1928 crypto |= CAP_PMULL;
1929 else if (CPUFeatures[I] == "sha1")
1930 crypto |= CAP_SHA1;
1931 else if (CPUFeatures[I] == "sha2")
1932 crypto |= CAP_SHA2;
1933 #endif
1934
1935 if (LLVMFeatureStr != "")
1936 Features[LLVMFeatureStr] = true;
1937 }
1938
1939 #if defined(__aarch64__)
1940 // If we have all crypto bits we can add the feature
1941 if (crypto == (CAP_AES | CAP_PMULL | CAP_SHA1 | CAP_SHA2))
1942 Features["crypto"] = true;
1943 #endif
1944
1945 return true;
1946 }
1947 #elif defined(_WIN32) && (defined(__aarch64__) || defined(_M_ARM64))
getHostCPUFeatures(StringMap<bool> & Features)1948 bool sys::getHostCPUFeatures(StringMap<bool> &Features) {
1949 if (IsProcessorFeaturePresent(PF_ARM_NEON_INSTRUCTIONS_AVAILABLE))
1950 Features["neon"] = true;
1951 if (IsProcessorFeaturePresent(PF_ARM_V8_CRC32_INSTRUCTIONS_AVAILABLE))
1952 Features["crc"] = true;
1953 if (IsProcessorFeaturePresent(PF_ARM_V8_CRYPTO_INSTRUCTIONS_AVAILABLE))
1954 Features["crypto"] = true;
1955
1956 return true;
1957 }
1958 #elif defined(__linux__) && defined(__loongarch__)
1959 #include <sys/auxv.h>
getHostCPUFeatures(StringMap<bool> & Features)1960 bool sys::getHostCPUFeatures(StringMap<bool> &Features) {
1961 unsigned long hwcap = getauxval(AT_HWCAP);
1962 bool HasFPU = hwcap & (1UL << 3); // HWCAP_LOONGARCH_FPU
1963 uint32_t cpucfg2 = 0x2;
1964 __asm__("cpucfg %[cpucfg2], %[cpucfg2]\n\t" : [cpucfg2] "+r"(cpucfg2));
1965
1966 Features["f"] = HasFPU && (cpucfg2 & (1U << 1)); // CPUCFG.2.FP_SP
1967 Features["d"] = HasFPU && (cpucfg2 & (1U << 2)); // CPUCFG.2.FP_DP
1968
1969 Features["lsx"] = hwcap & (1UL << 4); // HWCAP_LOONGARCH_LSX
1970 Features["lasx"] = hwcap & (1UL << 5); // HWCAP_LOONGARCH_LASX
1971 Features["lvz"] = hwcap & (1UL << 9); // HWCAP_LOONGARCH_LVZ
1972
1973 return true;
1974 }
1975 #else
getHostCPUFeatures(StringMap<bool> & Features)1976 bool sys::getHostCPUFeatures(StringMap<bool> &Features) { return false; }
1977 #endif
1978
1979 #if __APPLE__
1980 /// \returns the \p triple, but with the Host's arch spliced in.
withHostArch(Triple T)1981 static Triple withHostArch(Triple T) {
1982 #if defined(__arm__)
1983 T.setArch(Triple::arm);
1984 T.setArchName("arm");
1985 #elif defined(__arm64e__)
1986 T.setArch(Triple::aarch64, Triple::AArch64SubArch_arm64e);
1987 T.setArchName("arm64e");
1988 #elif defined(__aarch64__)
1989 T.setArch(Triple::aarch64);
1990 T.setArchName("arm64");
1991 #elif defined(__x86_64h__)
1992 T.setArch(Triple::x86_64);
1993 T.setArchName("x86_64h");
1994 #elif defined(__x86_64__)
1995 T.setArch(Triple::x86_64);
1996 T.setArchName("x86_64");
1997 #elif defined(__i386__)
1998 T.setArch(Triple::x86);
1999 T.setArchName("i386");
2000 #elif defined(__powerpc__)
2001 T.setArch(Triple::ppc);
2002 T.setArchName("powerpc");
2003 #else
2004 # error "Unimplemented host arch fixup"
2005 #endif
2006 return T;
2007 }
2008 #endif
2009
getProcessTriple()2010 std::string sys::getProcessTriple() {
2011 std::string TargetTripleString = updateTripleOSVersion(LLVM_HOST_TRIPLE);
2012 Triple PT(Triple::normalize(TargetTripleString));
2013
2014 #if __APPLE__
2015 /// In Universal builds, LLVM_HOST_TRIPLE will have the wrong arch in one of
2016 /// the slices. This fixes that up.
2017 PT = withHostArch(PT);
2018 #endif
2019
2020 if (sizeof(void *) == 8 && PT.isArch32Bit())
2021 PT = PT.get64BitArchVariant();
2022 if (sizeof(void *) == 4 && PT.isArch64Bit())
2023 PT = PT.get32BitArchVariant();
2024
2025 return PT.str();
2026 }
2027
printDefaultTargetAndDetectedCPU(raw_ostream & OS)2028 void sys::printDefaultTargetAndDetectedCPU(raw_ostream &OS) {
2029 #if LLVM_VERSION_PRINTER_SHOW_HOST_TARGET_INFO
2030 std::string CPU = std::string(sys::getHostCPUName());
2031 if (CPU == "generic")
2032 CPU = "(unknown)";
2033 OS << " Default target: " << sys::getDefaultTargetTriple() << '\n'
2034 << " Host CPU: " << CPU << '\n';
2035 #endif
2036 }
2037