1 //===-- Host.cpp - Implement OS Host Detection ------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements the operating system Host detection.
10 //
11 //===----------------------------------------------------------------------===//
12
13 #include "llvm/TargetParser/Host.h"
14 #include "llvm/ADT/SmallVector.h"
15 #include "llvm/ADT/StringMap.h"
16 #include "llvm/ADT/StringRef.h"
17 #include "llvm/ADT/StringSwitch.h"
18 #include "llvm/Config/llvm-config.h"
19 #include "llvm/Support/MemoryBuffer.h"
20 #include "llvm/Support/raw_ostream.h"
21 #include "llvm/TargetParser/Triple.h"
22 #include "llvm/TargetParser/X86TargetParser.h"
23 #include <string.h>
24
25 // Include the platform-specific parts of this class.
26 #ifdef LLVM_ON_UNIX
27 #include "Unix/Host.inc"
28 #include <sched.h>
29 #endif
30 #ifdef _WIN32
31 #include "Windows/Host.inc"
32 #endif
33 #ifdef _MSC_VER
34 #include <intrin.h>
35 #endif
36 #ifdef __MVS__
37 #include "llvm/Support/BCD.h"
38 #endif
39 #if defined(__APPLE__)
40 #include <mach/host_info.h>
41 #include <mach/mach.h>
42 #include <mach/mach_host.h>
43 #include <mach/machine.h>
44 #include <sys/param.h>
45 #include <sys/sysctl.h>
46 #endif
47 #ifdef _AIX
48 #include <sys/systemcfg.h>
49 #endif
50 #if defined(__sun__) && defined(__svr4__)
51 #include <kstat.h>
52 #endif
53
54 #define DEBUG_TYPE "host-detection"
55
56 //===----------------------------------------------------------------------===//
57 //
58 // Implementations of the CPU detection routines
59 //
60 //===----------------------------------------------------------------------===//
61
62 using namespace llvm;
63
64 static std::unique_ptr<llvm::MemoryBuffer>
getProcCpuinfoContent()65 LLVM_ATTRIBUTE_UNUSED getProcCpuinfoContent() {
66 llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> Text =
67 llvm::MemoryBuffer::getFileAsStream("/proc/cpuinfo");
68 if (std::error_code EC = Text.getError()) {
69 llvm::errs() << "Can't read "
70 << "/proc/cpuinfo: " << EC.message() << "\n";
71 return nullptr;
72 }
73 return std::move(*Text);
74 }
75
getHostCPUNameForPowerPC(StringRef ProcCpuinfoContent)76 StringRef sys::detail::getHostCPUNameForPowerPC(StringRef ProcCpuinfoContent) {
77 // Access to the Processor Version Register (PVR) on PowerPC is privileged,
78 // and so we must use an operating-system interface to determine the current
79 // processor type. On Linux, this is exposed through the /proc/cpuinfo file.
80 const char *generic = "generic";
81
82 // The cpu line is second (after the 'processor: 0' line), so if this
83 // buffer is too small then something has changed (or is wrong).
84 StringRef::const_iterator CPUInfoStart = ProcCpuinfoContent.begin();
85 StringRef::const_iterator CPUInfoEnd = ProcCpuinfoContent.end();
86
87 StringRef::const_iterator CIP = CPUInfoStart;
88
89 StringRef::const_iterator CPUStart = nullptr;
90 size_t CPULen = 0;
91
92 // We need to find the first line which starts with cpu, spaces, and a colon.
93 // After the colon, there may be some additional spaces and then the cpu type.
94 while (CIP < CPUInfoEnd && CPUStart == nullptr) {
95 if (CIP < CPUInfoEnd && *CIP == '\n')
96 ++CIP;
97
98 if (CIP < CPUInfoEnd && *CIP == 'c') {
99 ++CIP;
100 if (CIP < CPUInfoEnd && *CIP == 'p') {
101 ++CIP;
102 if (CIP < CPUInfoEnd && *CIP == 'u') {
103 ++CIP;
104 while (CIP < CPUInfoEnd && (*CIP == ' ' || *CIP == '\t'))
105 ++CIP;
106
107 if (CIP < CPUInfoEnd && *CIP == ':') {
108 ++CIP;
109 while (CIP < CPUInfoEnd && (*CIP == ' ' || *CIP == '\t'))
110 ++CIP;
111
112 if (CIP < CPUInfoEnd) {
113 CPUStart = CIP;
114 while (CIP < CPUInfoEnd && (*CIP != ' ' && *CIP != '\t' &&
115 *CIP != ',' && *CIP != '\n'))
116 ++CIP;
117 CPULen = CIP - CPUStart;
118 }
119 }
120 }
121 }
122 }
123
124 if (CPUStart == nullptr)
125 while (CIP < CPUInfoEnd && *CIP != '\n')
126 ++CIP;
127 }
128
129 if (CPUStart == nullptr)
130 return generic;
131
132 return StringSwitch<const char *>(StringRef(CPUStart, CPULen))
133 .Case("604e", "604e")
134 .Case("604", "604")
135 .Case("7400", "7400")
136 .Case("7410", "7400")
137 .Case("7447", "7400")
138 .Case("7455", "7450")
139 .Case("G4", "g4")
140 .Case("POWER4", "970")
141 .Case("PPC970FX", "970")
142 .Case("PPC970MP", "970")
143 .Case("G5", "g5")
144 .Case("POWER5", "g5")
145 .Case("A2", "a2")
146 .Case("POWER6", "pwr6")
147 .Case("POWER7", "pwr7")
148 .Case("POWER8", "pwr8")
149 .Case("POWER8E", "pwr8")
150 .Case("POWER8NVL", "pwr8")
151 .Case("POWER9", "pwr9")
152 .Case("POWER10", "pwr10")
153 // FIXME: If we get a simulator or machine with the capabilities of
154 // mcpu=future, we should revisit this and add the name reported by the
155 // simulator/machine.
156 .Default(generic);
157 }
158
getHostCPUNameForARM(StringRef ProcCpuinfoContent)159 StringRef sys::detail::getHostCPUNameForARM(StringRef ProcCpuinfoContent) {
160 // The cpuid register on arm is not accessible from user space. On Linux,
161 // it is exposed through the /proc/cpuinfo file.
162
163 // Read 32 lines from /proc/cpuinfo, which should contain the CPU part line
164 // in all cases.
165 SmallVector<StringRef, 32> Lines;
166 ProcCpuinfoContent.split(Lines, "\n");
167
168 // Look for the CPU implementer line.
169 StringRef Implementer;
170 StringRef Hardware;
171 StringRef Part;
172 for (unsigned I = 0, E = Lines.size(); I != E; ++I) {
173 if (Lines[I].startswith("CPU implementer"))
174 Implementer = Lines[I].substr(15).ltrim("\t :");
175 if (Lines[I].startswith("Hardware"))
176 Hardware = Lines[I].substr(8).ltrim("\t :");
177 if (Lines[I].startswith("CPU part"))
178 Part = Lines[I].substr(8).ltrim("\t :");
179 }
180
181 if (Implementer == "0x41") { // ARM Ltd.
182 // MSM8992/8994 may give cpu part for the core that the kernel is running on,
183 // which is undeterministic and wrong. Always return cortex-a53 for these SoC.
184 if (Hardware.endswith("MSM8994") || Hardware.endswith("MSM8996"))
185 return "cortex-a53";
186
187
188 // The CPU part is a 3 digit hexadecimal number with a 0x prefix. The
189 // values correspond to the "Part number" in the CP15/c0 register. The
190 // contents are specified in the various processor manuals.
191 // This corresponds to the Main ID Register in Technical Reference Manuals.
192 // and is used in programs like sys-utils
193 return StringSwitch<const char *>(Part)
194 .Case("0x926", "arm926ej-s")
195 .Case("0xb02", "mpcore")
196 .Case("0xb36", "arm1136j-s")
197 .Case("0xb56", "arm1156t2-s")
198 .Case("0xb76", "arm1176jz-s")
199 .Case("0xc08", "cortex-a8")
200 .Case("0xc09", "cortex-a9")
201 .Case("0xc0f", "cortex-a15")
202 .Case("0xc20", "cortex-m0")
203 .Case("0xc23", "cortex-m3")
204 .Case("0xc24", "cortex-m4")
205 .Case("0xd22", "cortex-m55")
206 .Case("0xd02", "cortex-a34")
207 .Case("0xd04", "cortex-a35")
208 .Case("0xd03", "cortex-a53")
209 .Case("0xd05", "cortex-a55")
210 .Case("0xd46", "cortex-a510")
211 .Case("0xd07", "cortex-a57")
212 .Case("0xd08", "cortex-a72")
213 .Case("0xd09", "cortex-a73")
214 .Case("0xd0a", "cortex-a75")
215 .Case("0xd0b", "cortex-a76")
216 .Case("0xd0d", "cortex-a77")
217 .Case("0xd41", "cortex-a78")
218 .Case("0xd47", "cortex-a710")
219 .Case("0xd4d", "cortex-a715")
220 .Case("0xd44", "cortex-x1")
221 .Case("0xd4c", "cortex-x1c")
222 .Case("0xd48", "cortex-x2")
223 .Case("0xd4e", "cortex-x3")
224 .Case("0xd0c", "neoverse-n1")
225 .Case("0xd49", "neoverse-n2")
226 .Case("0xd40", "neoverse-v1")
227 .Case("0xd4f", "neoverse-v2")
228 .Default("generic");
229 }
230
231 if (Implementer == "0x42" || Implementer == "0x43") { // Broadcom | Cavium.
232 return StringSwitch<const char *>(Part)
233 .Case("0x516", "thunderx2t99")
234 .Case("0x0516", "thunderx2t99")
235 .Case("0xaf", "thunderx2t99")
236 .Case("0x0af", "thunderx2t99")
237 .Case("0xa1", "thunderxt88")
238 .Case("0x0a1", "thunderxt88")
239 .Default("generic");
240 }
241
242 if (Implementer == "0x46") { // Fujitsu Ltd.
243 return StringSwitch<const char *>(Part)
244 .Case("0x001", "a64fx")
245 .Default("generic");
246 }
247
248 if (Implementer == "0x4e") { // NVIDIA Corporation
249 return StringSwitch<const char *>(Part)
250 .Case("0x004", "carmel")
251 .Default("generic");
252 }
253
254 if (Implementer == "0x48") // HiSilicon Technologies, Inc.
255 // The CPU part is a 3 digit hexadecimal number with a 0x prefix. The
256 // values correspond to the "Part number" in the CP15/c0 register. The
257 // contents are specified in the various processor manuals.
258 return StringSwitch<const char *>(Part)
259 .Case("0xd01", "tsv110")
260 .Default("generic");
261
262 if (Implementer == "0x51") // Qualcomm Technologies, Inc.
263 // The CPU part is a 3 digit hexadecimal number with a 0x prefix. The
264 // values correspond to the "Part number" in the CP15/c0 register. The
265 // contents are specified in the various processor manuals.
266 return StringSwitch<const char *>(Part)
267 .Case("0x06f", "krait") // APQ8064
268 .Case("0x201", "kryo")
269 .Case("0x205", "kryo")
270 .Case("0x211", "kryo")
271 .Case("0x800", "cortex-a73") // Kryo 2xx Gold
272 .Case("0x801", "cortex-a73") // Kryo 2xx Silver
273 .Case("0x802", "cortex-a75") // Kryo 3xx Gold
274 .Case("0x803", "cortex-a75") // Kryo 3xx Silver
275 .Case("0x804", "cortex-a76") // Kryo 4xx Gold
276 .Case("0x805", "cortex-a76") // Kryo 4xx/5xx Silver
277 .Case("0xc00", "falkor")
278 .Case("0xc01", "saphira")
279 .Default("generic");
280 if (Implementer == "0x53") { // Samsung Electronics Co., Ltd.
281 // The Exynos chips have a convoluted ID scheme that doesn't seem to follow
282 // any predictive pattern across variants and parts.
283 unsigned Variant = 0, Part = 0;
284
285 // Look for the CPU variant line, whose value is a 1 digit hexadecimal
286 // number, corresponding to the Variant bits in the CP15/C0 register.
287 for (auto I : Lines)
288 if (I.consume_front("CPU variant"))
289 I.ltrim("\t :").getAsInteger(0, Variant);
290
291 // Look for the CPU part line, whose value is a 3 digit hexadecimal
292 // number, corresponding to the PartNum bits in the CP15/C0 register.
293 for (auto I : Lines)
294 if (I.consume_front("CPU part"))
295 I.ltrim("\t :").getAsInteger(0, Part);
296
297 unsigned Exynos = (Variant << 12) | Part;
298 switch (Exynos) {
299 default:
300 // Default by falling through to Exynos M3.
301 [[fallthrough]];
302 case 0x1002:
303 return "exynos-m3";
304 case 0x1003:
305 return "exynos-m4";
306 }
307 }
308
309 if (Implementer == "0xc0") { // Ampere Computing
310 return StringSwitch<const char *>(Part)
311 .Case("0xac3", "ampere1")
312 .Case("0xac4", "ampere1a")
313 .Default("generic");
314 }
315
316 return "generic";
317 }
318
319 namespace {
getCPUNameFromS390Model(unsigned int Id,bool HaveVectorSupport)320 StringRef getCPUNameFromS390Model(unsigned int Id, bool HaveVectorSupport) {
321 switch (Id) {
322 case 2064: // z900 not supported by LLVM
323 case 2066:
324 case 2084: // z990 not supported by LLVM
325 case 2086:
326 case 2094: // z9-109 not supported by LLVM
327 case 2096:
328 return "generic";
329 case 2097:
330 case 2098:
331 return "z10";
332 case 2817:
333 case 2818:
334 return "z196";
335 case 2827:
336 case 2828:
337 return "zEC12";
338 case 2964:
339 case 2965:
340 return HaveVectorSupport? "z13" : "zEC12";
341 case 3906:
342 case 3907:
343 return HaveVectorSupport? "z14" : "zEC12";
344 case 8561:
345 case 8562:
346 return HaveVectorSupport? "z15" : "zEC12";
347 case 3931:
348 case 3932:
349 default:
350 return HaveVectorSupport? "z16" : "zEC12";
351 }
352 }
353 } // end anonymous namespace
354
getHostCPUNameForS390x(StringRef ProcCpuinfoContent)355 StringRef sys::detail::getHostCPUNameForS390x(StringRef ProcCpuinfoContent) {
356 // STIDP is a privileged operation, so use /proc/cpuinfo instead.
357
358 // The "processor 0:" line comes after a fair amount of other information,
359 // including a cache breakdown, but this should be plenty.
360 SmallVector<StringRef, 32> Lines;
361 ProcCpuinfoContent.split(Lines, "\n");
362
363 // Look for the CPU features.
364 SmallVector<StringRef, 32> CPUFeatures;
365 for (unsigned I = 0, E = Lines.size(); I != E; ++I)
366 if (Lines[I].startswith("features")) {
367 size_t Pos = Lines[I].find(':');
368 if (Pos != StringRef::npos) {
369 Lines[I].drop_front(Pos + 1).split(CPUFeatures, ' ');
370 break;
371 }
372 }
373
374 // We need to check for the presence of vector support independently of
375 // the machine type, since we may only use the vector register set when
376 // supported by the kernel (and hypervisor).
377 bool HaveVectorSupport = false;
378 for (unsigned I = 0, E = CPUFeatures.size(); I != E; ++I) {
379 if (CPUFeatures[I] == "vx")
380 HaveVectorSupport = true;
381 }
382
383 // Now check the processor machine type.
384 for (unsigned I = 0, E = Lines.size(); I != E; ++I) {
385 if (Lines[I].startswith("processor ")) {
386 size_t Pos = Lines[I].find("machine = ");
387 if (Pos != StringRef::npos) {
388 Pos += sizeof("machine = ") - 1;
389 unsigned int Id;
390 if (!Lines[I].drop_front(Pos).getAsInteger(10, Id))
391 return getCPUNameFromS390Model(Id, HaveVectorSupport);
392 }
393 break;
394 }
395 }
396
397 return "generic";
398 }
399
getHostCPUNameForRISCV(StringRef ProcCpuinfoContent)400 StringRef sys::detail::getHostCPUNameForRISCV(StringRef ProcCpuinfoContent) {
401 // There are 24 lines in /proc/cpuinfo
402 SmallVector<StringRef> Lines;
403 ProcCpuinfoContent.split(Lines, "\n");
404
405 // Look for uarch line to determine cpu name
406 StringRef UArch;
407 for (unsigned I = 0, E = Lines.size(); I != E; ++I) {
408 if (Lines[I].startswith("uarch")) {
409 UArch = Lines[I].substr(5).ltrim("\t :");
410 break;
411 }
412 }
413
414 return StringSwitch<const char *>(UArch)
415 .Case("sifive,u74-mc", "sifive-u74")
416 .Case("sifive,bullet0", "sifive-u74")
417 .Default("generic");
418 }
419
getHostCPUNameForBPF()420 StringRef sys::detail::getHostCPUNameForBPF() {
421 #if !defined(__linux__) || !defined(__x86_64__)
422 return "generic";
423 #else
424 uint8_t v3_insns[40] __attribute__ ((aligned (8))) =
425 /* BPF_MOV64_IMM(BPF_REG_0, 0) */
426 { 0xb7, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0,
427 /* BPF_MOV64_IMM(BPF_REG_2, 1) */
428 0xb7, 0x2, 0x0, 0x0, 0x1, 0x0, 0x0, 0x0,
429 /* BPF_JMP32_REG(BPF_JLT, BPF_REG_0, BPF_REG_2, 1) */
430 0xae, 0x20, 0x1, 0x0, 0x0, 0x0, 0x0, 0x0,
431 /* BPF_MOV64_IMM(BPF_REG_0, 1) */
432 0xb7, 0x0, 0x0, 0x0, 0x1, 0x0, 0x0, 0x0,
433 /* BPF_EXIT_INSN() */
434 0x95, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 };
435
436 uint8_t v2_insns[40] __attribute__ ((aligned (8))) =
437 /* BPF_MOV64_IMM(BPF_REG_0, 0) */
438 { 0xb7, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0,
439 /* BPF_MOV64_IMM(BPF_REG_2, 1) */
440 0xb7, 0x2, 0x0, 0x0, 0x1, 0x0, 0x0, 0x0,
441 /* BPF_JMP_REG(BPF_JLT, BPF_REG_0, BPF_REG_2, 1) */
442 0xad, 0x20, 0x1, 0x0, 0x0, 0x0, 0x0, 0x0,
443 /* BPF_MOV64_IMM(BPF_REG_0, 1) */
444 0xb7, 0x0, 0x0, 0x0, 0x1, 0x0, 0x0, 0x0,
445 /* BPF_EXIT_INSN() */
446 0x95, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 };
447
448 struct bpf_prog_load_attr {
449 uint32_t prog_type;
450 uint32_t insn_cnt;
451 uint64_t insns;
452 uint64_t license;
453 uint32_t log_level;
454 uint32_t log_size;
455 uint64_t log_buf;
456 uint32_t kern_version;
457 uint32_t prog_flags;
458 } attr = {};
459 attr.prog_type = 1; /* BPF_PROG_TYPE_SOCKET_FILTER */
460 attr.insn_cnt = 5;
461 attr.insns = (uint64_t)v3_insns;
462 attr.license = (uint64_t)"DUMMY";
463
464 int fd = syscall(321 /* __NR_bpf */, 5 /* BPF_PROG_LOAD */, &attr,
465 sizeof(attr));
466 if (fd >= 0) {
467 close(fd);
468 return "v3";
469 }
470
471 /* Clear the whole attr in case its content changed by syscall. */
472 memset(&attr, 0, sizeof(attr));
473 attr.prog_type = 1; /* BPF_PROG_TYPE_SOCKET_FILTER */
474 attr.insn_cnt = 5;
475 attr.insns = (uint64_t)v2_insns;
476 attr.license = (uint64_t)"DUMMY";
477 fd = syscall(321 /* __NR_bpf */, 5 /* BPF_PROG_LOAD */, &attr, sizeof(attr));
478 if (fd >= 0) {
479 close(fd);
480 return "v2";
481 }
482 return "v1";
483 #endif
484 }
485
486 #if defined(__i386__) || defined(_M_IX86) || \
487 defined(__x86_64__) || defined(_M_X64)
488
489 // The check below for i386 was copied from clang's cpuid.h (__get_cpuid_max).
490 // Check motivated by bug reports for OpenSSL crashing on CPUs without CPUID
491 // support. Consequently, for i386, the presence of CPUID is checked first
492 // via the corresponding eflags bit.
493 // Removal of cpuid.h header motivated by PR30384
494 // Header cpuid.h and method __get_cpuid_max are not used in llvm, clang, openmp
495 // or test-suite, but are used in external projects e.g. libstdcxx
isCpuIdSupported()496 static bool isCpuIdSupported() {
497 #if defined(__GNUC__) || defined(__clang__)
498 #if defined(__i386__)
499 int __cpuid_supported;
500 __asm__(" pushfl\n"
501 " popl %%eax\n"
502 " movl %%eax,%%ecx\n"
503 " xorl $0x00200000,%%eax\n"
504 " pushl %%eax\n"
505 " popfl\n"
506 " pushfl\n"
507 " popl %%eax\n"
508 " movl $0,%0\n"
509 " cmpl %%eax,%%ecx\n"
510 " je 1f\n"
511 " movl $1,%0\n"
512 "1:"
513 : "=r"(__cpuid_supported)
514 :
515 : "eax", "ecx");
516 if (!__cpuid_supported)
517 return false;
518 #endif
519 return true;
520 #endif
521 return true;
522 }
523
524 /// getX86CpuIDAndInfo - Execute the specified cpuid and return the 4 values in
525 /// the specified arguments. If we can't run cpuid on the host, return true.
getX86CpuIDAndInfo(unsigned value,unsigned * rEAX,unsigned * rEBX,unsigned * rECX,unsigned * rEDX)526 static bool getX86CpuIDAndInfo(unsigned value, unsigned *rEAX, unsigned *rEBX,
527 unsigned *rECX, unsigned *rEDX) {
528 #if defined(__GNUC__) || defined(__clang__)
529 #if defined(__x86_64__)
530 // gcc doesn't know cpuid would clobber ebx/rbx. Preserve it manually.
531 // FIXME: should we save this for Clang?
532 __asm__("movq\t%%rbx, %%rsi\n\t"
533 "cpuid\n\t"
534 "xchgq\t%%rbx, %%rsi\n\t"
535 : "=a"(*rEAX), "=S"(*rEBX), "=c"(*rECX), "=d"(*rEDX)
536 : "a"(value));
537 return false;
538 #elif defined(__i386__)
539 __asm__("movl\t%%ebx, %%esi\n\t"
540 "cpuid\n\t"
541 "xchgl\t%%ebx, %%esi\n\t"
542 : "=a"(*rEAX), "=S"(*rEBX), "=c"(*rECX), "=d"(*rEDX)
543 : "a"(value));
544 return false;
545 #else
546 return true;
547 #endif
548 #elif defined(_MSC_VER)
549 // The MSVC intrinsic is portable across x86 and x64.
550 int registers[4];
551 __cpuid(registers, value);
552 *rEAX = registers[0];
553 *rEBX = registers[1];
554 *rECX = registers[2];
555 *rEDX = registers[3];
556 return false;
557 #else
558 return true;
559 #endif
560 }
561
562 namespace llvm {
563 namespace sys {
564 namespace detail {
565 namespace x86 {
566
getVendorSignature(unsigned * MaxLeaf)567 VendorSignatures getVendorSignature(unsigned *MaxLeaf) {
568 unsigned EAX = 0, EBX = 0, ECX = 0, EDX = 0;
569 if (MaxLeaf == nullptr)
570 MaxLeaf = &EAX;
571 else
572 *MaxLeaf = 0;
573
574 if (!isCpuIdSupported())
575 return VendorSignatures::UNKNOWN;
576
577 if (getX86CpuIDAndInfo(0, MaxLeaf, &EBX, &ECX, &EDX) || *MaxLeaf < 1)
578 return VendorSignatures::UNKNOWN;
579
580 // "Genu ineI ntel"
581 if (EBX == 0x756e6547 && EDX == 0x49656e69 && ECX == 0x6c65746e)
582 return VendorSignatures::GENUINE_INTEL;
583
584 // "Auth enti cAMD"
585 if (EBX == 0x68747541 && EDX == 0x69746e65 && ECX == 0x444d4163)
586 return VendorSignatures::AUTHENTIC_AMD;
587
588 return VendorSignatures::UNKNOWN;
589 }
590
591 } // namespace x86
592 } // namespace detail
593 } // namespace sys
594 } // namespace llvm
595
596 using namespace llvm::sys::detail::x86;
597
598 /// getX86CpuIDAndInfoEx - Execute the specified cpuid with subleaf and return
599 /// the 4 values in the specified arguments. If we can't run cpuid on the host,
600 /// return true.
getX86CpuIDAndInfoEx(unsigned value,unsigned subleaf,unsigned * rEAX,unsigned * rEBX,unsigned * rECX,unsigned * rEDX)601 static bool getX86CpuIDAndInfoEx(unsigned value, unsigned subleaf,
602 unsigned *rEAX, unsigned *rEBX, unsigned *rECX,
603 unsigned *rEDX) {
604 #if defined(__GNUC__) || defined(__clang__)
605 #if defined(__x86_64__)
606 // gcc doesn't know cpuid would clobber ebx/rbx. Preserve it manually.
607 // FIXME: should we save this for Clang?
608 __asm__("movq\t%%rbx, %%rsi\n\t"
609 "cpuid\n\t"
610 "xchgq\t%%rbx, %%rsi\n\t"
611 : "=a"(*rEAX), "=S"(*rEBX), "=c"(*rECX), "=d"(*rEDX)
612 : "a"(value), "c"(subleaf));
613 return false;
614 #elif defined(__i386__)
615 __asm__("movl\t%%ebx, %%esi\n\t"
616 "cpuid\n\t"
617 "xchgl\t%%ebx, %%esi\n\t"
618 : "=a"(*rEAX), "=S"(*rEBX), "=c"(*rECX), "=d"(*rEDX)
619 : "a"(value), "c"(subleaf));
620 return false;
621 #else
622 return true;
623 #endif
624 #elif defined(_MSC_VER)
625 int registers[4];
626 __cpuidex(registers, value, subleaf);
627 *rEAX = registers[0];
628 *rEBX = registers[1];
629 *rECX = registers[2];
630 *rEDX = registers[3];
631 return false;
632 #else
633 return true;
634 #endif
635 }
636
637 // Read control register 0 (XCR0). Used to detect features such as AVX.
getX86XCR0(unsigned * rEAX,unsigned * rEDX)638 static bool getX86XCR0(unsigned *rEAX, unsigned *rEDX) {
639 #if defined(__GNUC__) || defined(__clang__)
640 // Check xgetbv; this uses a .byte sequence instead of the instruction
641 // directly because older assemblers do not include support for xgetbv and
642 // there is no easy way to conditionally compile based on the assembler used.
643 __asm__(".byte 0x0f, 0x01, 0xd0" : "=a"(*rEAX), "=d"(*rEDX) : "c"(0));
644 return false;
645 #elif defined(_MSC_FULL_VER) && defined(_XCR_XFEATURE_ENABLED_MASK)
646 unsigned long long Result = _xgetbv(_XCR_XFEATURE_ENABLED_MASK);
647 *rEAX = Result;
648 *rEDX = Result >> 32;
649 return false;
650 #else
651 return true;
652 #endif
653 }
654
detectX86FamilyModel(unsigned EAX,unsigned * Family,unsigned * Model)655 static void detectX86FamilyModel(unsigned EAX, unsigned *Family,
656 unsigned *Model) {
657 *Family = (EAX >> 8) & 0xf; // Bits 8 - 11
658 *Model = (EAX >> 4) & 0xf; // Bits 4 - 7
659 if (*Family == 6 || *Family == 0xf) {
660 if (*Family == 0xf)
661 // Examine extended family ID if family ID is F.
662 *Family += (EAX >> 20) & 0xff; // Bits 20 - 27
663 // Examine extended model ID if family ID is 6 or F.
664 *Model += ((EAX >> 16) & 0xf) << 4; // Bits 16 - 19
665 }
666 }
667
668 static StringRef
getIntelProcessorTypeAndSubtype(unsigned Family,unsigned Model,const unsigned * Features,unsigned * Type,unsigned * Subtype)669 getIntelProcessorTypeAndSubtype(unsigned Family, unsigned Model,
670 const unsigned *Features,
671 unsigned *Type, unsigned *Subtype) {
672 auto testFeature = [&](unsigned F) {
673 return (Features[F / 32] & (1U << (F % 32))) != 0;
674 };
675
676 StringRef CPU;
677
678 switch (Family) {
679 case 3:
680 CPU = "i386";
681 break;
682 case 4:
683 CPU = "i486";
684 break;
685 case 5:
686 if (testFeature(X86::FEATURE_MMX)) {
687 CPU = "pentium-mmx";
688 break;
689 }
690 CPU = "pentium";
691 break;
692 case 6:
693 switch (Model) {
694 case 0x0f: // Intel Core 2 Duo processor, Intel Core 2 Duo mobile
695 // processor, Intel Core 2 Quad processor, Intel Core 2 Quad
696 // mobile processor, Intel Core 2 Extreme processor, Intel
697 // Pentium Dual-Core processor, Intel Xeon processor, model
698 // 0Fh. All processors are manufactured using the 65 nm process.
699 case 0x16: // Intel Celeron processor model 16h. All processors are
700 // manufactured using the 65 nm process
701 CPU = "core2";
702 *Type = X86::INTEL_CORE2;
703 break;
704 case 0x17: // Intel Core 2 Extreme processor, Intel Xeon processor, model
705 // 17h. All processors are manufactured using the 45 nm process.
706 //
707 // 45nm: Penryn , Wolfdale, Yorkfield (XE)
708 case 0x1d: // Intel Xeon processor MP. All processors are manufactured using
709 // the 45 nm process.
710 CPU = "penryn";
711 *Type = X86::INTEL_CORE2;
712 break;
713 case 0x1a: // Intel Core i7 processor and Intel Xeon processor. All
714 // processors are manufactured using the 45 nm process.
715 case 0x1e: // Intel(R) Core(TM) i7 CPU 870 @ 2.93GHz.
716 // As found in a Summer 2010 model iMac.
717 case 0x1f:
718 case 0x2e: // Nehalem EX
719 CPU = "nehalem";
720 *Type = X86::INTEL_COREI7;
721 *Subtype = X86::INTEL_COREI7_NEHALEM;
722 break;
723 case 0x25: // Intel Core i7, laptop version.
724 case 0x2c: // Intel Core i7 processor and Intel Xeon processor. All
725 // processors are manufactured using the 32 nm process.
726 case 0x2f: // Westmere EX
727 CPU = "westmere";
728 *Type = X86::INTEL_COREI7;
729 *Subtype = X86::INTEL_COREI7_WESTMERE;
730 break;
731 case 0x2a: // Intel Core i7 processor. All processors are manufactured
732 // using the 32 nm process.
733 case 0x2d:
734 CPU = "sandybridge";
735 *Type = X86::INTEL_COREI7;
736 *Subtype = X86::INTEL_COREI7_SANDYBRIDGE;
737 break;
738 case 0x3a:
739 case 0x3e: // Ivy Bridge EP
740 CPU = "ivybridge";
741 *Type = X86::INTEL_COREI7;
742 *Subtype = X86::INTEL_COREI7_IVYBRIDGE;
743 break;
744
745 // Haswell:
746 case 0x3c:
747 case 0x3f:
748 case 0x45:
749 case 0x46:
750 CPU = "haswell";
751 *Type = X86::INTEL_COREI7;
752 *Subtype = X86::INTEL_COREI7_HASWELL;
753 break;
754
755 // Broadwell:
756 case 0x3d:
757 case 0x47:
758 case 0x4f:
759 case 0x56:
760 CPU = "broadwell";
761 *Type = X86::INTEL_COREI7;
762 *Subtype = X86::INTEL_COREI7_BROADWELL;
763 break;
764
765 // Skylake:
766 case 0x4e: // Skylake mobile
767 case 0x5e: // Skylake desktop
768 case 0x8e: // Kaby Lake mobile
769 case 0x9e: // Kaby Lake desktop
770 case 0xa5: // Comet Lake-H/S
771 case 0xa6: // Comet Lake-U
772 CPU = "skylake";
773 *Type = X86::INTEL_COREI7;
774 *Subtype = X86::INTEL_COREI7_SKYLAKE;
775 break;
776
777 // Rocketlake:
778 case 0xa7:
779 CPU = "rocketlake";
780 *Type = X86::INTEL_COREI7;
781 *Subtype = X86::INTEL_COREI7_ROCKETLAKE;
782 break;
783
784 // Skylake Xeon:
785 case 0x55:
786 *Type = X86::INTEL_COREI7;
787 if (testFeature(X86::FEATURE_AVX512BF16)) {
788 CPU = "cooperlake";
789 *Subtype = X86::INTEL_COREI7_COOPERLAKE;
790 } else if (testFeature(X86::FEATURE_AVX512VNNI)) {
791 CPU = "cascadelake";
792 *Subtype = X86::INTEL_COREI7_CASCADELAKE;
793 } else {
794 CPU = "skylake-avx512";
795 *Subtype = X86::INTEL_COREI7_SKYLAKE_AVX512;
796 }
797 break;
798
799 // Cannonlake:
800 case 0x66:
801 CPU = "cannonlake";
802 *Type = X86::INTEL_COREI7;
803 *Subtype = X86::INTEL_COREI7_CANNONLAKE;
804 break;
805
806 // Icelake:
807 case 0x7d:
808 case 0x7e:
809 CPU = "icelake-client";
810 *Type = X86::INTEL_COREI7;
811 *Subtype = X86::INTEL_COREI7_ICELAKE_CLIENT;
812 break;
813
814 // Tigerlake:
815 case 0x8c:
816 case 0x8d:
817 CPU = "tigerlake";
818 *Type = X86::INTEL_COREI7;
819 *Subtype = X86::INTEL_COREI7_TIGERLAKE;
820 break;
821
822 // Alderlake:
823 case 0x97:
824 case 0x9a:
825 // Raptorlake:
826 case 0xb7:
827 // Meteorlake:
828 case 0xaa:
829 case 0xac:
830 CPU = "alderlake";
831 *Type = X86::INTEL_COREI7;
832 *Subtype = X86::INTEL_COREI7_ALDERLAKE;
833 break;
834
835 // Graniterapids:
836 case 0xae:
837 case 0xad:
838 CPU = "graniterapids";
839 *Type = X86::INTEL_COREI7;
840 *Subtype = X86::INTEL_COREI7_GRANITERAPIDS;
841 break;
842
843 // Icelake Xeon:
844 case 0x6a:
845 case 0x6c:
846 CPU = "icelake-server";
847 *Type = X86::INTEL_COREI7;
848 *Subtype = X86::INTEL_COREI7_ICELAKE_SERVER;
849 break;
850
851 // Emerald Rapids:
852 case 0xcf:
853 // Sapphire Rapids:
854 case 0x8f:
855 CPU = "sapphirerapids";
856 *Type = X86::INTEL_COREI7;
857 *Subtype = X86::INTEL_COREI7_SAPPHIRERAPIDS;
858 break;
859
860 case 0x1c: // Most 45 nm Intel Atom processors
861 case 0x26: // 45 nm Atom Lincroft
862 case 0x27: // 32 nm Atom Medfield
863 case 0x35: // 32 nm Atom Midview
864 case 0x36: // 32 nm Atom Midview
865 CPU = "bonnell";
866 *Type = X86::INTEL_BONNELL;
867 break;
868
869 // Atom Silvermont codes from the Intel software optimization guide.
870 case 0x37:
871 case 0x4a:
872 case 0x4d:
873 case 0x5a:
874 case 0x5d:
875 case 0x4c: // really airmont
876 CPU = "silvermont";
877 *Type = X86::INTEL_SILVERMONT;
878 break;
879 // Goldmont:
880 case 0x5c: // Apollo Lake
881 case 0x5f: // Denverton
882 CPU = "goldmont";
883 *Type = X86::INTEL_GOLDMONT;
884 break;
885 case 0x7a:
886 CPU = "goldmont-plus";
887 *Type = X86::INTEL_GOLDMONT_PLUS;
888 break;
889 case 0x86:
890 CPU = "tremont";
891 *Type = X86::INTEL_TREMONT;
892 break;
893
894 // Sierraforest:
895 case 0xaf:
896 CPU = "sierraforest";
897 *Type = X86::INTEL_SIERRAFOREST;
898 break;
899
900 // Grandridge:
901 case 0xb6:
902 CPU = "grandridge";
903 *Type = X86::INTEL_GRANDRIDGE;
904 break;
905
906 // Xeon Phi (Knights Landing + Knights Mill):
907 case 0x57:
908 CPU = "knl";
909 *Type = X86::INTEL_KNL;
910 break;
911 case 0x85:
912 CPU = "knm";
913 *Type = X86::INTEL_KNM;
914 break;
915
916 default: // Unknown family 6 CPU, try to guess.
917 // Don't both with Type/Subtype here, they aren't used by the caller.
918 // They're used above to keep the code in sync with compiler-rt.
919 // TODO detect tigerlake host from model
920 if (testFeature(X86::FEATURE_AVX512VP2INTERSECT)) {
921 CPU = "tigerlake";
922 } else if (testFeature(X86::FEATURE_AVX512VBMI2)) {
923 CPU = "icelake-client";
924 } else if (testFeature(X86::FEATURE_AVX512VBMI)) {
925 CPU = "cannonlake";
926 } else if (testFeature(X86::FEATURE_AVX512BF16)) {
927 CPU = "cooperlake";
928 } else if (testFeature(X86::FEATURE_AVX512VNNI)) {
929 CPU = "cascadelake";
930 } else if (testFeature(X86::FEATURE_AVX512VL)) {
931 CPU = "skylake-avx512";
932 } else if (testFeature(X86::FEATURE_AVX512ER)) {
933 CPU = "knl";
934 } else if (testFeature(X86::FEATURE_CLFLUSHOPT)) {
935 if (testFeature(X86::FEATURE_SHA))
936 CPU = "goldmont";
937 else
938 CPU = "skylake";
939 } else if (testFeature(X86::FEATURE_ADX)) {
940 CPU = "broadwell";
941 } else if (testFeature(X86::FEATURE_AVX2)) {
942 CPU = "haswell";
943 } else if (testFeature(X86::FEATURE_AVX)) {
944 CPU = "sandybridge";
945 } else if (testFeature(X86::FEATURE_SSE4_2)) {
946 if (testFeature(X86::FEATURE_MOVBE))
947 CPU = "silvermont";
948 else
949 CPU = "nehalem";
950 } else if (testFeature(X86::FEATURE_SSE4_1)) {
951 CPU = "penryn";
952 } else if (testFeature(X86::FEATURE_SSSE3)) {
953 if (testFeature(X86::FEATURE_MOVBE))
954 CPU = "bonnell";
955 else
956 CPU = "core2";
957 } else if (testFeature(X86::FEATURE_64BIT)) {
958 CPU = "core2";
959 } else if (testFeature(X86::FEATURE_SSE3)) {
960 CPU = "yonah";
961 } else if (testFeature(X86::FEATURE_SSE2)) {
962 CPU = "pentium-m";
963 } else if (testFeature(X86::FEATURE_SSE)) {
964 CPU = "pentium3";
965 } else if (testFeature(X86::FEATURE_MMX)) {
966 CPU = "pentium2";
967 } else {
968 CPU = "pentiumpro";
969 }
970 break;
971 }
972 break;
973 case 15: {
974 if (testFeature(X86::FEATURE_64BIT)) {
975 CPU = "nocona";
976 break;
977 }
978 if (testFeature(X86::FEATURE_SSE3)) {
979 CPU = "prescott";
980 break;
981 }
982 CPU = "pentium4";
983 break;
984 }
985 default:
986 break; // Unknown.
987 }
988
989 return CPU;
990 }
991
992 static StringRef
getAMDProcessorTypeAndSubtype(unsigned Family,unsigned Model,const unsigned * Features,unsigned * Type,unsigned * Subtype)993 getAMDProcessorTypeAndSubtype(unsigned Family, unsigned Model,
994 const unsigned *Features,
995 unsigned *Type, unsigned *Subtype) {
996 auto testFeature = [&](unsigned F) {
997 return (Features[F / 32] & (1U << (F % 32))) != 0;
998 };
999
1000 StringRef CPU;
1001
1002 switch (Family) {
1003 case 4:
1004 CPU = "i486";
1005 break;
1006 case 5:
1007 CPU = "pentium";
1008 switch (Model) {
1009 case 6:
1010 case 7:
1011 CPU = "k6";
1012 break;
1013 case 8:
1014 CPU = "k6-2";
1015 break;
1016 case 9:
1017 case 13:
1018 CPU = "k6-3";
1019 break;
1020 case 10:
1021 CPU = "geode";
1022 break;
1023 }
1024 break;
1025 case 6:
1026 if (testFeature(X86::FEATURE_SSE)) {
1027 CPU = "athlon-xp";
1028 break;
1029 }
1030 CPU = "athlon";
1031 break;
1032 case 15:
1033 if (testFeature(X86::FEATURE_SSE3)) {
1034 CPU = "k8-sse3";
1035 break;
1036 }
1037 CPU = "k8";
1038 break;
1039 case 16:
1040 CPU = "amdfam10";
1041 *Type = X86::AMDFAM10H; // "amdfam10"
1042 switch (Model) {
1043 case 2:
1044 *Subtype = X86::AMDFAM10H_BARCELONA;
1045 break;
1046 case 4:
1047 *Subtype = X86::AMDFAM10H_SHANGHAI;
1048 break;
1049 case 8:
1050 *Subtype = X86::AMDFAM10H_ISTANBUL;
1051 break;
1052 }
1053 break;
1054 case 20:
1055 CPU = "btver1";
1056 *Type = X86::AMD_BTVER1;
1057 break;
1058 case 21:
1059 CPU = "bdver1";
1060 *Type = X86::AMDFAM15H;
1061 if (Model >= 0x60 && Model <= 0x7f) {
1062 CPU = "bdver4";
1063 *Subtype = X86::AMDFAM15H_BDVER4;
1064 break; // 60h-7Fh: Excavator
1065 }
1066 if (Model >= 0x30 && Model <= 0x3f) {
1067 CPU = "bdver3";
1068 *Subtype = X86::AMDFAM15H_BDVER3;
1069 break; // 30h-3Fh: Steamroller
1070 }
1071 if ((Model >= 0x10 && Model <= 0x1f) || Model == 0x02) {
1072 CPU = "bdver2";
1073 *Subtype = X86::AMDFAM15H_BDVER2;
1074 break; // 02h, 10h-1Fh: Piledriver
1075 }
1076 if (Model <= 0x0f) {
1077 *Subtype = X86::AMDFAM15H_BDVER1;
1078 break; // 00h-0Fh: Bulldozer
1079 }
1080 break;
1081 case 22:
1082 CPU = "btver2";
1083 *Type = X86::AMD_BTVER2;
1084 break;
1085 case 23:
1086 CPU = "znver1";
1087 *Type = X86::AMDFAM17H;
1088 if ((Model >= 0x30 && Model <= 0x3f) || Model == 0x71) {
1089 CPU = "znver2";
1090 *Subtype = X86::AMDFAM17H_ZNVER2;
1091 break; // 30h-3fh, 71h: Zen2
1092 }
1093 if (Model <= 0x0f) {
1094 *Subtype = X86::AMDFAM17H_ZNVER1;
1095 break; // 00h-0Fh: Zen1
1096 }
1097 break;
1098 case 25:
1099 CPU = "znver3";
1100 *Type = X86::AMDFAM19H;
1101 if (Model <= 0x0f || (Model >= 0x20 && Model <= 0x5f)) {
1102 // Family 19h Models 00h-0Fh - Zen3
1103 // Family 19h Models 20h-2Fh - Zen3
1104 // Family 19h Models 30h-3Fh - Zen3
1105 // Family 19h Models 40h-4Fh - Zen3+
1106 // Family 19h Models 50h-5Fh - Zen3+
1107 *Subtype = X86::AMDFAM19H_ZNVER3;
1108 break;
1109 }
1110 if ((Model >= 0x10 && Model <= 0x1f) ||
1111 (Model >= 0x60 && Model <= 0x74) ||
1112 (Model >= 0x78 && Model <= 0x7b) ||
1113 (Model >= 0xA0 && Model <= 0xAf)) {
1114 CPU = "znver4";
1115 *Subtype = X86::AMDFAM19H_ZNVER4;
1116 break; // "znver4"
1117 }
1118 break; // family 19h
1119 default:
1120 break; // Unknown AMD CPU.
1121 }
1122
1123 return CPU;
1124 }
1125
getAvailableFeatures(unsigned ECX,unsigned EDX,unsigned MaxLeaf,unsigned * Features)1126 static void getAvailableFeatures(unsigned ECX, unsigned EDX, unsigned MaxLeaf,
1127 unsigned *Features) {
1128 unsigned EAX, EBX;
1129
1130 auto setFeature = [&](unsigned F) {
1131 Features[F / 32] |= 1U << (F % 32);
1132 };
1133
1134 if ((EDX >> 15) & 1)
1135 setFeature(X86::FEATURE_CMOV);
1136 if ((EDX >> 23) & 1)
1137 setFeature(X86::FEATURE_MMX);
1138 if ((EDX >> 25) & 1)
1139 setFeature(X86::FEATURE_SSE);
1140 if ((EDX >> 26) & 1)
1141 setFeature(X86::FEATURE_SSE2);
1142
1143 if ((ECX >> 0) & 1)
1144 setFeature(X86::FEATURE_SSE3);
1145 if ((ECX >> 1) & 1)
1146 setFeature(X86::FEATURE_PCLMUL);
1147 if ((ECX >> 9) & 1)
1148 setFeature(X86::FEATURE_SSSE3);
1149 if ((ECX >> 12) & 1)
1150 setFeature(X86::FEATURE_FMA);
1151 if ((ECX >> 19) & 1)
1152 setFeature(X86::FEATURE_SSE4_1);
1153 if ((ECX >> 20) & 1) {
1154 setFeature(X86::FEATURE_SSE4_2);
1155 setFeature(X86::FEATURE_CRC32);
1156 }
1157 if ((ECX >> 23) & 1)
1158 setFeature(X86::FEATURE_POPCNT);
1159 if ((ECX >> 25) & 1)
1160 setFeature(X86::FEATURE_AES);
1161
1162 if ((ECX >> 22) & 1)
1163 setFeature(X86::FEATURE_MOVBE);
1164
1165 // If CPUID indicates support for XSAVE, XRESTORE and AVX, and XGETBV
1166 // indicates that the AVX registers will be saved and restored on context
1167 // switch, then we have full AVX support.
1168 const unsigned AVXBits = (1 << 27) | (1 << 28);
1169 bool HasAVX = ((ECX & AVXBits) == AVXBits) && !getX86XCR0(&EAX, &EDX) &&
1170 ((EAX & 0x6) == 0x6);
1171 #if defined(__APPLE__)
1172 // Darwin lazily saves the AVX512 context on first use: trust that the OS will
1173 // save the AVX512 context if we use AVX512 instructions, even the bit is not
1174 // set right now.
1175 bool HasAVX512Save = true;
1176 #else
1177 // AVX512 requires additional context to be saved by the OS.
1178 bool HasAVX512Save = HasAVX && ((EAX & 0xe0) == 0xe0);
1179 #endif
1180
1181 if (HasAVX)
1182 setFeature(X86::FEATURE_AVX);
1183
1184 bool HasLeaf7 =
1185 MaxLeaf >= 0x7 && !getX86CpuIDAndInfoEx(0x7, 0x0, &EAX, &EBX, &ECX, &EDX);
1186
1187 if (HasLeaf7 && ((EBX >> 3) & 1))
1188 setFeature(X86::FEATURE_BMI);
1189 if (HasLeaf7 && ((EBX >> 5) & 1) && HasAVX)
1190 setFeature(X86::FEATURE_AVX2);
1191 if (HasLeaf7 && ((EBX >> 8) & 1))
1192 setFeature(X86::FEATURE_BMI2);
1193 if (HasLeaf7 && ((EBX >> 16) & 1) && HasAVX512Save)
1194 setFeature(X86::FEATURE_AVX512F);
1195 if (HasLeaf7 && ((EBX >> 17) & 1) && HasAVX512Save)
1196 setFeature(X86::FEATURE_AVX512DQ);
1197 if (HasLeaf7 && ((EBX >> 19) & 1))
1198 setFeature(X86::FEATURE_ADX);
1199 if (HasLeaf7 && ((EBX >> 21) & 1) && HasAVX512Save)
1200 setFeature(X86::FEATURE_AVX512IFMA);
1201 if (HasLeaf7 && ((EBX >> 23) & 1))
1202 setFeature(X86::FEATURE_CLFLUSHOPT);
1203 if (HasLeaf7 && ((EBX >> 26) & 1) && HasAVX512Save)
1204 setFeature(X86::FEATURE_AVX512PF);
1205 if (HasLeaf7 && ((EBX >> 27) & 1) && HasAVX512Save)
1206 setFeature(X86::FEATURE_AVX512ER);
1207 if (HasLeaf7 && ((EBX >> 28) & 1) && HasAVX512Save)
1208 setFeature(X86::FEATURE_AVX512CD);
1209 if (HasLeaf7 && ((EBX >> 29) & 1))
1210 setFeature(X86::FEATURE_SHA);
1211 if (HasLeaf7 && ((EBX >> 30) & 1) && HasAVX512Save)
1212 setFeature(X86::FEATURE_AVX512BW);
1213 if (HasLeaf7 && ((EBX >> 31) & 1) && HasAVX512Save)
1214 setFeature(X86::FEATURE_AVX512VL);
1215
1216 if (HasLeaf7 && ((ECX >> 1) & 1) && HasAVX512Save)
1217 setFeature(X86::FEATURE_AVX512VBMI);
1218 if (HasLeaf7 && ((ECX >> 6) & 1) && HasAVX512Save)
1219 setFeature(X86::FEATURE_AVX512VBMI2);
1220 if (HasLeaf7 && ((ECX >> 8) & 1))
1221 setFeature(X86::FEATURE_GFNI);
1222 if (HasLeaf7 && ((ECX >> 10) & 1) && HasAVX)
1223 setFeature(X86::FEATURE_VPCLMULQDQ);
1224 if (HasLeaf7 && ((ECX >> 11) & 1) && HasAVX512Save)
1225 setFeature(X86::FEATURE_AVX512VNNI);
1226 if (HasLeaf7 && ((ECX >> 12) & 1) && HasAVX512Save)
1227 setFeature(X86::FEATURE_AVX512BITALG);
1228 if (HasLeaf7 && ((ECX >> 14) & 1) && HasAVX512Save)
1229 setFeature(X86::FEATURE_AVX512VPOPCNTDQ);
1230
1231 if (HasLeaf7 && ((EDX >> 2) & 1) && HasAVX512Save)
1232 setFeature(X86::FEATURE_AVX5124VNNIW);
1233 if (HasLeaf7 && ((EDX >> 3) & 1) && HasAVX512Save)
1234 setFeature(X86::FEATURE_AVX5124FMAPS);
1235 if (HasLeaf7 && ((EDX >> 8) & 1) && HasAVX512Save)
1236 setFeature(X86::FEATURE_AVX512VP2INTERSECT);
1237
1238 bool HasLeaf7Subleaf1 =
1239 MaxLeaf >= 7 && !getX86CpuIDAndInfoEx(0x7, 0x1, &EAX, &EBX, &ECX, &EDX);
1240 if (HasLeaf7Subleaf1 && ((EAX >> 5) & 1) && HasAVX512Save)
1241 setFeature(X86::FEATURE_AVX512BF16);
1242
1243 unsigned MaxExtLevel;
1244 getX86CpuIDAndInfo(0x80000000, &MaxExtLevel, &EBX, &ECX, &EDX);
1245
1246 bool HasExtLeaf1 = MaxExtLevel >= 0x80000001 &&
1247 !getX86CpuIDAndInfo(0x80000001, &EAX, &EBX, &ECX, &EDX);
1248 if (HasExtLeaf1 && ((ECX >> 6) & 1))
1249 setFeature(X86::FEATURE_SSE4_A);
1250 if (HasExtLeaf1 && ((ECX >> 11) & 1))
1251 setFeature(X86::FEATURE_XOP);
1252 if (HasExtLeaf1 && ((ECX >> 16) & 1))
1253 setFeature(X86::FEATURE_FMA4);
1254
1255 if (HasExtLeaf1 && ((EDX >> 29) & 1))
1256 setFeature(X86::FEATURE_64BIT);
1257 }
1258
getHostCPUName()1259 StringRef sys::getHostCPUName() {
1260 unsigned MaxLeaf = 0;
1261 const VendorSignatures Vendor = getVendorSignature(&MaxLeaf);
1262 if (Vendor == VendorSignatures::UNKNOWN)
1263 return "generic";
1264
1265 unsigned EAX = 0, EBX = 0, ECX = 0, EDX = 0;
1266 getX86CpuIDAndInfo(0x1, &EAX, &EBX, &ECX, &EDX);
1267
1268 unsigned Family = 0, Model = 0;
1269 unsigned Features[(X86::CPU_FEATURE_MAX + 31) / 32] = {0};
1270 detectX86FamilyModel(EAX, &Family, &Model);
1271 getAvailableFeatures(ECX, EDX, MaxLeaf, Features);
1272
1273 // These aren't consumed in this file, but we try to keep some source code the
1274 // same or similar to compiler-rt.
1275 unsigned Type = 0;
1276 unsigned Subtype = 0;
1277
1278 StringRef CPU;
1279
1280 if (Vendor == VendorSignatures::GENUINE_INTEL) {
1281 CPU = getIntelProcessorTypeAndSubtype(Family, Model, Features, &Type,
1282 &Subtype);
1283 } else if (Vendor == VendorSignatures::AUTHENTIC_AMD) {
1284 CPU = getAMDProcessorTypeAndSubtype(Family, Model, Features, &Type,
1285 &Subtype);
1286 }
1287
1288 if (!CPU.empty())
1289 return CPU;
1290
1291 return "generic";
1292 }
1293
1294 #elif defined(__APPLE__) && defined(__powerpc__)
getHostCPUName()1295 StringRef sys::getHostCPUName() {
1296 host_basic_info_data_t hostInfo;
1297 mach_msg_type_number_t infoCount;
1298
1299 infoCount = HOST_BASIC_INFO_COUNT;
1300 mach_port_t hostPort = mach_host_self();
1301 host_info(hostPort, HOST_BASIC_INFO, (host_info_t)&hostInfo,
1302 &infoCount);
1303 mach_port_deallocate(mach_task_self(), hostPort);
1304
1305 if (hostInfo.cpu_type != CPU_TYPE_POWERPC)
1306 return "generic";
1307
1308 switch (hostInfo.cpu_subtype) {
1309 case CPU_SUBTYPE_POWERPC_601:
1310 return "601";
1311 case CPU_SUBTYPE_POWERPC_602:
1312 return "602";
1313 case CPU_SUBTYPE_POWERPC_603:
1314 return "603";
1315 case CPU_SUBTYPE_POWERPC_603e:
1316 return "603e";
1317 case CPU_SUBTYPE_POWERPC_603ev:
1318 return "603ev";
1319 case CPU_SUBTYPE_POWERPC_604:
1320 return "604";
1321 case CPU_SUBTYPE_POWERPC_604e:
1322 return "604e";
1323 case CPU_SUBTYPE_POWERPC_620:
1324 return "620";
1325 case CPU_SUBTYPE_POWERPC_750:
1326 return "750";
1327 case CPU_SUBTYPE_POWERPC_7400:
1328 return "7400";
1329 case CPU_SUBTYPE_POWERPC_7450:
1330 return "7450";
1331 case CPU_SUBTYPE_POWERPC_970:
1332 return "970";
1333 default:;
1334 }
1335
1336 return "generic";
1337 }
1338 #elif defined(__linux__) && defined(__powerpc__)
getHostCPUName()1339 StringRef sys::getHostCPUName() {
1340 std::unique_ptr<llvm::MemoryBuffer> P = getProcCpuinfoContent();
1341 StringRef Content = P ? P->getBuffer() : "";
1342 return detail::getHostCPUNameForPowerPC(Content);
1343 }
1344 #elif defined(__linux__) && (defined(__arm__) || defined(__aarch64__))
getHostCPUName()1345 StringRef sys::getHostCPUName() {
1346 std::unique_ptr<llvm::MemoryBuffer> P = getProcCpuinfoContent();
1347 StringRef Content = P ? P->getBuffer() : "";
1348 return detail::getHostCPUNameForARM(Content);
1349 }
1350 #elif defined(__linux__) && defined(__s390x__)
getHostCPUName()1351 StringRef sys::getHostCPUName() {
1352 std::unique_ptr<llvm::MemoryBuffer> P = getProcCpuinfoContent();
1353 StringRef Content = P ? P->getBuffer() : "";
1354 return detail::getHostCPUNameForS390x(Content);
1355 }
1356 #elif defined(__MVS__)
getHostCPUName()1357 StringRef sys::getHostCPUName() {
1358 // Get pointer to Communications Vector Table (CVT).
1359 // The pointer is located at offset 16 of the Prefixed Save Area (PSA).
1360 // It is stored as 31 bit pointer and will be zero-extended to 64 bit.
1361 int *StartToCVTOffset = reinterpret_cast<int *>(0x10);
1362 // Since its stored as a 31-bit pointer, get the 4 bytes from the start
1363 // of address.
1364 int ReadValue = *StartToCVTOffset;
1365 // Explicitly clear the high order bit.
1366 ReadValue = (ReadValue & 0x7FFFFFFF);
1367 char *CVT = reinterpret_cast<char *>(ReadValue);
1368 // The model number is located in the CVT prefix at offset -6 and stored as
1369 // signless packed decimal.
1370 uint16_t Id = *(uint16_t *)&CVT[-6];
1371 // Convert number to integer.
1372 Id = decodePackedBCD<uint16_t>(Id, false);
1373 // Check for vector support. It's stored in field CVTFLAG5 (offset 244),
1374 // bit CVTVEF (X'80'). The facilities list is part of the PSA but the vector
1375 // extension can only be used if bit CVTVEF is on.
1376 bool HaveVectorSupport = CVT[244] & 0x80;
1377 return getCPUNameFromS390Model(Id, HaveVectorSupport);
1378 }
1379 #elif defined(__APPLE__) && (defined(__arm__) || defined(__aarch64__))
1380 #define CPUFAMILY_ARM_SWIFT 0x1e2d6381
1381 #define CPUFAMILY_ARM_CYCLONE 0x37a09642
1382 #define CPUFAMILY_ARM_TYPHOON 0x2c91a47e
1383 #define CPUFAMILY_ARM_TWISTER 0x92fb37c8
1384 #define CPUFAMILY_ARM_HURRICANE 0x67ceee93
1385 #define CPUFAMILY_ARM_MONSOON_MISTRAL 0xe81e7ef6
1386 #define CPUFAMILY_ARM_VORTEX_TEMPEST 0x07d34b9f
1387 #define CPUFAMILY_ARM_LIGHTNING_THUNDER 0x462504d2
1388 #define CPUFAMILY_ARM_FIRESTORM_ICESTORM 0x1b588bb3
1389
getHostCPUName()1390 StringRef sys::getHostCPUName() {
1391 uint32_t Family;
1392 size_t Length = sizeof(Family);
1393 sysctlbyname("hw.cpufamily", &Family, &Length, NULL, 0);
1394
1395 switch (Family) {
1396 case CPUFAMILY_ARM_SWIFT:
1397 return "swift";
1398 case CPUFAMILY_ARM_CYCLONE:
1399 return "apple-a7";
1400 case CPUFAMILY_ARM_TYPHOON:
1401 return "apple-a8";
1402 case CPUFAMILY_ARM_TWISTER:
1403 return "apple-a9";
1404 case CPUFAMILY_ARM_HURRICANE:
1405 return "apple-a10";
1406 case CPUFAMILY_ARM_MONSOON_MISTRAL:
1407 return "apple-a11";
1408 case CPUFAMILY_ARM_VORTEX_TEMPEST:
1409 return "apple-a12";
1410 case CPUFAMILY_ARM_LIGHTNING_THUNDER:
1411 return "apple-a13";
1412 case CPUFAMILY_ARM_FIRESTORM_ICESTORM:
1413 return "apple-m1";
1414 default:
1415 // Default to the newest CPU we know about.
1416 return "apple-m1";
1417 }
1418 }
1419 #elif defined(_AIX)
getHostCPUName()1420 StringRef sys::getHostCPUName() {
1421 switch (_system_configuration.implementation) {
1422 case POWER_4:
1423 if (_system_configuration.version == PV_4_3)
1424 return "970";
1425 return "pwr4";
1426 case POWER_5:
1427 if (_system_configuration.version == PV_5)
1428 return "pwr5";
1429 return "pwr5x";
1430 case POWER_6:
1431 if (_system_configuration.version == PV_6_Compat)
1432 return "pwr6";
1433 return "pwr6x";
1434 case POWER_7:
1435 return "pwr7";
1436 case POWER_8:
1437 return "pwr8";
1438 case POWER_9:
1439 return "pwr9";
1440 // TODO: simplify this once the macro is available in all OS levels.
1441 #ifdef POWER_10
1442 case POWER_10:
1443 #else
1444 case 0x40000:
1445 #endif
1446 return "pwr10";
1447 default:
1448 return "generic";
1449 }
1450 }
1451 #elif defined(__riscv)
getHostCPUName()1452 StringRef sys::getHostCPUName() {
1453 #if defined(__linux__)
1454 std::unique_ptr<llvm::MemoryBuffer> P = getProcCpuinfoContent();
1455 StringRef Content = P ? P->getBuffer() : "";
1456 return detail::getHostCPUNameForRISCV(Content);
1457 #else
1458 #if __riscv_xlen == 64
1459 return "generic-rv64";
1460 #elif __riscv_xlen == 32
1461 return "generic-rv32";
1462 #else
1463 #error "Unhandled value of __riscv_xlen"
1464 #endif
1465 #endif
1466 }
1467 #elif defined(__sparc__)
1468 #if defined(__linux__)
getHostCPUNameForSPARC(StringRef ProcCpuinfoContent)1469 StringRef sys::detail::getHostCPUNameForSPARC(StringRef ProcCpuinfoContent) {
1470 SmallVector<StringRef> Lines;
1471 ProcCpuinfoContent.split(Lines, "\n");
1472
1473 // Look for cpu line to determine cpu name
1474 StringRef Cpu;
1475 for (unsigned I = 0, E = Lines.size(); I != E; ++I) {
1476 if (Lines[I].startswith("cpu")) {
1477 Cpu = Lines[I].substr(5).ltrim("\t :");
1478 break;
1479 }
1480 }
1481
1482 return StringSwitch<const char *>(Cpu)
1483 .StartsWith("SuperSparc", "supersparc")
1484 .StartsWith("HyperSparc", "hypersparc")
1485 .StartsWith("SpitFire", "ultrasparc")
1486 .StartsWith("BlackBird", "ultrasparc")
1487 .StartsWith("Sabre", " ultrasparc")
1488 .StartsWith("Hummingbird", "ultrasparc")
1489 .StartsWith("Cheetah", "ultrasparc3")
1490 .StartsWith("Jalapeno", "ultrasparc3")
1491 .StartsWith("Jaguar", "ultrasparc3")
1492 .StartsWith("Panther", "ultrasparc3")
1493 .StartsWith("Serrano", "ultrasparc3")
1494 .StartsWith("UltraSparc T1", "niagara")
1495 .StartsWith("UltraSparc T2", "niagara2")
1496 .StartsWith("UltraSparc T3", "niagara3")
1497 .StartsWith("UltraSparc T4", "niagara4")
1498 .StartsWith("UltraSparc T5", "niagara4")
1499 .StartsWith("LEON", "leon3")
1500 // niagara7/m8 not supported by LLVM yet.
1501 .StartsWith("SPARC-M7", "niagara4" /* "niagara7" */)
1502 .StartsWith("SPARC-S7", "niagara4" /* "niagara7" */)
1503 .StartsWith("SPARC-M8", "niagara4" /* "m8" */)
1504 .Default("generic");
1505 }
1506 #endif
1507
getHostCPUName()1508 StringRef sys::getHostCPUName() {
1509 #if defined(__linux__)
1510 std::unique_ptr<llvm::MemoryBuffer> P = getProcCpuinfoContent();
1511 StringRef Content = P ? P->getBuffer() : "";
1512 return detail::getHostCPUNameForSPARC(Content);
1513 #elif defined(__sun__) && defined(__svr4__)
1514 char *buf = NULL;
1515 kstat_ctl_t *kc;
1516 kstat_t *ksp;
1517 kstat_named_t *brand = NULL;
1518
1519 kc = kstat_open();
1520 if (kc != NULL) {
1521 ksp = kstat_lookup(kc, const_cast<char *>("cpu_info"), -1, NULL);
1522 if (ksp != NULL && kstat_read(kc, ksp, NULL) != -1 &&
1523 ksp->ks_type == KSTAT_TYPE_NAMED)
1524 brand =
1525 (kstat_named_t *)kstat_data_lookup(ksp, const_cast<char *>("brand"));
1526 if (brand != NULL && brand->data_type == KSTAT_DATA_STRING)
1527 buf = KSTAT_NAMED_STR_PTR(brand);
1528 }
1529 kstat_close(kc);
1530
1531 return StringSwitch<const char *>(buf)
1532 .Case("TMS390S10", "supersparc") // Texas Instruments microSPARC I
1533 .Case("TMS390Z50", "supersparc") // Texas Instruments SuperSPARC I
1534 .Case("TMS390Z55",
1535 "supersparc") // Texas Instruments SuperSPARC I with SuperCache
1536 .Case("MB86904", "supersparc") // Fujitsu microSPARC II
1537 .Case("MB86907", "supersparc") // Fujitsu TurboSPARC
1538 .Case("RT623", "hypersparc") // Ross hyperSPARC
1539 .Case("RT625", "hypersparc")
1540 .Case("RT626", "hypersparc")
1541 .Case("UltraSPARC-I", "ultrasparc")
1542 .Case("UltraSPARC-II", "ultrasparc")
1543 .Case("UltraSPARC-IIe", "ultrasparc")
1544 .Case("UltraSPARC-IIi", "ultrasparc")
1545 .Case("SPARC64-III", "ultrasparc")
1546 .Case("SPARC64-IV", "ultrasparc")
1547 .Case("UltraSPARC-III", "ultrasparc3")
1548 .Case("UltraSPARC-III+", "ultrasparc3")
1549 .Case("UltraSPARC-IIIi", "ultrasparc3")
1550 .Case("UltraSPARC-IIIi+", "ultrasparc3")
1551 .Case("UltraSPARC-IV", "ultrasparc3")
1552 .Case("UltraSPARC-IV+", "ultrasparc3")
1553 .Case("SPARC64-V", "ultrasparc3")
1554 .Case("SPARC64-VI", "ultrasparc3")
1555 .Case("SPARC64-VII", "ultrasparc3")
1556 .Case("UltraSPARC-T1", "niagara")
1557 .Case("UltraSPARC-T2", "niagara2")
1558 .Case("UltraSPARC-T2", "niagara2")
1559 .Case("UltraSPARC-T2+", "niagara2")
1560 .Case("SPARC-T3", "niagara3")
1561 .Case("SPARC-T4", "niagara4")
1562 .Case("SPARC-T5", "niagara4")
1563 // niagara7/m8 not supported by LLVM yet.
1564 .Case("SPARC-M7", "niagara4" /* "niagara7" */)
1565 .Case("SPARC-S7", "niagara4" /* "niagara7" */)
1566 .Case("SPARC-M8", "niagara4" /* "m8" */)
1567 .Default("generic");
1568 #else
1569 return "generic";
1570 #endif
1571 }
1572 #else
getHostCPUName()1573 StringRef sys::getHostCPUName() { return "generic"; }
1574 namespace llvm {
1575 namespace sys {
1576 namespace detail {
1577 namespace x86 {
1578
getVendorSignature(unsigned * MaxLeaf)1579 VendorSignatures getVendorSignature(unsigned *MaxLeaf) {
1580 return VendorSignatures::UNKNOWN;
1581 }
1582
1583 } // namespace x86
1584 } // namespace detail
1585 } // namespace sys
1586 } // namespace llvm
1587 #endif
1588
1589 #if defined(__i386__) || defined(_M_IX86) || \
1590 defined(__x86_64__) || defined(_M_X64)
getHostCPUFeatures(StringMap<bool> & Features)1591 bool sys::getHostCPUFeatures(StringMap<bool> &Features) {
1592 unsigned EAX = 0, EBX = 0, ECX = 0, EDX = 0;
1593 unsigned MaxLevel;
1594
1595 if (getX86CpuIDAndInfo(0, &MaxLevel, &EBX, &ECX, &EDX) || MaxLevel < 1)
1596 return false;
1597
1598 getX86CpuIDAndInfo(1, &EAX, &EBX, &ECX, &EDX);
1599
1600 Features["cx8"] = (EDX >> 8) & 1;
1601 Features["cmov"] = (EDX >> 15) & 1;
1602 Features["mmx"] = (EDX >> 23) & 1;
1603 Features["fxsr"] = (EDX >> 24) & 1;
1604 Features["sse"] = (EDX >> 25) & 1;
1605 Features["sse2"] = (EDX >> 26) & 1;
1606
1607 Features["sse3"] = (ECX >> 0) & 1;
1608 Features["pclmul"] = (ECX >> 1) & 1;
1609 Features["ssse3"] = (ECX >> 9) & 1;
1610 Features["cx16"] = (ECX >> 13) & 1;
1611 Features["sse4.1"] = (ECX >> 19) & 1;
1612 Features["sse4.2"] = (ECX >> 20) & 1;
1613 Features["crc32"] = Features["sse4.2"];
1614 Features["movbe"] = (ECX >> 22) & 1;
1615 Features["popcnt"] = (ECX >> 23) & 1;
1616 Features["aes"] = (ECX >> 25) & 1;
1617 Features["rdrnd"] = (ECX >> 30) & 1;
1618
1619 // If CPUID indicates support for XSAVE, XRESTORE and AVX, and XGETBV
1620 // indicates that the AVX registers will be saved and restored on context
1621 // switch, then we have full AVX support.
1622 bool HasXSave = ((ECX >> 27) & 1) && !getX86XCR0(&EAX, &EDX);
1623 bool HasAVXSave = HasXSave && ((ECX >> 28) & 1) && ((EAX & 0x6) == 0x6);
1624 #if defined(__APPLE__)
1625 // Darwin lazily saves the AVX512 context on first use: trust that the OS will
1626 // save the AVX512 context if we use AVX512 instructions, even the bit is not
1627 // set right now.
1628 bool HasAVX512Save = true;
1629 #else
1630 // AVX512 requires additional context to be saved by the OS.
1631 bool HasAVX512Save = HasAVXSave && ((EAX & 0xe0) == 0xe0);
1632 #endif
1633 // AMX requires additional context to be saved by the OS.
1634 const unsigned AMXBits = (1 << 17) | (1 << 18);
1635 bool HasAMXSave = HasXSave && ((EAX & AMXBits) == AMXBits);
1636
1637 Features["avx"] = HasAVXSave;
1638 Features["fma"] = ((ECX >> 12) & 1) && HasAVXSave;
1639 // Only enable XSAVE if OS has enabled support for saving YMM state.
1640 Features["xsave"] = ((ECX >> 26) & 1) && HasAVXSave;
1641 Features["f16c"] = ((ECX >> 29) & 1) && HasAVXSave;
1642
1643 unsigned MaxExtLevel;
1644 getX86CpuIDAndInfo(0x80000000, &MaxExtLevel, &EBX, &ECX, &EDX);
1645
1646 bool HasExtLeaf1 = MaxExtLevel >= 0x80000001 &&
1647 !getX86CpuIDAndInfo(0x80000001, &EAX, &EBX, &ECX, &EDX);
1648 Features["sahf"] = HasExtLeaf1 && ((ECX >> 0) & 1);
1649 Features["lzcnt"] = HasExtLeaf1 && ((ECX >> 5) & 1);
1650 Features["sse4a"] = HasExtLeaf1 && ((ECX >> 6) & 1);
1651 Features["prfchw"] = HasExtLeaf1 && ((ECX >> 8) & 1);
1652 Features["xop"] = HasExtLeaf1 && ((ECX >> 11) & 1) && HasAVXSave;
1653 Features["lwp"] = HasExtLeaf1 && ((ECX >> 15) & 1);
1654 Features["fma4"] = HasExtLeaf1 && ((ECX >> 16) & 1) && HasAVXSave;
1655 Features["tbm"] = HasExtLeaf1 && ((ECX >> 21) & 1);
1656 Features["mwaitx"] = HasExtLeaf1 && ((ECX >> 29) & 1);
1657
1658 Features["64bit"] = HasExtLeaf1 && ((EDX >> 29) & 1);
1659
1660 // Miscellaneous memory related features, detected by
1661 // using the 0x80000008 leaf of the CPUID instruction
1662 bool HasExtLeaf8 = MaxExtLevel >= 0x80000008 &&
1663 !getX86CpuIDAndInfo(0x80000008, &EAX, &EBX, &ECX, &EDX);
1664 Features["clzero"] = HasExtLeaf8 && ((EBX >> 0) & 1);
1665 Features["rdpru"] = HasExtLeaf8 && ((EBX >> 4) & 1);
1666 Features["wbnoinvd"] = HasExtLeaf8 && ((EBX >> 9) & 1);
1667
1668 bool HasLeaf7 =
1669 MaxLevel >= 7 && !getX86CpuIDAndInfoEx(0x7, 0x0, &EAX, &EBX, &ECX, &EDX);
1670
1671 Features["fsgsbase"] = HasLeaf7 && ((EBX >> 0) & 1);
1672 Features["sgx"] = HasLeaf7 && ((EBX >> 2) & 1);
1673 Features["bmi"] = HasLeaf7 && ((EBX >> 3) & 1);
1674 // AVX2 is only supported if we have the OS save support from AVX.
1675 Features["avx2"] = HasLeaf7 && ((EBX >> 5) & 1) && HasAVXSave;
1676 Features["bmi2"] = HasLeaf7 && ((EBX >> 8) & 1);
1677 Features["invpcid"] = HasLeaf7 && ((EBX >> 10) & 1);
1678 Features["rtm"] = HasLeaf7 && ((EBX >> 11) & 1);
1679 // AVX512 is only supported if the OS supports the context save for it.
1680 Features["avx512f"] = HasLeaf7 && ((EBX >> 16) & 1) && HasAVX512Save;
1681 Features["avx512dq"] = HasLeaf7 && ((EBX >> 17) & 1) && HasAVX512Save;
1682 Features["rdseed"] = HasLeaf7 && ((EBX >> 18) & 1);
1683 Features["adx"] = HasLeaf7 && ((EBX >> 19) & 1);
1684 Features["avx512ifma"] = HasLeaf7 && ((EBX >> 21) & 1) && HasAVX512Save;
1685 Features["clflushopt"] = HasLeaf7 && ((EBX >> 23) & 1);
1686 Features["clwb"] = HasLeaf7 && ((EBX >> 24) & 1);
1687 Features["avx512pf"] = HasLeaf7 && ((EBX >> 26) & 1) && HasAVX512Save;
1688 Features["avx512er"] = HasLeaf7 && ((EBX >> 27) & 1) && HasAVX512Save;
1689 Features["avx512cd"] = HasLeaf7 && ((EBX >> 28) & 1) && HasAVX512Save;
1690 Features["sha"] = HasLeaf7 && ((EBX >> 29) & 1);
1691 Features["avx512bw"] = HasLeaf7 && ((EBX >> 30) & 1) && HasAVX512Save;
1692 Features["avx512vl"] = HasLeaf7 && ((EBX >> 31) & 1) && HasAVX512Save;
1693
1694 Features["prefetchwt1"] = HasLeaf7 && ((ECX >> 0) & 1);
1695 Features["avx512vbmi"] = HasLeaf7 && ((ECX >> 1) & 1) && HasAVX512Save;
1696 Features["pku"] = HasLeaf7 && ((ECX >> 4) & 1);
1697 Features["waitpkg"] = HasLeaf7 && ((ECX >> 5) & 1);
1698 Features["avx512vbmi2"] = HasLeaf7 && ((ECX >> 6) & 1) && HasAVX512Save;
1699 Features["shstk"] = HasLeaf7 && ((ECX >> 7) & 1);
1700 Features["gfni"] = HasLeaf7 && ((ECX >> 8) & 1);
1701 Features["vaes"] = HasLeaf7 && ((ECX >> 9) & 1) && HasAVXSave;
1702 Features["vpclmulqdq"] = HasLeaf7 && ((ECX >> 10) & 1) && HasAVXSave;
1703 Features["avx512vnni"] = HasLeaf7 && ((ECX >> 11) & 1) && HasAVX512Save;
1704 Features["avx512bitalg"] = HasLeaf7 && ((ECX >> 12) & 1) && HasAVX512Save;
1705 Features["avx512vpopcntdq"] = HasLeaf7 && ((ECX >> 14) & 1) && HasAVX512Save;
1706 Features["rdpid"] = HasLeaf7 && ((ECX >> 22) & 1);
1707 Features["kl"] = HasLeaf7 && ((ECX >> 23) & 1); // key locker
1708 Features["cldemote"] = HasLeaf7 && ((ECX >> 25) & 1);
1709 Features["movdiri"] = HasLeaf7 && ((ECX >> 27) & 1);
1710 Features["movdir64b"] = HasLeaf7 && ((ECX >> 28) & 1);
1711 Features["enqcmd"] = HasLeaf7 && ((ECX >> 29) & 1);
1712
1713 Features["uintr"] = HasLeaf7 && ((EDX >> 5) & 1);
1714 Features["avx512vp2intersect"] =
1715 HasLeaf7 && ((EDX >> 8) & 1) && HasAVX512Save;
1716 Features["serialize"] = HasLeaf7 && ((EDX >> 14) & 1);
1717 Features["tsxldtrk"] = HasLeaf7 && ((EDX >> 16) & 1);
1718 // There are two CPUID leafs which information associated with the pconfig
1719 // instruction:
1720 // EAX=0x7, ECX=0x0 indicates the availability of the instruction (via the 18th
1721 // bit of EDX), while the EAX=0x1b leaf returns information on the
1722 // availability of specific pconfig leafs.
1723 // The target feature here only refers to the the first of these two.
1724 // Users might need to check for the availability of specific pconfig
1725 // leaves using cpuid, since that information is ignored while
1726 // detecting features using the "-march=native" flag.
1727 // For more info, see X86 ISA docs.
1728 Features["pconfig"] = HasLeaf7 && ((EDX >> 18) & 1);
1729 Features["amx-bf16"] = HasLeaf7 && ((EDX >> 22) & 1) && HasAMXSave;
1730 Features["avx512fp16"] = HasLeaf7 && ((EDX >> 23) & 1) && HasAVX512Save;
1731 Features["amx-tile"] = HasLeaf7 && ((EDX >> 24) & 1) && HasAMXSave;
1732 Features["amx-int8"] = HasLeaf7 && ((EDX >> 25) & 1) && HasAMXSave;
1733 bool HasLeaf7Subleaf1 =
1734 MaxLevel >= 7 && !getX86CpuIDAndInfoEx(0x7, 0x1, &EAX, &EBX, &ECX, &EDX);
1735 Features["raoint"] = HasLeaf7Subleaf1 && ((EAX >> 3) & 1);
1736 Features["avxvnni"] = HasLeaf7Subleaf1 && ((EAX >> 4) & 1) && HasAVXSave;
1737 Features["avx512bf16"] = HasLeaf7Subleaf1 && ((EAX >> 5) & 1) && HasAVX512Save;
1738 Features["amx-fp16"] = HasLeaf7Subleaf1 && ((EAX >> 21) & 1) && HasAMXSave;
1739 Features["cmpccxadd"] = HasLeaf7Subleaf1 && ((EAX >> 7) & 1);
1740 Features["hreset"] = HasLeaf7Subleaf1 && ((EAX >> 22) & 1);
1741 Features["avxifma"] = HasLeaf7Subleaf1 && ((EAX >> 23) & 1) && HasAVXSave;
1742 Features["avxvnniint8"] = HasLeaf7Subleaf1 && ((EDX >> 4) & 1) && HasAVXSave;
1743 Features["avxneconvert"] = HasLeaf7Subleaf1 && ((EDX >> 5) & 1) && HasAVXSave;
1744 Features["prefetchi"] = HasLeaf7Subleaf1 && ((EDX >> 14) & 1);
1745
1746 bool HasLeafD = MaxLevel >= 0xd &&
1747 !getX86CpuIDAndInfoEx(0xd, 0x1, &EAX, &EBX, &ECX, &EDX);
1748
1749 // Only enable XSAVE if OS has enabled support for saving YMM state.
1750 Features["xsaveopt"] = HasLeafD && ((EAX >> 0) & 1) && HasAVXSave;
1751 Features["xsavec"] = HasLeafD && ((EAX >> 1) & 1) && HasAVXSave;
1752 Features["xsaves"] = HasLeafD && ((EAX >> 3) & 1) && HasAVXSave;
1753
1754 bool HasLeaf14 = MaxLevel >= 0x14 &&
1755 !getX86CpuIDAndInfoEx(0x14, 0x0, &EAX, &EBX, &ECX, &EDX);
1756
1757 Features["ptwrite"] = HasLeaf14 && ((EBX >> 4) & 1);
1758
1759 bool HasLeaf19 =
1760 MaxLevel >= 0x19 && !getX86CpuIDAndInfo(0x19, &EAX, &EBX, &ECX, &EDX);
1761 Features["widekl"] = HasLeaf7 && HasLeaf19 && ((EBX >> 2) & 1);
1762
1763 return true;
1764 }
1765 #elif defined(__linux__) && (defined(__arm__) || defined(__aarch64__))
getHostCPUFeatures(StringMap<bool> & Features)1766 bool sys::getHostCPUFeatures(StringMap<bool> &Features) {
1767 std::unique_ptr<llvm::MemoryBuffer> P = getProcCpuinfoContent();
1768 if (!P)
1769 return false;
1770
1771 SmallVector<StringRef, 32> Lines;
1772 P->getBuffer().split(Lines, "\n");
1773
1774 SmallVector<StringRef, 32> CPUFeatures;
1775
1776 // Look for the CPU features.
1777 for (unsigned I = 0, E = Lines.size(); I != E; ++I)
1778 if (Lines[I].startswith("Features")) {
1779 Lines[I].split(CPUFeatures, ' ');
1780 break;
1781 }
1782
1783 #if defined(__aarch64__)
1784 // Keep track of which crypto features we have seen
1785 enum { CAP_AES = 0x1, CAP_PMULL = 0x2, CAP_SHA1 = 0x4, CAP_SHA2 = 0x8 };
1786 uint32_t crypto = 0;
1787 #endif
1788
1789 for (unsigned I = 0, E = CPUFeatures.size(); I != E; ++I) {
1790 StringRef LLVMFeatureStr = StringSwitch<StringRef>(CPUFeatures[I])
1791 #if defined(__aarch64__)
1792 .Case("asimd", "neon")
1793 .Case("fp", "fp-armv8")
1794 .Case("crc32", "crc")
1795 .Case("atomics", "lse")
1796 .Case("sve", "sve")
1797 .Case("sve2", "sve2")
1798 #else
1799 .Case("half", "fp16")
1800 .Case("neon", "neon")
1801 .Case("vfpv3", "vfp3")
1802 .Case("vfpv3d16", "vfp3d16")
1803 .Case("vfpv4", "vfp4")
1804 .Case("idiva", "hwdiv-arm")
1805 .Case("idivt", "hwdiv")
1806 #endif
1807 .Default("");
1808
1809 #if defined(__aarch64__)
1810 // We need to check crypto separately since we need all of the crypto
1811 // extensions to enable the subtarget feature
1812 if (CPUFeatures[I] == "aes")
1813 crypto |= CAP_AES;
1814 else if (CPUFeatures[I] == "pmull")
1815 crypto |= CAP_PMULL;
1816 else if (CPUFeatures[I] == "sha1")
1817 crypto |= CAP_SHA1;
1818 else if (CPUFeatures[I] == "sha2")
1819 crypto |= CAP_SHA2;
1820 #endif
1821
1822 if (LLVMFeatureStr != "")
1823 Features[LLVMFeatureStr] = true;
1824 }
1825
1826 #if defined(__aarch64__)
1827 // If we have all crypto bits we can add the feature
1828 if (crypto == (CAP_AES | CAP_PMULL | CAP_SHA1 | CAP_SHA2))
1829 Features["crypto"] = true;
1830 #endif
1831
1832 return true;
1833 }
1834 #elif defined(_WIN32) && (defined(__aarch64__) || defined(_M_ARM64))
getHostCPUFeatures(StringMap<bool> & Features)1835 bool sys::getHostCPUFeatures(StringMap<bool> &Features) {
1836 if (IsProcessorFeaturePresent(PF_ARM_NEON_INSTRUCTIONS_AVAILABLE))
1837 Features["neon"] = true;
1838 if (IsProcessorFeaturePresent(PF_ARM_V8_CRC32_INSTRUCTIONS_AVAILABLE))
1839 Features["crc"] = true;
1840 if (IsProcessorFeaturePresent(PF_ARM_V8_CRYPTO_INSTRUCTIONS_AVAILABLE))
1841 Features["crypto"] = true;
1842
1843 return true;
1844 }
1845 #else
getHostCPUFeatures(StringMap<bool> & Features)1846 bool sys::getHostCPUFeatures(StringMap<bool> &Features) { return false; }
1847 #endif
1848
getProcessTriple()1849 std::string sys::getProcessTriple() {
1850 std::string TargetTripleString = updateTripleOSVersion(LLVM_HOST_TRIPLE);
1851 Triple PT(Triple::normalize(TargetTripleString));
1852
1853 if (sizeof(void *) == 8 && PT.isArch32Bit())
1854 PT = PT.get64BitArchVariant();
1855 if (sizeof(void *) == 4 && PT.isArch64Bit())
1856 PT = PT.get32BitArchVariant();
1857
1858 return PT.str();
1859 }
1860
printDefaultTargetAndDetectedCPU(raw_ostream & OS)1861 void sys::printDefaultTargetAndDetectedCPU(raw_ostream &OS) {
1862 #if LLVM_VERSION_PRINTER_SHOW_HOST_TARGET_INFO
1863 std::string CPU = std::string(sys::getHostCPUName());
1864 if (CPU == "generic")
1865 CPU = "(unknown)";
1866 OS << " Default target: " << sys::getDefaultTargetTriple() << '\n'
1867 << " Host CPU: " << CPU << '\n';
1868 #endif
1869 }
1870