xref: /openbsd/gnu/llvm/llvm/lib/TargetParser/Host.cpp (revision d415bd75)
1 //===-- Host.cpp - Implement OS Host Detection ------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 //  This file implements the operating system Host detection.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "llvm/TargetParser/Host.h"
14 #include "llvm/ADT/SmallVector.h"
15 #include "llvm/ADT/StringMap.h"
16 #include "llvm/ADT/StringRef.h"
17 #include "llvm/ADT/StringSwitch.h"
18 #include "llvm/Config/llvm-config.h"
19 #include "llvm/Support/MemoryBuffer.h"
20 #include "llvm/Support/raw_ostream.h"
21 #include "llvm/TargetParser/Triple.h"
22 #include "llvm/TargetParser/X86TargetParser.h"
23 #include <string.h>
24 
25 // Include the platform-specific parts of this class.
26 #ifdef LLVM_ON_UNIX
27 #include "Unix/Host.inc"
28 #include <sched.h>
29 #endif
30 #ifdef _WIN32
31 #include "Windows/Host.inc"
32 #endif
33 #ifdef _MSC_VER
34 #include <intrin.h>
35 #endif
36 #ifdef __MVS__
37 #include "llvm/Support/BCD.h"
38 #endif
39 #if defined(__APPLE__)
40 #include <mach/host_info.h>
41 #include <mach/mach.h>
42 #include <mach/mach_host.h>
43 #include <mach/machine.h>
44 #include <sys/param.h>
45 #include <sys/sysctl.h>
46 #endif
47 #ifdef _AIX
48 #include <sys/systemcfg.h>
49 #endif
50 #if defined(__sun__) && defined(__svr4__)
51 #include <kstat.h>
52 #endif
53 
54 #define DEBUG_TYPE "host-detection"
55 
56 //===----------------------------------------------------------------------===//
57 //
58 //  Implementations of the CPU detection routines
59 //
60 //===----------------------------------------------------------------------===//
61 
62 using namespace llvm;
63 
64 static std::unique_ptr<llvm::MemoryBuffer>
getProcCpuinfoContent()65     LLVM_ATTRIBUTE_UNUSED getProcCpuinfoContent() {
66   llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> Text =
67       llvm::MemoryBuffer::getFileAsStream("/proc/cpuinfo");
68   if (std::error_code EC = Text.getError()) {
69     llvm::errs() << "Can't read "
70                  << "/proc/cpuinfo: " << EC.message() << "\n";
71     return nullptr;
72   }
73   return std::move(*Text);
74 }
75 
getHostCPUNameForPowerPC(StringRef ProcCpuinfoContent)76 StringRef sys::detail::getHostCPUNameForPowerPC(StringRef ProcCpuinfoContent) {
77   // Access to the Processor Version Register (PVR) on PowerPC is privileged,
78   // and so we must use an operating-system interface to determine the current
79   // processor type. On Linux, this is exposed through the /proc/cpuinfo file.
80   const char *generic = "generic";
81 
82   // The cpu line is second (after the 'processor: 0' line), so if this
83   // buffer is too small then something has changed (or is wrong).
84   StringRef::const_iterator CPUInfoStart = ProcCpuinfoContent.begin();
85   StringRef::const_iterator CPUInfoEnd = ProcCpuinfoContent.end();
86 
87   StringRef::const_iterator CIP = CPUInfoStart;
88 
89   StringRef::const_iterator CPUStart = nullptr;
90   size_t CPULen = 0;
91 
92   // We need to find the first line which starts with cpu, spaces, and a colon.
93   // After the colon, there may be some additional spaces and then the cpu type.
94   while (CIP < CPUInfoEnd && CPUStart == nullptr) {
95     if (CIP < CPUInfoEnd && *CIP == '\n')
96       ++CIP;
97 
98     if (CIP < CPUInfoEnd && *CIP == 'c') {
99       ++CIP;
100       if (CIP < CPUInfoEnd && *CIP == 'p') {
101         ++CIP;
102         if (CIP < CPUInfoEnd && *CIP == 'u') {
103           ++CIP;
104           while (CIP < CPUInfoEnd && (*CIP == ' ' || *CIP == '\t'))
105             ++CIP;
106 
107           if (CIP < CPUInfoEnd && *CIP == ':') {
108             ++CIP;
109             while (CIP < CPUInfoEnd && (*CIP == ' ' || *CIP == '\t'))
110               ++CIP;
111 
112             if (CIP < CPUInfoEnd) {
113               CPUStart = CIP;
114               while (CIP < CPUInfoEnd && (*CIP != ' ' && *CIP != '\t' &&
115                                           *CIP != ',' && *CIP != '\n'))
116                 ++CIP;
117               CPULen = CIP - CPUStart;
118             }
119           }
120         }
121       }
122     }
123 
124     if (CPUStart == nullptr)
125       while (CIP < CPUInfoEnd && *CIP != '\n')
126         ++CIP;
127   }
128 
129   if (CPUStart == nullptr)
130     return generic;
131 
132   return StringSwitch<const char *>(StringRef(CPUStart, CPULen))
133       .Case("604e", "604e")
134       .Case("604", "604")
135       .Case("7400", "7400")
136       .Case("7410", "7400")
137       .Case("7447", "7400")
138       .Case("7455", "7450")
139       .Case("G4", "g4")
140       .Case("POWER4", "970")
141       .Case("PPC970FX", "970")
142       .Case("PPC970MP", "970")
143       .Case("G5", "g5")
144       .Case("POWER5", "g5")
145       .Case("A2", "a2")
146       .Case("POWER6", "pwr6")
147       .Case("POWER7", "pwr7")
148       .Case("POWER8", "pwr8")
149       .Case("POWER8E", "pwr8")
150       .Case("POWER8NVL", "pwr8")
151       .Case("POWER9", "pwr9")
152       .Case("POWER10", "pwr10")
153       // FIXME: If we get a simulator or machine with the capabilities of
154       // mcpu=future, we should revisit this and add the name reported by the
155       // simulator/machine.
156       .Default(generic);
157 }
158 
getHostCPUNameForARM(StringRef ProcCpuinfoContent)159 StringRef sys::detail::getHostCPUNameForARM(StringRef ProcCpuinfoContent) {
160   // The cpuid register on arm is not accessible from user space. On Linux,
161   // it is exposed through the /proc/cpuinfo file.
162 
163   // Read 32 lines from /proc/cpuinfo, which should contain the CPU part line
164   // in all cases.
165   SmallVector<StringRef, 32> Lines;
166   ProcCpuinfoContent.split(Lines, "\n");
167 
168   // Look for the CPU implementer line.
169   StringRef Implementer;
170   StringRef Hardware;
171   StringRef Part;
172   for (unsigned I = 0, E = Lines.size(); I != E; ++I) {
173     if (Lines[I].startswith("CPU implementer"))
174       Implementer = Lines[I].substr(15).ltrim("\t :");
175     if (Lines[I].startswith("Hardware"))
176       Hardware = Lines[I].substr(8).ltrim("\t :");
177     if (Lines[I].startswith("CPU part"))
178       Part = Lines[I].substr(8).ltrim("\t :");
179   }
180 
181   if (Implementer == "0x41") { // ARM Ltd.
182     // MSM8992/8994 may give cpu part for the core that the kernel is running on,
183     // which is undeterministic and wrong. Always return cortex-a53 for these SoC.
184     if (Hardware.endswith("MSM8994") || Hardware.endswith("MSM8996"))
185       return "cortex-a53";
186 
187 
188     // The CPU part is a 3 digit hexadecimal number with a 0x prefix. The
189     // values correspond to the "Part number" in the CP15/c0 register. The
190     // contents are specified in the various processor manuals.
191     // This corresponds to the Main ID Register in Technical Reference Manuals.
192     // and is used in programs like sys-utils
193     return StringSwitch<const char *>(Part)
194         .Case("0x926", "arm926ej-s")
195         .Case("0xb02", "mpcore")
196         .Case("0xb36", "arm1136j-s")
197         .Case("0xb56", "arm1156t2-s")
198         .Case("0xb76", "arm1176jz-s")
199         .Case("0xc08", "cortex-a8")
200         .Case("0xc09", "cortex-a9")
201         .Case("0xc0f", "cortex-a15")
202         .Case("0xc20", "cortex-m0")
203         .Case("0xc23", "cortex-m3")
204         .Case("0xc24", "cortex-m4")
205         .Case("0xd22", "cortex-m55")
206         .Case("0xd02", "cortex-a34")
207         .Case("0xd04", "cortex-a35")
208         .Case("0xd03", "cortex-a53")
209         .Case("0xd05", "cortex-a55")
210         .Case("0xd46", "cortex-a510")
211         .Case("0xd07", "cortex-a57")
212         .Case("0xd08", "cortex-a72")
213         .Case("0xd09", "cortex-a73")
214         .Case("0xd0a", "cortex-a75")
215         .Case("0xd0b", "cortex-a76")
216         .Case("0xd0d", "cortex-a77")
217         .Case("0xd41", "cortex-a78")
218         .Case("0xd47", "cortex-a710")
219         .Case("0xd4d", "cortex-a715")
220         .Case("0xd44", "cortex-x1")
221         .Case("0xd4c", "cortex-x1c")
222         .Case("0xd48", "cortex-x2")
223         .Case("0xd4e", "cortex-x3")
224         .Case("0xd0c", "neoverse-n1")
225         .Case("0xd49", "neoverse-n2")
226         .Case("0xd40", "neoverse-v1")
227         .Case("0xd4f", "neoverse-v2")
228         .Default("generic");
229   }
230 
231   if (Implementer == "0x42" || Implementer == "0x43") { // Broadcom | Cavium.
232     return StringSwitch<const char *>(Part)
233       .Case("0x516", "thunderx2t99")
234       .Case("0x0516", "thunderx2t99")
235       .Case("0xaf", "thunderx2t99")
236       .Case("0x0af", "thunderx2t99")
237       .Case("0xa1", "thunderxt88")
238       .Case("0x0a1", "thunderxt88")
239       .Default("generic");
240   }
241 
242   if (Implementer == "0x46") { // Fujitsu Ltd.
243     return StringSwitch<const char *>(Part)
244       .Case("0x001", "a64fx")
245       .Default("generic");
246   }
247 
248   if (Implementer == "0x4e") { // NVIDIA Corporation
249     return StringSwitch<const char *>(Part)
250         .Case("0x004", "carmel")
251         .Default("generic");
252   }
253 
254   if (Implementer == "0x48") // HiSilicon Technologies, Inc.
255     // The CPU part is a 3 digit hexadecimal number with a 0x prefix. The
256     // values correspond to the "Part number" in the CP15/c0 register. The
257     // contents are specified in the various processor manuals.
258     return StringSwitch<const char *>(Part)
259       .Case("0xd01", "tsv110")
260       .Default("generic");
261 
262   if (Implementer == "0x51") // Qualcomm Technologies, Inc.
263     // The CPU part is a 3 digit hexadecimal number with a 0x prefix. The
264     // values correspond to the "Part number" in the CP15/c0 register. The
265     // contents are specified in the various processor manuals.
266     return StringSwitch<const char *>(Part)
267         .Case("0x06f", "krait") // APQ8064
268         .Case("0x201", "kryo")
269         .Case("0x205", "kryo")
270         .Case("0x211", "kryo")
271         .Case("0x800", "cortex-a73") // Kryo 2xx Gold
272         .Case("0x801", "cortex-a73") // Kryo 2xx Silver
273         .Case("0x802", "cortex-a75") // Kryo 3xx Gold
274         .Case("0x803", "cortex-a75") // Kryo 3xx Silver
275         .Case("0x804", "cortex-a76") // Kryo 4xx Gold
276         .Case("0x805", "cortex-a76") // Kryo 4xx/5xx Silver
277         .Case("0xc00", "falkor")
278         .Case("0xc01", "saphira")
279         .Default("generic");
280   if (Implementer == "0x53") { // Samsung Electronics Co., Ltd.
281     // The Exynos chips have a convoluted ID scheme that doesn't seem to follow
282     // any predictive pattern across variants and parts.
283     unsigned Variant = 0, Part = 0;
284 
285     // Look for the CPU variant line, whose value is a 1 digit hexadecimal
286     // number, corresponding to the Variant bits in the CP15/C0 register.
287     for (auto I : Lines)
288       if (I.consume_front("CPU variant"))
289         I.ltrim("\t :").getAsInteger(0, Variant);
290 
291     // Look for the CPU part line, whose value is a 3 digit hexadecimal
292     // number, corresponding to the PartNum bits in the CP15/C0 register.
293     for (auto I : Lines)
294       if (I.consume_front("CPU part"))
295         I.ltrim("\t :").getAsInteger(0, Part);
296 
297     unsigned Exynos = (Variant << 12) | Part;
298     switch (Exynos) {
299     default:
300       // Default by falling through to Exynos M3.
301       [[fallthrough]];
302     case 0x1002:
303       return "exynos-m3";
304     case 0x1003:
305       return "exynos-m4";
306     }
307   }
308 
309   if (Implementer == "0xc0") { // Ampere Computing
310     return StringSwitch<const char *>(Part)
311         .Case("0xac3", "ampere1")
312         .Case("0xac4", "ampere1a")
313         .Default("generic");
314   }
315 
316   return "generic";
317 }
318 
319 namespace {
getCPUNameFromS390Model(unsigned int Id,bool HaveVectorSupport)320 StringRef getCPUNameFromS390Model(unsigned int Id, bool HaveVectorSupport) {
321   switch (Id) {
322     case 2064:  // z900 not supported by LLVM
323     case 2066:
324     case 2084:  // z990 not supported by LLVM
325     case 2086:
326     case 2094:  // z9-109 not supported by LLVM
327     case 2096:
328       return "generic";
329     case 2097:
330     case 2098:
331       return "z10";
332     case 2817:
333     case 2818:
334       return "z196";
335     case 2827:
336     case 2828:
337       return "zEC12";
338     case 2964:
339     case 2965:
340       return HaveVectorSupport? "z13" : "zEC12";
341     case 3906:
342     case 3907:
343       return HaveVectorSupport? "z14" : "zEC12";
344     case 8561:
345     case 8562:
346       return HaveVectorSupport? "z15" : "zEC12";
347     case 3931:
348     case 3932:
349     default:
350       return HaveVectorSupport? "z16" : "zEC12";
351   }
352 }
353 } // end anonymous namespace
354 
getHostCPUNameForS390x(StringRef ProcCpuinfoContent)355 StringRef sys::detail::getHostCPUNameForS390x(StringRef ProcCpuinfoContent) {
356   // STIDP is a privileged operation, so use /proc/cpuinfo instead.
357 
358   // The "processor 0:" line comes after a fair amount of other information,
359   // including a cache breakdown, but this should be plenty.
360   SmallVector<StringRef, 32> Lines;
361   ProcCpuinfoContent.split(Lines, "\n");
362 
363   // Look for the CPU features.
364   SmallVector<StringRef, 32> CPUFeatures;
365   for (unsigned I = 0, E = Lines.size(); I != E; ++I)
366     if (Lines[I].startswith("features")) {
367       size_t Pos = Lines[I].find(':');
368       if (Pos != StringRef::npos) {
369         Lines[I].drop_front(Pos + 1).split(CPUFeatures, ' ');
370         break;
371       }
372     }
373 
374   // We need to check for the presence of vector support independently of
375   // the machine type, since we may only use the vector register set when
376   // supported by the kernel (and hypervisor).
377   bool HaveVectorSupport = false;
378   for (unsigned I = 0, E = CPUFeatures.size(); I != E; ++I) {
379     if (CPUFeatures[I] == "vx")
380       HaveVectorSupport = true;
381   }
382 
383   // Now check the processor machine type.
384   for (unsigned I = 0, E = Lines.size(); I != E; ++I) {
385     if (Lines[I].startswith("processor ")) {
386       size_t Pos = Lines[I].find("machine = ");
387       if (Pos != StringRef::npos) {
388         Pos += sizeof("machine = ") - 1;
389         unsigned int Id;
390         if (!Lines[I].drop_front(Pos).getAsInteger(10, Id))
391           return getCPUNameFromS390Model(Id, HaveVectorSupport);
392       }
393       break;
394     }
395   }
396 
397   return "generic";
398 }
399 
getHostCPUNameForRISCV(StringRef ProcCpuinfoContent)400 StringRef sys::detail::getHostCPUNameForRISCV(StringRef ProcCpuinfoContent) {
401   // There are 24 lines in /proc/cpuinfo
402   SmallVector<StringRef> Lines;
403   ProcCpuinfoContent.split(Lines, "\n");
404 
405   // Look for uarch line to determine cpu name
406   StringRef UArch;
407   for (unsigned I = 0, E = Lines.size(); I != E; ++I) {
408     if (Lines[I].startswith("uarch")) {
409       UArch = Lines[I].substr(5).ltrim("\t :");
410       break;
411     }
412   }
413 
414   return StringSwitch<const char *>(UArch)
415       .Case("sifive,u74-mc", "sifive-u74")
416       .Case("sifive,bullet0", "sifive-u74")
417       .Default("generic");
418 }
419 
getHostCPUNameForBPF()420 StringRef sys::detail::getHostCPUNameForBPF() {
421 #if !defined(__linux__) || !defined(__x86_64__)
422   return "generic";
423 #else
424   uint8_t v3_insns[40] __attribute__ ((aligned (8))) =
425       /* BPF_MOV64_IMM(BPF_REG_0, 0) */
426     { 0xb7, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0,
427       /* BPF_MOV64_IMM(BPF_REG_2, 1) */
428       0xb7, 0x2, 0x0, 0x0, 0x1, 0x0, 0x0, 0x0,
429       /* BPF_JMP32_REG(BPF_JLT, BPF_REG_0, BPF_REG_2, 1) */
430       0xae, 0x20, 0x1, 0x0, 0x0, 0x0, 0x0, 0x0,
431       /* BPF_MOV64_IMM(BPF_REG_0, 1) */
432       0xb7, 0x0, 0x0, 0x0, 0x1, 0x0, 0x0, 0x0,
433       /* BPF_EXIT_INSN() */
434       0x95, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 };
435 
436   uint8_t v2_insns[40] __attribute__ ((aligned (8))) =
437       /* BPF_MOV64_IMM(BPF_REG_0, 0) */
438     { 0xb7, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0,
439       /* BPF_MOV64_IMM(BPF_REG_2, 1) */
440       0xb7, 0x2, 0x0, 0x0, 0x1, 0x0, 0x0, 0x0,
441       /* BPF_JMP_REG(BPF_JLT, BPF_REG_0, BPF_REG_2, 1) */
442       0xad, 0x20, 0x1, 0x0, 0x0, 0x0, 0x0, 0x0,
443       /* BPF_MOV64_IMM(BPF_REG_0, 1) */
444       0xb7, 0x0, 0x0, 0x0, 0x1, 0x0, 0x0, 0x0,
445       /* BPF_EXIT_INSN() */
446       0x95, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 };
447 
448   struct bpf_prog_load_attr {
449     uint32_t prog_type;
450     uint32_t insn_cnt;
451     uint64_t insns;
452     uint64_t license;
453     uint32_t log_level;
454     uint32_t log_size;
455     uint64_t log_buf;
456     uint32_t kern_version;
457     uint32_t prog_flags;
458   } attr = {};
459   attr.prog_type = 1; /* BPF_PROG_TYPE_SOCKET_FILTER */
460   attr.insn_cnt = 5;
461   attr.insns = (uint64_t)v3_insns;
462   attr.license = (uint64_t)"DUMMY";
463 
464   int fd = syscall(321 /* __NR_bpf */, 5 /* BPF_PROG_LOAD */, &attr,
465                    sizeof(attr));
466   if (fd >= 0) {
467     close(fd);
468     return "v3";
469   }
470 
471   /* Clear the whole attr in case its content changed by syscall. */
472   memset(&attr, 0, sizeof(attr));
473   attr.prog_type = 1; /* BPF_PROG_TYPE_SOCKET_FILTER */
474   attr.insn_cnt = 5;
475   attr.insns = (uint64_t)v2_insns;
476   attr.license = (uint64_t)"DUMMY";
477   fd = syscall(321 /* __NR_bpf */, 5 /* BPF_PROG_LOAD */, &attr, sizeof(attr));
478   if (fd >= 0) {
479     close(fd);
480     return "v2";
481   }
482   return "v1";
483 #endif
484 }
485 
486 #if defined(__i386__) || defined(_M_IX86) || \
487     defined(__x86_64__) || defined(_M_X64)
488 
489 // The check below for i386 was copied from clang's cpuid.h (__get_cpuid_max).
490 // Check motivated by bug reports for OpenSSL crashing on CPUs without CPUID
491 // support. Consequently, for i386, the presence of CPUID is checked first
492 // via the corresponding eflags bit.
493 // Removal of cpuid.h header motivated by PR30384
494 // Header cpuid.h and method __get_cpuid_max are not used in llvm, clang, openmp
495 // or test-suite, but are used in external projects e.g. libstdcxx
isCpuIdSupported()496 static bool isCpuIdSupported() {
497 #if defined(__GNUC__) || defined(__clang__)
498 #if defined(__i386__)
499   int __cpuid_supported;
500   __asm__("  pushfl\n"
501           "  popl   %%eax\n"
502           "  movl   %%eax,%%ecx\n"
503           "  xorl   $0x00200000,%%eax\n"
504           "  pushl  %%eax\n"
505           "  popfl\n"
506           "  pushfl\n"
507           "  popl   %%eax\n"
508           "  movl   $0,%0\n"
509           "  cmpl   %%eax,%%ecx\n"
510           "  je     1f\n"
511           "  movl   $1,%0\n"
512           "1:"
513           : "=r"(__cpuid_supported)
514           :
515           : "eax", "ecx");
516   if (!__cpuid_supported)
517     return false;
518 #endif
519   return true;
520 #endif
521   return true;
522 }
523 
524 /// getX86CpuIDAndInfo - Execute the specified cpuid and return the 4 values in
525 /// the specified arguments.  If we can't run cpuid on the host, return true.
getX86CpuIDAndInfo(unsigned value,unsigned * rEAX,unsigned * rEBX,unsigned * rECX,unsigned * rEDX)526 static bool getX86CpuIDAndInfo(unsigned value, unsigned *rEAX, unsigned *rEBX,
527                                unsigned *rECX, unsigned *rEDX) {
528 #if defined(__GNUC__) || defined(__clang__)
529 #if defined(__x86_64__)
530   // gcc doesn't know cpuid would clobber ebx/rbx. Preserve it manually.
531   // FIXME: should we save this for Clang?
532   __asm__("movq\t%%rbx, %%rsi\n\t"
533           "cpuid\n\t"
534           "xchgq\t%%rbx, %%rsi\n\t"
535           : "=a"(*rEAX), "=S"(*rEBX), "=c"(*rECX), "=d"(*rEDX)
536           : "a"(value));
537   return false;
538 #elif defined(__i386__)
539   __asm__("movl\t%%ebx, %%esi\n\t"
540           "cpuid\n\t"
541           "xchgl\t%%ebx, %%esi\n\t"
542           : "=a"(*rEAX), "=S"(*rEBX), "=c"(*rECX), "=d"(*rEDX)
543           : "a"(value));
544   return false;
545 #else
546   return true;
547 #endif
548 #elif defined(_MSC_VER)
549   // The MSVC intrinsic is portable across x86 and x64.
550   int registers[4];
551   __cpuid(registers, value);
552   *rEAX = registers[0];
553   *rEBX = registers[1];
554   *rECX = registers[2];
555   *rEDX = registers[3];
556   return false;
557 #else
558   return true;
559 #endif
560 }
561 
562 namespace llvm {
563 namespace sys {
564 namespace detail {
565 namespace x86 {
566 
getVendorSignature(unsigned * MaxLeaf)567 VendorSignatures getVendorSignature(unsigned *MaxLeaf) {
568   unsigned EAX = 0, EBX = 0, ECX = 0, EDX = 0;
569   if (MaxLeaf == nullptr)
570     MaxLeaf = &EAX;
571   else
572     *MaxLeaf = 0;
573 
574   if (!isCpuIdSupported())
575     return VendorSignatures::UNKNOWN;
576 
577   if (getX86CpuIDAndInfo(0, MaxLeaf, &EBX, &ECX, &EDX) || *MaxLeaf < 1)
578     return VendorSignatures::UNKNOWN;
579 
580   // "Genu ineI ntel"
581   if (EBX == 0x756e6547 && EDX == 0x49656e69 && ECX == 0x6c65746e)
582     return VendorSignatures::GENUINE_INTEL;
583 
584   // "Auth enti cAMD"
585   if (EBX == 0x68747541 && EDX == 0x69746e65 && ECX == 0x444d4163)
586     return VendorSignatures::AUTHENTIC_AMD;
587 
588   return VendorSignatures::UNKNOWN;
589 }
590 
591 } // namespace x86
592 } // namespace detail
593 } // namespace sys
594 } // namespace llvm
595 
596 using namespace llvm::sys::detail::x86;
597 
598 /// getX86CpuIDAndInfoEx - Execute the specified cpuid with subleaf and return
599 /// the 4 values in the specified arguments.  If we can't run cpuid on the host,
600 /// return true.
getX86CpuIDAndInfoEx(unsigned value,unsigned subleaf,unsigned * rEAX,unsigned * rEBX,unsigned * rECX,unsigned * rEDX)601 static bool getX86CpuIDAndInfoEx(unsigned value, unsigned subleaf,
602                                  unsigned *rEAX, unsigned *rEBX, unsigned *rECX,
603                                  unsigned *rEDX) {
604 #if defined(__GNUC__) || defined(__clang__)
605 #if defined(__x86_64__)
606   // gcc doesn't know cpuid would clobber ebx/rbx. Preserve it manually.
607   // FIXME: should we save this for Clang?
608   __asm__("movq\t%%rbx, %%rsi\n\t"
609           "cpuid\n\t"
610           "xchgq\t%%rbx, %%rsi\n\t"
611           : "=a"(*rEAX), "=S"(*rEBX), "=c"(*rECX), "=d"(*rEDX)
612           : "a"(value), "c"(subleaf));
613   return false;
614 #elif defined(__i386__)
615   __asm__("movl\t%%ebx, %%esi\n\t"
616           "cpuid\n\t"
617           "xchgl\t%%ebx, %%esi\n\t"
618           : "=a"(*rEAX), "=S"(*rEBX), "=c"(*rECX), "=d"(*rEDX)
619           : "a"(value), "c"(subleaf));
620   return false;
621 #else
622   return true;
623 #endif
624 #elif defined(_MSC_VER)
625   int registers[4];
626   __cpuidex(registers, value, subleaf);
627   *rEAX = registers[0];
628   *rEBX = registers[1];
629   *rECX = registers[2];
630   *rEDX = registers[3];
631   return false;
632 #else
633   return true;
634 #endif
635 }
636 
637 // Read control register 0 (XCR0). Used to detect features such as AVX.
getX86XCR0(unsigned * rEAX,unsigned * rEDX)638 static bool getX86XCR0(unsigned *rEAX, unsigned *rEDX) {
639 #if defined(__GNUC__) || defined(__clang__)
640   // Check xgetbv; this uses a .byte sequence instead of the instruction
641   // directly because older assemblers do not include support for xgetbv and
642   // there is no easy way to conditionally compile based on the assembler used.
643   __asm__(".byte 0x0f, 0x01, 0xd0" : "=a"(*rEAX), "=d"(*rEDX) : "c"(0));
644   return false;
645 #elif defined(_MSC_FULL_VER) && defined(_XCR_XFEATURE_ENABLED_MASK)
646   unsigned long long Result = _xgetbv(_XCR_XFEATURE_ENABLED_MASK);
647   *rEAX = Result;
648   *rEDX = Result >> 32;
649   return false;
650 #else
651   return true;
652 #endif
653 }
654 
detectX86FamilyModel(unsigned EAX,unsigned * Family,unsigned * Model)655 static void detectX86FamilyModel(unsigned EAX, unsigned *Family,
656                                  unsigned *Model) {
657   *Family = (EAX >> 8) & 0xf; // Bits 8 - 11
658   *Model = (EAX >> 4) & 0xf;  // Bits 4 - 7
659   if (*Family == 6 || *Family == 0xf) {
660     if (*Family == 0xf)
661       // Examine extended family ID if family ID is F.
662       *Family += (EAX >> 20) & 0xff; // Bits 20 - 27
663     // Examine extended model ID if family ID is 6 or F.
664     *Model += ((EAX >> 16) & 0xf) << 4; // Bits 16 - 19
665   }
666 }
667 
668 static StringRef
getIntelProcessorTypeAndSubtype(unsigned Family,unsigned Model,const unsigned * Features,unsigned * Type,unsigned * Subtype)669 getIntelProcessorTypeAndSubtype(unsigned Family, unsigned Model,
670                                 const unsigned *Features,
671                                 unsigned *Type, unsigned *Subtype) {
672   auto testFeature = [&](unsigned F) {
673     return (Features[F / 32] & (1U << (F % 32))) != 0;
674   };
675 
676   StringRef CPU;
677 
678   switch (Family) {
679   case 3:
680     CPU = "i386";
681     break;
682   case 4:
683     CPU = "i486";
684     break;
685   case 5:
686     if (testFeature(X86::FEATURE_MMX)) {
687       CPU = "pentium-mmx";
688       break;
689     }
690     CPU = "pentium";
691     break;
692   case 6:
693     switch (Model) {
694     case 0x0f: // Intel Core 2 Duo processor, Intel Core 2 Duo mobile
695                // processor, Intel Core 2 Quad processor, Intel Core 2 Quad
696                // mobile processor, Intel Core 2 Extreme processor, Intel
697                // Pentium Dual-Core processor, Intel Xeon processor, model
698                // 0Fh. All processors are manufactured using the 65 nm process.
699     case 0x16: // Intel Celeron processor model 16h. All processors are
700                // manufactured using the 65 nm process
701       CPU = "core2";
702       *Type = X86::INTEL_CORE2;
703       break;
704     case 0x17: // Intel Core 2 Extreme processor, Intel Xeon processor, model
705                // 17h. All processors are manufactured using the 45 nm process.
706                //
707                // 45nm: Penryn , Wolfdale, Yorkfield (XE)
708     case 0x1d: // Intel Xeon processor MP. All processors are manufactured using
709                // the 45 nm process.
710       CPU = "penryn";
711       *Type = X86::INTEL_CORE2;
712       break;
713     case 0x1a: // Intel Core i7 processor and Intel Xeon processor. All
714                // processors are manufactured using the 45 nm process.
715     case 0x1e: // Intel(R) Core(TM) i7 CPU         870  @ 2.93GHz.
716                // As found in a Summer 2010 model iMac.
717     case 0x1f:
718     case 0x2e:              // Nehalem EX
719       CPU = "nehalem";
720       *Type = X86::INTEL_COREI7;
721       *Subtype = X86::INTEL_COREI7_NEHALEM;
722       break;
723     case 0x25: // Intel Core i7, laptop version.
724     case 0x2c: // Intel Core i7 processor and Intel Xeon processor. All
725                // processors are manufactured using the 32 nm process.
726     case 0x2f: // Westmere EX
727       CPU = "westmere";
728       *Type = X86::INTEL_COREI7;
729       *Subtype = X86::INTEL_COREI7_WESTMERE;
730       break;
731     case 0x2a: // Intel Core i7 processor. All processors are manufactured
732                // using the 32 nm process.
733     case 0x2d:
734       CPU = "sandybridge";
735       *Type = X86::INTEL_COREI7;
736       *Subtype = X86::INTEL_COREI7_SANDYBRIDGE;
737       break;
738     case 0x3a:
739     case 0x3e:              // Ivy Bridge EP
740       CPU = "ivybridge";
741       *Type = X86::INTEL_COREI7;
742       *Subtype = X86::INTEL_COREI7_IVYBRIDGE;
743       break;
744 
745     // Haswell:
746     case 0x3c:
747     case 0x3f:
748     case 0x45:
749     case 0x46:
750       CPU = "haswell";
751       *Type = X86::INTEL_COREI7;
752       *Subtype = X86::INTEL_COREI7_HASWELL;
753       break;
754 
755     // Broadwell:
756     case 0x3d:
757     case 0x47:
758     case 0x4f:
759     case 0x56:
760       CPU = "broadwell";
761       *Type = X86::INTEL_COREI7;
762       *Subtype = X86::INTEL_COREI7_BROADWELL;
763       break;
764 
765     // Skylake:
766     case 0x4e:              // Skylake mobile
767     case 0x5e:              // Skylake desktop
768     case 0x8e:              // Kaby Lake mobile
769     case 0x9e:              // Kaby Lake desktop
770     case 0xa5:              // Comet Lake-H/S
771     case 0xa6:              // Comet Lake-U
772       CPU = "skylake";
773       *Type = X86::INTEL_COREI7;
774       *Subtype = X86::INTEL_COREI7_SKYLAKE;
775       break;
776 
777     // Rocketlake:
778     case 0xa7:
779       CPU = "rocketlake";
780       *Type = X86::INTEL_COREI7;
781       *Subtype = X86::INTEL_COREI7_ROCKETLAKE;
782       break;
783 
784     // Skylake Xeon:
785     case 0x55:
786       *Type = X86::INTEL_COREI7;
787       if (testFeature(X86::FEATURE_AVX512BF16)) {
788         CPU = "cooperlake";
789         *Subtype = X86::INTEL_COREI7_COOPERLAKE;
790       } else if (testFeature(X86::FEATURE_AVX512VNNI)) {
791         CPU = "cascadelake";
792         *Subtype = X86::INTEL_COREI7_CASCADELAKE;
793       } else {
794         CPU = "skylake-avx512";
795         *Subtype = X86::INTEL_COREI7_SKYLAKE_AVX512;
796       }
797       break;
798 
799     // Cannonlake:
800     case 0x66:
801       CPU = "cannonlake";
802       *Type = X86::INTEL_COREI7;
803       *Subtype = X86::INTEL_COREI7_CANNONLAKE;
804       break;
805 
806     // Icelake:
807     case 0x7d:
808     case 0x7e:
809       CPU = "icelake-client";
810       *Type = X86::INTEL_COREI7;
811       *Subtype = X86::INTEL_COREI7_ICELAKE_CLIENT;
812       break;
813 
814     // Tigerlake:
815     case 0x8c:
816     case 0x8d:
817       CPU = "tigerlake";
818       *Type = X86::INTEL_COREI7;
819       *Subtype = X86::INTEL_COREI7_TIGERLAKE;
820       break;
821 
822     // Alderlake:
823     case 0x97:
824     case 0x9a:
825     // Raptorlake:
826     case 0xb7:
827     // Meteorlake:
828     case 0xaa:
829     case 0xac:
830       CPU = "alderlake";
831       *Type = X86::INTEL_COREI7;
832       *Subtype = X86::INTEL_COREI7_ALDERLAKE;
833       break;
834 
835     // Graniterapids:
836     case 0xae:
837     case 0xad:
838       CPU = "graniterapids";
839       *Type = X86::INTEL_COREI7;
840       *Subtype = X86::INTEL_COREI7_GRANITERAPIDS;
841       break;
842 
843     // Icelake Xeon:
844     case 0x6a:
845     case 0x6c:
846       CPU = "icelake-server";
847       *Type = X86::INTEL_COREI7;
848       *Subtype = X86::INTEL_COREI7_ICELAKE_SERVER;
849       break;
850 
851     // Emerald Rapids:
852     case 0xcf:
853     // Sapphire Rapids:
854     case 0x8f:
855       CPU = "sapphirerapids";
856       *Type = X86::INTEL_COREI7;
857       *Subtype = X86::INTEL_COREI7_SAPPHIRERAPIDS;
858       break;
859 
860     case 0x1c: // Most 45 nm Intel Atom processors
861     case 0x26: // 45 nm Atom Lincroft
862     case 0x27: // 32 nm Atom Medfield
863     case 0x35: // 32 nm Atom Midview
864     case 0x36: // 32 nm Atom Midview
865       CPU = "bonnell";
866       *Type = X86::INTEL_BONNELL;
867       break;
868 
869     // Atom Silvermont codes from the Intel software optimization guide.
870     case 0x37:
871     case 0x4a:
872     case 0x4d:
873     case 0x5a:
874     case 0x5d:
875     case 0x4c: // really airmont
876       CPU = "silvermont";
877       *Type = X86::INTEL_SILVERMONT;
878       break;
879     // Goldmont:
880     case 0x5c: // Apollo Lake
881     case 0x5f: // Denverton
882       CPU = "goldmont";
883       *Type = X86::INTEL_GOLDMONT;
884       break;
885     case 0x7a:
886       CPU = "goldmont-plus";
887       *Type = X86::INTEL_GOLDMONT_PLUS;
888       break;
889     case 0x86:
890       CPU = "tremont";
891       *Type = X86::INTEL_TREMONT;
892       break;
893 
894     // Sierraforest:
895     case 0xaf:
896       CPU = "sierraforest";
897       *Type = X86::INTEL_SIERRAFOREST;
898       break;
899 
900     // Grandridge:
901     case 0xb6:
902       CPU = "grandridge";
903       *Type = X86::INTEL_GRANDRIDGE;
904       break;
905 
906     // Xeon Phi (Knights Landing + Knights Mill):
907     case 0x57:
908       CPU = "knl";
909       *Type = X86::INTEL_KNL;
910       break;
911     case 0x85:
912       CPU = "knm";
913       *Type = X86::INTEL_KNM;
914       break;
915 
916     default: // Unknown family 6 CPU, try to guess.
917       // Don't both with Type/Subtype here, they aren't used by the caller.
918       // They're used above to keep the code in sync with compiler-rt.
919       // TODO detect tigerlake host from model
920       if (testFeature(X86::FEATURE_AVX512VP2INTERSECT)) {
921         CPU = "tigerlake";
922       } else if (testFeature(X86::FEATURE_AVX512VBMI2)) {
923         CPU = "icelake-client";
924       } else if (testFeature(X86::FEATURE_AVX512VBMI)) {
925         CPU = "cannonlake";
926       } else if (testFeature(X86::FEATURE_AVX512BF16)) {
927         CPU = "cooperlake";
928       } else if (testFeature(X86::FEATURE_AVX512VNNI)) {
929         CPU = "cascadelake";
930       } else if (testFeature(X86::FEATURE_AVX512VL)) {
931         CPU = "skylake-avx512";
932       } else if (testFeature(X86::FEATURE_AVX512ER)) {
933         CPU = "knl";
934       } else if (testFeature(X86::FEATURE_CLFLUSHOPT)) {
935         if (testFeature(X86::FEATURE_SHA))
936           CPU = "goldmont";
937         else
938           CPU = "skylake";
939       } else if (testFeature(X86::FEATURE_ADX)) {
940         CPU = "broadwell";
941       } else if (testFeature(X86::FEATURE_AVX2)) {
942         CPU = "haswell";
943       } else if (testFeature(X86::FEATURE_AVX)) {
944         CPU = "sandybridge";
945       } else if (testFeature(X86::FEATURE_SSE4_2)) {
946         if (testFeature(X86::FEATURE_MOVBE))
947           CPU = "silvermont";
948         else
949           CPU = "nehalem";
950       } else if (testFeature(X86::FEATURE_SSE4_1)) {
951         CPU = "penryn";
952       } else if (testFeature(X86::FEATURE_SSSE3)) {
953         if (testFeature(X86::FEATURE_MOVBE))
954           CPU = "bonnell";
955         else
956           CPU = "core2";
957       } else if (testFeature(X86::FEATURE_64BIT)) {
958         CPU = "core2";
959       } else if (testFeature(X86::FEATURE_SSE3)) {
960         CPU = "yonah";
961       } else if (testFeature(X86::FEATURE_SSE2)) {
962         CPU = "pentium-m";
963       } else if (testFeature(X86::FEATURE_SSE)) {
964         CPU = "pentium3";
965       } else if (testFeature(X86::FEATURE_MMX)) {
966         CPU = "pentium2";
967       } else {
968         CPU = "pentiumpro";
969       }
970       break;
971     }
972     break;
973   case 15: {
974     if (testFeature(X86::FEATURE_64BIT)) {
975       CPU = "nocona";
976       break;
977     }
978     if (testFeature(X86::FEATURE_SSE3)) {
979       CPU = "prescott";
980       break;
981     }
982     CPU = "pentium4";
983     break;
984   }
985   default:
986     break; // Unknown.
987   }
988 
989   return CPU;
990 }
991 
992 static StringRef
getAMDProcessorTypeAndSubtype(unsigned Family,unsigned Model,const unsigned * Features,unsigned * Type,unsigned * Subtype)993 getAMDProcessorTypeAndSubtype(unsigned Family, unsigned Model,
994                               const unsigned *Features,
995                               unsigned *Type, unsigned *Subtype) {
996   auto testFeature = [&](unsigned F) {
997     return (Features[F / 32] & (1U << (F % 32))) != 0;
998   };
999 
1000   StringRef CPU;
1001 
1002   switch (Family) {
1003   case 4:
1004     CPU = "i486";
1005     break;
1006   case 5:
1007     CPU = "pentium";
1008     switch (Model) {
1009     case 6:
1010     case 7:
1011       CPU = "k6";
1012       break;
1013     case 8:
1014       CPU = "k6-2";
1015       break;
1016     case 9:
1017     case 13:
1018       CPU = "k6-3";
1019       break;
1020     case 10:
1021       CPU = "geode";
1022       break;
1023     }
1024     break;
1025   case 6:
1026     if (testFeature(X86::FEATURE_SSE)) {
1027       CPU = "athlon-xp";
1028       break;
1029     }
1030     CPU = "athlon";
1031     break;
1032   case 15:
1033     if (testFeature(X86::FEATURE_SSE3)) {
1034       CPU = "k8-sse3";
1035       break;
1036     }
1037     CPU = "k8";
1038     break;
1039   case 16:
1040     CPU = "amdfam10";
1041     *Type = X86::AMDFAM10H; // "amdfam10"
1042     switch (Model) {
1043     case 2:
1044       *Subtype = X86::AMDFAM10H_BARCELONA;
1045       break;
1046     case 4:
1047       *Subtype = X86::AMDFAM10H_SHANGHAI;
1048       break;
1049     case 8:
1050       *Subtype = X86::AMDFAM10H_ISTANBUL;
1051       break;
1052     }
1053     break;
1054   case 20:
1055     CPU = "btver1";
1056     *Type = X86::AMD_BTVER1;
1057     break;
1058   case 21:
1059     CPU = "bdver1";
1060     *Type = X86::AMDFAM15H;
1061     if (Model >= 0x60 && Model <= 0x7f) {
1062       CPU = "bdver4";
1063       *Subtype = X86::AMDFAM15H_BDVER4;
1064       break; // 60h-7Fh: Excavator
1065     }
1066     if (Model >= 0x30 && Model <= 0x3f) {
1067       CPU = "bdver3";
1068       *Subtype = X86::AMDFAM15H_BDVER3;
1069       break; // 30h-3Fh: Steamroller
1070     }
1071     if ((Model >= 0x10 && Model <= 0x1f) || Model == 0x02) {
1072       CPU = "bdver2";
1073       *Subtype = X86::AMDFAM15H_BDVER2;
1074       break; // 02h, 10h-1Fh: Piledriver
1075     }
1076     if (Model <= 0x0f) {
1077       *Subtype = X86::AMDFAM15H_BDVER1;
1078       break; // 00h-0Fh: Bulldozer
1079     }
1080     break;
1081   case 22:
1082     CPU = "btver2";
1083     *Type = X86::AMD_BTVER2;
1084     break;
1085   case 23:
1086     CPU = "znver1";
1087     *Type = X86::AMDFAM17H;
1088     if ((Model >= 0x30 && Model <= 0x3f) || Model == 0x71) {
1089       CPU = "znver2";
1090       *Subtype = X86::AMDFAM17H_ZNVER2;
1091       break; // 30h-3fh, 71h: Zen2
1092     }
1093     if (Model <= 0x0f) {
1094       *Subtype = X86::AMDFAM17H_ZNVER1;
1095       break; // 00h-0Fh: Zen1
1096     }
1097     break;
1098   case 25:
1099     CPU = "znver3";
1100     *Type = X86::AMDFAM19H;
1101     if (Model <= 0x0f || (Model >= 0x20 && Model <= 0x5f)) {
1102       // Family 19h Models 00h-0Fh - Zen3
1103       // Family 19h Models 20h-2Fh - Zen3
1104       // Family 19h Models 30h-3Fh - Zen3
1105       // Family 19h Models 40h-4Fh - Zen3+
1106       // Family 19h Models 50h-5Fh - Zen3+
1107       *Subtype = X86::AMDFAM19H_ZNVER3;
1108       break;
1109     }
1110     if ((Model >= 0x10 && Model <= 0x1f) ||
1111         (Model >= 0x60 && Model <= 0x74) ||
1112         (Model >= 0x78 && Model <= 0x7b) ||
1113         (Model >= 0xA0 && Model <= 0xAf)) {
1114       CPU = "znver4";
1115       *Subtype = X86::AMDFAM19H_ZNVER4;
1116       break; //  "znver4"
1117     }
1118     break; // family 19h
1119   default:
1120     break; // Unknown AMD CPU.
1121   }
1122 
1123   return CPU;
1124 }
1125 
getAvailableFeatures(unsigned ECX,unsigned EDX,unsigned MaxLeaf,unsigned * Features)1126 static void getAvailableFeatures(unsigned ECX, unsigned EDX, unsigned MaxLeaf,
1127                                  unsigned *Features) {
1128   unsigned EAX, EBX;
1129 
1130   auto setFeature = [&](unsigned F) {
1131     Features[F / 32] |= 1U << (F % 32);
1132   };
1133 
1134   if ((EDX >> 15) & 1)
1135     setFeature(X86::FEATURE_CMOV);
1136   if ((EDX >> 23) & 1)
1137     setFeature(X86::FEATURE_MMX);
1138   if ((EDX >> 25) & 1)
1139     setFeature(X86::FEATURE_SSE);
1140   if ((EDX >> 26) & 1)
1141     setFeature(X86::FEATURE_SSE2);
1142 
1143   if ((ECX >> 0) & 1)
1144     setFeature(X86::FEATURE_SSE3);
1145   if ((ECX >> 1) & 1)
1146     setFeature(X86::FEATURE_PCLMUL);
1147   if ((ECX >> 9) & 1)
1148     setFeature(X86::FEATURE_SSSE3);
1149   if ((ECX >> 12) & 1)
1150     setFeature(X86::FEATURE_FMA);
1151   if ((ECX >> 19) & 1)
1152     setFeature(X86::FEATURE_SSE4_1);
1153   if ((ECX >> 20) & 1) {
1154     setFeature(X86::FEATURE_SSE4_2);
1155     setFeature(X86::FEATURE_CRC32);
1156   }
1157   if ((ECX >> 23) & 1)
1158     setFeature(X86::FEATURE_POPCNT);
1159   if ((ECX >> 25) & 1)
1160     setFeature(X86::FEATURE_AES);
1161 
1162   if ((ECX >> 22) & 1)
1163     setFeature(X86::FEATURE_MOVBE);
1164 
1165   // If CPUID indicates support for XSAVE, XRESTORE and AVX, and XGETBV
1166   // indicates that the AVX registers will be saved and restored on context
1167   // switch, then we have full AVX support.
1168   const unsigned AVXBits = (1 << 27) | (1 << 28);
1169   bool HasAVX = ((ECX & AVXBits) == AVXBits) && !getX86XCR0(&EAX, &EDX) &&
1170                 ((EAX & 0x6) == 0x6);
1171 #if defined(__APPLE__)
1172   // Darwin lazily saves the AVX512 context on first use: trust that the OS will
1173   // save the AVX512 context if we use AVX512 instructions, even the bit is not
1174   // set right now.
1175   bool HasAVX512Save = true;
1176 #else
1177   // AVX512 requires additional context to be saved by the OS.
1178   bool HasAVX512Save = HasAVX && ((EAX & 0xe0) == 0xe0);
1179 #endif
1180 
1181   if (HasAVX)
1182     setFeature(X86::FEATURE_AVX);
1183 
1184   bool HasLeaf7 =
1185       MaxLeaf >= 0x7 && !getX86CpuIDAndInfoEx(0x7, 0x0, &EAX, &EBX, &ECX, &EDX);
1186 
1187   if (HasLeaf7 && ((EBX >> 3) & 1))
1188     setFeature(X86::FEATURE_BMI);
1189   if (HasLeaf7 && ((EBX >> 5) & 1) && HasAVX)
1190     setFeature(X86::FEATURE_AVX2);
1191   if (HasLeaf7 && ((EBX >> 8) & 1))
1192     setFeature(X86::FEATURE_BMI2);
1193   if (HasLeaf7 && ((EBX >> 16) & 1) && HasAVX512Save)
1194     setFeature(X86::FEATURE_AVX512F);
1195   if (HasLeaf7 && ((EBX >> 17) & 1) && HasAVX512Save)
1196     setFeature(X86::FEATURE_AVX512DQ);
1197   if (HasLeaf7 && ((EBX >> 19) & 1))
1198     setFeature(X86::FEATURE_ADX);
1199   if (HasLeaf7 && ((EBX >> 21) & 1) && HasAVX512Save)
1200     setFeature(X86::FEATURE_AVX512IFMA);
1201   if (HasLeaf7 && ((EBX >> 23) & 1))
1202     setFeature(X86::FEATURE_CLFLUSHOPT);
1203   if (HasLeaf7 && ((EBX >> 26) & 1) && HasAVX512Save)
1204     setFeature(X86::FEATURE_AVX512PF);
1205   if (HasLeaf7 && ((EBX >> 27) & 1) && HasAVX512Save)
1206     setFeature(X86::FEATURE_AVX512ER);
1207   if (HasLeaf7 && ((EBX >> 28) & 1) && HasAVX512Save)
1208     setFeature(X86::FEATURE_AVX512CD);
1209   if (HasLeaf7 && ((EBX >> 29) & 1))
1210     setFeature(X86::FEATURE_SHA);
1211   if (HasLeaf7 && ((EBX >> 30) & 1) && HasAVX512Save)
1212     setFeature(X86::FEATURE_AVX512BW);
1213   if (HasLeaf7 && ((EBX >> 31) & 1) && HasAVX512Save)
1214     setFeature(X86::FEATURE_AVX512VL);
1215 
1216   if (HasLeaf7 && ((ECX >> 1) & 1) && HasAVX512Save)
1217     setFeature(X86::FEATURE_AVX512VBMI);
1218   if (HasLeaf7 && ((ECX >> 6) & 1) && HasAVX512Save)
1219     setFeature(X86::FEATURE_AVX512VBMI2);
1220   if (HasLeaf7 && ((ECX >> 8) & 1))
1221     setFeature(X86::FEATURE_GFNI);
1222   if (HasLeaf7 && ((ECX >> 10) & 1) && HasAVX)
1223     setFeature(X86::FEATURE_VPCLMULQDQ);
1224   if (HasLeaf7 && ((ECX >> 11) & 1) && HasAVX512Save)
1225     setFeature(X86::FEATURE_AVX512VNNI);
1226   if (HasLeaf7 && ((ECX >> 12) & 1) && HasAVX512Save)
1227     setFeature(X86::FEATURE_AVX512BITALG);
1228   if (HasLeaf7 && ((ECX >> 14) & 1) && HasAVX512Save)
1229     setFeature(X86::FEATURE_AVX512VPOPCNTDQ);
1230 
1231   if (HasLeaf7 && ((EDX >> 2) & 1) && HasAVX512Save)
1232     setFeature(X86::FEATURE_AVX5124VNNIW);
1233   if (HasLeaf7 && ((EDX >> 3) & 1) && HasAVX512Save)
1234     setFeature(X86::FEATURE_AVX5124FMAPS);
1235   if (HasLeaf7 && ((EDX >> 8) & 1) && HasAVX512Save)
1236     setFeature(X86::FEATURE_AVX512VP2INTERSECT);
1237 
1238   bool HasLeaf7Subleaf1 =
1239       MaxLeaf >= 7 && !getX86CpuIDAndInfoEx(0x7, 0x1, &EAX, &EBX, &ECX, &EDX);
1240   if (HasLeaf7Subleaf1 && ((EAX >> 5) & 1) && HasAVX512Save)
1241     setFeature(X86::FEATURE_AVX512BF16);
1242 
1243   unsigned MaxExtLevel;
1244   getX86CpuIDAndInfo(0x80000000, &MaxExtLevel, &EBX, &ECX, &EDX);
1245 
1246   bool HasExtLeaf1 = MaxExtLevel >= 0x80000001 &&
1247                      !getX86CpuIDAndInfo(0x80000001, &EAX, &EBX, &ECX, &EDX);
1248   if (HasExtLeaf1 && ((ECX >> 6) & 1))
1249     setFeature(X86::FEATURE_SSE4_A);
1250   if (HasExtLeaf1 && ((ECX >> 11) & 1))
1251     setFeature(X86::FEATURE_XOP);
1252   if (HasExtLeaf1 && ((ECX >> 16) & 1))
1253     setFeature(X86::FEATURE_FMA4);
1254 
1255   if (HasExtLeaf1 && ((EDX >> 29) & 1))
1256     setFeature(X86::FEATURE_64BIT);
1257 }
1258 
getHostCPUName()1259 StringRef sys::getHostCPUName() {
1260   unsigned MaxLeaf = 0;
1261   const VendorSignatures Vendor = getVendorSignature(&MaxLeaf);
1262   if (Vendor == VendorSignatures::UNKNOWN)
1263     return "generic";
1264 
1265   unsigned EAX = 0, EBX = 0, ECX = 0, EDX = 0;
1266   getX86CpuIDAndInfo(0x1, &EAX, &EBX, &ECX, &EDX);
1267 
1268   unsigned Family = 0, Model = 0;
1269   unsigned Features[(X86::CPU_FEATURE_MAX + 31) / 32] = {0};
1270   detectX86FamilyModel(EAX, &Family, &Model);
1271   getAvailableFeatures(ECX, EDX, MaxLeaf, Features);
1272 
1273   // These aren't consumed in this file, but we try to keep some source code the
1274   // same or similar to compiler-rt.
1275   unsigned Type = 0;
1276   unsigned Subtype = 0;
1277 
1278   StringRef CPU;
1279 
1280   if (Vendor == VendorSignatures::GENUINE_INTEL) {
1281     CPU = getIntelProcessorTypeAndSubtype(Family, Model, Features, &Type,
1282                                           &Subtype);
1283   } else if (Vendor == VendorSignatures::AUTHENTIC_AMD) {
1284     CPU = getAMDProcessorTypeAndSubtype(Family, Model, Features, &Type,
1285                                         &Subtype);
1286   }
1287 
1288   if (!CPU.empty())
1289     return CPU;
1290 
1291   return "generic";
1292 }
1293 
1294 #elif defined(__APPLE__) && defined(__powerpc__)
getHostCPUName()1295 StringRef sys::getHostCPUName() {
1296   host_basic_info_data_t hostInfo;
1297   mach_msg_type_number_t infoCount;
1298 
1299   infoCount = HOST_BASIC_INFO_COUNT;
1300   mach_port_t hostPort = mach_host_self();
1301   host_info(hostPort, HOST_BASIC_INFO, (host_info_t)&hostInfo,
1302             &infoCount);
1303   mach_port_deallocate(mach_task_self(), hostPort);
1304 
1305   if (hostInfo.cpu_type != CPU_TYPE_POWERPC)
1306     return "generic";
1307 
1308   switch (hostInfo.cpu_subtype) {
1309   case CPU_SUBTYPE_POWERPC_601:
1310     return "601";
1311   case CPU_SUBTYPE_POWERPC_602:
1312     return "602";
1313   case CPU_SUBTYPE_POWERPC_603:
1314     return "603";
1315   case CPU_SUBTYPE_POWERPC_603e:
1316     return "603e";
1317   case CPU_SUBTYPE_POWERPC_603ev:
1318     return "603ev";
1319   case CPU_SUBTYPE_POWERPC_604:
1320     return "604";
1321   case CPU_SUBTYPE_POWERPC_604e:
1322     return "604e";
1323   case CPU_SUBTYPE_POWERPC_620:
1324     return "620";
1325   case CPU_SUBTYPE_POWERPC_750:
1326     return "750";
1327   case CPU_SUBTYPE_POWERPC_7400:
1328     return "7400";
1329   case CPU_SUBTYPE_POWERPC_7450:
1330     return "7450";
1331   case CPU_SUBTYPE_POWERPC_970:
1332     return "970";
1333   default:;
1334   }
1335 
1336   return "generic";
1337 }
1338 #elif defined(__linux__) && defined(__powerpc__)
getHostCPUName()1339 StringRef sys::getHostCPUName() {
1340   std::unique_ptr<llvm::MemoryBuffer> P = getProcCpuinfoContent();
1341   StringRef Content = P ? P->getBuffer() : "";
1342   return detail::getHostCPUNameForPowerPC(Content);
1343 }
1344 #elif defined(__linux__) && (defined(__arm__) || defined(__aarch64__))
getHostCPUName()1345 StringRef sys::getHostCPUName() {
1346   std::unique_ptr<llvm::MemoryBuffer> P = getProcCpuinfoContent();
1347   StringRef Content = P ? P->getBuffer() : "";
1348   return detail::getHostCPUNameForARM(Content);
1349 }
1350 #elif defined(__linux__) && defined(__s390x__)
getHostCPUName()1351 StringRef sys::getHostCPUName() {
1352   std::unique_ptr<llvm::MemoryBuffer> P = getProcCpuinfoContent();
1353   StringRef Content = P ? P->getBuffer() : "";
1354   return detail::getHostCPUNameForS390x(Content);
1355 }
1356 #elif defined(__MVS__)
getHostCPUName()1357 StringRef sys::getHostCPUName() {
1358   // Get pointer to Communications Vector Table (CVT).
1359   // The pointer is located at offset 16 of the Prefixed Save Area (PSA).
1360   // It is stored as 31 bit pointer and will be zero-extended to 64 bit.
1361   int *StartToCVTOffset = reinterpret_cast<int *>(0x10);
1362   // Since its stored as a 31-bit pointer, get the 4 bytes from the start
1363   // of address.
1364   int ReadValue = *StartToCVTOffset;
1365   // Explicitly clear the high order bit.
1366   ReadValue = (ReadValue & 0x7FFFFFFF);
1367   char *CVT = reinterpret_cast<char *>(ReadValue);
1368   // The model number is located in the CVT prefix at offset -6 and stored as
1369   // signless packed decimal.
1370   uint16_t Id = *(uint16_t *)&CVT[-6];
1371   // Convert number to integer.
1372   Id = decodePackedBCD<uint16_t>(Id, false);
1373   // Check for vector support. It's stored in field CVTFLAG5 (offset 244),
1374   // bit CVTVEF (X'80'). The facilities list is part of the PSA but the vector
1375   // extension can only be used if bit CVTVEF is on.
1376   bool HaveVectorSupport = CVT[244] & 0x80;
1377   return getCPUNameFromS390Model(Id, HaveVectorSupport);
1378 }
1379 #elif defined(__APPLE__) && (defined(__arm__) || defined(__aarch64__))
1380 #define CPUFAMILY_ARM_SWIFT 0x1e2d6381
1381 #define CPUFAMILY_ARM_CYCLONE 0x37a09642
1382 #define CPUFAMILY_ARM_TYPHOON 0x2c91a47e
1383 #define CPUFAMILY_ARM_TWISTER 0x92fb37c8
1384 #define CPUFAMILY_ARM_HURRICANE 0x67ceee93
1385 #define CPUFAMILY_ARM_MONSOON_MISTRAL 0xe81e7ef6
1386 #define CPUFAMILY_ARM_VORTEX_TEMPEST 0x07d34b9f
1387 #define CPUFAMILY_ARM_LIGHTNING_THUNDER 0x462504d2
1388 #define CPUFAMILY_ARM_FIRESTORM_ICESTORM 0x1b588bb3
1389 
getHostCPUName()1390 StringRef sys::getHostCPUName() {
1391   uint32_t Family;
1392   size_t Length = sizeof(Family);
1393   sysctlbyname("hw.cpufamily", &Family, &Length, NULL, 0);
1394 
1395   switch (Family) {
1396   case CPUFAMILY_ARM_SWIFT:
1397     return "swift";
1398   case CPUFAMILY_ARM_CYCLONE:
1399     return "apple-a7";
1400   case CPUFAMILY_ARM_TYPHOON:
1401     return "apple-a8";
1402   case CPUFAMILY_ARM_TWISTER:
1403     return "apple-a9";
1404   case CPUFAMILY_ARM_HURRICANE:
1405     return "apple-a10";
1406   case CPUFAMILY_ARM_MONSOON_MISTRAL:
1407     return "apple-a11";
1408   case CPUFAMILY_ARM_VORTEX_TEMPEST:
1409     return "apple-a12";
1410   case CPUFAMILY_ARM_LIGHTNING_THUNDER:
1411     return "apple-a13";
1412   case CPUFAMILY_ARM_FIRESTORM_ICESTORM:
1413     return "apple-m1";
1414   default:
1415     // Default to the newest CPU we know about.
1416     return "apple-m1";
1417   }
1418 }
1419 #elif defined(_AIX)
getHostCPUName()1420 StringRef sys::getHostCPUName() {
1421   switch (_system_configuration.implementation) {
1422   case POWER_4:
1423     if (_system_configuration.version == PV_4_3)
1424       return "970";
1425     return "pwr4";
1426   case POWER_5:
1427     if (_system_configuration.version == PV_5)
1428       return "pwr5";
1429     return "pwr5x";
1430   case POWER_6:
1431     if (_system_configuration.version == PV_6_Compat)
1432       return "pwr6";
1433     return "pwr6x";
1434   case POWER_7:
1435     return "pwr7";
1436   case POWER_8:
1437     return "pwr8";
1438   case POWER_9:
1439     return "pwr9";
1440 // TODO: simplify this once the macro is available in all OS levels.
1441 #ifdef POWER_10
1442   case POWER_10:
1443 #else
1444   case 0x40000:
1445 #endif
1446     return "pwr10";
1447   default:
1448     return "generic";
1449   }
1450 }
1451 #elif defined(__riscv)
getHostCPUName()1452 StringRef sys::getHostCPUName() {
1453 #if defined(__linux__)
1454   std::unique_ptr<llvm::MemoryBuffer> P = getProcCpuinfoContent();
1455   StringRef Content = P ? P->getBuffer() : "";
1456   return detail::getHostCPUNameForRISCV(Content);
1457 #else
1458 #if __riscv_xlen == 64
1459   return "generic-rv64";
1460 #elif __riscv_xlen == 32
1461   return "generic-rv32";
1462 #else
1463 #error "Unhandled value of __riscv_xlen"
1464 #endif
1465 #endif
1466 }
1467 #elif defined(__sparc__)
1468 #if defined(__linux__)
getHostCPUNameForSPARC(StringRef ProcCpuinfoContent)1469 StringRef sys::detail::getHostCPUNameForSPARC(StringRef ProcCpuinfoContent) {
1470   SmallVector<StringRef> Lines;
1471   ProcCpuinfoContent.split(Lines, "\n");
1472 
1473   // Look for cpu line to determine cpu name
1474   StringRef Cpu;
1475   for (unsigned I = 0, E = Lines.size(); I != E; ++I) {
1476     if (Lines[I].startswith("cpu")) {
1477       Cpu = Lines[I].substr(5).ltrim("\t :");
1478       break;
1479     }
1480   }
1481 
1482   return StringSwitch<const char *>(Cpu)
1483       .StartsWith("SuperSparc", "supersparc")
1484       .StartsWith("HyperSparc", "hypersparc")
1485       .StartsWith("SpitFire", "ultrasparc")
1486       .StartsWith("BlackBird", "ultrasparc")
1487       .StartsWith("Sabre", " ultrasparc")
1488       .StartsWith("Hummingbird", "ultrasparc")
1489       .StartsWith("Cheetah", "ultrasparc3")
1490       .StartsWith("Jalapeno", "ultrasparc3")
1491       .StartsWith("Jaguar", "ultrasparc3")
1492       .StartsWith("Panther", "ultrasparc3")
1493       .StartsWith("Serrano", "ultrasparc3")
1494       .StartsWith("UltraSparc T1", "niagara")
1495       .StartsWith("UltraSparc T2", "niagara2")
1496       .StartsWith("UltraSparc T3", "niagara3")
1497       .StartsWith("UltraSparc T4", "niagara4")
1498       .StartsWith("UltraSparc T5", "niagara4")
1499       .StartsWith("LEON", "leon3")
1500       // niagara7/m8 not supported by LLVM yet.
1501       .StartsWith("SPARC-M7", "niagara4" /* "niagara7" */)
1502       .StartsWith("SPARC-S7", "niagara4" /* "niagara7" */)
1503       .StartsWith("SPARC-M8", "niagara4" /* "m8" */)
1504       .Default("generic");
1505 }
1506 #endif
1507 
getHostCPUName()1508 StringRef sys::getHostCPUName() {
1509 #if defined(__linux__)
1510   std::unique_ptr<llvm::MemoryBuffer> P = getProcCpuinfoContent();
1511   StringRef Content = P ? P->getBuffer() : "";
1512   return detail::getHostCPUNameForSPARC(Content);
1513 #elif defined(__sun__) && defined(__svr4__)
1514   char *buf = NULL;
1515   kstat_ctl_t *kc;
1516   kstat_t *ksp;
1517   kstat_named_t *brand = NULL;
1518 
1519   kc = kstat_open();
1520   if (kc != NULL) {
1521     ksp = kstat_lookup(kc, const_cast<char *>("cpu_info"), -1, NULL);
1522     if (ksp != NULL && kstat_read(kc, ksp, NULL) != -1 &&
1523         ksp->ks_type == KSTAT_TYPE_NAMED)
1524       brand =
1525           (kstat_named_t *)kstat_data_lookup(ksp, const_cast<char *>("brand"));
1526     if (brand != NULL && brand->data_type == KSTAT_DATA_STRING)
1527       buf = KSTAT_NAMED_STR_PTR(brand);
1528   }
1529   kstat_close(kc);
1530 
1531   return StringSwitch<const char *>(buf)
1532       .Case("TMS390S10", "supersparc") // Texas Instruments microSPARC I
1533       .Case("TMS390Z50", "supersparc") // Texas Instruments SuperSPARC I
1534       .Case("TMS390Z55",
1535             "supersparc") // Texas Instruments SuperSPARC I with SuperCache
1536       .Case("MB86904", "supersparc") // Fujitsu microSPARC II
1537       .Case("MB86907", "supersparc") // Fujitsu TurboSPARC
1538       .Case("RT623", "hypersparc")   // Ross hyperSPARC
1539       .Case("RT625", "hypersparc")
1540       .Case("RT626", "hypersparc")
1541       .Case("UltraSPARC-I", "ultrasparc")
1542       .Case("UltraSPARC-II", "ultrasparc")
1543       .Case("UltraSPARC-IIe", "ultrasparc")
1544       .Case("UltraSPARC-IIi", "ultrasparc")
1545       .Case("SPARC64-III", "ultrasparc")
1546       .Case("SPARC64-IV", "ultrasparc")
1547       .Case("UltraSPARC-III", "ultrasparc3")
1548       .Case("UltraSPARC-III+", "ultrasparc3")
1549       .Case("UltraSPARC-IIIi", "ultrasparc3")
1550       .Case("UltraSPARC-IIIi+", "ultrasparc3")
1551       .Case("UltraSPARC-IV", "ultrasparc3")
1552       .Case("UltraSPARC-IV+", "ultrasparc3")
1553       .Case("SPARC64-V", "ultrasparc3")
1554       .Case("SPARC64-VI", "ultrasparc3")
1555       .Case("SPARC64-VII", "ultrasparc3")
1556       .Case("UltraSPARC-T1", "niagara")
1557       .Case("UltraSPARC-T2", "niagara2")
1558       .Case("UltraSPARC-T2", "niagara2")
1559       .Case("UltraSPARC-T2+", "niagara2")
1560       .Case("SPARC-T3", "niagara3")
1561       .Case("SPARC-T4", "niagara4")
1562       .Case("SPARC-T5", "niagara4")
1563       // niagara7/m8 not supported by LLVM yet.
1564       .Case("SPARC-M7", "niagara4" /* "niagara7" */)
1565       .Case("SPARC-S7", "niagara4" /* "niagara7" */)
1566       .Case("SPARC-M8", "niagara4" /* "m8" */)
1567       .Default("generic");
1568 #else
1569   return "generic";
1570 #endif
1571 }
1572 #else
getHostCPUName()1573 StringRef sys::getHostCPUName() { return "generic"; }
1574 namespace llvm {
1575 namespace sys {
1576 namespace detail {
1577 namespace x86 {
1578 
getVendorSignature(unsigned * MaxLeaf)1579 VendorSignatures getVendorSignature(unsigned *MaxLeaf) {
1580   return VendorSignatures::UNKNOWN;
1581 }
1582 
1583 } // namespace x86
1584 } // namespace detail
1585 } // namespace sys
1586 } // namespace llvm
1587 #endif
1588 
1589 #if defined(__i386__) || defined(_M_IX86) || \
1590     defined(__x86_64__) || defined(_M_X64)
getHostCPUFeatures(StringMap<bool> & Features)1591 bool sys::getHostCPUFeatures(StringMap<bool> &Features) {
1592   unsigned EAX = 0, EBX = 0, ECX = 0, EDX = 0;
1593   unsigned MaxLevel;
1594 
1595   if (getX86CpuIDAndInfo(0, &MaxLevel, &EBX, &ECX, &EDX) || MaxLevel < 1)
1596     return false;
1597 
1598   getX86CpuIDAndInfo(1, &EAX, &EBX, &ECX, &EDX);
1599 
1600   Features["cx8"]    = (EDX >>  8) & 1;
1601   Features["cmov"]   = (EDX >> 15) & 1;
1602   Features["mmx"]    = (EDX >> 23) & 1;
1603   Features["fxsr"]   = (EDX >> 24) & 1;
1604   Features["sse"]    = (EDX >> 25) & 1;
1605   Features["sse2"]   = (EDX >> 26) & 1;
1606 
1607   Features["sse3"]   = (ECX >>  0) & 1;
1608   Features["pclmul"] = (ECX >>  1) & 1;
1609   Features["ssse3"]  = (ECX >>  9) & 1;
1610   Features["cx16"]   = (ECX >> 13) & 1;
1611   Features["sse4.1"] = (ECX >> 19) & 1;
1612   Features["sse4.2"] = (ECX >> 20) & 1;
1613   Features["crc32"]  = Features["sse4.2"];
1614   Features["movbe"]  = (ECX >> 22) & 1;
1615   Features["popcnt"] = (ECX >> 23) & 1;
1616   Features["aes"]    = (ECX >> 25) & 1;
1617   Features["rdrnd"]  = (ECX >> 30) & 1;
1618 
1619   // If CPUID indicates support for XSAVE, XRESTORE and AVX, and XGETBV
1620   // indicates that the AVX registers will be saved and restored on context
1621   // switch, then we have full AVX support.
1622   bool HasXSave = ((ECX >> 27) & 1) && !getX86XCR0(&EAX, &EDX);
1623   bool HasAVXSave = HasXSave && ((ECX >> 28) & 1) && ((EAX & 0x6) == 0x6);
1624 #if defined(__APPLE__)
1625   // Darwin lazily saves the AVX512 context on first use: trust that the OS will
1626   // save the AVX512 context if we use AVX512 instructions, even the bit is not
1627   // set right now.
1628   bool HasAVX512Save = true;
1629 #else
1630   // AVX512 requires additional context to be saved by the OS.
1631   bool HasAVX512Save = HasAVXSave && ((EAX & 0xe0) == 0xe0);
1632 #endif
1633   // AMX requires additional context to be saved by the OS.
1634   const unsigned AMXBits = (1 << 17) | (1 << 18);
1635   bool HasAMXSave = HasXSave && ((EAX & AMXBits) == AMXBits);
1636 
1637   Features["avx"]   = HasAVXSave;
1638   Features["fma"]   = ((ECX >> 12) & 1) && HasAVXSave;
1639   // Only enable XSAVE if OS has enabled support for saving YMM state.
1640   Features["xsave"] = ((ECX >> 26) & 1) && HasAVXSave;
1641   Features["f16c"]  = ((ECX >> 29) & 1) && HasAVXSave;
1642 
1643   unsigned MaxExtLevel;
1644   getX86CpuIDAndInfo(0x80000000, &MaxExtLevel, &EBX, &ECX, &EDX);
1645 
1646   bool HasExtLeaf1 = MaxExtLevel >= 0x80000001 &&
1647                      !getX86CpuIDAndInfo(0x80000001, &EAX, &EBX, &ECX, &EDX);
1648   Features["sahf"]   = HasExtLeaf1 && ((ECX >>  0) & 1);
1649   Features["lzcnt"]  = HasExtLeaf1 && ((ECX >>  5) & 1);
1650   Features["sse4a"]  = HasExtLeaf1 && ((ECX >>  6) & 1);
1651   Features["prfchw"] = HasExtLeaf1 && ((ECX >>  8) & 1);
1652   Features["xop"]    = HasExtLeaf1 && ((ECX >> 11) & 1) && HasAVXSave;
1653   Features["lwp"]    = HasExtLeaf1 && ((ECX >> 15) & 1);
1654   Features["fma4"]   = HasExtLeaf1 && ((ECX >> 16) & 1) && HasAVXSave;
1655   Features["tbm"]    = HasExtLeaf1 && ((ECX >> 21) & 1);
1656   Features["mwaitx"] = HasExtLeaf1 && ((ECX >> 29) & 1);
1657 
1658   Features["64bit"]  = HasExtLeaf1 && ((EDX >> 29) & 1);
1659 
1660   // Miscellaneous memory related features, detected by
1661   // using the 0x80000008 leaf of the CPUID instruction
1662   bool HasExtLeaf8 = MaxExtLevel >= 0x80000008 &&
1663                      !getX86CpuIDAndInfo(0x80000008, &EAX, &EBX, &ECX, &EDX);
1664   Features["clzero"]   = HasExtLeaf8 && ((EBX >> 0) & 1);
1665   Features["rdpru"]    = HasExtLeaf8 && ((EBX >> 4) & 1);
1666   Features["wbnoinvd"] = HasExtLeaf8 && ((EBX >> 9) & 1);
1667 
1668   bool HasLeaf7 =
1669       MaxLevel >= 7 && !getX86CpuIDAndInfoEx(0x7, 0x0, &EAX, &EBX, &ECX, &EDX);
1670 
1671   Features["fsgsbase"]   = HasLeaf7 && ((EBX >>  0) & 1);
1672   Features["sgx"]        = HasLeaf7 && ((EBX >>  2) & 1);
1673   Features["bmi"]        = HasLeaf7 && ((EBX >>  3) & 1);
1674   // AVX2 is only supported if we have the OS save support from AVX.
1675   Features["avx2"]       = HasLeaf7 && ((EBX >>  5) & 1) && HasAVXSave;
1676   Features["bmi2"]       = HasLeaf7 && ((EBX >>  8) & 1);
1677   Features["invpcid"]    = HasLeaf7 && ((EBX >> 10) & 1);
1678   Features["rtm"]        = HasLeaf7 && ((EBX >> 11) & 1);
1679   // AVX512 is only supported if the OS supports the context save for it.
1680   Features["avx512f"]    = HasLeaf7 && ((EBX >> 16) & 1) && HasAVX512Save;
1681   Features["avx512dq"]   = HasLeaf7 && ((EBX >> 17) & 1) && HasAVX512Save;
1682   Features["rdseed"]     = HasLeaf7 && ((EBX >> 18) & 1);
1683   Features["adx"]        = HasLeaf7 && ((EBX >> 19) & 1);
1684   Features["avx512ifma"] = HasLeaf7 && ((EBX >> 21) & 1) && HasAVX512Save;
1685   Features["clflushopt"] = HasLeaf7 && ((EBX >> 23) & 1);
1686   Features["clwb"]       = HasLeaf7 && ((EBX >> 24) & 1);
1687   Features["avx512pf"]   = HasLeaf7 && ((EBX >> 26) & 1) && HasAVX512Save;
1688   Features["avx512er"]   = HasLeaf7 && ((EBX >> 27) & 1) && HasAVX512Save;
1689   Features["avx512cd"]   = HasLeaf7 && ((EBX >> 28) & 1) && HasAVX512Save;
1690   Features["sha"]        = HasLeaf7 && ((EBX >> 29) & 1);
1691   Features["avx512bw"]   = HasLeaf7 && ((EBX >> 30) & 1) && HasAVX512Save;
1692   Features["avx512vl"]   = HasLeaf7 && ((EBX >> 31) & 1) && HasAVX512Save;
1693 
1694   Features["prefetchwt1"]     = HasLeaf7 && ((ECX >>  0) & 1);
1695   Features["avx512vbmi"]      = HasLeaf7 && ((ECX >>  1) & 1) && HasAVX512Save;
1696   Features["pku"]             = HasLeaf7 && ((ECX >>  4) & 1);
1697   Features["waitpkg"]         = HasLeaf7 && ((ECX >>  5) & 1);
1698   Features["avx512vbmi2"]     = HasLeaf7 && ((ECX >>  6) & 1) && HasAVX512Save;
1699   Features["shstk"]           = HasLeaf7 && ((ECX >>  7) & 1);
1700   Features["gfni"]            = HasLeaf7 && ((ECX >>  8) & 1);
1701   Features["vaes"]            = HasLeaf7 && ((ECX >>  9) & 1) && HasAVXSave;
1702   Features["vpclmulqdq"]      = HasLeaf7 && ((ECX >> 10) & 1) && HasAVXSave;
1703   Features["avx512vnni"]      = HasLeaf7 && ((ECX >> 11) & 1) && HasAVX512Save;
1704   Features["avx512bitalg"]    = HasLeaf7 && ((ECX >> 12) & 1) && HasAVX512Save;
1705   Features["avx512vpopcntdq"] = HasLeaf7 && ((ECX >> 14) & 1) && HasAVX512Save;
1706   Features["rdpid"]           = HasLeaf7 && ((ECX >> 22) & 1);
1707   Features["kl"]              = HasLeaf7 && ((ECX >> 23) & 1); // key locker
1708   Features["cldemote"]        = HasLeaf7 && ((ECX >> 25) & 1);
1709   Features["movdiri"]         = HasLeaf7 && ((ECX >> 27) & 1);
1710   Features["movdir64b"]       = HasLeaf7 && ((ECX >> 28) & 1);
1711   Features["enqcmd"]          = HasLeaf7 && ((ECX >> 29) & 1);
1712 
1713   Features["uintr"]           = HasLeaf7 && ((EDX >> 5) & 1);
1714   Features["avx512vp2intersect"] =
1715       HasLeaf7 && ((EDX >> 8) & 1) && HasAVX512Save;
1716   Features["serialize"]       = HasLeaf7 && ((EDX >> 14) & 1);
1717   Features["tsxldtrk"]        = HasLeaf7 && ((EDX >> 16) & 1);
1718   // There are two CPUID leafs which information associated with the pconfig
1719   // instruction:
1720   // EAX=0x7, ECX=0x0 indicates the availability of the instruction (via the 18th
1721   // bit of EDX), while the EAX=0x1b leaf returns information on the
1722   // availability of specific pconfig leafs.
1723   // The target feature here only refers to the the first of these two.
1724   // Users might need to check for the availability of specific pconfig
1725   // leaves using cpuid, since that information is ignored while
1726   // detecting features using the "-march=native" flag.
1727   // For more info, see X86 ISA docs.
1728   Features["pconfig"] = HasLeaf7 && ((EDX >> 18) & 1);
1729   Features["amx-bf16"]   = HasLeaf7 && ((EDX >> 22) & 1) && HasAMXSave;
1730   Features["avx512fp16"] = HasLeaf7 && ((EDX >> 23) & 1) && HasAVX512Save;
1731   Features["amx-tile"]   = HasLeaf7 && ((EDX >> 24) & 1) && HasAMXSave;
1732   Features["amx-int8"]   = HasLeaf7 && ((EDX >> 25) & 1) && HasAMXSave;
1733   bool HasLeaf7Subleaf1 =
1734       MaxLevel >= 7 && !getX86CpuIDAndInfoEx(0x7, 0x1, &EAX, &EBX, &ECX, &EDX);
1735   Features["raoint"]     = HasLeaf7Subleaf1 && ((EAX >> 3) & 1);
1736   Features["avxvnni"]    = HasLeaf7Subleaf1 && ((EAX >> 4) & 1) && HasAVXSave;
1737   Features["avx512bf16"] = HasLeaf7Subleaf1 && ((EAX >> 5) & 1) && HasAVX512Save;
1738   Features["amx-fp16"]   = HasLeaf7Subleaf1 && ((EAX >> 21) & 1) && HasAMXSave;
1739   Features["cmpccxadd"]  = HasLeaf7Subleaf1 && ((EAX >> 7) & 1);
1740   Features["hreset"]     = HasLeaf7Subleaf1 && ((EAX >> 22) & 1);
1741   Features["avxifma"]    = HasLeaf7Subleaf1 && ((EAX >> 23) & 1) && HasAVXSave;
1742   Features["avxvnniint8"] = HasLeaf7Subleaf1 && ((EDX >> 4) & 1) && HasAVXSave;
1743   Features["avxneconvert"] = HasLeaf7Subleaf1 && ((EDX >> 5) & 1) && HasAVXSave;
1744   Features["prefetchi"]  = HasLeaf7Subleaf1 && ((EDX >> 14) & 1);
1745 
1746   bool HasLeafD = MaxLevel >= 0xd &&
1747                   !getX86CpuIDAndInfoEx(0xd, 0x1, &EAX, &EBX, &ECX, &EDX);
1748 
1749   // Only enable XSAVE if OS has enabled support for saving YMM state.
1750   Features["xsaveopt"] = HasLeafD && ((EAX >> 0) & 1) && HasAVXSave;
1751   Features["xsavec"]   = HasLeafD && ((EAX >> 1) & 1) && HasAVXSave;
1752   Features["xsaves"]   = HasLeafD && ((EAX >> 3) & 1) && HasAVXSave;
1753 
1754   bool HasLeaf14 = MaxLevel >= 0x14 &&
1755                   !getX86CpuIDAndInfoEx(0x14, 0x0, &EAX, &EBX, &ECX, &EDX);
1756 
1757   Features["ptwrite"] = HasLeaf14 && ((EBX >> 4) & 1);
1758 
1759   bool HasLeaf19 =
1760       MaxLevel >= 0x19 && !getX86CpuIDAndInfo(0x19, &EAX, &EBX, &ECX, &EDX);
1761   Features["widekl"] = HasLeaf7 && HasLeaf19 && ((EBX >> 2) & 1);
1762 
1763   return true;
1764 }
1765 #elif defined(__linux__) && (defined(__arm__) || defined(__aarch64__))
getHostCPUFeatures(StringMap<bool> & Features)1766 bool sys::getHostCPUFeatures(StringMap<bool> &Features) {
1767   std::unique_ptr<llvm::MemoryBuffer> P = getProcCpuinfoContent();
1768   if (!P)
1769     return false;
1770 
1771   SmallVector<StringRef, 32> Lines;
1772   P->getBuffer().split(Lines, "\n");
1773 
1774   SmallVector<StringRef, 32> CPUFeatures;
1775 
1776   // Look for the CPU features.
1777   for (unsigned I = 0, E = Lines.size(); I != E; ++I)
1778     if (Lines[I].startswith("Features")) {
1779       Lines[I].split(CPUFeatures, ' ');
1780       break;
1781     }
1782 
1783 #if defined(__aarch64__)
1784   // Keep track of which crypto features we have seen
1785   enum { CAP_AES = 0x1, CAP_PMULL = 0x2, CAP_SHA1 = 0x4, CAP_SHA2 = 0x8 };
1786   uint32_t crypto = 0;
1787 #endif
1788 
1789   for (unsigned I = 0, E = CPUFeatures.size(); I != E; ++I) {
1790     StringRef LLVMFeatureStr = StringSwitch<StringRef>(CPUFeatures[I])
1791 #if defined(__aarch64__)
1792                                    .Case("asimd", "neon")
1793                                    .Case("fp", "fp-armv8")
1794                                    .Case("crc32", "crc")
1795                                    .Case("atomics", "lse")
1796                                    .Case("sve", "sve")
1797                                    .Case("sve2", "sve2")
1798 #else
1799                                    .Case("half", "fp16")
1800                                    .Case("neon", "neon")
1801                                    .Case("vfpv3", "vfp3")
1802                                    .Case("vfpv3d16", "vfp3d16")
1803                                    .Case("vfpv4", "vfp4")
1804                                    .Case("idiva", "hwdiv-arm")
1805                                    .Case("idivt", "hwdiv")
1806 #endif
1807                                    .Default("");
1808 
1809 #if defined(__aarch64__)
1810     // We need to check crypto separately since we need all of the crypto
1811     // extensions to enable the subtarget feature
1812     if (CPUFeatures[I] == "aes")
1813       crypto |= CAP_AES;
1814     else if (CPUFeatures[I] == "pmull")
1815       crypto |= CAP_PMULL;
1816     else if (CPUFeatures[I] == "sha1")
1817       crypto |= CAP_SHA1;
1818     else if (CPUFeatures[I] == "sha2")
1819       crypto |= CAP_SHA2;
1820 #endif
1821 
1822     if (LLVMFeatureStr != "")
1823       Features[LLVMFeatureStr] = true;
1824   }
1825 
1826 #if defined(__aarch64__)
1827   // If we have all crypto bits we can add the feature
1828   if (crypto == (CAP_AES | CAP_PMULL | CAP_SHA1 | CAP_SHA2))
1829     Features["crypto"] = true;
1830 #endif
1831 
1832   return true;
1833 }
1834 #elif defined(_WIN32) && (defined(__aarch64__) || defined(_M_ARM64))
getHostCPUFeatures(StringMap<bool> & Features)1835 bool sys::getHostCPUFeatures(StringMap<bool> &Features) {
1836   if (IsProcessorFeaturePresent(PF_ARM_NEON_INSTRUCTIONS_AVAILABLE))
1837     Features["neon"] = true;
1838   if (IsProcessorFeaturePresent(PF_ARM_V8_CRC32_INSTRUCTIONS_AVAILABLE))
1839     Features["crc"] = true;
1840   if (IsProcessorFeaturePresent(PF_ARM_V8_CRYPTO_INSTRUCTIONS_AVAILABLE))
1841     Features["crypto"] = true;
1842 
1843   return true;
1844 }
1845 #else
getHostCPUFeatures(StringMap<bool> & Features)1846 bool sys::getHostCPUFeatures(StringMap<bool> &Features) { return false; }
1847 #endif
1848 
getProcessTriple()1849 std::string sys::getProcessTriple() {
1850   std::string TargetTripleString = updateTripleOSVersion(LLVM_HOST_TRIPLE);
1851   Triple PT(Triple::normalize(TargetTripleString));
1852 
1853   if (sizeof(void *) == 8 && PT.isArch32Bit())
1854     PT = PT.get64BitArchVariant();
1855   if (sizeof(void *) == 4 && PT.isArch64Bit())
1856     PT = PT.get32BitArchVariant();
1857 
1858   return PT.str();
1859 }
1860 
printDefaultTargetAndDetectedCPU(raw_ostream & OS)1861 void sys::printDefaultTargetAndDetectedCPU(raw_ostream &OS) {
1862 #if LLVM_VERSION_PRINTER_SHOW_HOST_TARGET_INFO
1863   std::string CPU = std::string(sys::getHostCPUName());
1864   if (CPU == "generic")
1865     CPU = "(unknown)";
1866   OS << "  Default target: " << sys::getDefaultTargetTriple() << '\n'
1867      << "  Host CPU: " << CPU << '\n';
1868 #endif
1869 }
1870