1 //===-- Host.cpp - Implement OS Host Detection ------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 //  This file implements the operating system Host detection.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "llvm/TargetParser/Host.h"
14 #include "llvm/ADT/SmallVector.h"
15 #include "llvm/ADT/StringMap.h"
16 #include "llvm/ADT/StringRef.h"
17 #include "llvm/ADT/StringSwitch.h"
18 #include "llvm/Config/llvm-config.h"
19 #include "llvm/Support/MemoryBuffer.h"
20 #include "llvm/Support/raw_ostream.h"
21 #include "llvm/TargetParser/Triple.h"
22 #include "llvm/TargetParser/X86TargetParser.h"
23 #include <string.h>
24 
25 // Include the platform-specific parts of this class.
26 #ifdef LLVM_ON_UNIX
27 #include "Unix/Host.inc"
28 #include <sched.h>
29 #endif
30 #ifdef _WIN32
31 #include "Windows/Host.inc"
32 #endif
33 #ifdef _MSC_VER
34 #include <intrin.h>
35 #endif
36 #ifdef __MVS__
37 #include "llvm/Support/BCD.h"
38 #endif
39 #if defined(__APPLE__)
40 #include <mach/host_info.h>
41 #include <mach/mach.h>
42 #include <mach/mach_host.h>
43 #include <mach/machine.h>
44 #include <sys/param.h>
45 #include <sys/sysctl.h>
46 #endif
47 #ifdef _AIX
48 #include <sys/systemcfg.h>
49 #endif
50 #if defined(__sun__) && defined(__svr4__)
51 #include <kstat.h>
52 #endif
53 
54 #define DEBUG_TYPE "host-detection"
55 
56 //===----------------------------------------------------------------------===//
57 //
58 //  Implementations of the CPU detection routines
59 //
60 //===----------------------------------------------------------------------===//
61 
62 using namespace llvm;
63 
64 static std::unique_ptr<llvm::MemoryBuffer>
65     LLVM_ATTRIBUTE_UNUSED getProcCpuinfoContent() {
66   llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> Text =
67       llvm::MemoryBuffer::getFileAsStream("/proc/cpuinfo");
68   if (std::error_code EC = Text.getError()) {
69     llvm::errs() << "Can't read "
70                  << "/proc/cpuinfo: " << EC.message() << "\n";
71     return nullptr;
72   }
73   return std::move(*Text);
74 }
75 
76 StringRef sys::detail::getHostCPUNameForPowerPC(StringRef ProcCpuinfoContent) {
77   // Access to the Processor Version Register (PVR) on PowerPC is privileged,
78   // and so we must use an operating-system interface to determine the current
79   // processor type. On Linux, this is exposed through the /proc/cpuinfo file.
80   const char *generic = "generic";
81 
82   // The cpu line is second (after the 'processor: 0' line), so if this
83   // buffer is too small then something has changed (or is wrong).
84   StringRef::const_iterator CPUInfoStart = ProcCpuinfoContent.begin();
85   StringRef::const_iterator CPUInfoEnd = ProcCpuinfoContent.end();
86 
87   StringRef::const_iterator CIP = CPUInfoStart;
88 
89   StringRef::const_iterator CPUStart = nullptr;
90   size_t CPULen = 0;
91 
92   // We need to find the first line which starts with cpu, spaces, and a colon.
93   // After the colon, there may be some additional spaces and then the cpu type.
94   while (CIP < CPUInfoEnd && CPUStart == nullptr) {
95     if (CIP < CPUInfoEnd && *CIP == '\n')
96       ++CIP;
97 
98     if (CIP < CPUInfoEnd && *CIP == 'c') {
99       ++CIP;
100       if (CIP < CPUInfoEnd && *CIP == 'p') {
101         ++CIP;
102         if (CIP < CPUInfoEnd && *CIP == 'u') {
103           ++CIP;
104           while (CIP < CPUInfoEnd && (*CIP == ' ' || *CIP == '\t'))
105             ++CIP;
106 
107           if (CIP < CPUInfoEnd && *CIP == ':') {
108             ++CIP;
109             while (CIP < CPUInfoEnd && (*CIP == ' ' || *CIP == '\t'))
110               ++CIP;
111 
112             if (CIP < CPUInfoEnd) {
113               CPUStart = CIP;
114               while (CIP < CPUInfoEnd && (*CIP != ' ' && *CIP != '\t' &&
115                                           *CIP != ',' && *CIP != '\n'))
116                 ++CIP;
117               CPULen = CIP - CPUStart;
118             }
119           }
120         }
121       }
122     }
123 
124     if (CPUStart == nullptr)
125       while (CIP < CPUInfoEnd && *CIP != '\n')
126         ++CIP;
127   }
128 
129   if (CPUStart == nullptr)
130     return generic;
131 
132   return StringSwitch<const char *>(StringRef(CPUStart, CPULen))
133       .Case("604e", "604e")
134       .Case("604", "604")
135       .Case("7400", "7400")
136       .Case("7410", "7400")
137       .Case("7447", "7400")
138       .Case("7455", "7450")
139       .Case("G4", "g4")
140       .Case("POWER4", "970")
141       .Case("PPC970FX", "970")
142       .Case("PPC970MP", "970")
143       .Case("G5", "g5")
144       .Case("POWER5", "g5")
145       .Case("A2", "a2")
146       .Case("POWER6", "pwr6")
147       .Case("POWER7", "pwr7")
148       .Case("POWER8", "pwr8")
149       .Case("POWER8E", "pwr8")
150       .Case("POWER8NVL", "pwr8")
151       .Case("POWER9", "pwr9")
152       .Case("POWER10", "pwr10")
153       // FIXME: If we get a simulator or machine with the capabilities of
154       // mcpu=future, we should revisit this and add the name reported by the
155       // simulator/machine.
156       .Default(generic);
157 }
158 
159 StringRef sys::detail::getHostCPUNameForARM(StringRef ProcCpuinfoContent) {
160   // The cpuid register on arm is not accessible from user space. On Linux,
161   // it is exposed through the /proc/cpuinfo file.
162 
163   // Read 32 lines from /proc/cpuinfo, which should contain the CPU part line
164   // in all cases.
165   SmallVector<StringRef, 32> Lines;
166   ProcCpuinfoContent.split(Lines, "\n");
167 
168   // Look for the CPU implementer line.
169   StringRef Implementer;
170   StringRef Hardware;
171   StringRef Part;
172   for (unsigned I = 0, E = Lines.size(); I != E; ++I) {
173     if (Lines[I].startswith("CPU implementer"))
174       Implementer = Lines[I].substr(15).ltrim("\t :");
175     if (Lines[I].startswith("Hardware"))
176       Hardware = Lines[I].substr(8).ltrim("\t :");
177     if (Lines[I].startswith("CPU part"))
178       Part = Lines[I].substr(8).ltrim("\t :");
179   }
180 
181   if (Implementer == "0x41") { // ARM Ltd.
182     // MSM8992/8994 may give cpu part for the core that the kernel is running on,
183     // which is undeterministic and wrong. Always return cortex-a53 for these SoC.
184     if (Hardware.endswith("MSM8994") || Hardware.endswith("MSM8996"))
185       return "cortex-a53";
186 
187 
188     // The CPU part is a 3 digit hexadecimal number with a 0x prefix. The
189     // values correspond to the "Part number" in the CP15/c0 register. The
190     // contents are specified in the various processor manuals.
191     // This corresponds to the Main ID Register in Technical Reference Manuals.
192     // and is used in programs like sys-utils
193     return StringSwitch<const char *>(Part)
194         .Case("0x926", "arm926ej-s")
195         .Case("0xb02", "mpcore")
196         .Case("0xb36", "arm1136j-s")
197         .Case("0xb56", "arm1156t2-s")
198         .Case("0xb76", "arm1176jz-s")
199         .Case("0xc08", "cortex-a8")
200         .Case("0xc09", "cortex-a9")
201         .Case("0xc0f", "cortex-a15")
202         .Case("0xc20", "cortex-m0")
203         .Case("0xc23", "cortex-m3")
204         .Case("0xc24", "cortex-m4")
205         .Case("0xd22", "cortex-m55")
206         .Case("0xd02", "cortex-a34")
207         .Case("0xd04", "cortex-a35")
208         .Case("0xd03", "cortex-a53")
209         .Case("0xd05", "cortex-a55")
210         .Case("0xd46", "cortex-a510")
211         .Case("0xd07", "cortex-a57")
212         .Case("0xd08", "cortex-a72")
213         .Case("0xd09", "cortex-a73")
214         .Case("0xd0a", "cortex-a75")
215         .Case("0xd0b", "cortex-a76")
216         .Case("0xd0d", "cortex-a77")
217         .Case("0xd41", "cortex-a78")
218         .Case("0xd47", "cortex-a710")
219         .Case("0xd4d", "cortex-a715")
220         .Case("0xd44", "cortex-x1")
221         .Case("0xd4c", "cortex-x1c")
222         .Case("0xd48", "cortex-x2")
223         .Case("0xd4e", "cortex-x3")
224         .Case("0xd0c", "neoverse-n1")
225         .Case("0xd49", "neoverse-n2")
226         .Case("0xd40", "neoverse-v1")
227         .Case("0xd4f", "neoverse-v2")
228         .Default("generic");
229   }
230 
231   if (Implementer == "0x42" || Implementer == "0x43") { // Broadcom | Cavium.
232     return StringSwitch<const char *>(Part)
233       .Case("0x516", "thunderx2t99")
234       .Case("0x0516", "thunderx2t99")
235       .Case("0xaf", "thunderx2t99")
236       .Case("0x0af", "thunderx2t99")
237       .Case("0xa1", "thunderxt88")
238       .Case("0x0a1", "thunderxt88")
239       .Default("generic");
240   }
241 
242   if (Implementer == "0x46") { // Fujitsu Ltd.
243     return StringSwitch<const char *>(Part)
244       .Case("0x001", "a64fx")
245       .Default("generic");
246   }
247 
248   if (Implementer == "0x4e") { // NVIDIA Corporation
249     return StringSwitch<const char *>(Part)
250         .Case("0x004", "carmel")
251         .Default("generic");
252   }
253 
254   if (Implementer == "0x48") // HiSilicon Technologies, Inc.
255     // The CPU part is a 3 digit hexadecimal number with a 0x prefix. The
256     // values correspond to the "Part number" in the CP15/c0 register. The
257     // contents are specified in the various processor manuals.
258     return StringSwitch<const char *>(Part)
259       .Case("0xd01", "tsv110")
260       .Default("generic");
261 
262   if (Implementer == "0x51") // Qualcomm Technologies, Inc.
263     // The CPU part is a 3 digit hexadecimal number with a 0x prefix. The
264     // values correspond to the "Part number" in the CP15/c0 register. The
265     // contents are specified in the various processor manuals.
266     return StringSwitch<const char *>(Part)
267         .Case("0x06f", "krait") // APQ8064
268         .Case("0x201", "kryo")
269         .Case("0x205", "kryo")
270         .Case("0x211", "kryo")
271         .Case("0x800", "cortex-a73") // Kryo 2xx Gold
272         .Case("0x801", "cortex-a73") // Kryo 2xx Silver
273         .Case("0x802", "cortex-a75") // Kryo 3xx Gold
274         .Case("0x803", "cortex-a75") // Kryo 3xx Silver
275         .Case("0x804", "cortex-a76") // Kryo 4xx Gold
276         .Case("0x805", "cortex-a76") // Kryo 4xx/5xx Silver
277         .Case("0xc00", "falkor")
278         .Case("0xc01", "saphira")
279         .Default("generic");
280   if (Implementer == "0x53") { // Samsung Electronics Co., Ltd.
281     // The Exynos chips have a convoluted ID scheme that doesn't seem to follow
282     // any predictive pattern across variants and parts.
283     unsigned Variant = 0, Part = 0;
284 
285     // Look for the CPU variant line, whose value is a 1 digit hexadecimal
286     // number, corresponding to the Variant bits in the CP15/C0 register.
287     for (auto I : Lines)
288       if (I.consume_front("CPU variant"))
289         I.ltrim("\t :").getAsInteger(0, Variant);
290 
291     // Look for the CPU part line, whose value is a 3 digit hexadecimal
292     // number, corresponding to the PartNum bits in the CP15/C0 register.
293     for (auto I : Lines)
294       if (I.consume_front("CPU part"))
295         I.ltrim("\t :").getAsInteger(0, Part);
296 
297     unsigned Exynos = (Variant << 12) | Part;
298     switch (Exynos) {
299     default:
300       // Default by falling through to Exynos M3.
301       [[fallthrough]];
302     case 0x1002:
303       return "exynos-m3";
304     case 0x1003:
305       return "exynos-m4";
306     }
307   }
308 
309   if (Implementer == "0xc0") { // Ampere Computing
310     return StringSwitch<const char *>(Part)
311         .Case("0xac3", "ampere1")
312         .Case("0xac4", "ampere1a")
313         .Default("generic");
314   }
315 
316   return "generic";
317 }
318 
319 namespace {
320 StringRef getCPUNameFromS390Model(unsigned int Id, bool HaveVectorSupport) {
321   switch (Id) {
322     case 2064:  // z900 not supported by LLVM
323     case 2066:
324     case 2084:  // z990 not supported by LLVM
325     case 2086:
326     case 2094:  // z9-109 not supported by LLVM
327     case 2096:
328       return "generic";
329     case 2097:
330     case 2098:
331       return "z10";
332     case 2817:
333     case 2818:
334       return "z196";
335     case 2827:
336     case 2828:
337       return "zEC12";
338     case 2964:
339     case 2965:
340       return HaveVectorSupport? "z13" : "zEC12";
341     case 3906:
342     case 3907:
343       return HaveVectorSupport? "z14" : "zEC12";
344     case 8561:
345     case 8562:
346       return HaveVectorSupport? "z15" : "zEC12";
347     case 3931:
348     case 3932:
349     default:
350       return HaveVectorSupport? "z16" : "zEC12";
351   }
352 }
353 } // end anonymous namespace
354 
355 StringRef sys::detail::getHostCPUNameForS390x(StringRef ProcCpuinfoContent) {
356   // STIDP is a privileged operation, so use /proc/cpuinfo instead.
357 
358   // The "processor 0:" line comes after a fair amount of other information,
359   // including a cache breakdown, but this should be plenty.
360   SmallVector<StringRef, 32> Lines;
361   ProcCpuinfoContent.split(Lines, "\n");
362 
363   // Look for the CPU features.
364   SmallVector<StringRef, 32> CPUFeatures;
365   for (unsigned I = 0, E = Lines.size(); I != E; ++I)
366     if (Lines[I].startswith("features")) {
367       size_t Pos = Lines[I].find(':');
368       if (Pos != StringRef::npos) {
369         Lines[I].drop_front(Pos + 1).split(CPUFeatures, ' ');
370         break;
371       }
372     }
373 
374   // We need to check for the presence of vector support independently of
375   // the machine type, since we may only use the vector register set when
376   // supported by the kernel (and hypervisor).
377   bool HaveVectorSupport = false;
378   for (unsigned I = 0, E = CPUFeatures.size(); I != E; ++I) {
379     if (CPUFeatures[I] == "vx")
380       HaveVectorSupport = true;
381   }
382 
383   // Now check the processor machine type.
384   for (unsigned I = 0, E = Lines.size(); I != E; ++I) {
385     if (Lines[I].startswith("processor ")) {
386       size_t Pos = Lines[I].find("machine = ");
387       if (Pos != StringRef::npos) {
388         Pos += sizeof("machine = ") - 1;
389         unsigned int Id;
390         if (!Lines[I].drop_front(Pos).getAsInteger(10, Id))
391           return getCPUNameFromS390Model(Id, HaveVectorSupport);
392       }
393       break;
394     }
395   }
396 
397   return "generic";
398 }
399 
400 StringRef sys::detail::getHostCPUNameForRISCV(StringRef ProcCpuinfoContent) {
401   // There are 24 lines in /proc/cpuinfo
402   SmallVector<StringRef> Lines;
403   ProcCpuinfoContent.split(Lines, "\n");
404 
405   // Look for uarch line to determine cpu name
406   StringRef UArch;
407   for (unsigned I = 0, E = Lines.size(); I != E; ++I) {
408     if (Lines[I].startswith("uarch")) {
409       UArch = Lines[I].substr(5).ltrim("\t :");
410       break;
411     }
412   }
413 
414   return StringSwitch<const char *>(UArch)
415       .Case("sifive,u74-mc", "sifive-u74")
416       .Case("sifive,bullet0", "sifive-u74")
417       .Default("generic");
418 }
419 
420 StringRef sys::detail::getHostCPUNameForBPF() {
421 #if !defined(__linux__) || !defined(__x86_64__)
422   return "generic";
423 #else
424   uint8_t v3_insns[40] __attribute__ ((aligned (8))) =
425       /* BPF_MOV64_IMM(BPF_REG_0, 0) */
426     { 0xb7, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0,
427       /* BPF_MOV64_IMM(BPF_REG_2, 1) */
428       0xb7, 0x2, 0x0, 0x0, 0x1, 0x0, 0x0, 0x0,
429       /* BPF_JMP32_REG(BPF_JLT, BPF_REG_0, BPF_REG_2, 1) */
430       0xae, 0x20, 0x1, 0x0, 0x0, 0x0, 0x0, 0x0,
431       /* BPF_MOV64_IMM(BPF_REG_0, 1) */
432       0xb7, 0x0, 0x0, 0x0, 0x1, 0x0, 0x0, 0x0,
433       /* BPF_EXIT_INSN() */
434       0x95, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 };
435 
436   uint8_t v2_insns[40] __attribute__ ((aligned (8))) =
437       /* BPF_MOV64_IMM(BPF_REG_0, 0) */
438     { 0xb7, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0,
439       /* BPF_MOV64_IMM(BPF_REG_2, 1) */
440       0xb7, 0x2, 0x0, 0x0, 0x1, 0x0, 0x0, 0x0,
441       /* BPF_JMP_REG(BPF_JLT, BPF_REG_0, BPF_REG_2, 1) */
442       0xad, 0x20, 0x1, 0x0, 0x0, 0x0, 0x0, 0x0,
443       /* BPF_MOV64_IMM(BPF_REG_0, 1) */
444       0xb7, 0x0, 0x0, 0x0, 0x1, 0x0, 0x0, 0x0,
445       /* BPF_EXIT_INSN() */
446       0x95, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 };
447 
448   struct bpf_prog_load_attr {
449     uint32_t prog_type;
450     uint32_t insn_cnt;
451     uint64_t insns;
452     uint64_t license;
453     uint32_t log_level;
454     uint32_t log_size;
455     uint64_t log_buf;
456     uint32_t kern_version;
457     uint32_t prog_flags;
458   } attr = {};
459   attr.prog_type = 1; /* BPF_PROG_TYPE_SOCKET_FILTER */
460   attr.insn_cnt = 5;
461   attr.insns = (uint64_t)v3_insns;
462   attr.license = (uint64_t)"DUMMY";
463 
464   int fd = syscall(321 /* __NR_bpf */, 5 /* BPF_PROG_LOAD */, &attr,
465                    sizeof(attr));
466   if (fd >= 0) {
467     close(fd);
468     return "v3";
469   }
470 
471   /* Clear the whole attr in case its content changed by syscall. */
472   memset(&attr, 0, sizeof(attr));
473   attr.prog_type = 1; /* BPF_PROG_TYPE_SOCKET_FILTER */
474   attr.insn_cnt = 5;
475   attr.insns = (uint64_t)v2_insns;
476   attr.license = (uint64_t)"DUMMY";
477   fd = syscall(321 /* __NR_bpf */, 5 /* BPF_PROG_LOAD */, &attr, sizeof(attr));
478   if (fd >= 0) {
479     close(fd);
480     return "v2";
481   }
482   return "v1";
483 #endif
484 }
485 
486 #if defined(__i386__) || defined(_M_IX86) || \
487     defined(__x86_64__) || defined(_M_X64)
488 
489 // The check below for i386 was copied from clang's cpuid.h (__get_cpuid_max).
490 // Check motivated by bug reports for OpenSSL crashing on CPUs without CPUID
491 // support. Consequently, for i386, the presence of CPUID is checked first
492 // via the corresponding eflags bit.
493 // Removal of cpuid.h header motivated by PR30384
494 // Header cpuid.h and method __get_cpuid_max are not used in llvm, clang, openmp
495 // or test-suite, but are used in external projects e.g. libstdcxx
496 static bool isCpuIdSupported() {
497 #if defined(__GNUC__) || defined(__clang__)
498 #if defined(__i386__)
499   int __cpuid_supported;
500   __asm__("  pushfl\n"
501           "  popl   %%eax\n"
502           "  movl   %%eax,%%ecx\n"
503           "  xorl   $0x00200000,%%eax\n"
504           "  pushl  %%eax\n"
505           "  popfl\n"
506           "  pushfl\n"
507           "  popl   %%eax\n"
508           "  movl   $0,%0\n"
509           "  cmpl   %%eax,%%ecx\n"
510           "  je     1f\n"
511           "  movl   $1,%0\n"
512           "1:"
513           : "=r"(__cpuid_supported)
514           :
515           : "eax", "ecx");
516   if (!__cpuid_supported)
517     return false;
518 #endif
519   return true;
520 #endif
521   return true;
522 }
523 
524 /// getX86CpuIDAndInfo - Execute the specified cpuid and return the 4 values in
525 /// the specified arguments.  If we can't run cpuid on the host, return true.
526 static bool getX86CpuIDAndInfo(unsigned value, unsigned *rEAX, unsigned *rEBX,
527                                unsigned *rECX, unsigned *rEDX) {
528 #if defined(__GNUC__) || defined(__clang__)
529 #if defined(__x86_64__)
530   // gcc doesn't know cpuid would clobber ebx/rbx. Preserve it manually.
531   // FIXME: should we save this for Clang?
532   __asm__("movq\t%%rbx, %%rsi\n\t"
533           "cpuid\n\t"
534           "xchgq\t%%rbx, %%rsi\n\t"
535           : "=a"(*rEAX), "=S"(*rEBX), "=c"(*rECX), "=d"(*rEDX)
536           : "a"(value));
537   return false;
538 #elif defined(__i386__)
539   __asm__("movl\t%%ebx, %%esi\n\t"
540           "cpuid\n\t"
541           "xchgl\t%%ebx, %%esi\n\t"
542           : "=a"(*rEAX), "=S"(*rEBX), "=c"(*rECX), "=d"(*rEDX)
543           : "a"(value));
544   return false;
545 #else
546   return true;
547 #endif
548 #elif defined(_MSC_VER)
549   // The MSVC intrinsic is portable across x86 and x64.
550   int registers[4];
551   __cpuid(registers, value);
552   *rEAX = registers[0];
553   *rEBX = registers[1];
554   *rECX = registers[2];
555   *rEDX = registers[3];
556   return false;
557 #else
558   return true;
559 #endif
560 }
561 
562 namespace llvm {
563 namespace sys {
564 namespace detail {
565 namespace x86 {
566 
567 VendorSignatures getVendorSignature(unsigned *MaxLeaf) {
568   unsigned EAX = 0, EBX = 0, ECX = 0, EDX = 0;
569   if (MaxLeaf == nullptr)
570     MaxLeaf = &EAX;
571   else
572     *MaxLeaf = 0;
573 
574   if (!isCpuIdSupported())
575     return VendorSignatures::UNKNOWN;
576 
577   if (getX86CpuIDAndInfo(0, MaxLeaf, &EBX, &ECX, &EDX) || *MaxLeaf < 1)
578     return VendorSignatures::UNKNOWN;
579 
580   // "Genu ineI ntel"
581   if (EBX == 0x756e6547 && EDX == 0x49656e69 && ECX == 0x6c65746e)
582     return VendorSignatures::GENUINE_INTEL;
583 
584   // "Auth enti cAMD"
585   if (EBX == 0x68747541 && EDX == 0x69746e65 && ECX == 0x444d4163)
586     return VendorSignatures::AUTHENTIC_AMD;
587 
588   return VendorSignatures::UNKNOWN;
589 }
590 
591 } // namespace x86
592 } // namespace detail
593 } // namespace sys
594 } // namespace llvm
595 
596 using namespace llvm::sys::detail::x86;
597 
598 /// getX86CpuIDAndInfoEx - Execute the specified cpuid with subleaf and return
599 /// the 4 values in the specified arguments.  If we can't run cpuid on the host,
600 /// return true.
601 static bool getX86CpuIDAndInfoEx(unsigned value, unsigned subleaf,
602                                  unsigned *rEAX, unsigned *rEBX, unsigned *rECX,
603                                  unsigned *rEDX) {
604 #if defined(__GNUC__) || defined(__clang__)
605 #if defined(__x86_64__)
606   // gcc doesn't know cpuid would clobber ebx/rbx. Preserve it manually.
607   // FIXME: should we save this for Clang?
608   __asm__("movq\t%%rbx, %%rsi\n\t"
609           "cpuid\n\t"
610           "xchgq\t%%rbx, %%rsi\n\t"
611           : "=a"(*rEAX), "=S"(*rEBX), "=c"(*rECX), "=d"(*rEDX)
612           : "a"(value), "c"(subleaf));
613   return false;
614 #elif defined(__i386__)
615   __asm__("movl\t%%ebx, %%esi\n\t"
616           "cpuid\n\t"
617           "xchgl\t%%ebx, %%esi\n\t"
618           : "=a"(*rEAX), "=S"(*rEBX), "=c"(*rECX), "=d"(*rEDX)
619           : "a"(value), "c"(subleaf));
620   return false;
621 #else
622   return true;
623 #endif
624 #elif defined(_MSC_VER)
625   int registers[4];
626   __cpuidex(registers, value, subleaf);
627   *rEAX = registers[0];
628   *rEBX = registers[1];
629   *rECX = registers[2];
630   *rEDX = registers[3];
631   return false;
632 #else
633   return true;
634 #endif
635 }
636 
637 // Read control register 0 (XCR0). Used to detect features such as AVX.
638 static bool getX86XCR0(unsigned *rEAX, unsigned *rEDX) {
639 #if defined(__GNUC__) || defined(__clang__)
640   // Check xgetbv; this uses a .byte sequence instead of the instruction
641   // directly because older assemblers do not include support for xgetbv and
642   // there is no easy way to conditionally compile based on the assembler used.
643   __asm__(".byte 0x0f, 0x01, 0xd0" : "=a"(*rEAX), "=d"(*rEDX) : "c"(0));
644   return false;
645 #elif defined(_MSC_FULL_VER) && defined(_XCR_XFEATURE_ENABLED_MASK)
646   unsigned long long Result = _xgetbv(_XCR_XFEATURE_ENABLED_MASK);
647   *rEAX = Result;
648   *rEDX = Result >> 32;
649   return false;
650 #else
651   return true;
652 #endif
653 }
654 
655 static void detectX86FamilyModel(unsigned EAX, unsigned *Family,
656                                  unsigned *Model) {
657   *Family = (EAX >> 8) & 0xf; // Bits 8 - 11
658   *Model = (EAX >> 4) & 0xf;  // Bits 4 - 7
659   if (*Family == 6 || *Family == 0xf) {
660     if (*Family == 0xf)
661       // Examine extended family ID if family ID is F.
662       *Family += (EAX >> 20) & 0xff; // Bits 20 - 27
663     // Examine extended model ID if family ID is 6 or F.
664     *Model += ((EAX >> 16) & 0xf) << 4; // Bits 16 - 19
665   }
666 }
667 
668 static StringRef
669 getIntelProcessorTypeAndSubtype(unsigned Family, unsigned Model,
670                                 const unsigned *Features,
671                                 unsigned *Type, unsigned *Subtype) {
672   auto testFeature = [&](unsigned F) {
673     return (Features[F / 32] & (1U << (F % 32))) != 0;
674   };
675 
676   StringRef CPU;
677 
678   switch (Family) {
679   case 3:
680     CPU = "i386";
681     break;
682   case 4:
683     CPU = "i486";
684     break;
685   case 5:
686     if (testFeature(X86::FEATURE_MMX)) {
687       CPU = "pentium-mmx";
688       break;
689     }
690     CPU = "pentium";
691     break;
692   case 6:
693     switch (Model) {
694     case 0x0f: // Intel Core 2 Duo processor, Intel Core 2 Duo mobile
695                // processor, Intel Core 2 Quad processor, Intel Core 2 Quad
696                // mobile processor, Intel Core 2 Extreme processor, Intel
697                // Pentium Dual-Core processor, Intel Xeon processor, model
698                // 0Fh. All processors are manufactured using the 65 nm process.
699     case 0x16: // Intel Celeron processor model 16h. All processors are
700                // manufactured using the 65 nm process
701       CPU = "core2";
702       *Type = X86::INTEL_CORE2;
703       break;
704     case 0x17: // Intel Core 2 Extreme processor, Intel Xeon processor, model
705                // 17h. All processors are manufactured using the 45 nm process.
706                //
707                // 45nm: Penryn , Wolfdale, Yorkfield (XE)
708     case 0x1d: // Intel Xeon processor MP. All processors are manufactured using
709                // the 45 nm process.
710       CPU = "penryn";
711       *Type = X86::INTEL_CORE2;
712       break;
713     case 0x1a: // Intel Core i7 processor and Intel Xeon processor. All
714                // processors are manufactured using the 45 nm process.
715     case 0x1e: // Intel(R) Core(TM) i7 CPU         870  @ 2.93GHz.
716                // As found in a Summer 2010 model iMac.
717     case 0x1f:
718     case 0x2e:              // Nehalem EX
719       CPU = "nehalem";
720       *Type = X86::INTEL_COREI7;
721       *Subtype = X86::INTEL_COREI7_NEHALEM;
722       break;
723     case 0x25: // Intel Core i7, laptop version.
724     case 0x2c: // Intel Core i7 processor and Intel Xeon processor. All
725                // processors are manufactured using the 32 nm process.
726     case 0x2f: // Westmere EX
727       CPU = "westmere";
728       *Type = X86::INTEL_COREI7;
729       *Subtype = X86::INTEL_COREI7_WESTMERE;
730       break;
731     case 0x2a: // Intel Core i7 processor. All processors are manufactured
732                // using the 32 nm process.
733     case 0x2d:
734       CPU = "sandybridge";
735       *Type = X86::INTEL_COREI7;
736       *Subtype = X86::INTEL_COREI7_SANDYBRIDGE;
737       break;
738     case 0x3a:
739     case 0x3e:              // Ivy Bridge EP
740       CPU = "ivybridge";
741       *Type = X86::INTEL_COREI7;
742       *Subtype = X86::INTEL_COREI7_IVYBRIDGE;
743       break;
744 
745     // Haswell:
746     case 0x3c:
747     case 0x3f:
748     case 0x45:
749     case 0x46:
750       CPU = "haswell";
751       *Type = X86::INTEL_COREI7;
752       *Subtype = X86::INTEL_COREI7_HASWELL;
753       break;
754 
755     // Broadwell:
756     case 0x3d:
757     case 0x47:
758     case 0x4f:
759     case 0x56:
760       CPU = "broadwell";
761       *Type = X86::INTEL_COREI7;
762       *Subtype = X86::INTEL_COREI7_BROADWELL;
763       break;
764 
765     // Skylake:
766     case 0x4e:              // Skylake mobile
767     case 0x5e:              // Skylake desktop
768     case 0x8e:              // Kaby Lake mobile
769     case 0x9e:              // Kaby Lake desktop
770     case 0xa5:              // Comet Lake-H/S
771     case 0xa6:              // Comet Lake-U
772       CPU = "skylake";
773       *Type = X86::INTEL_COREI7;
774       *Subtype = X86::INTEL_COREI7_SKYLAKE;
775       break;
776 
777     // Rocketlake:
778     case 0xa7:
779       CPU = "rocketlake";
780       *Type = X86::INTEL_COREI7;
781       *Subtype = X86::INTEL_COREI7_ROCKETLAKE;
782       break;
783 
784     // Skylake Xeon:
785     case 0x55:
786       *Type = X86::INTEL_COREI7;
787       if (testFeature(X86::FEATURE_AVX512BF16)) {
788         CPU = "cooperlake";
789         *Subtype = X86::INTEL_COREI7_COOPERLAKE;
790       } else if (testFeature(X86::FEATURE_AVX512VNNI)) {
791         CPU = "cascadelake";
792         *Subtype = X86::INTEL_COREI7_CASCADELAKE;
793       } else {
794         CPU = "skylake-avx512";
795         *Subtype = X86::INTEL_COREI7_SKYLAKE_AVX512;
796       }
797       break;
798 
799     // Cannonlake:
800     case 0x66:
801       CPU = "cannonlake";
802       *Type = X86::INTEL_COREI7;
803       *Subtype = X86::INTEL_COREI7_CANNONLAKE;
804       break;
805 
806     // Icelake:
807     case 0x7d:
808     case 0x7e:
809       CPU = "icelake-client";
810       *Type = X86::INTEL_COREI7;
811       *Subtype = X86::INTEL_COREI7_ICELAKE_CLIENT;
812       break;
813 
814     // Tigerlake:
815     case 0x8c:
816     case 0x8d:
817       CPU = "tigerlake";
818       *Type = X86::INTEL_COREI7;
819       *Subtype = X86::INTEL_COREI7_TIGERLAKE;
820       break;
821 
822     // Alderlake:
823     case 0x97:
824     case 0x9a:
825     // Raptorlake:
826     case 0xb7:
827     // Meteorlake:
828     case 0xaa:
829     case 0xac:
830       CPU = "alderlake";
831       *Type = X86::INTEL_COREI7;
832       *Subtype = X86::INTEL_COREI7_ALDERLAKE;
833       break;
834 
835     // Graniterapids:
836     case 0xad:
837       CPU = "graniterapids";
838       *Type = X86::INTEL_COREI7;
839       *Subtype = X86::INTEL_COREI7_GRANITERAPIDS;
840       break;
841 
842     // Granite Rapids D:
843     case 0xae:
844       CPU = "graniterapids-d";
845       *Type = X86::INTEL_COREI7;
846       *Subtype = X86::INTEL_COREI7_GRANITERAPIDS_D;
847       break;
848 
849     // Icelake Xeon:
850     case 0x6a:
851     case 0x6c:
852       CPU = "icelake-server";
853       *Type = X86::INTEL_COREI7;
854       *Subtype = X86::INTEL_COREI7_ICELAKE_SERVER;
855       break;
856 
857     // Emerald Rapids:
858     case 0xcf:
859     // Sapphire Rapids:
860     case 0x8f:
861       CPU = "sapphirerapids";
862       *Type = X86::INTEL_COREI7;
863       *Subtype = X86::INTEL_COREI7_SAPPHIRERAPIDS;
864       break;
865 
866     case 0x1c: // Most 45 nm Intel Atom processors
867     case 0x26: // 45 nm Atom Lincroft
868     case 0x27: // 32 nm Atom Medfield
869     case 0x35: // 32 nm Atom Midview
870     case 0x36: // 32 nm Atom Midview
871       CPU = "bonnell";
872       *Type = X86::INTEL_BONNELL;
873       break;
874 
875     // Atom Silvermont codes from the Intel software optimization guide.
876     case 0x37:
877     case 0x4a:
878     case 0x4d:
879     case 0x5a:
880     case 0x5d:
881     case 0x4c: // really airmont
882       CPU = "silvermont";
883       *Type = X86::INTEL_SILVERMONT;
884       break;
885     // Goldmont:
886     case 0x5c: // Apollo Lake
887     case 0x5f: // Denverton
888       CPU = "goldmont";
889       *Type = X86::INTEL_GOLDMONT;
890       break;
891     case 0x7a:
892       CPU = "goldmont-plus";
893       *Type = X86::INTEL_GOLDMONT_PLUS;
894       break;
895     case 0x86:
896       CPU = "tremont";
897       *Type = X86::INTEL_TREMONT;
898       break;
899 
900     // Sierraforest:
901     case 0xaf:
902       CPU = "sierraforest";
903       *Type = X86::INTEL_SIERRAFOREST;
904       break;
905 
906     // Grandridge:
907     case 0xb6:
908       CPU = "grandridge";
909       *Type = X86::INTEL_GRANDRIDGE;
910       break;
911 
912     // Xeon Phi (Knights Landing + Knights Mill):
913     case 0x57:
914       CPU = "knl";
915       *Type = X86::INTEL_KNL;
916       break;
917     case 0x85:
918       CPU = "knm";
919       *Type = X86::INTEL_KNM;
920       break;
921 
922     default: // Unknown family 6 CPU, try to guess.
923       // Don't both with Type/Subtype here, they aren't used by the caller.
924       // They're used above to keep the code in sync with compiler-rt.
925       // TODO detect tigerlake host from model
926       if (testFeature(X86::FEATURE_AVX512VP2INTERSECT)) {
927         CPU = "tigerlake";
928       } else if (testFeature(X86::FEATURE_AVX512VBMI2)) {
929         CPU = "icelake-client";
930       } else if (testFeature(X86::FEATURE_AVX512VBMI)) {
931         CPU = "cannonlake";
932       } else if (testFeature(X86::FEATURE_AVX512BF16)) {
933         CPU = "cooperlake";
934       } else if (testFeature(X86::FEATURE_AVX512VNNI)) {
935         CPU = "cascadelake";
936       } else if (testFeature(X86::FEATURE_AVX512VL)) {
937         CPU = "skylake-avx512";
938       } else if (testFeature(X86::FEATURE_AVX512ER)) {
939         CPU = "knl";
940       } else if (testFeature(X86::FEATURE_CLFLUSHOPT)) {
941         if (testFeature(X86::FEATURE_SHA))
942           CPU = "goldmont";
943         else
944           CPU = "skylake";
945       } else if (testFeature(X86::FEATURE_ADX)) {
946         CPU = "broadwell";
947       } else if (testFeature(X86::FEATURE_AVX2)) {
948         CPU = "haswell";
949       } else if (testFeature(X86::FEATURE_AVX)) {
950         CPU = "sandybridge";
951       } else if (testFeature(X86::FEATURE_SSE4_2)) {
952         if (testFeature(X86::FEATURE_MOVBE))
953           CPU = "silvermont";
954         else
955           CPU = "nehalem";
956       } else if (testFeature(X86::FEATURE_SSE4_1)) {
957         CPU = "penryn";
958       } else if (testFeature(X86::FEATURE_SSSE3)) {
959         if (testFeature(X86::FEATURE_MOVBE))
960           CPU = "bonnell";
961         else
962           CPU = "core2";
963       } else if (testFeature(X86::FEATURE_64BIT)) {
964         CPU = "core2";
965       } else if (testFeature(X86::FEATURE_SSE3)) {
966         CPU = "yonah";
967       } else if (testFeature(X86::FEATURE_SSE2)) {
968         CPU = "pentium-m";
969       } else if (testFeature(X86::FEATURE_SSE)) {
970         CPU = "pentium3";
971       } else if (testFeature(X86::FEATURE_MMX)) {
972         CPU = "pentium2";
973       } else {
974         CPU = "pentiumpro";
975       }
976       break;
977     }
978     break;
979   case 15: {
980     if (testFeature(X86::FEATURE_64BIT)) {
981       CPU = "nocona";
982       break;
983     }
984     if (testFeature(X86::FEATURE_SSE3)) {
985       CPU = "prescott";
986       break;
987     }
988     CPU = "pentium4";
989     break;
990   }
991   default:
992     break; // Unknown.
993   }
994 
995   return CPU;
996 }
997 
998 static StringRef
999 getAMDProcessorTypeAndSubtype(unsigned Family, unsigned Model,
1000                               const unsigned *Features,
1001                               unsigned *Type, unsigned *Subtype) {
1002   auto testFeature = [&](unsigned F) {
1003     return (Features[F / 32] & (1U << (F % 32))) != 0;
1004   };
1005 
1006   StringRef CPU;
1007 
1008   switch (Family) {
1009   case 4:
1010     CPU = "i486";
1011     break;
1012   case 5:
1013     CPU = "pentium";
1014     switch (Model) {
1015     case 6:
1016     case 7:
1017       CPU = "k6";
1018       break;
1019     case 8:
1020       CPU = "k6-2";
1021       break;
1022     case 9:
1023     case 13:
1024       CPU = "k6-3";
1025       break;
1026     case 10:
1027       CPU = "geode";
1028       break;
1029     }
1030     break;
1031   case 6:
1032     if (testFeature(X86::FEATURE_SSE)) {
1033       CPU = "athlon-xp";
1034       break;
1035     }
1036     CPU = "athlon";
1037     break;
1038   case 15:
1039     if (testFeature(X86::FEATURE_SSE3)) {
1040       CPU = "k8-sse3";
1041       break;
1042     }
1043     CPU = "k8";
1044     break;
1045   case 16:
1046     CPU = "amdfam10";
1047     *Type = X86::AMDFAM10H; // "amdfam10"
1048     switch (Model) {
1049     case 2:
1050       *Subtype = X86::AMDFAM10H_BARCELONA;
1051       break;
1052     case 4:
1053       *Subtype = X86::AMDFAM10H_SHANGHAI;
1054       break;
1055     case 8:
1056       *Subtype = X86::AMDFAM10H_ISTANBUL;
1057       break;
1058     }
1059     break;
1060   case 20:
1061     CPU = "btver1";
1062     *Type = X86::AMD_BTVER1;
1063     break;
1064   case 21:
1065     CPU = "bdver1";
1066     *Type = X86::AMDFAM15H;
1067     if (Model >= 0x60 && Model <= 0x7f) {
1068       CPU = "bdver4";
1069       *Subtype = X86::AMDFAM15H_BDVER4;
1070       break; // 60h-7Fh: Excavator
1071     }
1072     if (Model >= 0x30 && Model <= 0x3f) {
1073       CPU = "bdver3";
1074       *Subtype = X86::AMDFAM15H_BDVER3;
1075       break; // 30h-3Fh: Steamroller
1076     }
1077     if ((Model >= 0x10 && Model <= 0x1f) || Model == 0x02) {
1078       CPU = "bdver2";
1079       *Subtype = X86::AMDFAM15H_BDVER2;
1080       break; // 02h, 10h-1Fh: Piledriver
1081     }
1082     if (Model <= 0x0f) {
1083       *Subtype = X86::AMDFAM15H_BDVER1;
1084       break; // 00h-0Fh: Bulldozer
1085     }
1086     break;
1087   case 22:
1088     CPU = "btver2";
1089     *Type = X86::AMD_BTVER2;
1090     break;
1091   case 23:
1092     CPU = "znver1";
1093     *Type = X86::AMDFAM17H;
1094     if ((Model >= 0x30 && Model <= 0x3f) || Model == 0x71) {
1095       CPU = "znver2";
1096       *Subtype = X86::AMDFAM17H_ZNVER2;
1097       break; // 30h-3fh, 71h: Zen2
1098     }
1099     if (Model <= 0x0f) {
1100       *Subtype = X86::AMDFAM17H_ZNVER1;
1101       break; // 00h-0Fh: Zen1
1102     }
1103     break;
1104   case 25:
1105     CPU = "znver3";
1106     *Type = X86::AMDFAM19H;
1107     if (Model <= 0x0f || (Model >= 0x20 && Model <= 0x5f)) {
1108       // Family 19h Models 00h-0Fh - Zen3
1109       // Family 19h Models 20h-2Fh - Zen3
1110       // Family 19h Models 30h-3Fh - Zen3
1111       // Family 19h Models 40h-4Fh - Zen3+
1112       // Family 19h Models 50h-5Fh - Zen3+
1113       *Subtype = X86::AMDFAM19H_ZNVER3;
1114       break;
1115     }
1116     if ((Model >= 0x10 && Model <= 0x1f) ||
1117         (Model >= 0x60 && Model <= 0x74) ||
1118         (Model >= 0x78 && Model <= 0x7b) ||
1119         (Model >= 0xA0 && Model <= 0xAf)) {
1120       CPU = "znver4";
1121       *Subtype = X86::AMDFAM19H_ZNVER4;
1122       break; //  "znver4"
1123     }
1124     break; // family 19h
1125   default:
1126     break; // Unknown AMD CPU.
1127   }
1128 
1129   return CPU;
1130 }
1131 
1132 static void getAvailableFeatures(unsigned ECX, unsigned EDX, unsigned MaxLeaf,
1133                                  unsigned *Features) {
1134   unsigned EAX, EBX;
1135 
1136   auto setFeature = [&](unsigned F) {
1137     Features[F / 32] |= 1U << (F % 32);
1138   };
1139 
1140   if ((EDX >> 15) & 1)
1141     setFeature(X86::FEATURE_CMOV);
1142   if ((EDX >> 23) & 1)
1143     setFeature(X86::FEATURE_MMX);
1144   if ((EDX >> 25) & 1)
1145     setFeature(X86::FEATURE_SSE);
1146   if ((EDX >> 26) & 1)
1147     setFeature(X86::FEATURE_SSE2);
1148 
1149   if ((ECX >> 0) & 1)
1150     setFeature(X86::FEATURE_SSE3);
1151   if ((ECX >> 1) & 1)
1152     setFeature(X86::FEATURE_PCLMUL);
1153   if ((ECX >> 9) & 1)
1154     setFeature(X86::FEATURE_SSSE3);
1155   if ((ECX >> 12) & 1)
1156     setFeature(X86::FEATURE_FMA);
1157   if ((ECX >> 19) & 1)
1158     setFeature(X86::FEATURE_SSE4_1);
1159   if ((ECX >> 20) & 1) {
1160     setFeature(X86::FEATURE_SSE4_2);
1161     setFeature(X86::FEATURE_CRC32);
1162   }
1163   if ((ECX >> 23) & 1)
1164     setFeature(X86::FEATURE_POPCNT);
1165   if ((ECX >> 25) & 1)
1166     setFeature(X86::FEATURE_AES);
1167 
1168   if ((ECX >> 22) & 1)
1169     setFeature(X86::FEATURE_MOVBE);
1170 
1171   // If CPUID indicates support for XSAVE, XRESTORE and AVX, and XGETBV
1172   // indicates that the AVX registers will be saved and restored on context
1173   // switch, then we have full AVX support.
1174   const unsigned AVXBits = (1 << 27) | (1 << 28);
1175   bool HasAVX = ((ECX & AVXBits) == AVXBits) && !getX86XCR0(&EAX, &EDX) &&
1176                 ((EAX & 0x6) == 0x6);
1177 #if defined(__APPLE__)
1178   // Darwin lazily saves the AVX512 context on first use: trust that the OS will
1179   // save the AVX512 context if we use AVX512 instructions, even the bit is not
1180   // set right now.
1181   bool HasAVX512Save = true;
1182 #else
1183   // AVX512 requires additional context to be saved by the OS.
1184   bool HasAVX512Save = HasAVX && ((EAX & 0xe0) == 0xe0);
1185 #endif
1186 
1187   if (HasAVX)
1188     setFeature(X86::FEATURE_AVX);
1189 
1190   bool HasLeaf7 =
1191       MaxLeaf >= 0x7 && !getX86CpuIDAndInfoEx(0x7, 0x0, &EAX, &EBX, &ECX, &EDX);
1192 
1193   if (HasLeaf7 && ((EBX >> 3) & 1))
1194     setFeature(X86::FEATURE_BMI);
1195   if (HasLeaf7 && ((EBX >> 5) & 1) && HasAVX)
1196     setFeature(X86::FEATURE_AVX2);
1197   if (HasLeaf7 && ((EBX >> 8) & 1))
1198     setFeature(X86::FEATURE_BMI2);
1199   if (HasLeaf7 && ((EBX >> 16) & 1) && HasAVX512Save)
1200     setFeature(X86::FEATURE_AVX512F);
1201   if (HasLeaf7 && ((EBX >> 17) & 1) && HasAVX512Save)
1202     setFeature(X86::FEATURE_AVX512DQ);
1203   if (HasLeaf7 && ((EBX >> 19) & 1))
1204     setFeature(X86::FEATURE_ADX);
1205   if (HasLeaf7 && ((EBX >> 21) & 1) && HasAVX512Save)
1206     setFeature(X86::FEATURE_AVX512IFMA);
1207   if (HasLeaf7 && ((EBX >> 23) & 1))
1208     setFeature(X86::FEATURE_CLFLUSHOPT);
1209   if (HasLeaf7 && ((EBX >> 26) & 1) && HasAVX512Save)
1210     setFeature(X86::FEATURE_AVX512PF);
1211   if (HasLeaf7 && ((EBX >> 27) & 1) && HasAVX512Save)
1212     setFeature(X86::FEATURE_AVX512ER);
1213   if (HasLeaf7 && ((EBX >> 28) & 1) && HasAVX512Save)
1214     setFeature(X86::FEATURE_AVX512CD);
1215   if (HasLeaf7 && ((EBX >> 29) & 1))
1216     setFeature(X86::FEATURE_SHA);
1217   if (HasLeaf7 && ((EBX >> 30) & 1) && HasAVX512Save)
1218     setFeature(X86::FEATURE_AVX512BW);
1219   if (HasLeaf7 && ((EBX >> 31) & 1) && HasAVX512Save)
1220     setFeature(X86::FEATURE_AVX512VL);
1221 
1222   if (HasLeaf7 && ((ECX >> 1) & 1) && HasAVX512Save)
1223     setFeature(X86::FEATURE_AVX512VBMI);
1224   if (HasLeaf7 && ((ECX >> 6) & 1) && HasAVX512Save)
1225     setFeature(X86::FEATURE_AVX512VBMI2);
1226   if (HasLeaf7 && ((ECX >> 8) & 1))
1227     setFeature(X86::FEATURE_GFNI);
1228   if (HasLeaf7 && ((ECX >> 10) & 1) && HasAVX)
1229     setFeature(X86::FEATURE_VPCLMULQDQ);
1230   if (HasLeaf7 && ((ECX >> 11) & 1) && HasAVX512Save)
1231     setFeature(X86::FEATURE_AVX512VNNI);
1232   if (HasLeaf7 && ((ECX >> 12) & 1) && HasAVX512Save)
1233     setFeature(X86::FEATURE_AVX512BITALG);
1234   if (HasLeaf7 && ((ECX >> 14) & 1) && HasAVX512Save)
1235     setFeature(X86::FEATURE_AVX512VPOPCNTDQ);
1236 
1237   if (HasLeaf7 && ((EDX >> 2) & 1) && HasAVX512Save)
1238     setFeature(X86::FEATURE_AVX5124VNNIW);
1239   if (HasLeaf7 && ((EDX >> 3) & 1) && HasAVX512Save)
1240     setFeature(X86::FEATURE_AVX5124FMAPS);
1241   if (HasLeaf7 && ((EDX >> 8) & 1) && HasAVX512Save)
1242     setFeature(X86::FEATURE_AVX512VP2INTERSECT);
1243 
1244   // EAX from subleaf 0 is the maximum subleaf supported. Some CPUs don't
1245   // return all 0s for invalid subleaves so check the limit.
1246   bool HasLeaf7Subleaf1 =
1247       HasLeaf7 && EAX >= 1 &&
1248       !getX86CpuIDAndInfoEx(0x7, 0x1, &EAX, &EBX, &ECX, &EDX);
1249   if (HasLeaf7Subleaf1 && ((EAX >> 5) & 1) && HasAVX512Save)
1250     setFeature(X86::FEATURE_AVX512BF16);
1251 
1252   unsigned MaxExtLevel;
1253   getX86CpuIDAndInfo(0x80000000, &MaxExtLevel, &EBX, &ECX, &EDX);
1254 
1255   bool HasExtLeaf1 = MaxExtLevel >= 0x80000001 &&
1256                      !getX86CpuIDAndInfo(0x80000001, &EAX, &EBX, &ECX, &EDX);
1257   if (HasExtLeaf1 && ((ECX >> 6) & 1))
1258     setFeature(X86::FEATURE_SSE4_A);
1259   if (HasExtLeaf1 && ((ECX >> 11) & 1))
1260     setFeature(X86::FEATURE_XOP);
1261   if (HasExtLeaf1 && ((ECX >> 16) & 1))
1262     setFeature(X86::FEATURE_FMA4);
1263 
1264   if (HasExtLeaf1 && ((EDX >> 29) & 1))
1265     setFeature(X86::FEATURE_64BIT);
1266 }
1267 
1268 StringRef sys::getHostCPUName() {
1269   unsigned MaxLeaf = 0;
1270   const VendorSignatures Vendor = getVendorSignature(&MaxLeaf);
1271   if (Vendor == VendorSignatures::UNKNOWN)
1272     return "generic";
1273 
1274   unsigned EAX = 0, EBX = 0, ECX = 0, EDX = 0;
1275   getX86CpuIDAndInfo(0x1, &EAX, &EBX, &ECX, &EDX);
1276 
1277   unsigned Family = 0, Model = 0;
1278   unsigned Features[(X86::CPU_FEATURE_MAX + 31) / 32] = {0};
1279   detectX86FamilyModel(EAX, &Family, &Model);
1280   getAvailableFeatures(ECX, EDX, MaxLeaf, Features);
1281 
1282   // These aren't consumed in this file, but we try to keep some source code the
1283   // same or similar to compiler-rt.
1284   unsigned Type = 0;
1285   unsigned Subtype = 0;
1286 
1287   StringRef CPU;
1288 
1289   if (Vendor == VendorSignatures::GENUINE_INTEL) {
1290     CPU = getIntelProcessorTypeAndSubtype(Family, Model, Features, &Type,
1291                                           &Subtype);
1292   } else if (Vendor == VendorSignatures::AUTHENTIC_AMD) {
1293     CPU = getAMDProcessorTypeAndSubtype(Family, Model, Features, &Type,
1294                                         &Subtype);
1295   }
1296 
1297   if (!CPU.empty())
1298     return CPU;
1299 
1300   return "generic";
1301 }
1302 
1303 #elif defined(__APPLE__) && defined(__powerpc__)
1304 StringRef sys::getHostCPUName() {
1305   host_basic_info_data_t hostInfo;
1306   mach_msg_type_number_t infoCount;
1307 
1308   infoCount = HOST_BASIC_INFO_COUNT;
1309   mach_port_t hostPort = mach_host_self();
1310   host_info(hostPort, HOST_BASIC_INFO, (host_info_t)&hostInfo,
1311             &infoCount);
1312   mach_port_deallocate(mach_task_self(), hostPort);
1313 
1314   if (hostInfo.cpu_type != CPU_TYPE_POWERPC)
1315     return "generic";
1316 
1317   switch (hostInfo.cpu_subtype) {
1318   case CPU_SUBTYPE_POWERPC_601:
1319     return "601";
1320   case CPU_SUBTYPE_POWERPC_602:
1321     return "602";
1322   case CPU_SUBTYPE_POWERPC_603:
1323     return "603";
1324   case CPU_SUBTYPE_POWERPC_603e:
1325     return "603e";
1326   case CPU_SUBTYPE_POWERPC_603ev:
1327     return "603ev";
1328   case CPU_SUBTYPE_POWERPC_604:
1329     return "604";
1330   case CPU_SUBTYPE_POWERPC_604e:
1331     return "604e";
1332   case CPU_SUBTYPE_POWERPC_620:
1333     return "620";
1334   case CPU_SUBTYPE_POWERPC_750:
1335     return "750";
1336   case CPU_SUBTYPE_POWERPC_7400:
1337     return "7400";
1338   case CPU_SUBTYPE_POWERPC_7450:
1339     return "7450";
1340   case CPU_SUBTYPE_POWERPC_970:
1341     return "970";
1342   default:;
1343   }
1344 
1345   return "generic";
1346 }
1347 #elif defined(__linux__) && defined(__powerpc__)
1348 StringRef sys::getHostCPUName() {
1349   std::unique_ptr<llvm::MemoryBuffer> P = getProcCpuinfoContent();
1350   StringRef Content = P ? P->getBuffer() : "";
1351   return detail::getHostCPUNameForPowerPC(Content);
1352 }
1353 #elif defined(__linux__) && (defined(__arm__) || defined(__aarch64__))
1354 StringRef sys::getHostCPUName() {
1355   std::unique_ptr<llvm::MemoryBuffer> P = getProcCpuinfoContent();
1356   StringRef Content = P ? P->getBuffer() : "";
1357   return detail::getHostCPUNameForARM(Content);
1358 }
1359 #elif defined(__linux__) && defined(__s390x__)
1360 StringRef sys::getHostCPUName() {
1361   std::unique_ptr<llvm::MemoryBuffer> P = getProcCpuinfoContent();
1362   StringRef Content = P ? P->getBuffer() : "";
1363   return detail::getHostCPUNameForS390x(Content);
1364 }
1365 #elif defined(__MVS__)
1366 StringRef sys::getHostCPUName() {
1367   // Get pointer to Communications Vector Table (CVT).
1368   // The pointer is located at offset 16 of the Prefixed Save Area (PSA).
1369   // It is stored as 31 bit pointer and will be zero-extended to 64 bit.
1370   int *StartToCVTOffset = reinterpret_cast<int *>(0x10);
1371   // Since its stored as a 31-bit pointer, get the 4 bytes from the start
1372   // of address.
1373   int ReadValue = *StartToCVTOffset;
1374   // Explicitly clear the high order bit.
1375   ReadValue = (ReadValue & 0x7FFFFFFF);
1376   char *CVT = reinterpret_cast<char *>(ReadValue);
1377   // The model number is located in the CVT prefix at offset -6 and stored as
1378   // signless packed decimal.
1379   uint16_t Id = *(uint16_t *)&CVT[-6];
1380   // Convert number to integer.
1381   Id = decodePackedBCD<uint16_t>(Id, false);
1382   // Check for vector support. It's stored in field CVTFLAG5 (offset 244),
1383   // bit CVTVEF (X'80'). The facilities list is part of the PSA but the vector
1384   // extension can only be used if bit CVTVEF is on.
1385   bool HaveVectorSupport = CVT[244] & 0x80;
1386   return getCPUNameFromS390Model(Id, HaveVectorSupport);
1387 }
1388 #elif defined(__APPLE__) && (defined(__arm__) || defined(__aarch64__))
1389 #define CPUFAMILY_ARM_SWIFT 0x1e2d6381
1390 #define CPUFAMILY_ARM_CYCLONE 0x37a09642
1391 #define CPUFAMILY_ARM_TYPHOON 0x2c91a47e
1392 #define CPUFAMILY_ARM_TWISTER 0x92fb37c8
1393 #define CPUFAMILY_ARM_HURRICANE 0x67ceee93
1394 #define CPUFAMILY_ARM_MONSOON_MISTRAL 0xe81e7ef6
1395 #define CPUFAMILY_ARM_VORTEX_TEMPEST 0x07d34b9f
1396 #define CPUFAMILY_ARM_LIGHTNING_THUNDER 0x462504d2
1397 #define CPUFAMILY_ARM_FIRESTORM_ICESTORM 0x1b588bb3
1398 
1399 StringRef sys::getHostCPUName() {
1400   uint32_t Family;
1401   size_t Length = sizeof(Family);
1402   sysctlbyname("hw.cpufamily", &Family, &Length, NULL, 0);
1403 
1404   switch (Family) {
1405   case CPUFAMILY_ARM_SWIFT:
1406     return "swift";
1407   case CPUFAMILY_ARM_CYCLONE:
1408     return "apple-a7";
1409   case CPUFAMILY_ARM_TYPHOON:
1410     return "apple-a8";
1411   case CPUFAMILY_ARM_TWISTER:
1412     return "apple-a9";
1413   case CPUFAMILY_ARM_HURRICANE:
1414     return "apple-a10";
1415   case CPUFAMILY_ARM_MONSOON_MISTRAL:
1416     return "apple-a11";
1417   case CPUFAMILY_ARM_VORTEX_TEMPEST:
1418     return "apple-a12";
1419   case CPUFAMILY_ARM_LIGHTNING_THUNDER:
1420     return "apple-a13";
1421   case CPUFAMILY_ARM_FIRESTORM_ICESTORM:
1422     return "apple-m1";
1423   default:
1424     // Default to the newest CPU we know about.
1425     return "apple-m1";
1426   }
1427 }
1428 #elif defined(_AIX)
1429 StringRef sys::getHostCPUName() {
1430   switch (_system_configuration.implementation) {
1431   case POWER_4:
1432     if (_system_configuration.version == PV_4_3)
1433       return "970";
1434     return "pwr4";
1435   case POWER_5:
1436     if (_system_configuration.version == PV_5)
1437       return "pwr5";
1438     return "pwr5x";
1439   case POWER_6:
1440     if (_system_configuration.version == PV_6_Compat)
1441       return "pwr6";
1442     return "pwr6x";
1443   case POWER_7:
1444     return "pwr7";
1445   case POWER_8:
1446     return "pwr8";
1447   case POWER_9:
1448     return "pwr9";
1449 // TODO: simplify this once the macro is available in all OS levels.
1450 #ifdef POWER_10
1451   case POWER_10:
1452 #else
1453   case 0x40000:
1454 #endif
1455     return "pwr10";
1456   default:
1457     return "generic";
1458   }
1459 }
1460 #elif defined(__loongarch__)
1461 StringRef sys::getHostCPUName() {
1462   // Use processor id to detect cpu name.
1463   uint32_t processor_id;
1464   __asm__("cpucfg %[prid], $zero\n\t" : [prid] "=r"(processor_id));
1465   switch (processor_id & 0xff00) {
1466   case 0xc000: // Loongson 64bit, 4-issue
1467     return "la464";
1468   // TODO: Others.
1469   default:
1470     break;
1471   }
1472   return "generic";
1473 }
1474 #elif defined(__riscv)
1475 StringRef sys::getHostCPUName() {
1476 #if defined(__linux__)
1477   std::unique_ptr<llvm::MemoryBuffer> P = getProcCpuinfoContent();
1478   StringRef Content = P ? P->getBuffer() : "";
1479   return detail::getHostCPUNameForRISCV(Content);
1480 #else
1481 #if __riscv_xlen == 64
1482   return "generic-rv64";
1483 #elif __riscv_xlen == 32
1484   return "generic-rv32";
1485 #else
1486 #error "Unhandled value of __riscv_xlen"
1487 #endif
1488 #endif
1489 }
1490 #elif defined(__sparc__)
1491 #if defined(__linux__)
1492 StringRef sys::detail::getHostCPUNameForSPARC(StringRef ProcCpuinfoContent) {
1493   SmallVector<StringRef> Lines;
1494   ProcCpuinfoContent.split(Lines, "\n");
1495 
1496   // Look for cpu line to determine cpu name
1497   StringRef Cpu;
1498   for (unsigned I = 0, E = Lines.size(); I != E; ++I) {
1499     if (Lines[I].startswith("cpu")) {
1500       Cpu = Lines[I].substr(5).ltrim("\t :");
1501       break;
1502     }
1503   }
1504 
1505   return StringSwitch<const char *>(Cpu)
1506       .StartsWith("SuperSparc", "supersparc")
1507       .StartsWith("HyperSparc", "hypersparc")
1508       .StartsWith("SpitFire", "ultrasparc")
1509       .StartsWith("BlackBird", "ultrasparc")
1510       .StartsWith("Sabre", " ultrasparc")
1511       .StartsWith("Hummingbird", "ultrasparc")
1512       .StartsWith("Cheetah", "ultrasparc3")
1513       .StartsWith("Jalapeno", "ultrasparc3")
1514       .StartsWith("Jaguar", "ultrasparc3")
1515       .StartsWith("Panther", "ultrasparc3")
1516       .StartsWith("Serrano", "ultrasparc3")
1517       .StartsWith("UltraSparc T1", "niagara")
1518       .StartsWith("UltraSparc T2", "niagara2")
1519       .StartsWith("UltraSparc T3", "niagara3")
1520       .StartsWith("UltraSparc T4", "niagara4")
1521       .StartsWith("UltraSparc T5", "niagara4")
1522       .StartsWith("LEON", "leon3")
1523       // niagara7/m8 not supported by LLVM yet.
1524       .StartsWith("SPARC-M7", "niagara4" /* "niagara7" */)
1525       .StartsWith("SPARC-S7", "niagara4" /* "niagara7" */)
1526       .StartsWith("SPARC-M8", "niagara4" /* "m8" */)
1527       .Default("generic");
1528 }
1529 #endif
1530 
1531 StringRef sys::getHostCPUName() {
1532 #if defined(__linux__)
1533   std::unique_ptr<llvm::MemoryBuffer> P = getProcCpuinfoContent();
1534   StringRef Content = P ? P->getBuffer() : "";
1535   return detail::getHostCPUNameForSPARC(Content);
1536 #elif defined(__sun__) && defined(__svr4__)
1537   char *buf = NULL;
1538   kstat_ctl_t *kc;
1539   kstat_t *ksp;
1540   kstat_named_t *brand = NULL;
1541 
1542   kc = kstat_open();
1543   if (kc != NULL) {
1544     ksp = kstat_lookup(kc, const_cast<char *>("cpu_info"), -1, NULL);
1545     if (ksp != NULL && kstat_read(kc, ksp, NULL) != -1 &&
1546         ksp->ks_type == KSTAT_TYPE_NAMED)
1547       brand =
1548           (kstat_named_t *)kstat_data_lookup(ksp, const_cast<char *>("brand"));
1549     if (brand != NULL && brand->data_type == KSTAT_DATA_STRING)
1550       buf = KSTAT_NAMED_STR_PTR(brand);
1551   }
1552   kstat_close(kc);
1553 
1554   return StringSwitch<const char *>(buf)
1555       .Case("TMS390S10", "supersparc") // Texas Instruments microSPARC I
1556       .Case("TMS390Z50", "supersparc") // Texas Instruments SuperSPARC I
1557       .Case("TMS390Z55",
1558             "supersparc") // Texas Instruments SuperSPARC I with SuperCache
1559       .Case("MB86904", "supersparc") // Fujitsu microSPARC II
1560       .Case("MB86907", "supersparc") // Fujitsu TurboSPARC
1561       .Case("RT623", "hypersparc")   // Ross hyperSPARC
1562       .Case("RT625", "hypersparc")
1563       .Case("RT626", "hypersparc")
1564       .Case("UltraSPARC-I", "ultrasparc")
1565       .Case("UltraSPARC-II", "ultrasparc")
1566       .Case("UltraSPARC-IIe", "ultrasparc")
1567       .Case("UltraSPARC-IIi", "ultrasparc")
1568       .Case("SPARC64-III", "ultrasparc")
1569       .Case("SPARC64-IV", "ultrasparc")
1570       .Case("UltraSPARC-III", "ultrasparc3")
1571       .Case("UltraSPARC-III+", "ultrasparc3")
1572       .Case("UltraSPARC-IIIi", "ultrasparc3")
1573       .Case("UltraSPARC-IIIi+", "ultrasparc3")
1574       .Case("UltraSPARC-IV", "ultrasparc3")
1575       .Case("UltraSPARC-IV+", "ultrasparc3")
1576       .Case("SPARC64-V", "ultrasparc3")
1577       .Case("SPARC64-VI", "ultrasparc3")
1578       .Case("SPARC64-VII", "ultrasparc3")
1579       .Case("UltraSPARC-T1", "niagara")
1580       .Case("UltraSPARC-T2", "niagara2")
1581       .Case("UltraSPARC-T2", "niagara2")
1582       .Case("UltraSPARC-T2+", "niagara2")
1583       .Case("SPARC-T3", "niagara3")
1584       .Case("SPARC-T4", "niagara4")
1585       .Case("SPARC-T5", "niagara4")
1586       // niagara7/m8 not supported by LLVM yet.
1587       .Case("SPARC-M7", "niagara4" /* "niagara7" */)
1588       .Case("SPARC-S7", "niagara4" /* "niagara7" */)
1589       .Case("SPARC-M8", "niagara4" /* "m8" */)
1590       .Default("generic");
1591 #else
1592   return "generic";
1593 #endif
1594 }
1595 #else
1596 StringRef sys::getHostCPUName() { return "generic"; }
1597 namespace llvm {
1598 namespace sys {
1599 namespace detail {
1600 namespace x86 {
1601 
1602 VendorSignatures getVendorSignature(unsigned *MaxLeaf) {
1603   return VendorSignatures::UNKNOWN;
1604 }
1605 
1606 } // namespace x86
1607 } // namespace detail
1608 } // namespace sys
1609 } // namespace llvm
1610 #endif
1611 
1612 #if defined(__i386__) || defined(_M_IX86) || \
1613     defined(__x86_64__) || defined(_M_X64)
1614 bool sys::getHostCPUFeatures(StringMap<bool> &Features) {
1615   unsigned EAX = 0, EBX = 0, ECX = 0, EDX = 0;
1616   unsigned MaxLevel;
1617 
1618   if (getX86CpuIDAndInfo(0, &MaxLevel, &EBX, &ECX, &EDX) || MaxLevel < 1)
1619     return false;
1620 
1621   getX86CpuIDAndInfo(1, &EAX, &EBX, &ECX, &EDX);
1622 
1623   Features["cx8"]    = (EDX >>  8) & 1;
1624   Features["cmov"]   = (EDX >> 15) & 1;
1625   Features["mmx"]    = (EDX >> 23) & 1;
1626   Features["fxsr"]   = (EDX >> 24) & 1;
1627   Features["sse"]    = (EDX >> 25) & 1;
1628   Features["sse2"]   = (EDX >> 26) & 1;
1629 
1630   Features["sse3"]   = (ECX >>  0) & 1;
1631   Features["pclmul"] = (ECX >>  1) & 1;
1632   Features["ssse3"]  = (ECX >>  9) & 1;
1633   Features["cx16"]   = (ECX >> 13) & 1;
1634   Features["sse4.1"] = (ECX >> 19) & 1;
1635   Features["sse4.2"] = (ECX >> 20) & 1;
1636   Features["crc32"]  = Features["sse4.2"];
1637   Features["movbe"]  = (ECX >> 22) & 1;
1638   Features["popcnt"] = (ECX >> 23) & 1;
1639   Features["aes"]    = (ECX >> 25) & 1;
1640   Features["rdrnd"]  = (ECX >> 30) & 1;
1641 
1642   // If CPUID indicates support for XSAVE, XRESTORE and AVX, and XGETBV
1643   // indicates that the AVX registers will be saved and restored on context
1644   // switch, then we have full AVX support.
1645   bool HasXSave = ((ECX >> 27) & 1) && !getX86XCR0(&EAX, &EDX);
1646   bool HasAVXSave = HasXSave && ((ECX >> 28) & 1) && ((EAX & 0x6) == 0x6);
1647 #if defined(__APPLE__)
1648   // Darwin lazily saves the AVX512 context on first use: trust that the OS will
1649   // save the AVX512 context if we use AVX512 instructions, even the bit is not
1650   // set right now.
1651   bool HasAVX512Save = true;
1652 #else
1653   // AVX512 requires additional context to be saved by the OS.
1654   bool HasAVX512Save = HasAVXSave && ((EAX & 0xe0) == 0xe0);
1655 #endif
1656   // AMX requires additional context to be saved by the OS.
1657   const unsigned AMXBits = (1 << 17) | (1 << 18);
1658   bool HasAMXSave = HasXSave && ((EAX & AMXBits) == AMXBits);
1659 
1660   Features["avx"]   = HasAVXSave;
1661   Features["fma"]   = ((ECX >> 12) & 1) && HasAVXSave;
1662   // Only enable XSAVE if OS has enabled support for saving YMM state.
1663   Features["xsave"] = ((ECX >> 26) & 1) && HasAVXSave;
1664   Features["f16c"]  = ((ECX >> 29) & 1) && HasAVXSave;
1665 
1666   unsigned MaxExtLevel;
1667   getX86CpuIDAndInfo(0x80000000, &MaxExtLevel, &EBX, &ECX, &EDX);
1668 
1669   bool HasExtLeaf1 = MaxExtLevel >= 0x80000001 &&
1670                      !getX86CpuIDAndInfo(0x80000001, &EAX, &EBX, &ECX, &EDX);
1671   Features["sahf"]   = HasExtLeaf1 && ((ECX >>  0) & 1);
1672   Features["lzcnt"]  = HasExtLeaf1 && ((ECX >>  5) & 1);
1673   Features["sse4a"]  = HasExtLeaf1 && ((ECX >>  6) & 1);
1674   Features["prfchw"] = HasExtLeaf1 && ((ECX >>  8) & 1);
1675   Features["xop"]    = HasExtLeaf1 && ((ECX >> 11) & 1) && HasAVXSave;
1676   Features["lwp"]    = HasExtLeaf1 && ((ECX >> 15) & 1);
1677   Features["fma4"]   = HasExtLeaf1 && ((ECX >> 16) & 1) && HasAVXSave;
1678   Features["tbm"]    = HasExtLeaf1 && ((ECX >> 21) & 1);
1679   Features["mwaitx"] = HasExtLeaf1 && ((ECX >> 29) & 1);
1680 
1681   Features["64bit"]  = HasExtLeaf1 && ((EDX >> 29) & 1);
1682 
1683   // Miscellaneous memory related features, detected by
1684   // using the 0x80000008 leaf of the CPUID instruction
1685   bool HasExtLeaf8 = MaxExtLevel >= 0x80000008 &&
1686                      !getX86CpuIDAndInfo(0x80000008, &EAX, &EBX, &ECX, &EDX);
1687   Features["clzero"]   = HasExtLeaf8 && ((EBX >> 0) & 1);
1688   Features["rdpru"]    = HasExtLeaf8 && ((EBX >> 4) & 1);
1689   Features["wbnoinvd"] = HasExtLeaf8 && ((EBX >> 9) & 1);
1690 
1691   bool HasLeaf7 =
1692       MaxLevel >= 7 && !getX86CpuIDAndInfoEx(0x7, 0x0, &EAX, &EBX, &ECX, &EDX);
1693 
1694   Features["fsgsbase"]   = HasLeaf7 && ((EBX >>  0) & 1);
1695   Features["sgx"]        = HasLeaf7 && ((EBX >>  2) & 1);
1696   Features["bmi"]        = HasLeaf7 && ((EBX >>  3) & 1);
1697   // AVX2 is only supported if we have the OS save support from AVX.
1698   Features["avx2"]       = HasLeaf7 && ((EBX >>  5) & 1) && HasAVXSave;
1699   Features["bmi2"]       = HasLeaf7 && ((EBX >>  8) & 1);
1700   Features["invpcid"]    = HasLeaf7 && ((EBX >> 10) & 1);
1701   Features["rtm"]        = HasLeaf7 && ((EBX >> 11) & 1);
1702   // AVX512 is only supported if the OS supports the context save for it.
1703   Features["avx512f"]    = HasLeaf7 && ((EBX >> 16) & 1) && HasAVX512Save;
1704   Features["avx512dq"]   = HasLeaf7 && ((EBX >> 17) & 1) && HasAVX512Save;
1705   Features["rdseed"]     = HasLeaf7 && ((EBX >> 18) & 1);
1706   Features["adx"]        = HasLeaf7 && ((EBX >> 19) & 1);
1707   Features["avx512ifma"] = HasLeaf7 && ((EBX >> 21) & 1) && HasAVX512Save;
1708   Features["clflushopt"] = HasLeaf7 && ((EBX >> 23) & 1);
1709   Features["clwb"]       = HasLeaf7 && ((EBX >> 24) & 1);
1710   Features["avx512pf"]   = HasLeaf7 && ((EBX >> 26) & 1) && HasAVX512Save;
1711   Features["avx512er"]   = HasLeaf7 && ((EBX >> 27) & 1) && HasAVX512Save;
1712   Features["avx512cd"]   = HasLeaf7 && ((EBX >> 28) & 1) && HasAVX512Save;
1713   Features["sha"]        = HasLeaf7 && ((EBX >> 29) & 1);
1714   Features["avx512bw"]   = HasLeaf7 && ((EBX >> 30) & 1) && HasAVX512Save;
1715   Features["avx512vl"]   = HasLeaf7 && ((EBX >> 31) & 1) && HasAVX512Save;
1716 
1717   Features["prefetchwt1"]     = HasLeaf7 && ((ECX >>  0) & 1);
1718   Features["avx512vbmi"]      = HasLeaf7 && ((ECX >>  1) & 1) && HasAVX512Save;
1719   Features["pku"]             = HasLeaf7 && ((ECX >>  4) & 1);
1720   Features["waitpkg"]         = HasLeaf7 && ((ECX >>  5) & 1);
1721   Features["avx512vbmi2"]     = HasLeaf7 && ((ECX >>  6) & 1) && HasAVX512Save;
1722   Features["shstk"]           = HasLeaf7 && ((ECX >>  7) & 1);
1723   Features["gfni"]            = HasLeaf7 && ((ECX >>  8) & 1);
1724   Features["vaes"]            = HasLeaf7 && ((ECX >>  9) & 1) && HasAVXSave;
1725   Features["vpclmulqdq"]      = HasLeaf7 && ((ECX >> 10) & 1) && HasAVXSave;
1726   Features["avx512vnni"]      = HasLeaf7 && ((ECX >> 11) & 1) && HasAVX512Save;
1727   Features["avx512bitalg"]    = HasLeaf7 && ((ECX >> 12) & 1) && HasAVX512Save;
1728   Features["avx512vpopcntdq"] = HasLeaf7 && ((ECX >> 14) & 1) && HasAVX512Save;
1729   Features["rdpid"]           = HasLeaf7 && ((ECX >> 22) & 1);
1730   Features["kl"]              = HasLeaf7 && ((ECX >> 23) & 1); // key locker
1731   Features["cldemote"]        = HasLeaf7 && ((ECX >> 25) & 1);
1732   Features["movdiri"]         = HasLeaf7 && ((ECX >> 27) & 1);
1733   Features["movdir64b"]       = HasLeaf7 && ((ECX >> 28) & 1);
1734   Features["enqcmd"]          = HasLeaf7 && ((ECX >> 29) & 1);
1735 
1736   Features["uintr"]           = HasLeaf7 && ((EDX >> 5) & 1);
1737   Features["avx512vp2intersect"] =
1738       HasLeaf7 && ((EDX >> 8) & 1) && HasAVX512Save;
1739   Features["serialize"]       = HasLeaf7 && ((EDX >> 14) & 1);
1740   Features["tsxldtrk"]        = HasLeaf7 && ((EDX >> 16) & 1);
1741   // There are two CPUID leafs which information associated with the pconfig
1742   // instruction:
1743   // EAX=0x7, ECX=0x0 indicates the availability of the instruction (via the 18th
1744   // bit of EDX), while the EAX=0x1b leaf returns information on the
1745   // availability of specific pconfig leafs.
1746   // The target feature here only refers to the the first of these two.
1747   // Users might need to check for the availability of specific pconfig
1748   // leaves using cpuid, since that information is ignored while
1749   // detecting features using the "-march=native" flag.
1750   // For more info, see X86 ISA docs.
1751   Features["pconfig"] = HasLeaf7 && ((EDX >> 18) & 1);
1752   Features["amx-bf16"]   = HasLeaf7 && ((EDX >> 22) & 1) && HasAMXSave;
1753   Features["avx512fp16"] = HasLeaf7 && ((EDX >> 23) & 1) && HasAVX512Save;
1754   Features["amx-tile"]   = HasLeaf7 && ((EDX >> 24) & 1) && HasAMXSave;
1755   Features["amx-int8"]   = HasLeaf7 && ((EDX >> 25) & 1) && HasAMXSave;
1756   // EAX from subleaf 0 is the maximum subleaf supported. Some CPUs don't
1757   // return all 0s for invalid subleaves so check the limit.
1758   bool HasLeaf7Subleaf1 =
1759       HasLeaf7 && EAX >= 1 &&
1760       !getX86CpuIDAndInfoEx(0x7, 0x1, &EAX, &EBX, &ECX, &EDX);
1761   Features["sha512"]     = HasLeaf7Subleaf1 && ((EAX >> 0) & 1);
1762   Features["sm3"]        = HasLeaf7Subleaf1 && ((EAX >> 1) & 1);
1763   Features["sm4"]        = HasLeaf7Subleaf1 && ((EAX >> 2) & 1);
1764   Features["raoint"]     = HasLeaf7Subleaf1 && ((EAX >> 3) & 1);
1765   Features["avxvnni"]    = HasLeaf7Subleaf1 && ((EAX >> 4) & 1) && HasAVXSave;
1766   Features["avx512bf16"] = HasLeaf7Subleaf1 && ((EAX >> 5) & 1) && HasAVX512Save;
1767   Features["amx-fp16"]   = HasLeaf7Subleaf1 && ((EAX >> 21) & 1) && HasAMXSave;
1768   Features["cmpccxadd"]  = HasLeaf7Subleaf1 && ((EAX >> 7) & 1);
1769   Features["hreset"]     = HasLeaf7Subleaf1 && ((EAX >> 22) & 1);
1770   Features["avxifma"]    = HasLeaf7Subleaf1 && ((EAX >> 23) & 1) && HasAVXSave;
1771   Features["avxvnniint8"] = HasLeaf7Subleaf1 && ((EDX >> 4) & 1) && HasAVXSave;
1772   Features["avxneconvert"] = HasLeaf7Subleaf1 && ((EDX >> 5) & 1) && HasAVXSave;
1773   Features["amx-complex"] = HasLeaf7Subleaf1 && ((EDX >> 8) & 1) && HasAMXSave;
1774   Features["avxvnniint16"] = HasLeaf7Subleaf1 && ((EDX >> 10) & 1) && HasAVXSave;
1775   Features["prefetchi"]  = HasLeaf7Subleaf1 && ((EDX >> 14) & 1);
1776 
1777   bool HasLeafD = MaxLevel >= 0xd &&
1778                   !getX86CpuIDAndInfoEx(0xd, 0x1, &EAX, &EBX, &ECX, &EDX);
1779 
1780   // Only enable XSAVE if OS has enabled support for saving YMM state.
1781   Features["xsaveopt"] = HasLeafD && ((EAX >> 0) & 1) && HasAVXSave;
1782   Features["xsavec"]   = HasLeafD && ((EAX >> 1) & 1) && HasAVXSave;
1783   Features["xsaves"]   = HasLeafD && ((EAX >> 3) & 1) && HasAVXSave;
1784 
1785   bool HasLeaf14 = MaxLevel >= 0x14 &&
1786                   !getX86CpuIDAndInfoEx(0x14, 0x0, &EAX, &EBX, &ECX, &EDX);
1787 
1788   Features["ptwrite"] = HasLeaf14 && ((EBX >> 4) & 1);
1789 
1790   bool HasLeaf19 =
1791       MaxLevel >= 0x19 && !getX86CpuIDAndInfo(0x19, &EAX, &EBX, &ECX, &EDX);
1792   Features["widekl"] = HasLeaf7 && HasLeaf19 && ((EBX >> 2) & 1);
1793 
1794   return true;
1795 }
1796 #elif defined(__linux__) && (defined(__arm__) || defined(__aarch64__))
1797 bool sys::getHostCPUFeatures(StringMap<bool> &Features) {
1798   std::unique_ptr<llvm::MemoryBuffer> P = getProcCpuinfoContent();
1799   if (!P)
1800     return false;
1801 
1802   SmallVector<StringRef, 32> Lines;
1803   P->getBuffer().split(Lines, "\n");
1804 
1805   SmallVector<StringRef, 32> CPUFeatures;
1806 
1807   // Look for the CPU features.
1808   for (unsigned I = 0, E = Lines.size(); I != E; ++I)
1809     if (Lines[I].startswith("Features")) {
1810       Lines[I].split(CPUFeatures, ' ');
1811       break;
1812     }
1813 
1814 #if defined(__aarch64__)
1815   // Keep track of which crypto features we have seen
1816   enum { CAP_AES = 0x1, CAP_PMULL = 0x2, CAP_SHA1 = 0x4, CAP_SHA2 = 0x8 };
1817   uint32_t crypto = 0;
1818 #endif
1819 
1820   for (unsigned I = 0, E = CPUFeatures.size(); I != E; ++I) {
1821     StringRef LLVMFeatureStr = StringSwitch<StringRef>(CPUFeatures[I])
1822 #if defined(__aarch64__)
1823                                    .Case("asimd", "neon")
1824                                    .Case("fp", "fp-armv8")
1825                                    .Case("crc32", "crc")
1826                                    .Case("atomics", "lse")
1827                                    .Case("sve", "sve")
1828                                    .Case("sve2", "sve2")
1829 #else
1830                                    .Case("half", "fp16")
1831                                    .Case("neon", "neon")
1832                                    .Case("vfpv3", "vfp3")
1833                                    .Case("vfpv3d16", "vfp3d16")
1834                                    .Case("vfpv4", "vfp4")
1835                                    .Case("idiva", "hwdiv-arm")
1836                                    .Case("idivt", "hwdiv")
1837 #endif
1838                                    .Default("");
1839 
1840 #if defined(__aarch64__)
1841     // We need to check crypto separately since we need all of the crypto
1842     // extensions to enable the subtarget feature
1843     if (CPUFeatures[I] == "aes")
1844       crypto |= CAP_AES;
1845     else if (CPUFeatures[I] == "pmull")
1846       crypto |= CAP_PMULL;
1847     else if (CPUFeatures[I] == "sha1")
1848       crypto |= CAP_SHA1;
1849     else if (CPUFeatures[I] == "sha2")
1850       crypto |= CAP_SHA2;
1851 #endif
1852 
1853     if (LLVMFeatureStr != "")
1854       Features[LLVMFeatureStr] = true;
1855   }
1856 
1857 #if defined(__aarch64__)
1858   // If we have all crypto bits we can add the feature
1859   if (crypto == (CAP_AES | CAP_PMULL | CAP_SHA1 | CAP_SHA2))
1860     Features["crypto"] = true;
1861 #endif
1862 
1863   return true;
1864 }
1865 #elif defined(_WIN32) && (defined(__aarch64__) || defined(_M_ARM64))
1866 bool sys::getHostCPUFeatures(StringMap<bool> &Features) {
1867   if (IsProcessorFeaturePresent(PF_ARM_NEON_INSTRUCTIONS_AVAILABLE))
1868     Features["neon"] = true;
1869   if (IsProcessorFeaturePresent(PF_ARM_V8_CRC32_INSTRUCTIONS_AVAILABLE))
1870     Features["crc"] = true;
1871   if (IsProcessorFeaturePresent(PF_ARM_V8_CRYPTO_INSTRUCTIONS_AVAILABLE))
1872     Features["crypto"] = true;
1873 
1874   return true;
1875 }
1876 #elif defined(__linux__) && defined(__loongarch__)
1877 #include <sys/auxv.h>
1878 bool sys::getHostCPUFeatures(StringMap<bool> &Features) {
1879   unsigned long hwcap = getauxval(AT_HWCAP);
1880   bool HasFPU = hwcap & (1UL << 3); // HWCAP_LOONGARCH_FPU
1881   uint32_t cpucfg2 = 0x2;
1882   __asm__("cpucfg %[cpucfg2], %[cpucfg2]\n\t" : [cpucfg2] "+r"(cpucfg2));
1883 
1884   Features["f"] = HasFPU && (cpucfg2 & (1U << 1)); // CPUCFG.2.FP_SP
1885   Features["d"] = HasFPU && (cpucfg2 & (1U << 2)); // CPUCFG.2.FP_DP
1886 
1887   Features["lsx"] = hwcap & (1UL << 4);  // HWCAP_LOONGARCH_LSX
1888   Features["lasx"] = hwcap & (1UL << 5); // HWCAP_LOONGARCH_LASX
1889   Features["lvz"] = hwcap & (1UL << 9);  // HWCAP_LOONGARCH_LVZ
1890 
1891   return true;
1892 }
1893 #else
1894 bool sys::getHostCPUFeatures(StringMap<bool> &Features) { return false; }
1895 #endif
1896 
1897 #if __APPLE__
1898 /// \returns the \p triple, but with the Host's arch spliced in.
1899 static Triple withHostArch(Triple T) {
1900 #if defined(__arm__)
1901   T.setArch(Triple::arm);
1902   T.setArchName("arm");
1903 #elif defined(__arm64e__)
1904   T.setArch(Triple::aarch64, Triple::AArch64SubArch_arm64e);
1905   T.setArchName("arm64e");
1906 #elif defined(__aarch64__)
1907   T.setArch(Triple::aarch64);
1908   T.setArchName("arm64");
1909 #elif defined(__x86_64h__)
1910   T.setArch(Triple::x86_64);
1911   T.setArchName("x86_64h");
1912 #elif defined(__x86_64__)
1913   T.setArch(Triple::x86_64);
1914   T.setArchName("x86_64");
1915 #elif defined(__powerpc__)
1916   T.setArch(Triple::ppc);
1917   T.setArchName("powerpc");
1918 #else
1919 #  error "Unimplemented host arch fixup"
1920 #endif
1921   return T;
1922 }
1923 #endif
1924 
1925 std::string sys::getProcessTriple() {
1926   std::string TargetTripleString = updateTripleOSVersion(LLVM_HOST_TRIPLE);
1927   Triple PT(Triple::normalize(TargetTripleString));
1928 
1929 #if __APPLE__
1930   /// In Universal builds, LLVM_HOST_TRIPLE will have the wrong arch in one of
1931   /// the slices. This fixes that up.
1932   PT = withHostArch(PT);
1933 #endif
1934 
1935   if (sizeof(void *) == 8 && PT.isArch32Bit())
1936     PT = PT.get64BitArchVariant();
1937   if (sizeof(void *) == 4 && PT.isArch64Bit())
1938     PT = PT.get32BitArchVariant();
1939 
1940   return PT.str();
1941 }
1942 
1943 void sys::printDefaultTargetAndDetectedCPU(raw_ostream &OS) {
1944 #if LLVM_VERSION_PRINTER_SHOW_HOST_TARGET_INFO
1945   std::string CPU = std::string(sys::getHostCPUName());
1946   if (CPU == "generic")
1947     CPU = "(unknown)";
1948   OS << "  Default target: " << sys::getDefaultTargetTriple() << '\n'
1949      << "  Host CPU: " << CPU << '\n';
1950 #endif
1951 }
1952