1 //===-- Host.cpp - Implement OS Host Detection ------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 //  This file implements the operating system Host detection.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "llvm/TargetParser/Host.h"
14 #include "llvm/ADT/SmallVector.h"
15 #include "llvm/ADT/StringMap.h"
16 #include "llvm/ADT/StringRef.h"
17 #include "llvm/ADT/StringSwitch.h"
18 #include "llvm/Config/llvm-config.h"
19 #include "llvm/Support/MemoryBuffer.h"
20 #include "llvm/Support/raw_ostream.h"
21 #include "llvm/TargetParser/Triple.h"
22 #include "llvm/TargetParser/X86TargetParser.h"
23 #include <string.h>
24 
25 // Include the platform-specific parts of this class.
26 #ifdef LLVM_ON_UNIX
27 #include "Unix/Host.inc"
28 #include <sched.h>
29 #endif
30 #ifdef _WIN32
31 #include "Windows/Host.inc"
32 #endif
33 #ifdef _MSC_VER
34 #include <intrin.h>
35 #endif
36 #ifdef __MVS__
37 #include "llvm/Support/BCD.h"
38 #endif
39 #if defined(__APPLE__)
40 #include <mach/host_info.h>
41 #include <mach/mach.h>
42 #include <mach/mach_host.h>
43 #include <mach/machine.h>
44 #include <sys/param.h>
45 #include <sys/sysctl.h>
46 #endif
47 #ifdef _AIX
48 #include <sys/systemcfg.h>
49 #endif
50 #if defined(__sun__) && defined(__svr4__)
51 #include <kstat.h>
52 #endif
53 
54 #define DEBUG_TYPE "host-detection"
55 
56 //===----------------------------------------------------------------------===//
57 //
58 //  Implementations of the CPU detection routines
59 //
60 //===----------------------------------------------------------------------===//
61 
62 using namespace llvm;
63 
64 static std::unique_ptr<llvm::MemoryBuffer>
65     LLVM_ATTRIBUTE_UNUSED getProcCpuinfoContent() {
66   llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> Text =
67       llvm::MemoryBuffer::getFileAsStream("/proc/cpuinfo");
68   if (std::error_code EC = Text.getError()) {
69     llvm::errs() << "Can't read "
70                  << "/proc/cpuinfo: " << EC.message() << "\n";
71     return nullptr;
72   }
73   return std::move(*Text);
74 }
75 
76 StringRef sys::detail::getHostCPUNameForPowerPC(StringRef ProcCpuinfoContent) {
77   // Access to the Processor Version Register (PVR) on PowerPC is privileged,
78   // and so we must use an operating-system interface to determine the current
79   // processor type. On Linux, this is exposed through the /proc/cpuinfo file.
80   const char *generic = "generic";
81 
82   // The cpu line is second (after the 'processor: 0' line), so if this
83   // buffer is too small then something has changed (or is wrong).
84   StringRef::const_iterator CPUInfoStart = ProcCpuinfoContent.begin();
85   StringRef::const_iterator CPUInfoEnd = ProcCpuinfoContent.end();
86 
87   StringRef::const_iterator CIP = CPUInfoStart;
88 
89   StringRef::const_iterator CPUStart = nullptr;
90   size_t CPULen = 0;
91 
92   // We need to find the first line which starts with cpu, spaces, and a colon.
93   // After the colon, there may be some additional spaces and then the cpu type.
94   while (CIP < CPUInfoEnd && CPUStart == nullptr) {
95     if (CIP < CPUInfoEnd && *CIP == '\n')
96       ++CIP;
97 
98     if (CIP < CPUInfoEnd && *CIP == 'c') {
99       ++CIP;
100       if (CIP < CPUInfoEnd && *CIP == 'p') {
101         ++CIP;
102         if (CIP < CPUInfoEnd && *CIP == 'u') {
103           ++CIP;
104           while (CIP < CPUInfoEnd && (*CIP == ' ' || *CIP == '\t'))
105             ++CIP;
106 
107           if (CIP < CPUInfoEnd && *CIP == ':') {
108             ++CIP;
109             while (CIP < CPUInfoEnd && (*CIP == ' ' || *CIP == '\t'))
110               ++CIP;
111 
112             if (CIP < CPUInfoEnd) {
113               CPUStart = CIP;
114               while (CIP < CPUInfoEnd && (*CIP != ' ' && *CIP != '\t' &&
115                                           *CIP != ',' && *CIP != '\n'))
116                 ++CIP;
117               CPULen = CIP - CPUStart;
118             }
119           }
120         }
121       }
122     }
123 
124     if (CPUStart == nullptr)
125       while (CIP < CPUInfoEnd && *CIP != '\n')
126         ++CIP;
127   }
128 
129   if (CPUStart == nullptr)
130     return generic;
131 
132   return StringSwitch<const char *>(StringRef(CPUStart, CPULen))
133       .Case("604e", "604e")
134       .Case("604", "604")
135       .Case("7400", "7400")
136       .Case("7410", "7400")
137       .Case("7447", "7400")
138       .Case("7455", "7450")
139       .Case("G4", "g4")
140       .Case("POWER4", "970")
141       .Case("PPC970FX", "970")
142       .Case("PPC970MP", "970")
143       .Case("G5", "g5")
144       .Case("POWER5", "g5")
145       .Case("A2", "a2")
146       .Case("POWER6", "pwr6")
147       .Case("POWER7", "pwr7")
148       .Case("POWER8", "pwr8")
149       .Case("POWER8E", "pwr8")
150       .Case("POWER8NVL", "pwr8")
151       .Case("POWER9", "pwr9")
152       .Case("POWER10", "pwr10")
153       // FIXME: If we get a simulator or machine with the capabilities of
154       // mcpu=future, we should revisit this and add the name reported by the
155       // simulator/machine.
156       .Default(generic);
157 }
158 
159 StringRef sys::detail::getHostCPUNameForARM(StringRef ProcCpuinfoContent) {
160   // The cpuid register on arm is not accessible from user space. On Linux,
161   // it is exposed through the /proc/cpuinfo file.
162 
163   // Read 32 lines from /proc/cpuinfo, which should contain the CPU part line
164   // in all cases.
165   SmallVector<StringRef, 32> Lines;
166   ProcCpuinfoContent.split(Lines, "\n");
167 
168   // Look for the CPU implementer line.
169   StringRef Implementer;
170   StringRef Hardware;
171   StringRef Part;
172   for (unsigned I = 0, E = Lines.size(); I != E; ++I) {
173     if (Lines[I].starts_with("CPU implementer"))
174       Implementer = Lines[I].substr(15).ltrim("\t :");
175     if (Lines[I].starts_with("Hardware"))
176       Hardware = Lines[I].substr(8).ltrim("\t :");
177     if (Lines[I].starts_with("CPU part"))
178       Part = Lines[I].substr(8).ltrim("\t :");
179   }
180 
181   if (Implementer == "0x41") { // ARM Ltd.
182     // MSM8992/8994 may give cpu part for the core that the kernel is running on,
183     // which is undeterministic and wrong. Always return cortex-a53 for these SoC.
184     if (Hardware.ends_with("MSM8994") || Hardware.ends_with("MSM8996"))
185       return "cortex-a53";
186 
187 
188     // The CPU part is a 3 digit hexadecimal number with a 0x prefix. The
189     // values correspond to the "Part number" in the CP15/c0 register. The
190     // contents are specified in the various processor manuals.
191     // This corresponds to the Main ID Register in Technical Reference Manuals.
192     // and is used in programs like sys-utils
193     return StringSwitch<const char *>(Part)
194         .Case("0x926", "arm926ej-s")
195         .Case("0xb02", "mpcore")
196         .Case("0xb36", "arm1136j-s")
197         .Case("0xb56", "arm1156t2-s")
198         .Case("0xb76", "arm1176jz-s")
199         .Case("0xc08", "cortex-a8")
200         .Case("0xc09", "cortex-a9")
201         .Case("0xc0f", "cortex-a15")
202         .Case("0xc20", "cortex-m0")
203         .Case("0xc23", "cortex-m3")
204         .Case("0xc24", "cortex-m4")
205         .Case("0xd24", "cortex-m52")
206         .Case("0xd22", "cortex-m55")
207         .Case("0xd02", "cortex-a34")
208         .Case("0xd04", "cortex-a35")
209         .Case("0xd03", "cortex-a53")
210         .Case("0xd05", "cortex-a55")
211         .Case("0xd46", "cortex-a510")
212         .Case("0xd80", "cortex-a520")
213         .Case("0xd07", "cortex-a57")
214         .Case("0xd08", "cortex-a72")
215         .Case("0xd09", "cortex-a73")
216         .Case("0xd0a", "cortex-a75")
217         .Case("0xd0b", "cortex-a76")
218         .Case("0xd0d", "cortex-a77")
219         .Case("0xd41", "cortex-a78")
220         .Case("0xd47", "cortex-a710")
221         .Case("0xd4d", "cortex-a715")
222         .Case("0xd81", "cortex-a720")
223         .Case("0xd44", "cortex-x1")
224         .Case("0xd4c", "cortex-x1c")
225         .Case("0xd48", "cortex-x2")
226         .Case("0xd4e", "cortex-x3")
227         .Case("0xd82", "cortex-x4")
228         .Case("0xd0c", "neoverse-n1")
229         .Case("0xd49", "neoverse-n2")
230         .Case("0xd40", "neoverse-v1")
231         .Case("0xd4f", "neoverse-v2")
232         .Default("generic");
233   }
234 
235   if (Implementer == "0x42" || Implementer == "0x43") { // Broadcom | Cavium.
236     return StringSwitch<const char *>(Part)
237       .Case("0x516", "thunderx2t99")
238       .Case("0x0516", "thunderx2t99")
239       .Case("0xaf", "thunderx2t99")
240       .Case("0x0af", "thunderx2t99")
241       .Case("0xa1", "thunderxt88")
242       .Case("0x0a1", "thunderxt88")
243       .Default("generic");
244   }
245 
246   if (Implementer == "0x46") { // Fujitsu Ltd.
247     return StringSwitch<const char *>(Part)
248       .Case("0x001", "a64fx")
249       .Default("generic");
250   }
251 
252   if (Implementer == "0x4e") { // NVIDIA Corporation
253     return StringSwitch<const char *>(Part)
254         .Case("0x004", "carmel")
255         .Default("generic");
256   }
257 
258   if (Implementer == "0x48") // HiSilicon Technologies, Inc.
259     // The CPU part is a 3 digit hexadecimal number with a 0x prefix. The
260     // values correspond to the "Part number" in the CP15/c0 register. The
261     // contents are specified in the various processor manuals.
262     return StringSwitch<const char *>(Part)
263       .Case("0xd01", "tsv110")
264       .Default("generic");
265 
266   if (Implementer == "0x51") // Qualcomm Technologies, Inc.
267     // The CPU part is a 3 digit hexadecimal number with a 0x prefix. The
268     // values correspond to the "Part number" in the CP15/c0 register. The
269     // contents are specified in the various processor manuals.
270     return StringSwitch<const char *>(Part)
271         .Case("0x06f", "krait") // APQ8064
272         .Case("0x201", "kryo")
273         .Case("0x205", "kryo")
274         .Case("0x211", "kryo")
275         .Case("0x800", "cortex-a73") // Kryo 2xx Gold
276         .Case("0x801", "cortex-a73") // Kryo 2xx Silver
277         .Case("0x802", "cortex-a75") // Kryo 3xx Gold
278         .Case("0x803", "cortex-a75") // Kryo 3xx Silver
279         .Case("0x804", "cortex-a76") // Kryo 4xx Gold
280         .Case("0x805", "cortex-a76") // Kryo 4xx/5xx Silver
281         .Case("0xc00", "falkor")
282         .Case("0xc01", "saphira")
283         .Default("generic");
284   if (Implementer == "0x53") { // Samsung Electronics Co., Ltd.
285     // The Exynos chips have a convoluted ID scheme that doesn't seem to follow
286     // any predictive pattern across variants and parts.
287     unsigned Variant = 0, Part = 0;
288 
289     // Look for the CPU variant line, whose value is a 1 digit hexadecimal
290     // number, corresponding to the Variant bits in the CP15/C0 register.
291     for (auto I : Lines)
292       if (I.consume_front("CPU variant"))
293         I.ltrim("\t :").getAsInteger(0, Variant);
294 
295     // Look for the CPU part line, whose value is a 3 digit hexadecimal
296     // number, corresponding to the PartNum bits in the CP15/C0 register.
297     for (auto I : Lines)
298       if (I.consume_front("CPU part"))
299         I.ltrim("\t :").getAsInteger(0, Part);
300 
301     unsigned Exynos = (Variant << 12) | Part;
302     switch (Exynos) {
303     default:
304       // Default by falling through to Exynos M3.
305       [[fallthrough]];
306     case 0x1002:
307       return "exynos-m3";
308     case 0x1003:
309       return "exynos-m4";
310     }
311   }
312 
313   if (Implementer == "0xc0") { // Ampere Computing
314     return StringSwitch<const char *>(Part)
315         .Case("0xac3", "ampere1")
316         .Case("0xac4", "ampere1a")
317         .Default("generic");
318   }
319 
320   return "generic";
321 }
322 
323 namespace {
324 StringRef getCPUNameFromS390Model(unsigned int Id, bool HaveVectorSupport) {
325   switch (Id) {
326     case 2064:  // z900 not supported by LLVM
327     case 2066:
328     case 2084:  // z990 not supported by LLVM
329     case 2086:
330     case 2094:  // z9-109 not supported by LLVM
331     case 2096:
332       return "generic";
333     case 2097:
334     case 2098:
335       return "z10";
336     case 2817:
337     case 2818:
338       return "z196";
339     case 2827:
340     case 2828:
341       return "zEC12";
342     case 2964:
343     case 2965:
344       return HaveVectorSupport? "z13" : "zEC12";
345     case 3906:
346     case 3907:
347       return HaveVectorSupport? "z14" : "zEC12";
348     case 8561:
349     case 8562:
350       return HaveVectorSupport? "z15" : "zEC12";
351     case 3931:
352     case 3932:
353     default:
354       return HaveVectorSupport? "z16" : "zEC12";
355   }
356 }
357 } // end anonymous namespace
358 
359 StringRef sys::detail::getHostCPUNameForS390x(StringRef ProcCpuinfoContent) {
360   // STIDP is a privileged operation, so use /proc/cpuinfo instead.
361 
362   // The "processor 0:" line comes after a fair amount of other information,
363   // including a cache breakdown, but this should be plenty.
364   SmallVector<StringRef, 32> Lines;
365   ProcCpuinfoContent.split(Lines, "\n");
366 
367   // Look for the CPU features.
368   SmallVector<StringRef, 32> CPUFeatures;
369   for (unsigned I = 0, E = Lines.size(); I != E; ++I)
370     if (Lines[I].starts_with("features")) {
371       size_t Pos = Lines[I].find(':');
372       if (Pos != StringRef::npos) {
373         Lines[I].drop_front(Pos + 1).split(CPUFeatures, ' ');
374         break;
375       }
376     }
377 
378   // We need to check for the presence of vector support independently of
379   // the machine type, since we may only use the vector register set when
380   // supported by the kernel (and hypervisor).
381   bool HaveVectorSupport = false;
382   for (unsigned I = 0, E = CPUFeatures.size(); I != E; ++I) {
383     if (CPUFeatures[I] == "vx")
384       HaveVectorSupport = true;
385   }
386 
387   // Now check the processor machine type.
388   for (unsigned I = 0, E = Lines.size(); I != E; ++I) {
389     if (Lines[I].starts_with("processor ")) {
390       size_t Pos = Lines[I].find("machine = ");
391       if (Pos != StringRef::npos) {
392         Pos += sizeof("machine = ") - 1;
393         unsigned int Id;
394         if (!Lines[I].drop_front(Pos).getAsInteger(10, Id))
395           return getCPUNameFromS390Model(Id, HaveVectorSupport);
396       }
397       break;
398     }
399   }
400 
401   return "generic";
402 }
403 
404 StringRef sys::detail::getHostCPUNameForRISCV(StringRef ProcCpuinfoContent) {
405   // There are 24 lines in /proc/cpuinfo
406   SmallVector<StringRef> Lines;
407   ProcCpuinfoContent.split(Lines, "\n");
408 
409   // Look for uarch line to determine cpu name
410   StringRef UArch;
411   for (unsigned I = 0, E = Lines.size(); I != E; ++I) {
412     if (Lines[I].starts_with("uarch")) {
413       UArch = Lines[I].substr(5).ltrim("\t :");
414       break;
415     }
416   }
417 
418   return StringSwitch<const char *>(UArch)
419       .Case("sifive,u74-mc", "sifive-u74")
420       .Case("sifive,bullet0", "sifive-u74")
421       .Default("generic");
422 }
423 
424 StringRef sys::detail::getHostCPUNameForBPF() {
425 #if !defined(__linux__) || !defined(__x86_64__)
426   return "generic";
427 #else
428   uint8_t v3_insns[40] __attribute__ ((aligned (8))) =
429       /* BPF_MOV64_IMM(BPF_REG_0, 0) */
430     { 0xb7, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0,
431       /* BPF_MOV64_IMM(BPF_REG_2, 1) */
432       0xb7, 0x2, 0x0, 0x0, 0x1, 0x0, 0x0, 0x0,
433       /* BPF_JMP32_REG(BPF_JLT, BPF_REG_0, BPF_REG_2, 1) */
434       0xae, 0x20, 0x1, 0x0, 0x0, 0x0, 0x0, 0x0,
435       /* BPF_MOV64_IMM(BPF_REG_0, 1) */
436       0xb7, 0x0, 0x0, 0x0, 0x1, 0x0, 0x0, 0x0,
437       /* BPF_EXIT_INSN() */
438       0x95, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 };
439 
440   uint8_t v2_insns[40] __attribute__ ((aligned (8))) =
441       /* BPF_MOV64_IMM(BPF_REG_0, 0) */
442     { 0xb7, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0,
443       /* BPF_MOV64_IMM(BPF_REG_2, 1) */
444       0xb7, 0x2, 0x0, 0x0, 0x1, 0x0, 0x0, 0x0,
445       /* BPF_JMP_REG(BPF_JLT, BPF_REG_0, BPF_REG_2, 1) */
446       0xad, 0x20, 0x1, 0x0, 0x0, 0x0, 0x0, 0x0,
447       /* BPF_MOV64_IMM(BPF_REG_0, 1) */
448       0xb7, 0x0, 0x0, 0x0, 0x1, 0x0, 0x0, 0x0,
449       /* BPF_EXIT_INSN() */
450       0x95, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 };
451 
452   struct bpf_prog_load_attr {
453     uint32_t prog_type;
454     uint32_t insn_cnt;
455     uint64_t insns;
456     uint64_t license;
457     uint32_t log_level;
458     uint32_t log_size;
459     uint64_t log_buf;
460     uint32_t kern_version;
461     uint32_t prog_flags;
462   } attr = {};
463   attr.prog_type = 1; /* BPF_PROG_TYPE_SOCKET_FILTER */
464   attr.insn_cnt = 5;
465   attr.insns = (uint64_t)v3_insns;
466   attr.license = (uint64_t)"DUMMY";
467 
468   int fd = syscall(321 /* __NR_bpf */, 5 /* BPF_PROG_LOAD */, &attr,
469                    sizeof(attr));
470   if (fd >= 0) {
471     close(fd);
472     return "v3";
473   }
474 
475   /* Clear the whole attr in case its content changed by syscall. */
476   memset(&attr, 0, sizeof(attr));
477   attr.prog_type = 1; /* BPF_PROG_TYPE_SOCKET_FILTER */
478   attr.insn_cnt = 5;
479   attr.insns = (uint64_t)v2_insns;
480   attr.license = (uint64_t)"DUMMY";
481   fd = syscall(321 /* __NR_bpf */, 5 /* BPF_PROG_LOAD */, &attr, sizeof(attr));
482   if (fd >= 0) {
483     close(fd);
484     return "v2";
485   }
486   return "v1";
487 #endif
488 }
489 
490 #if defined(__i386__) || defined(_M_IX86) || \
491     defined(__x86_64__) || defined(_M_X64)
492 
493 // The check below for i386 was copied from clang's cpuid.h (__get_cpuid_max).
494 // Check motivated by bug reports for OpenSSL crashing on CPUs without CPUID
495 // support. Consequently, for i386, the presence of CPUID is checked first
496 // via the corresponding eflags bit.
497 // Removal of cpuid.h header motivated by PR30384
498 // Header cpuid.h and method __get_cpuid_max are not used in llvm, clang, openmp
499 // or test-suite, but are used in external projects e.g. libstdcxx
500 static bool isCpuIdSupported() {
501 #if defined(__GNUC__) || defined(__clang__)
502 #if defined(__i386__)
503   int __cpuid_supported;
504   __asm__("  pushfl\n"
505           "  popl   %%eax\n"
506           "  movl   %%eax,%%ecx\n"
507           "  xorl   $0x00200000,%%eax\n"
508           "  pushl  %%eax\n"
509           "  popfl\n"
510           "  pushfl\n"
511           "  popl   %%eax\n"
512           "  movl   $0,%0\n"
513           "  cmpl   %%eax,%%ecx\n"
514           "  je     1f\n"
515           "  movl   $1,%0\n"
516           "1:"
517           : "=r"(__cpuid_supported)
518           :
519           : "eax", "ecx");
520   if (!__cpuid_supported)
521     return false;
522 #endif
523   return true;
524 #endif
525   return true;
526 }
527 
528 /// getX86CpuIDAndInfo - Execute the specified cpuid and return the 4 values in
529 /// the specified arguments.  If we can't run cpuid on the host, return true.
530 static bool getX86CpuIDAndInfo(unsigned value, unsigned *rEAX, unsigned *rEBX,
531                                unsigned *rECX, unsigned *rEDX) {
532 #if defined(__GNUC__) || defined(__clang__)
533 #if defined(__x86_64__)
534   // gcc doesn't know cpuid would clobber ebx/rbx. Preserve it manually.
535   // FIXME: should we save this for Clang?
536   __asm__("movq\t%%rbx, %%rsi\n\t"
537           "cpuid\n\t"
538           "xchgq\t%%rbx, %%rsi\n\t"
539           : "=a"(*rEAX), "=S"(*rEBX), "=c"(*rECX), "=d"(*rEDX)
540           : "a"(value));
541   return false;
542 #elif defined(__i386__)
543   __asm__("movl\t%%ebx, %%esi\n\t"
544           "cpuid\n\t"
545           "xchgl\t%%ebx, %%esi\n\t"
546           : "=a"(*rEAX), "=S"(*rEBX), "=c"(*rECX), "=d"(*rEDX)
547           : "a"(value));
548   return false;
549 #else
550   return true;
551 #endif
552 #elif defined(_MSC_VER)
553   // The MSVC intrinsic is portable across x86 and x64.
554   int registers[4];
555   __cpuid(registers, value);
556   *rEAX = registers[0];
557   *rEBX = registers[1];
558   *rECX = registers[2];
559   *rEDX = registers[3];
560   return false;
561 #else
562   return true;
563 #endif
564 }
565 
566 namespace llvm {
567 namespace sys {
568 namespace detail {
569 namespace x86 {
570 
571 VendorSignatures getVendorSignature(unsigned *MaxLeaf) {
572   unsigned EAX = 0, EBX = 0, ECX = 0, EDX = 0;
573   if (MaxLeaf == nullptr)
574     MaxLeaf = &EAX;
575   else
576     *MaxLeaf = 0;
577 
578   if (!isCpuIdSupported())
579     return VendorSignatures::UNKNOWN;
580 
581   if (getX86CpuIDAndInfo(0, MaxLeaf, &EBX, &ECX, &EDX) || *MaxLeaf < 1)
582     return VendorSignatures::UNKNOWN;
583 
584   // "Genu ineI ntel"
585   if (EBX == 0x756e6547 && EDX == 0x49656e69 && ECX == 0x6c65746e)
586     return VendorSignatures::GENUINE_INTEL;
587 
588   // "Auth enti cAMD"
589   if (EBX == 0x68747541 && EDX == 0x69746e65 && ECX == 0x444d4163)
590     return VendorSignatures::AUTHENTIC_AMD;
591 
592   return VendorSignatures::UNKNOWN;
593 }
594 
595 } // namespace x86
596 } // namespace detail
597 } // namespace sys
598 } // namespace llvm
599 
600 using namespace llvm::sys::detail::x86;
601 
602 /// getX86CpuIDAndInfoEx - Execute the specified cpuid with subleaf and return
603 /// the 4 values in the specified arguments.  If we can't run cpuid on the host,
604 /// return true.
605 static bool getX86CpuIDAndInfoEx(unsigned value, unsigned subleaf,
606                                  unsigned *rEAX, unsigned *rEBX, unsigned *rECX,
607                                  unsigned *rEDX) {
608 #if defined(__GNUC__) || defined(__clang__)
609 #if defined(__x86_64__)
610   // gcc doesn't know cpuid would clobber ebx/rbx. Preserve it manually.
611   // FIXME: should we save this for Clang?
612   __asm__("movq\t%%rbx, %%rsi\n\t"
613           "cpuid\n\t"
614           "xchgq\t%%rbx, %%rsi\n\t"
615           : "=a"(*rEAX), "=S"(*rEBX), "=c"(*rECX), "=d"(*rEDX)
616           : "a"(value), "c"(subleaf));
617   return false;
618 #elif defined(__i386__)
619   __asm__("movl\t%%ebx, %%esi\n\t"
620           "cpuid\n\t"
621           "xchgl\t%%ebx, %%esi\n\t"
622           : "=a"(*rEAX), "=S"(*rEBX), "=c"(*rECX), "=d"(*rEDX)
623           : "a"(value), "c"(subleaf));
624   return false;
625 #else
626   return true;
627 #endif
628 #elif defined(_MSC_VER)
629   int registers[4];
630   __cpuidex(registers, value, subleaf);
631   *rEAX = registers[0];
632   *rEBX = registers[1];
633   *rECX = registers[2];
634   *rEDX = registers[3];
635   return false;
636 #else
637   return true;
638 #endif
639 }
640 
641 // Read control register 0 (XCR0). Used to detect features such as AVX.
642 static bool getX86XCR0(unsigned *rEAX, unsigned *rEDX) {
643 #if defined(__GNUC__) || defined(__clang__)
644   // Check xgetbv; this uses a .byte sequence instead of the instruction
645   // directly because older assemblers do not include support for xgetbv and
646   // there is no easy way to conditionally compile based on the assembler used.
647   __asm__(".byte 0x0f, 0x01, 0xd0" : "=a"(*rEAX), "=d"(*rEDX) : "c"(0));
648   return false;
649 #elif defined(_MSC_FULL_VER) && defined(_XCR_XFEATURE_ENABLED_MASK)
650   unsigned long long Result = _xgetbv(_XCR_XFEATURE_ENABLED_MASK);
651   *rEAX = Result;
652   *rEDX = Result >> 32;
653   return false;
654 #else
655   return true;
656 #endif
657 }
658 
659 static void detectX86FamilyModel(unsigned EAX, unsigned *Family,
660                                  unsigned *Model) {
661   *Family = (EAX >> 8) & 0xf; // Bits 8 - 11
662   *Model = (EAX >> 4) & 0xf;  // Bits 4 - 7
663   if (*Family == 6 || *Family == 0xf) {
664     if (*Family == 0xf)
665       // Examine extended family ID if family ID is F.
666       *Family += (EAX >> 20) & 0xff; // Bits 20 - 27
667     // Examine extended model ID if family ID is 6 or F.
668     *Model += ((EAX >> 16) & 0xf) << 4; // Bits 16 - 19
669   }
670 }
671 
672 static StringRef
673 getIntelProcessorTypeAndSubtype(unsigned Family, unsigned Model,
674                                 const unsigned *Features,
675                                 unsigned *Type, unsigned *Subtype) {
676   auto testFeature = [&](unsigned F) {
677     return (Features[F / 32] & (1U << (F % 32))) != 0;
678   };
679 
680   StringRef CPU;
681 
682   switch (Family) {
683   case 3:
684     CPU = "i386";
685     break;
686   case 4:
687     CPU = "i486";
688     break;
689   case 5:
690     if (testFeature(X86::FEATURE_MMX)) {
691       CPU = "pentium-mmx";
692       break;
693     }
694     CPU = "pentium";
695     break;
696   case 6:
697     switch (Model) {
698     case 0x0f: // Intel Core 2 Duo processor, Intel Core 2 Duo mobile
699                // processor, Intel Core 2 Quad processor, Intel Core 2 Quad
700                // mobile processor, Intel Core 2 Extreme processor, Intel
701                // Pentium Dual-Core processor, Intel Xeon processor, model
702                // 0Fh. All processors are manufactured using the 65 nm process.
703     case 0x16: // Intel Celeron processor model 16h. All processors are
704                // manufactured using the 65 nm process
705       CPU = "core2";
706       *Type = X86::INTEL_CORE2;
707       break;
708     case 0x17: // Intel Core 2 Extreme processor, Intel Xeon processor, model
709                // 17h. All processors are manufactured using the 45 nm process.
710                //
711                // 45nm: Penryn , Wolfdale, Yorkfield (XE)
712     case 0x1d: // Intel Xeon processor MP. All processors are manufactured using
713                // the 45 nm process.
714       CPU = "penryn";
715       *Type = X86::INTEL_CORE2;
716       break;
717     case 0x1a: // Intel Core i7 processor and Intel Xeon processor. All
718                // processors are manufactured using the 45 nm process.
719     case 0x1e: // Intel(R) Core(TM) i7 CPU         870  @ 2.93GHz.
720                // As found in a Summer 2010 model iMac.
721     case 0x1f:
722     case 0x2e:              // Nehalem EX
723       CPU = "nehalem";
724       *Type = X86::INTEL_COREI7;
725       *Subtype = X86::INTEL_COREI7_NEHALEM;
726       break;
727     case 0x25: // Intel Core i7, laptop version.
728     case 0x2c: // Intel Core i7 processor and Intel Xeon processor. All
729                // processors are manufactured using the 32 nm process.
730     case 0x2f: // Westmere EX
731       CPU = "westmere";
732       *Type = X86::INTEL_COREI7;
733       *Subtype = X86::INTEL_COREI7_WESTMERE;
734       break;
735     case 0x2a: // Intel Core i7 processor. All processors are manufactured
736                // using the 32 nm process.
737     case 0x2d:
738       CPU = "sandybridge";
739       *Type = X86::INTEL_COREI7;
740       *Subtype = X86::INTEL_COREI7_SANDYBRIDGE;
741       break;
742     case 0x3a:
743     case 0x3e:              // Ivy Bridge EP
744       CPU = "ivybridge";
745       *Type = X86::INTEL_COREI7;
746       *Subtype = X86::INTEL_COREI7_IVYBRIDGE;
747       break;
748 
749     // Haswell:
750     case 0x3c:
751     case 0x3f:
752     case 0x45:
753     case 0x46:
754       CPU = "haswell";
755       *Type = X86::INTEL_COREI7;
756       *Subtype = X86::INTEL_COREI7_HASWELL;
757       break;
758 
759     // Broadwell:
760     case 0x3d:
761     case 0x47:
762     case 0x4f:
763     case 0x56:
764       CPU = "broadwell";
765       *Type = X86::INTEL_COREI7;
766       *Subtype = X86::INTEL_COREI7_BROADWELL;
767       break;
768 
769     // Skylake:
770     case 0x4e:              // Skylake mobile
771     case 0x5e:              // Skylake desktop
772     case 0x8e:              // Kaby Lake mobile
773     case 0x9e:              // Kaby Lake desktop
774     case 0xa5:              // Comet Lake-H/S
775     case 0xa6:              // Comet Lake-U
776       CPU = "skylake";
777       *Type = X86::INTEL_COREI7;
778       *Subtype = X86::INTEL_COREI7_SKYLAKE;
779       break;
780 
781     // Rocketlake:
782     case 0xa7:
783       CPU = "rocketlake";
784       *Type = X86::INTEL_COREI7;
785       *Subtype = X86::INTEL_COREI7_ROCKETLAKE;
786       break;
787 
788     // Skylake Xeon:
789     case 0x55:
790       *Type = X86::INTEL_COREI7;
791       if (testFeature(X86::FEATURE_AVX512BF16)) {
792         CPU = "cooperlake";
793         *Subtype = X86::INTEL_COREI7_COOPERLAKE;
794       } else if (testFeature(X86::FEATURE_AVX512VNNI)) {
795         CPU = "cascadelake";
796         *Subtype = X86::INTEL_COREI7_CASCADELAKE;
797       } else {
798         CPU = "skylake-avx512";
799         *Subtype = X86::INTEL_COREI7_SKYLAKE_AVX512;
800       }
801       break;
802 
803     // Cannonlake:
804     case 0x66:
805       CPU = "cannonlake";
806       *Type = X86::INTEL_COREI7;
807       *Subtype = X86::INTEL_COREI7_CANNONLAKE;
808       break;
809 
810     // Icelake:
811     case 0x7d:
812     case 0x7e:
813       CPU = "icelake-client";
814       *Type = X86::INTEL_COREI7;
815       *Subtype = X86::INTEL_COREI7_ICELAKE_CLIENT;
816       break;
817 
818     // Tigerlake:
819     case 0x8c:
820     case 0x8d:
821       CPU = "tigerlake";
822       *Type = X86::INTEL_COREI7;
823       *Subtype = X86::INTEL_COREI7_TIGERLAKE;
824       break;
825 
826     // Alderlake:
827     case 0x97:
828     case 0x9a:
829     // Gracemont
830     case 0xbe:
831     // Raptorlake:
832     case 0xb7:
833     case 0xba:
834     case 0xbf:
835     // Meteorlake:
836     case 0xaa:
837     case 0xac:
838       CPU = "alderlake";
839       *Type = X86::INTEL_COREI7;
840       *Subtype = X86::INTEL_COREI7_ALDERLAKE;
841       break;
842 
843     // Arrowlake:
844     case 0xc5:
845       CPU = "arrowlake";
846       *Type = X86::INTEL_COREI7;
847       *Subtype = X86::INTEL_COREI7_ARROWLAKE;
848       break;
849 
850     // Arrowlake S:
851     case 0xc6:
852     // Lunarlake:
853     case 0xbd:
854       CPU = "arrowlake-s";
855       *Type = X86::INTEL_COREI7;
856       *Subtype = X86::INTEL_COREI7_ARROWLAKE_S;
857       break;
858 
859     // Pantherlake:
860     case 0xcc:
861       CPU = "pantherlake";
862       *Type = X86::INTEL_COREI7;
863       *Subtype = X86::INTEL_COREI7_PANTHERLAKE;
864       break;
865 
866     // Graniterapids:
867     case 0xad:
868       CPU = "graniterapids";
869       *Type = X86::INTEL_COREI7;
870       *Subtype = X86::INTEL_COREI7_GRANITERAPIDS;
871       break;
872 
873     // Granite Rapids D:
874     case 0xae:
875       CPU = "graniterapids-d";
876       *Type = X86::INTEL_COREI7;
877       *Subtype = X86::INTEL_COREI7_GRANITERAPIDS_D;
878       break;
879 
880     // Icelake Xeon:
881     case 0x6a:
882     case 0x6c:
883       CPU = "icelake-server";
884       *Type = X86::INTEL_COREI7;
885       *Subtype = X86::INTEL_COREI7_ICELAKE_SERVER;
886       break;
887 
888     // Emerald Rapids:
889     case 0xcf:
890     // Sapphire Rapids:
891     case 0x8f:
892       CPU = "sapphirerapids";
893       *Type = X86::INTEL_COREI7;
894       *Subtype = X86::INTEL_COREI7_SAPPHIRERAPIDS;
895       break;
896 
897     case 0x1c: // Most 45 nm Intel Atom processors
898     case 0x26: // 45 nm Atom Lincroft
899     case 0x27: // 32 nm Atom Medfield
900     case 0x35: // 32 nm Atom Midview
901     case 0x36: // 32 nm Atom Midview
902       CPU = "bonnell";
903       *Type = X86::INTEL_BONNELL;
904       break;
905 
906     // Atom Silvermont codes from the Intel software optimization guide.
907     case 0x37:
908     case 0x4a:
909     case 0x4d:
910     case 0x5a:
911     case 0x5d:
912     case 0x4c: // really airmont
913       CPU = "silvermont";
914       *Type = X86::INTEL_SILVERMONT;
915       break;
916     // Goldmont:
917     case 0x5c: // Apollo Lake
918     case 0x5f: // Denverton
919       CPU = "goldmont";
920       *Type = X86::INTEL_GOLDMONT;
921       break;
922     case 0x7a:
923       CPU = "goldmont-plus";
924       *Type = X86::INTEL_GOLDMONT_PLUS;
925       break;
926     case 0x86:
927     case 0x8a: // Lakefield
928     case 0x96: // Elkhart Lake
929     case 0x9c: // Jasper Lake
930       CPU = "tremont";
931       *Type = X86::INTEL_TREMONT;
932       break;
933 
934     // Sierraforest:
935     case 0xaf:
936       CPU = "sierraforest";
937       *Type = X86::INTEL_SIERRAFOREST;
938       break;
939 
940     // Grandridge:
941     case 0xb6:
942       CPU = "grandridge";
943       *Type = X86::INTEL_GRANDRIDGE;
944       break;
945 
946     // Clearwaterforest:
947     case 0xdd:
948       CPU = "clearwaterforest";
949       *Type = X86::INTEL_CLEARWATERFOREST;
950       break;
951 
952     // Xeon Phi (Knights Landing + Knights Mill):
953     case 0x57:
954       CPU = "knl";
955       *Type = X86::INTEL_KNL;
956       break;
957     case 0x85:
958       CPU = "knm";
959       *Type = X86::INTEL_KNM;
960       break;
961 
962     default: // Unknown family 6 CPU, try to guess.
963       // Don't both with Type/Subtype here, they aren't used by the caller.
964       // They're used above to keep the code in sync with compiler-rt.
965       // TODO detect tigerlake host from model
966       if (testFeature(X86::FEATURE_AVX512VP2INTERSECT)) {
967         CPU = "tigerlake";
968       } else if (testFeature(X86::FEATURE_AVX512VBMI2)) {
969         CPU = "icelake-client";
970       } else if (testFeature(X86::FEATURE_AVX512VBMI)) {
971         CPU = "cannonlake";
972       } else if (testFeature(X86::FEATURE_AVX512BF16)) {
973         CPU = "cooperlake";
974       } else if (testFeature(X86::FEATURE_AVX512VNNI)) {
975         CPU = "cascadelake";
976       } else if (testFeature(X86::FEATURE_AVX512VL)) {
977         CPU = "skylake-avx512";
978       } else if (testFeature(X86::FEATURE_AVX512ER)) {
979         CPU = "knl";
980       } else if (testFeature(X86::FEATURE_CLFLUSHOPT)) {
981         if (testFeature(X86::FEATURE_SHA))
982           CPU = "goldmont";
983         else
984           CPU = "skylake";
985       } else if (testFeature(X86::FEATURE_ADX)) {
986         CPU = "broadwell";
987       } else if (testFeature(X86::FEATURE_AVX2)) {
988         CPU = "haswell";
989       } else if (testFeature(X86::FEATURE_AVX)) {
990         CPU = "sandybridge";
991       } else if (testFeature(X86::FEATURE_SSE4_2)) {
992         if (testFeature(X86::FEATURE_MOVBE))
993           CPU = "silvermont";
994         else
995           CPU = "nehalem";
996       } else if (testFeature(X86::FEATURE_SSE4_1)) {
997         CPU = "penryn";
998       } else if (testFeature(X86::FEATURE_SSSE3)) {
999         if (testFeature(X86::FEATURE_MOVBE))
1000           CPU = "bonnell";
1001         else
1002           CPU = "core2";
1003       } else if (testFeature(X86::FEATURE_64BIT)) {
1004         CPU = "core2";
1005       } else if (testFeature(X86::FEATURE_SSE3)) {
1006         CPU = "yonah";
1007       } else if (testFeature(X86::FEATURE_SSE2)) {
1008         CPU = "pentium-m";
1009       } else if (testFeature(X86::FEATURE_SSE)) {
1010         CPU = "pentium3";
1011       } else if (testFeature(X86::FEATURE_MMX)) {
1012         CPU = "pentium2";
1013       } else {
1014         CPU = "pentiumpro";
1015       }
1016       break;
1017     }
1018     break;
1019   case 15: {
1020     if (testFeature(X86::FEATURE_64BIT)) {
1021       CPU = "nocona";
1022       break;
1023     }
1024     if (testFeature(X86::FEATURE_SSE3)) {
1025       CPU = "prescott";
1026       break;
1027     }
1028     CPU = "pentium4";
1029     break;
1030   }
1031   default:
1032     break; // Unknown.
1033   }
1034 
1035   return CPU;
1036 }
1037 
1038 static StringRef
1039 getAMDProcessorTypeAndSubtype(unsigned Family, unsigned Model,
1040                               const unsigned *Features,
1041                               unsigned *Type, unsigned *Subtype) {
1042   auto testFeature = [&](unsigned F) {
1043     return (Features[F / 32] & (1U << (F % 32))) != 0;
1044   };
1045 
1046   StringRef CPU;
1047 
1048   switch (Family) {
1049   case 4:
1050     CPU = "i486";
1051     break;
1052   case 5:
1053     CPU = "pentium";
1054     switch (Model) {
1055     case 6:
1056     case 7:
1057       CPU = "k6";
1058       break;
1059     case 8:
1060       CPU = "k6-2";
1061       break;
1062     case 9:
1063     case 13:
1064       CPU = "k6-3";
1065       break;
1066     case 10:
1067       CPU = "geode";
1068       break;
1069     }
1070     break;
1071   case 6:
1072     if (testFeature(X86::FEATURE_SSE)) {
1073       CPU = "athlon-xp";
1074       break;
1075     }
1076     CPU = "athlon";
1077     break;
1078   case 15:
1079     if (testFeature(X86::FEATURE_SSE3)) {
1080       CPU = "k8-sse3";
1081       break;
1082     }
1083     CPU = "k8";
1084     break;
1085   case 16:
1086     CPU = "amdfam10";
1087     *Type = X86::AMDFAM10H; // "amdfam10"
1088     switch (Model) {
1089     case 2:
1090       *Subtype = X86::AMDFAM10H_BARCELONA;
1091       break;
1092     case 4:
1093       *Subtype = X86::AMDFAM10H_SHANGHAI;
1094       break;
1095     case 8:
1096       *Subtype = X86::AMDFAM10H_ISTANBUL;
1097       break;
1098     }
1099     break;
1100   case 20:
1101     CPU = "btver1";
1102     *Type = X86::AMD_BTVER1;
1103     break;
1104   case 21:
1105     CPU = "bdver1";
1106     *Type = X86::AMDFAM15H;
1107     if (Model >= 0x60 && Model <= 0x7f) {
1108       CPU = "bdver4";
1109       *Subtype = X86::AMDFAM15H_BDVER4;
1110       break; // 60h-7Fh: Excavator
1111     }
1112     if (Model >= 0x30 && Model <= 0x3f) {
1113       CPU = "bdver3";
1114       *Subtype = X86::AMDFAM15H_BDVER3;
1115       break; // 30h-3Fh: Steamroller
1116     }
1117     if ((Model >= 0x10 && Model <= 0x1f) || Model == 0x02) {
1118       CPU = "bdver2";
1119       *Subtype = X86::AMDFAM15H_BDVER2;
1120       break; // 02h, 10h-1Fh: Piledriver
1121     }
1122     if (Model <= 0x0f) {
1123       *Subtype = X86::AMDFAM15H_BDVER1;
1124       break; // 00h-0Fh: Bulldozer
1125     }
1126     break;
1127   case 22:
1128     CPU = "btver2";
1129     *Type = X86::AMD_BTVER2;
1130     break;
1131   case 23:
1132     CPU = "znver1";
1133     *Type = X86::AMDFAM17H;
1134     if ((Model >= 0x30 && Model <= 0x3f) || Model == 0x71) {
1135       CPU = "znver2";
1136       *Subtype = X86::AMDFAM17H_ZNVER2;
1137       break; // 30h-3fh, 71h: Zen2
1138     }
1139     if (Model <= 0x0f) {
1140       *Subtype = X86::AMDFAM17H_ZNVER1;
1141       break; // 00h-0Fh: Zen1
1142     }
1143     break;
1144   case 25:
1145     CPU = "znver3";
1146     *Type = X86::AMDFAM19H;
1147     if (Model <= 0x0f || (Model >= 0x20 && Model <= 0x5f)) {
1148       // Family 19h Models 00h-0Fh - Zen3
1149       // Family 19h Models 20h-2Fh - Zen3
1150       // Family 19h Models 30h-3Fh - Zen3
1151       // Family 19h Models 40h-4Fh - Zen3+
1152       // Family 19h Models 50h-5Fh - Zen3+
1153       *Subtype = X86::AMDFAM19H_ZNVER3;
1154       break;
1155     }
1156     if ((Model >= 0x10 && Model <= 0x1f) ||
1157         (Model >= 0x60 && Model <= 0x74) ||
1158         (Model >= 0x78 && Model <= 0x7b) ||
1159         (Model >= 0xA0 && Model <= 0xAf)) {
1160       CPU = "znver4";
1161       *Subtype = X86::AMDFAM19H_ZNVER4;
1162       break; //  "znver4"
1163     }
1164     break; // family 19h
1165   default:
1166     break; // Unknown AMD CPU.
1167   }
1168 
1169   return CPU;
1170 }
1171 
1172 static void getAvailableFeatures(unsigned ECX, unsigned EDX, unsigned MaxLeaf,
1173                                  unsigned *Features) {
1174   unsigned EAX, EBX;
1175 
1176   auto setFeature = [&](unsigned F) {
1177     Features[F / 32] |= 1U << (F % 32);
1178   };
1179 
1180   if ((EDX >> 15) & 1)
1181     setFeature(X86::FEATURE_CMOV);
1182   if ((EDX >> 23) & 1)
1183     setFeature(X86::FEATURE_MMX);
1184   if ((EDX >> 25) & 1)
1185     setFeature(X86::FEATURE_SSE);
1186   if ((EDX >> 26) & 1)
1187     setFeature(X86::FEATURE_SSE2);
1188 
1189   if ((ECX >> 0) & 1)
1190     setFeature(X86::FEATURE_SSE3);
1191   if ((ECX >> 1) & 1)
1192     setFeature(X86::FEATURE_PCLMUL);
1193   if ((ECX >> 9) & 1)
1194     setFeature(X86::FEATURE_SSSE3);
1195   if ((ECX >> 12) & 1)
1196     setFeature(X86::FEATURE_FMA);
1197   if ((ECX >> 19) & 1)
1198     setFeature(X86::FEATURE_SSE4_1);
1199   if ((ECX >> 20) & 1) {
1200     setFeature(X86::FEATURE_SSE4_2);
1201     setFeature(X86::FEATURE_CRC32);
1202   }
1203   if ((ECX >> 23) & 1)
1204     setFeature(X86::FEATURE_POPCNT);
1205   if ((ECX >> 25) & 1)
1206     setFeature(X86::FEATURE_AES);
1207 
1208   if ((ECX >> 22) & 1)
1209     setFeature(X86::FEATURE_MOVBE);
1210 
1211   // If CPUID indicates support for XSAVE, XRESTORE and AVX, and XGETBV
1212   // indicates that the AVX registers will be saved and restored on context
1213   // switch, then we have full AVX support.
1214   const unsigned AVXBits = (1 << 27) | (1 << 28);
1215   bool HasAVX = ((ECX & AVXBits) == AVXBits) && !getX86XCR0(&EAX, &EDX) &&
1216                 ((EAX & 0x6) == 0x6);
1217 #if defined(__APPLE__)
1218   // Darwin lazily saves the AVX512 context on first use: trust that the OS will
1219   // save the AVX512 context if we use AVX512 instructions, even the bit is not
1220   // set right now.
1221   bool HasAVX512Save = true;
1222 #else
1223   // AVX512 requires additional context to be saved by the OS.
1224   bool HasAVX512Save = HasAVX && ((EAX & 0xe0) == 0xe0);
1225 #endif
1226 
1227   if (HasAVX)
1228     setFeature(X86::FEATURE_AVX);
1229 
1230   bool HasLeaf7 =
1231       MaxLeaf >= 0x7 && !getX86CpuIDAndInfoEx(0x7, 0x0, &EAX, &EBX, &ECX, &EDX);
1232 
1233   if (HasLeaf7 && ((EBX >> 3) & 1))
1234     setFeature(X86::FEATURE_BMI);
1235   if (HasLeaf7 && ((EBX >> 5) & 1) && HasAVX)
1236     setFeature(X86::FEATURE_AVX2);
1237   if (HasLeaf7 && ((EBX >> 8) & 1))
1238     setFeature(X86::FEATURE_BMI2);
1239   if (HasLeaf7 && ((EBX >> 16) & 1) && HasAVX512Save)
1240     setFeature(X86::FEATURE_AVX512F);
1241   if (HasLeaf7 && ((EBX >> 17) & 1) && HasAVX512Save)
1242     setFeature(X86::FEATURE_AVX512DQ);
1243   if (HasLeaf7 && ((EBX >> 19) & 1))
1244     setFeature(X86::FEATURE_ADX);
1245   if (HasLeaf7 && ((EBX >> 21) & 1) && HasAVX512Save)
1246     setFeature(X86::FEATURE_AVX512IFMA);
1247   if (HasLeaf7 && ((EBX >> 23) & 1))
1248     setFeature(X86::FEATURE_CLFLUSHOPT);
1249   if (HasLeaf7 && ((EBX >> 26) & 1) && HasAVX512Save)
1250     setFeature(X86::FEATURE_AVX512PF);
1251   if (HasLeaf7 && ((EBX >> 27) & 1) && HasAVX512Save)
1252     setFeature(X86::FEATURE_AVX512ER);
1253   if (HasLeaf7 && ((EBX >> 28) & 1) && HasAVX512Save)
1254     setFeature(X86::FEATURE_AVX512CD);
1255   if (HasLeaf7 && ((EBX >> 29) & 1))
1256     setFeature(X86::FEATURE_SHA);
1257   if (HasLeaf7 && ((EBX >> 30) & 1) && HasAVX512Save)
1258     setFeature(X86::FEATURE_AVX512BW);
1259   if (HasLeaf7 && ((EBX >> 31) & 1) && HasAVX512Save)
1260     setFeature(X86::FEATURE_AVX512VL);
1261 
1262   if (HasLeaf7 && ((ECX >> 1) & 1) && HasAVX512Save)
1263     setFeature(X86::FEATURE_AVX512VBMI);
1264   if (HasLeaf7 && ((ECX >> 6) & 1) && HasAVX512Save)
1265     setFeature(X86::FEATURE_AVX512VBMI2);
1266   if (HasLeaf7 && ((ECX >> 8) & 1))
1267     setFeature(X86::FEATURE_GFNI);
1268   if (HasLeaf7 && ((ECX >> 10) & 1) && HasAVX)
1269     setFeature(X86::FEATURE_VPCLMULQDQ);
1270   if (HasLeaf7 && ((ECX >> 11) & 1) && HasAVX512Save)
1271     setFeature(X86::FEATURE_AVX512VNNI);
1272   if (HasLeaf7 && ((ECX >> 12) & 1) && HasAVX512Save)
1273     setFeature(X86::FEATURE_AVX512BITALG);
1274   if (HasLeaf7 && ((ECX >> 14) & 1) && HasAVX512Save)
1275     setFeature(X86::FEATURE_AVX512VPOPCNTDQ);
1276 
1277   if (HasLeaf7 && ((EDX >> 2) & 1) && HasAVX512Save)
1278     setFeature(X86::FEATURE_AVX5124VNNIW);
1279   if (HasLeaf7 && ((EDX >> 3) & 1) && HasAVX512Save)
1280     setFeature(X86::FEATURE_AVX5124FMAPS);
1281   if (HasLeaf7 && ((EDX >> 8) & 1) && HasAVX512Save)
1282     setFeature(X86::FEATURE_AVX512VP2INTERSECT);
1283 
1284   // EAX from subleaf 0 is the maximum subleaf supported. Some CPUs don't
1285   // return all 0s for invalid subleaves so check the limit.
1286   bool HasLeaf7Subleaf1 =
1287       HasLeaf7 && EAX >= 1 &&
1288       !getX86CpuIDAndInfoEx(0x7, 0x1, &EAX, &EBX, &ECX, &EDX);
1289   if (HasLeaf7Subleaf1 && ((EAX >> 5) & 1) && HasAVX512Save)
1290     setFeature(X86::FEATURE_AVX512BF16);
1291 
1292   unsigned MaxExtLevel;
1293   getX86CpuIDAndInfo(0x80000000, &MaxExtLevel, &EBX, &ECX, &EDX);
1294 
1295   bool HasExtLeaf1 = MaxExtLevel >= 0x80000001 &&
1296                      !getX86CpuIDAndInfo(0x80000001, &EAX, &EBX, &ECX, &EDX);
1297   if (HasExtLeaf1 && ((ECX >> 6) & 1))
1298     setFeature(X86::FEATURE_SSE4_A);
1299   if (HasExtLeaf1 && ((ECX >> 11) & 1))
1300     setFeature(X86::FEATURE_XOP);
1301   if (HasExtLeaf1 && ((ECX >> 16) & 1))
1302     setFeature(X86::FEATURE_FMA4);
1303 
1304   if (HasExtLeaf1 && ((EDX >> 29) & 1))
1305     setFeature(X86::FEATURE_64BIT);
1306 }
1307 
1308 StringRef sys::getHostCPUName() {
1309   unsigned MaxLeaf = 0;
1310   const VendorSignatures Vendor = getVendorSignature(&MaxLeaf);
1311   if (Vendor == VendorSignatures::UNKNOWN)
1312     return "generic";
1313 
1314   unsigned EAX = 0, EBX = 0, ECX = 0, EDX = 0;
1315   getX86CpuIDAndInfo(0x1, &EAX, &EBX, &ECX, &EDX);
1316 
1317   unsigned Family = 0, Model = 0;
1318   unsigned Features[(X86::CPU_FEATURE_MAX + 31) / 32] = {0};
1319   detectX86FamilyModel(EAX, &Family, &Model);
1320   getAvailableFeatures(ECX, EDX, MaxLeaf, Features);
1321 
1322   // These aren't consumed in this file, but we try to keep some source code the
1323   // same or similar to compiler-rt.
1324   unsigned Type = 0;
1325   unsigned Subtype = 0;
1326 
1327   StringRef CPU;
1328 
1329   if (Vendor == VendorSignatures::GENUINE_INTEL) {
1330     CPU = getIntelProcessorTypeAndSubtype(Family, Model, Features, &Type,
1331                                           &Subtype);
1332   } else if (Vendor == VendorSignatures::AUTHENTIC_AMD) {
1333     CPU = getAMDProcessorTypeAndSubtype(Family, Model, Features, &Type,
1334                                         &Subtype);
1335   }
1336 
1337   if (!CPU.empty())
1338     return CPU;
1339 
1340   return "generic";
1341 }
1342 
1343 #elif defined(__APPLE__) && defined(__powerpc__)
1344 StringRef sys::getHostCPUName() {
1345   host_basic_info_data_t hostInfo;
1346   mach_msg_type_number_t infoCount;
1347 
1348   infoCount = HOST_BASIC_INFO_COUNT;
1349   mach_port_t hostPort = mach_host_self();
1350   host_info(hostPort, HOST_BASIC_INFO, (host_info_t)&hostInfo,
1351             &infoCount);
1352   mach_port_deallocate(mach_task_self(), hostPort);
1353 
1354   if (hostInfo.cpu_type != CPU_TYPE_POWERPC)
1355     return "generic";
1356 
1357   switch (hostInfo.cpu_subtype) {
1358   case CPU_SUBTYPE_POWERPC_601:
1359     return "601";
1360   case CPU_SUBTYPE_POWERPC_602:
1361     return "602";
1362   case CPU_SUBTYPE_POWERPC_603:
1363     return "603";
1364   case CPU_SUBTYPE_POWERPC_603e:
1365     return "603e";
1366   case CPU_SUBTYPE_POWERPC_603ev:
1367     return "603ev";
1368   case CPU_SUBTYPE_POWERPC_604:
1369     return "604";
1370   case CPU_SUBTYPE_POWERPC_604e:
1371     return "604e";
1372   case CPU_SUBTYPE_POWERPC_620:
1373     return "620";
1374   case CPU_SUBTYPE_POWERPC_750:
1375     return "750";
1376   case CPU_SUBTYPE_POWERPC_7400:
1377     return "7400";
1378   case CPU_SUBTYPE_POWERPC_7450:
1379     return "7450";
1380   case CPU_SUBTYPE_POWERPC_970:
1381     return "970";
1382   default:;
1383   }
1384 
1385   return "generic";
1386 }
1387 #elif defined(__linux__) && defined(__powerpc__)
1388 StringRef sys::getHostCPUName() {
1389   std::unique_ptr<llvm::MemoryBuffer> P = getProcCpuinfoContent();
1390   StringRef Content = P ? P->getBuffer() : "";
1391   return detail::getHostCPUNameForPowerPC(Content);
1392 }
1393 #elif defined(__linux__) && (defined(__arm__) || defined(__aarch64__))
1394 StringRef sys::getHostCPUName() {
1395   std::unique_ptr<llvm::MemoryBuffer> P = getProcCpuinfoContent();
1396   StringRef Content = P ? P->getBuffer() : "";
1397   return detail::getHostCPUNameForARM(Content);
1398 }
1399 #elif defined(__linux__) && defined(__s390x__)
1400 StringRef sys::getHostCPUName() {
1401   std::unique_ptr<llvm::MemoryBuffer> P = getProcCpuinfoContent();
1402   StringRef Content = P ? P->getBuffer() : "";
1403   return detail::getHostCPUNameForS390x(Content);
1404 }
1405 #elif defined(__MVS__)
1406 StringRef sys::getHostCPUName() {
1407   // Get pointer to Communications Vector Table (CVT).
1408   // The pointer is located at offset 16 of the Prefixed Save Area (PSA).
1409   // It is stored as 31 bit pointer and will be zero-extended to 64 bit.
1410   int *StartToCVTOffset = reinterpret_cast<int *>(0x10);
1411   // Since its stored as a 31-bit pointer, get the 4 bytes from the start
1412   // of address.
1413   int ReadValue = *StartToCVTOffset;
1414   // Explicitly clear the high order bit.
1415   ReadValue = (ReadValue & 0x7FFFFFFF);
1416   char *CVT = reinterpret_cast<char *>(ReadValue);
1417   // The model number is located in the CVT prefix at offset -6 and stored as
1418   // signless packed decimal.
1419   uint16_t Id = *(uint16_t *)&CVT[-6];
1420   // Convert number to integer.
1421   Id = decodePackedBCD<uint16_t>(Id, false);
1422   // Check for vector support. It's stored in field CVTFLAG5 (offset 244),
1423   // bit CVTVEF (X'80'). The facilities list is part of the PSA but the vector
1424   // extension can only be used if bit CVTVEF is on.
1425   bool HaveVectorSupport = CVT[244] & 0x80;
1426   return getCPUNameFromS390Model(Id, HaveVectorSupport);
1427 }
1428 #elif defined(__APPLE__) && (defined(__arm__) || defined(__aarch64__))
1429 #define CPUFAMILY_ARM_SWIFT 0x1e2d6381
1430 #define CPUFAMILY_ARM_CYCLONE 0x37a09642
1431 #define CPUFAMILY_ARM_TYPHOON 0x2c91a47e
1432 #define CPUFAMILY_ARM_TWISTER 0x92fb37c8
1433 #define CPUFAMILY_ARM_HURRICANE 0x67ceee93
1434 #define CPUFAMILY_ARM_MONSOON_MISTRAL 0xe81e7ef6
1435 #define CPUFAMILY_ARM_VORTEX_TEMPEST 0x07d34b9f
1436 #define CPUFAMILY_ARM_LIGHTNING_THUNDER 0x462504d2
1437 #define CPUFAMILY_ARM_FIRESTORM_ICESTORM 0x1b588bb3
1438 
1439 StringRef sys::getHostCPUName() {
1440   uint32_t Family;
1441   size_t Length = sizeof(Family);
1442   sysctlbyname("hw.cpufamily", &Family, &Length, NULL, 0);
1443 
1444   switch (Family) {
1445   case CPUFAMILY_ARM_SWIFT:
1446     return "swift";
1447   case CPUFAMILY_ARM_CYCLONE:
1448     return "apple-a7";
1449   case CPUFAMILY_ARM_TYPHOON:
1450     return "apple-a8";
1451   case CPUFAMILY_ARM_TWISTER:
1452     return "apple-a9";
1453   case CPUFAMILY_ARM_HURRICANE:
1454     return "apple-a10";
1455   case CPUFAMILY_ARM_MONSOON_MISTRAL:
1456     return "apple-a11";
1457   case CPUFAMILY_ARM_VORTEX_TEMPEST:
1458     return "apple-a12";
1459   case CPUFAMILY_ARM_LIGHTNING_THUNDER:
1460     return "apple-a13";
1461   case CPUFAMILY_ARM_FIRESTORM_ICESTORM:
1462     return "apple-m1";
1463   default:
1464     // Default to the newest CPU we know about.
1465     return "apple-m1";
1466   }
1467 }
1468 #elif defined(_AIX)
1469 StringRef sys::getHostCPUName() {
1470   switch (_system_configuration.implementation) {
1471   case POWER_4:
1472     if (_system_configuration.version == PV_4_3)
1473       return "970";
1474     return "pwr4";
1475   case POWER_5:
1476     if (_system_configuration.version == PV_5)
1477       return "pwr5";
1478     return "pwr5x";
1479   case POWER_6:
1480     if (_system_configuration.version == PV_6_Compat)
1481       return "pwr6";
1482     return "pwr6x";
1483   case POWER_7:
1484     return "pwr7";
1485   case POWER_8:
1486     return "pwr8";
1487   case POWER_9:
1488     return "pwr9";
1489 // TODO: simplify this once the macro is available in all OS levels.
1490 #ifdef POWER_10
1491   case POWER_10:
1492 #else
1493   case 0x40000:
1494 #endif
1495     return "pwr10";
1496   default:
1497     return "generic";
1498   }
1499 }
1500 #elif defined(__loongarch__)
1501 StringRef sys::getHostCPUName() {
1502   // Use processor id to detect cpu name.
1503   uint32_t processor_id;
1504   __asm__("cpucfg %[prid], $zero\n\t" : [prid] "=r"(processor_id));
1505   switch (processor_id & 0xff00) {
1506   case 0xc000: // Loongson 64bit, 4-issue
1507     return "la464";
1508   // TODO: Others.
1509   default:
1510     break;
1511   }
1512   return "generic";
1513 }
1514 #elif defined(__riscv)
1515 StringRef sys::getHostCPUName() {
1516 #if defined(__linux__)
1517   std::unique_ptr<llvm::MemoryBuffer> P = getProcCpuinfoContent();
1518   StringRef Content = P ? P->getBuffer() : "";
1519   return detail::getHostCPUNameForRISCV(Content);
1520 #else
1521 #if __riscv_xlen == 64
1522   return "generic-rv64";
1523 #elif __riscv_xlen == 32
1524   return "generic-rv32";
1525 #else
1526 #error "Unhandled value of __riscv_xlen"
1527 #endif
1528 #endif
1529 }
1530 #elif defined(__sparc__)
1531 #if defined(__linux__)
1532 StringRef sys::detail::getHostCPUNameForSPARC(StringRef ProcCpuinfoContent) {
1533   SmallVector<StringRef> Lines;
1534   ProcCpuinfoContent.split(Lines, "\n");
1535 
1536   // Look for cpu line to determine cpu name
1537   StringRef Cpu;
1538   for (unsigned I = 0, E = Lines.size(); I != E; ++I) {
1539     if (Lines[I].starts_with("cpu")) {
1540       Cpu = Lines[I].substr(5).ltrim("\t :");
1541       break;
1542     }
1543   }
1544 
1545   return StringSwitch<const char *>(Cpu)
1546       .StartsWith("SuperSparc", "supersparc")
1547       .StartsWith("HyperSparc", "hypersparc")
1548       .StartsWith("SpitFire", "ultrasparc")
1549       .StartsWith("BlackBird", "ultrasparc")
1550       .StartsWith("Sabre", " ultrasparc")
1551       .StartsWith("Hummingbird", "ultrasparc")
1552       .StartsWith("Cheetah", "ultrasparc3")
1553       .StartsWith("Jalapeno", "ultrasparc3")
1554       .StartsWith("Jaguar", "ultrasparc3")
1555       .StartsWith("Panther", "ultrasparc3")
1556       .StartsWith("Serrano", "ultrasparc3")
1557       .StartsWith("UltraSparc T1", "niagara")
1558       .StartsWith("UltraSparc T2", "niagara2")
1559       .StartsWith("UltraSparc T3", "niagara3")
1560       .StartsWith("UltraSparc T4", "niagara4")
1561       .StartsWith("UltraSparc T5", "niagara4")
1562       .StartsWith("LEON", "leon3")
1563       // niagara7/m8 not supported by LLVM yet.
1564       .StartsWith("SPARC-M7", "niagara4" /* "niagara7" */)
1565       .StartsWith("SPARC-S7", "niagara4" /* "niagara7" */)
1566       .StartsWith("SPARC-M8", "niagara4" /* "m8" */)
1567       .Default("generic");
1568 }
1569 #endif
1570 
1571 StringRef sys::getHostCPUName() {
1572 #if defined(__linux__)
1573   std::unique_ptr<llvm::MemoryBuffer> P = getProcCpuinfoContent();
1574   StringRef Content = P ? P->getBuffer() : "";
1575   return detail::getHostCPUNameForSPARC(Content);
1576 #elif defined(__sun__) && defined(__svr4__)
1577   char *buf = NULL;
1578   kstat_ctl_t *kc;
1579   kstat_t *ksp;
1580   kstat_named_t *brand = NULL;
1581 
1582   kc = kstat_open();
1583   if (kc != NULL) {
1584     ksp = kstat_lookup(kc, const_cast<char *>("cpu_info"), -1, NULL);
1585     if (ksp != NULL && kstat_read(kc, ksp, NULL) != -1 &&
1586         ksp->ks_type == KSTAT_TYPE_NAMED)
1587       brand =
1588           (kstat_named_t *)kstat_data_lookup(ksp, const_cast<char *>("brand"));
1589     if (brand != NULL && brand->data_type == KSTAT_DATA_STRING)
1590       buf = KSTAT_NAMED_STR_PTR(brand);
1591   }
1592   kstat_close(kc);
1593 
1594   return StringSwitch<const char *>(buf)
1595       .Case("TMS390S10", "supersparc") // Texas Instruments microSPARC I
1596       .Case("TMS390Z50", "supersparc") // Texas Instruments SuperSPARC I
1597       .Case("TMS390Z55",
1598             "supersparc") // Texas Instruments SuperSPARC I with SuperCache
1599       .Case("MB86904", "supersparc") // Fujitsu microSPARC II
1600       .Case("MB86907", "supersparc") // Fujitsu TurboSPARC
1601       .Case("RT623", "hypersparc")   // Ross hyperSPARC
1602       .Case("RT625", "hypersparc")
1603       .Case("RT626", "hypersparc")
1604       .Case("UltraSPARC-I", "ultrasparc")
1605       .Case("UltraSPARC-II", "ultrasparc")
1606       .Case("UltraSPARC-IIe", "ultrasparc")
1607       .Case("UltraSPARC-IIi", "ultrasparc")
1608       .Case("SPARC64-III", "ultrasparc")
1609       .Case("SPARC64-IV", "ultrasparc")
1610       .Case("UltraSPARC-III", "ultrasparc3")
1611       .Case("UltraSPARC-III+", "ultrasparc3")
1612       .Case("UltraSPARC-IIIi", "ultrasparc3")
1613       .Case("UltraSPARC-IIIi+", "ultrasparc3")
1614       .Case("UltraSPARC-IV", "ultrasparc3")
1615       .Case("UltraSPARC-IV+", "ultrasparc3")
1616       .Case("SPARC64-V", "ultrasparc3")
1617       .Case("SPARC64-VI", "ultrasparc3")
1618       .Case("SPARC64-VII", "ultrasparc3")
1619       .Case("UltraSPARC-T1", "niagara")
1620       .Case("UltraSPARC-T2", "niagara2")
1621       .Case("UltraSPARC-T2", "niagara2")
1622       .Case("UltraSPARC-T2+", "niagara2")
1623       .Case("SPARC-T3", "niagara3")
1624       .Case("SPARC-T4", "niagara4")
1625       .Case("SPARC-T5", "niagara4")
1626       // niagara7/m8 not supported by LLVM yet.
1627       .Case("SPARC-M7", "niagara4" /* "niagara7" */)
1628       .Case("SPARC-S7", "niagara4" /* "niagara7" */)
1629       .Case("SPARC-M8", "niagara4" /* "m8" */)
1630       .Default("generic");
1631 #else
1632   return "generic";
1633 #endif
1634 }
1635 #else
1636 StringRef sys::getHostCPUName() { return "generic"; }
1637 namespace llvm {
1638 namespace sys {
1639 namespace detail {
1640 namespace x86 {
1641 
1642 VendorSignatures getVendorSignature(unsigned *MaxLeaf) {
1643   return VendorSignatures::UNKNOWN;
1644 }
1645 
1646 } // namespace x86
1647 } // namespace detail
1648 } // namespace sys
1649 } // namespace llvm
1650 #endif
1651 
1652 #if defined(__i386__) || defined(_M_IX86) || \
1653     defined(__x86_64__) || defined(_M_X64)
1654 bool sys::getHostCPUFeatures(StringMap<bool> &Features) {
1655   unsigned EAX = 0, EBX = 0, ECX = 0, EDX = 0;
1656   unsigned MaxLevel;
1657 
1658   if (getX86CpuIDAndInfo(0, &MaxLevel, &EBX, &ECX, &EDX) || MaxLevel < 1)
1659     return false;
1660 
1661   getX86CpuIDAndInfo(1, &EAX, &EBX, &ECX, &EDX);
1662 
1663   Features["cx8"]    = (EDX >>  8) & 1;
1664   Features["cmov"]   = (EDX >> 15) & 1;
1665   Features["mmx"]    = (EDX >> 23) & 1;
1666   Features["fxsr"]   = (EDX >> 24) & 1;
1667   Features["sse"]    = (EDX >> 25) & 1;
1668   Features["sse2"]   = (EDX >> 26) & 1;
1669 
1670   Features["sse3"]   = (ECX >>  0) & 1;
1671   Features["pclmul"] = (ECX >>  1) & 1;
1672   Features["ssse3"]  = (ECX >>  9) & 1;
1673   Features["cx16"]   = (ECX >> 13) & 1;
1674   Features["sse4.1"] = (ECX >> 19) & 1;
1675   Features["sse4.2"] = (ECX >> 20) & 1;
1676   Features["crc32"]  = Features["sse4.2"];
1677   Features["movbe"]  = (ECX >> 22) & 1;
1678   Features["popcnt"] = (ECX >> 23) & 1;
1679   Features["aes"]    = (ECX >> 25) & 1;
1680   Features["rdrnd"]  = (ECX >> 30) & 1;
1681 
1682   // If CPUID indicates support for XSAVE, XRESTORE and AVX, and XGETBV
1683   // indicates that the AVX registers will be saved and restored on context
1684   // switch, then we have full AVX support.
1685   bool HasXSave = ((ECX >> 27) & 1) && !getX86XCR0(&EAX, &EDX);
1686   bool HasAVXSave = HasXSave && ((ECX >> 28) & 1) && ((EAX & 0x6) == 0x6);
1687 #if defined(__APPLE__)
1688   // Darwin lazily saves the AVX512 context on first use: trust that the OS will
1689   // save the AVX512 context if we use AVX512 instructions, even the bit is not
1690   // set right now.
1691   bool HasAVX512Save = true;
1692 #else
1693   // AVX512 requires additional context to be saved by the OS.
1694   bool HasAVX512Save = HasAVXSave && ((EAX & 0xe0) == 0xe0);
1695 #endif
1696   // AMX requires additional context to be saved by the OS.
1697   const unsigned AMXBits = (1 << 17) | (1 << 18);
1698   bool HasAMXSave = HasXSave && ((EAX & AMXBits) == AMXBits);
1699 
1700   Features["avx"]   = HasAVXSave;
1701   Features["fma"]   = ((ECX >> 12) & 1) && HasAVXSave;
1702   // Only enable XSAVE if OS has enabled support for saving YMM state.
1703   Features["xsave"] = ((ECX >> 26) & 1) && HasAVXSave;
1704   Features["f16c"]  = ((ECX >> 29) & 1) && HasAVXSave;
1705 
1706   unsigned MaxExtLevel;
1707   getX86CpuIDAndInfo(0x80000000, &MaxExtLevel, &EBX, &ECX, &EDX);
1708 
1709   bool HasExtLeaf1 = MaxExtLevel >= 0x80000001 &&
1710                      !getX86CpuIDAndInfo(0x80000001, &EAX, &EBX, &ECX, &EDX);
1711   Features["sahf"]   = HasExtLeaf1 && ((ECX >>  0) & 1);
1712   Features["lzcnt"]  = HasExtLeaf1 && ((ECX >>  5) & 1);
1713   Features["sse4a"]  = HasExtLeaf1 && ((ECX >>  6) & 1);
1714   Features["prfchw"] = HasExtLeaf1 && ((ECX >>  8) & 1);
1715   Features["xop"]    = HasExtLeaf1 && ((ECX >> 11) & 1) && HasAVXSave;
1716   Features["lwp"]    = HasExtLeaf1 && ((ECX >> 15) & 1);
1717   Features["fma4"]   = HasExtLeaf1 && ((ECX >> 16) & 1) && HasAVXSave;
1718   Features["tbm"]    = HasExtLeaf1 && ((ECX >> 21) & 1);
1719   Features["mwaitx"] = HasExtLeaf1 && ((ECX >> 29) & 1);
1720 
1721   Features["64bit"]  = HasExtLeaf1 && ((EDX >> 29) & 1);
1722 
1723   // Miscellaneous memory related features, detected by
1724   // using the 0x80000008 leaf of the CPUID instruction
1725   bool HasExtLeaf8 = MaxExtLevel >= 0x80000008 &&
1726                      !getX86CpuIDAndInfo(0x80000008, &EAX, &EBX, &ECX, &EDX);
1727   Features["clzero"]   = HasExtLeaf8 && ((EBX >> 0) & 1);
1728   Features["rdpru"]    = HasExtLeaf8 && ((EBX >> 4) & 1);
1729   Features["wbnoinvd"] = HasExtLeaf8 && ((EBX >> 9) & 1);
1730 
1731   bool HasLeaf7 =
1732       MaxLevel >= 7 && !getX86CpuIDAndInfoEx(0x7, 0x0, &EAX, &EBX, &ECX, &EDX);
1733 
1734   Features["fsgsbase"]   = HasLeaf7 && ((EBX >>  0) & 1);
1735   Features["sgx"]        = HasLeaf7 && ((EBX >>  2) & 1);
1736   Features["bmi"]        = HasLeaf7 && ((EBX >>  3) & 1);
1737   // AVX2 is only supported if we have the OS save support from AVX.
1738   Features["avx2"]       = HasLeaf7 && ((EBX >>  5) & 1) && HasAVXSave;
1739   Features["bmi2"]       = HasLeaf7 && ((EBX >>  8) & 1);
1740   Features["invpcid"]    = HasLeaf7 && ((EBX >> 10) & 1);
1741   Features["rtm"]        = HasLeaf7 && ((EBX >> 11) & 1);
1742   // AVX512 is only supported if the OS supports the context save for it.
1743   Features["avx512f"]    = HasLeaf7 && ((EBX >> 16) & 1) && HasAVX512Save;
1744   Features["avx512dq"]   = HasLeaf7 && ((EBX >> 17) & 1) && HasAVX512Save;
1745   Features["rdseed"]     = HasLeaf7 && ((EBX >> 18) & 1);
1746   Features["adx"]        = HasLeaf7 && ((EBX >> 19) & 1);
1747   Features["avx512ifma"] = HasLeaf7 && ((EBX >> 21) & 1) && HasAVX512Save;
1748   Features["clflushopt"] = HasLeaf7 && ((EBX >> 23) & 1);
1749   Features["clwb"]       = HasLeaf7 && ((EBX >> 24) & 1);
1750   Features["avx512pf"]   = HasLeaf7 && ((EBX >> 26) & 1) && HasAVX512Save;
1751   Features["avx512er"]   = HasLeaf7 && ((EBX >> 27) & 1) && HasAVX512Save;
1752   Features["avx512cd"]   = HasLeaf7 && ((EBX >> 28) & 1) && HasAVX512Save;
1753   Features["sha"]        = HasLeaf7 && ((EBX >> 29) & 1);
1754   Features["avx512bw"]   = HasLeaf7 && ((EBX >> 30) & 1) && HasAVX512Save;
1755   Features["avx512vl"]   = HasLeaf7 && ((EBX >> 31) & 1) && HasAVX512Save;
1756 
1757   Features["prefetchwt1"]     = HasLeaf7 && ((ECX >>  0) & 1);
1758   Features["avx512vbmi"]      = HasLeaf7 && ((ECX >>  1) & 1) && HasAVX512Save;
1759   Features["pku"]             = HasLeaf7 && ((ECX >>  4) & 1);
1760   Features["waitpkg"]         = HasLeaf7 && ((ECX >>  5) & 1);
1761   Features["avx512vbmi2"]     = HasLeaf7 && ((ECX >>  6) & 1) && HasAVX512Save;
1762   Features["shstk"]           = HasLeaf7 && ((ECX >>  7) & 1);
1763   Features["gfni"]            = HasLeaf7 && ((ECX >>  8) & 1);
1764   Features["vaes"]            = HasLeaf7 && ((ECX >>  9) & 1) && HasAVXSave;
1765   Features["vpclmulqdq"]      = HasLeaf7 && ((ECX >> 10) & 1) && HasAVXSave;
1766   Features["avx512vnni"]      = HasLeaf7 && ((ECX >> 11) & 1) && HasAVX512Save;
1767   Features["avx512bitalg"]    = HasLeaf7 && ((ECX >> 12) & 1) && HasAVX512Save;
1768   Features["avx512vpopcntdq"] = HasLeaf7 && ((ECX >> 14) & 1) && HasAVX512Save;
1769   Features["rdpid"]           = HasLeaf7 && ((ECX >> 22) & 1);
1770   Features["kl"]              = HasLeaf7 && ((ECX >> 23) & 1); // key locker
1771   Features["cldemote"]        = HasLeaf7 && ((ECX >> 25) & 1);
1772   Features["movdiri"]         = HasLeaf7 && ((ECX >> 27) & 1);
1773   Features["movdir64b"]       = HasLeaf7 && ((ECX >> 28) & 1);
1774   Features["enqcmd"]          = HasLeaf7 && ((ECX >> 29) & 1);
1775 
1776   Features["uintr"]           = HasLeaf7 && ((EDX >> 5) & 1);
1777   Features["avx512vp2intersect"] =
1778       HasLeaf7 && ((EDX >> 8) & 1) && HasAVX512Save;
1779   Features["serialize"]       = HasLeaf7 && ((EDX >> 14) & 1);
1780   Features["tsxldtrk"]        = HasLeaf7 && ((EDX >> 16) & 1);
1781   // There are two CPUID leafs which information associated with the pconfig
1782   // instruction:
1783   // EAX=0x7, ECX=0x0 indicates the availability of the instruction (via the 18th
1784   // bit of EDX), while the EAX=0x1b leaf returns information on the
1785   // availability of specific pconfig leafs.
1786   // The target feature here only refers to the the first of these two.
1787   // Users might need to check for the availability of specific pconfig
1788   // leaves using cpuid, since that information is ignored while
1789   // detecting features using the "-march=native" flag.
1790   // For more info, see X86 ISA docs.
1791   Features["pconfig"] = HasLeaf7 && ((EDX >> 18) & 1);
1792   Features["amx-bf16"]   = HasLeaf7 && ((EDX >> 22) & 1) && HasAMXSave;
1793   Features["avx512fp16"] = HasLeaf7 && ((EDX >> 23) & 1) && HasAVX512Save;
1794   Features["amx-tile"]   = HasLeaf7 && ((EDX >> 24) & 1) && HasAMXSave;
1795   Features["amx-int8"]   = HasLeaf7 && ((EDX >> 25) & 1) && HasAMXSave;
1796   // EAX from subleaf 0 is the maximum subleaf supported. Some CPUs don't
1797   // return all 0s for invalid subleaves so check the limit.
1798   bool HasLeaf7Subleaf1 =
1799       HasLeaf7 && EAX >= 1 &&
1800       !getX86CpuIDAndInfoEx(0x7, 0x1, &EAX, &EBX, &ECX, &EDX);
1801   Features["sha512"]     = HasLeaf7Subleaf1 && ((EAX >> 0) & 1);
1802   Features["sm3"]        = HasLeaf7Subleaf1 && ((EAX >> 1) & 1);
1803   Features["sm4"]        = HasLeaf7Subleaf1 && ((EAX >> 2) & 1);
1804   Features["raoint"]     = HasLeaf7Subleaf1 && ((EAX >> 3) & 1);
1805   Features["avxvnni"]    = HasLeaf7Subleaf1 && ((EAX >> 4) & 1) && HasAVXSave;
1806   Features["avx512bf16"] = HasLeaf7Subleaf1 && ((EAX >> 5) & 1) && HasAVX512Save;
1807   Features["amx-fp16"]   = HasLeaf7Subleaf1 && ((EAX >> 21) & 1) && HasAMXSave;
1808   Features["cmpccxadd"]  = HasLeaf7Subleaf1 && ((EAX >> 7) & 1);
1809   Features["hreset"]     = HasLeaf7Subleaf1 && ((EAX >> 22) & 1);
1810   Features["avxifma"]    = HasLeaf7Subleaf1 && ((EAX >> 23) & 1) && HasAVXSave;
1811   Features["avxvnniint8"] = HasLeaf7Subleaf1 && ((EDX >> 4) & 1) && HasAVXSave;
1812   Features["avxneconvert"] = HasLeaf7Subleaf1 && ((EDX >> 5) & 1) && HasAVXSave;
1813   Features["amx-complex"] = HasLeaf7Subleaf1 && ((EDX >> 8) & 1) && HasAMXSave;
1814   Features["avxvnniint16"] = HasLeaf7Subleaf1 && ((EDX >> 10) & 1) && HasAVXSave;
1815   Features["prefetchi"]  = HasLeaf7Subleaf1 && ((EDX >> 14) & 1);
1816   Features["usermsr"]  = HasLeaf7Subleaf1 && ((EDX >> 15) & 1);
1817   Features["avx10.1-256"] = HasLeaf7Subleaf1 && ((EDX >> 19) & 1);
1818 
1819   bool HasLeafD = MaxLevel >= 0xd &&
1820                   !getX86CpuIDAndInfoEx(0xd, 0x1, &EAX, &EBX, &ECX, &EDX);
1821 
1822   // Only enable XSAVE if OS has enabled support for saving YMM state.
1823   Features["xsaveopt"] = HasLeafD && ((EAX >> 0) & 1) && HasAVXSave;
1824   Features["xsavec"]   = HasLeafD && ((EAX >> 1) & 1) && HasAVXSave;
1825   Features["xsaves"]   = HasLeafD && ((EAX >> 3) & 1) && HasAVXSave;
1826 
1827   bool HasLeaf14 = MaxLevel >= 0x14 &&
1828                   !getX86CpuIDAndInfoEx(0x14, 0x0, &EAX, &EBX, &ECX, &EDX);
1829 
1830   Features["ptwrite"] = HasLeaf14 && ((EBX >> 4) & 1);
1831 
1832   bool HasLeaf19 =
1833       MaxLevel >= 0x19 && !getX86CpuIDAndInfo(0x19, &EAX, &EBX, &ECX, &EDX);
1834   Features["widekl"] = HasLeaf7 && HasLeaf19 && ((EBX >> 2) & 1);
1835 
1836   bool HasLeaf24 =
1837       MaxLevel >= 0x24 && !getX86CpuIDAndInfo(0x24, &EAX, &EBX, &ECX, &EDX);
1838   Features["avx10.1-512"] =
1839       Features["avx10.1-256"] && HasLeaf24 && ((EBX >> 18) & 1);
1840 
1841   return true;
1842 }
1843 #elif defined(__linux__) && (defined(__arm__) || defined(__aarch64__))
1844 bool sys::getHostCPUFeatures(StringMap<bool> &Features) {
1845   std::unique_ptr<llvm::MemoryBuffer> P = getProcCpuinfoContent();
1846   if (!P)
1847     return false;
1848 
1849   SmallVector<StringRef, 32> Lines;
1850   P->getBuffer().split(Lines, "\n");
1851 
1852   SmallVector<StringRef, 32> CPUFeatures;
1853 
1854   // Look for the CPU features.
1855   for (unsigned I = 0, E = Lines.size(); I != E; ++I)
1856     if (Lines[I].starts_with("Features")) {
1857       Lines[I].split(CPUFeatures, ' ');
1858       break;
1859     }
1860 
1861 #if defined(__aarch64__)
1862   // Keep track of which crypto features we have seen
1863   enum { CAP_AES = 0x1, CAP_PMULL = 0x2, CAP_SHA1 = 0x4, CAP_SHA2 = 0x8 };
1864   uint32_t crypto = 0;
1865 #endif
1866 
1867   for (unsigned I = 0, E = CPUFeatures.size(); I != E; ++I) {
1868     StringRef LLVMFeatureStr = StringSwitch<StringRef>(CPUFeatures[I])
1869 #if defined(__aarch64__)
1870                                    .Case("asimd", "neon")
1871                                    .Case("fp", "fp-armv8")
1872                                    .Case("crc32", "crc")
1873                                    .Case("atomics", "lse")
1874                                    .Case("sve", "sve")
1875                                    .Case("sve2", "sve2")
1876 #else
1877                                    .Case("half", "fp16")
1878                                    .Case("neon", "neon")
1879                                    .Case("vfpv3", "vfp3")
1880                                    .Case("vfpv3d16", "vfp3d16")
1881                                    .Case("vfpv4", "vfp4")
1882                                    .Case("idiva", "hwdiv-arm")
1883                                    .Case("idivt", "hwdiv")
1884 #endif
1885                                    .Default("");
1886 
1887 #if defined(__aarch64__)
1888     // We need to check crypto separately since we need all of the crypto
1889     // extensions to enable the subtarget feature
1890     if (CPUFeatures[I] == "aes")
1891       crypto |= CAP_AES;
1892     else if (CPUFeatures[I] == "pmull")
1893       crypto |= CAP_PMULL;
1894     else if (CPUFeatures[I] == "sha1")
1895       crypto |= CAP_SHA1;
1896     else if (CPUFeatures[I] == "sha2")
1897       crypto |= CAP_SHA2;
1898 #endif
1899 
1900     if (LLVMFeatureStr != "")
1901       Features[LLVMFeatureStr] = true;
1902   }
1903 
1904 #if defined(__aarch64__)
1905   // If we have all crypto bits we can add the feature
1906   if (crypto == (CAP_AES | CAP_PMULL | CAP_SHA1 | CAP_SHA2))
1907     Features["crypto"] = true;
1908 #endif
1909 
1910   return true;
1911 }
1912 #elif defined(_WIN32) && (defined(__aarch64__) || defined(_M_ARM64))
1913 bool sys::getHostCPUFeatures(StringMap<bool> &Features) {
1914   if (IsProcessorFeaturePresent(PF_ARM_NEON_INSTRUCTIONS_AVAILABLE))
1915     Features["neon"] = true;
1916   if (IsProcessorFeaturePresent(PF_ARM_V8_CRC32_INSTRUCTIONS_AVAILABLE))
1917     Features["crc"] = true;
1918   if (IsProcessorFeaturePresent(PF_ARM_V8_CRYPTO_INSTRUCTIONS_AVAILABLE))
1919     Features["crypto"] = true;
1920 
1921   return true;
1922 }
1923 #elif defined(__linux__) && defined(__loongarch__)
1924 #include <sys/auxv.h>
1925 bool sys::getHostCPUFeatures(StringMap<bool> &Features) {
1926   unsigned long hwcap = getauxval(AT_HWCAP);
1927   bool HasFPU = hwcap & (1UL << 3); // HWCAP_LOONGARCH_FPU
1928   uint32_t cpucfg2 = 0x2;
1929   __asm__("cpucfg %[cpucfg2], %[cpucfg2]\n\t" : [cpucfg2] "+r"(cpucfg2));
1930 
1931   Features["f"] = HasFPU && (cpucfg2 & (1U << 1)); // CPUCFG.2.FP_SP
1932   Features["d"] = HasFPU && (cpucfg2 & (1U << 2)); // CPUCFG.2.FP_DP
1933 
1934   Features["lsx"] = hwcap & (1UL << 4);  // HWCAP_LOONGARCH_LSX
1935   Features["lasx"] = hwcap & (1UL << 5); // HWCAP_LOONGARCH_LASX
1936   Features["lvz"] = hwcap & (1UL << 9);  // HWCAP_LOONGARCH_LVZ
1937 
1938   return true;
1939 }
1940 #else
1941 bool sys::getHostCPUFeatures(StringMap<bool> &Features) { return false; }
1942 #endif
1943 
1944 #if __APPLE__
1945 /// \returns the \p triple, but with the Host's arch spliced in.
1946 static Triple withHostArch(Triple T) {
1947 #if defined(__arm__)
1948   T.setArch(Triple::arm);
1949   T.setArchName("arm");
1950 #elif defined(__arm64e__)
1951   T.setArch(Triple::aarch64, Triple::AArch64SubArch_arm64e);
1952   T.setArchName("arm64e");
1953 #elif defined(__aarch64__)
1954   T.setArch(Triple::aarch64);
1955   T.setArchName("arm64");
1956 #elif defined(__x86_64h__)
1957   T.setArch(Triple::x86_64);
1958   T.setArchName("x86_64h");
1959 #elif defined(__x86_64__)
1960   T.setArch(Triple::x86_64);
1961   T.setArchName("x86_64");
1962 #elif defined(__i386__)
1963   T.setArch(Triple::x86);
1964   T.setArchName("i386");
1965 #elif defined(__powerpc__)
1966   T.setArch(Triple::ppc);
1967   T.setArchName("powerpc");
1968 #else
1969 #  error "Unimplemented host arch fixup"
1970 #endif
1971   return T;
1972 }
1973 #endif
1974 
1975 std::string sys::getProcessTriple() {
1976   std::string TargetTripleString = updateTripleOSVersion(LLVM_HOST_TRIPLE);
1977   Triple PT(Triple::normalize(TargetTripleString));
1978 
1979 #if __APPLE__
1980   /// In Universal builds, LLVM_HOST_TRIPLE will have the wrong arch in one of
1981   /// the slices. This fixes that up.
1982   PT = withHostArch(PT);
1983 #endif
1984 
1985   if (sizeof(void *) == 8 && PT.isArch32Bit())
1986     PT = PT.get64BitArchVariant();
1987   if (sizeof(void *) == 4 && PT.isArch64Bit())
1988     PT = PT.get32BitArchVariant();
1989 
1990   return PT.str();
1991 }
1992 
1993 void sys::printDefaultTargetAndDetectedCPU(raw_ostream &OS) {
1994 #if LLVM_VERSION_PRINTER_SHOW_HOST_TARGET_INFO
1995   std::string CPU = std::string(sys::getHostCPUName());
1996   if (CPU == "generic")
1997     CPU = "(unknown)";
1998   OS << "  Default target: " << sys::getDefaultTargetTriple() << '\n'
1999      << "  Host CPU: " << CPU << '\n';
2000 #endif
2001 }
2002