1 //===-- Host.cpp - Implement OS Host Concept --------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 //  This file implements the operating system Host concept.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "llvm/Support/Host.h"
14 #include "llvm/ADT/SmallSet.h"
15 #include "llvm/ADT/SmallVector.h"
16 #include "llvm/ADT/StringMap.h"
17 #include "llvm/ADT/StringRef.h"
18 #include "llvm/ADT/StringSwitch.h"
19 #include "llvm/ADT/Triple.h"
20 #include "llvm/Config/llvm-config.h"
21 #include "llvm/Support/BCD.h"
22 #include "llvm/Support/Debug.h"
23 #include "llvm/Support/FileSystem.h"
24 #include "llvm/Support/MemoryBuffer.h"
25 #include "llvm/Support/X86TargetParser.h"
26 #include "llvm/Support/raw_ostream.h"
27 #include <assert.h>
28 #include <string.h>
29 
30 // Include the platform-specific parts of this class.
31 #ifdef LLVM_ON_UNIX
32 #include "Unix/Host.inc"
33 #include <sched.h>
34 #endif
35 #ifdef _WIN32
36 #include "Windows/Host.inc"
37 #endif
38 #ifdef _MSC_VER
39 #include <intrin.h>
40 #endif
41 #if defined(__APPLE__) && (!defined(__x86_64__))
42 #include <mach/host_info.h>
43 #include <mach/mach.h>
44 #include <mach/mach_host.h>
45 #include <mach/machine.h>
46 #endif
47 #ifdef _AIX
48 #include <sys/systemcfg.h>
49 #endif
50 
51 #define DEBUG_TYPE "host-detection"
52 
53 //===----------------------------------------------------------------------===//
54 //
55 //  Implementations of the CPU detection routines
56 //
57 //===----------------------------------------------------------------------===//
58 
59 using namespace llvm;
60 
61 static std::unique_ptr<llvm::MemoryBuffer>
getProcCpuinfoContent()62     LLVM_ATTRIBUTE_UNUSED getProcCpuinfoContent() {
63   llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> Text =
64       llvm::MemoryBuffer::getFileAsStream("/proc/cpuinfo");
65   if (std::error_code EC = Text.getError()) {
66     llvm::errs() << "Can't read "
67                  << "/proc/cpuinfo: " << EC.message() << "\n";
68     return nullptr;
69   }
70   return std::move(*Text);
71 }
72 
getHostCPUNameForPowerPC(StringRef ProcCpuinfoContent)73 StringRef sys::detail::getHostCPUNameForPowerPC(StringRef ProcCpuinfoContent) {
74   // Access to the Processor Version Register (PVR) on PowerPC is privileged,
75   // and so we must use an operating-system interface to determine the current
76   // processor type. On Linux, this is exposed through the /proc/cpuinfo file.
77   const char *generic = "generic";
78 
79   // The cpu line is second (after the 'processor: 0' line), so if this
80   // buffer is too small then something has changed (or is wrong).
81   StringRef::const_iterator CPUInfoStart = ProcCpuinfoContent.begin();
82   StringRef::const_iterator CPUInfoEnd = ProcCpuinfoContent.end();
83 
84   StringRef::const_iterator CIP = CPUInfoStart;
85 
86   StringRef::const_iterator CPUStart = 0;
87   size_t CPULen = 0;
88 
89   // We need to find the first line which starts with cpu, spaces, and a colon.
90   // After the colon, there may be some additional spaces and then the cpu type.
91   while (CIP < CPUInfoEnd && CPUStart == 0) {
92     if (CIP < CPUInfoEnd && *CIP == '\n')
93       ++CIP;
94 
95     if (CIP < CPUInfoEnd && *CIP == 'c') {
96       ++CIP;
97       if (CIP < CPUInfoEnd && *CIP == 'p') {
98         ++CIP;
99         if (CIP < CPUInfoEnd && *CIP == 'u') {
100           ++CIP;
101           while (CIP < CPUInfoEnd && (*CIP == ' ' || *CIP == '\t'))
102             ++CIP;
103 
104           if (CIP < CPUInfoEnd && *CIP == ':') {
105             ++CIP;
106             while (CIP < CPUInfoEnd && (*CIP == ' ' || *CIP == '\t'))
107               ++CIP;
108 
109             if (CIP < CPUInfoEnd) {
110               CPUStart = CIP;
111               while (CIP < CPUInfoEnd && (*CIP != ' ' && *CIP != '\t' &&
112                                           *CIP != ',' && *CIP != '\n'))
113                 ++CIP;
114               CPULen = CIP - CPUStart;
115             }
116           }
117         }
118       }
119     }
120 
121     if (CPUStart == 0)
122       while (CIP < CPUInfoEnd && *CIP != '\n')
123         ++CIP;
124   }
125 
126   if (CPUStart == 0)
127     return generic;
128 
129   return StringSwitch<const char *>(StringRef(CPUStart, CPULen))
130       .Case("604e", "604e")
131       .Case("604", "604")
132       .Case("7400", "7400")
133       .Case("7410", "7400")
134       .Case("7447", "7400")
135       .Case("7455", "7450")
136       .Case("G4", "g4")
137       .Case("POWER4", "970")
138       .Case("PPC970FX", "970")
139       .Case("PPC970MP", "970")
140       .Case("G5", "g5")
141       .Case("POWER5", "g5")
142       .Case("A2", "a2")
143       .Case("POWER6", "pwr6")
144       .Case("POWER7", "pwr7")
145       .Case("POWER8", "pwr8")
146       .Case("POWER8E", "pwr8")
147       .Case("POWER8NVL", "pwr8")
148       .Case("POWER9", "pwr9")
149       .Case("POWER10", "pwr10")
150       // FIXME: If we get a simulator or machine with the capabilities of
151       // mcpu=future, we should revisit this and add the name reported by the
152       // simulator/machine.
153       .Default(generic);
154 }
155 
getHostCPUNameForARM(StringRef ProcCpuinfoContent)156 StringRef sys::detail::getHostCPUNameForARM(StringRef ProcCpuinfoContent) {
157   // The cpuid register on arm is not accessible from user space. On Linux,
158   // it is exposed through the /proc/cpuinfo file.
159 
160   // Read 32 lines from /proc/cpuinfo, which should contain the CPU part line
161   // in all cases.
162   SmallVector<StringRef, 32> Lines;
163   ProcCpuinfoContent.split(Lines, "\n");
164 
165   // Look for the CPU implementer line.
166   StringRef Implementer;
167   StringRef Hardware;
168   StringRef Part;
169   for (unsigned I = 0, E = Lines.size(); I != E; ++I) {
170     if (Lines[I].startswith("CPU implementer"))
171       Implementer = Lines[I].substr(15).ltrim("\t :");
172     if (Lines[I].startswith("Hardware"))
173       Hardware = Lines[I].substr(8).ltrim("\t :");
174     if (Lines[I].startswith("CPU part"))
175       Part = Lines[I].substr(8).ltrim("\t :");
176   }
177 
178   if (Implementer == "0x41") { // ARM Ltd.
179     // MSM8992/8994 may give cpu part for the core that the kernel is running on,
180     // which is undeterministic and wrong. Always return cortex-a53 for these SoC.
181     if (Hardware.endswith("MSM8994") || Hardware.endswith("MSM8996"))
182       return "cortex-a53";
183 
184 
185     // The CPU part is a 3 digit hexadecimal number with a 0x prefix. The
186     // values correspond to the "Part number" in the CP15/c0 register. The
187     // contents are specified in the various processor manuals.
188     // This corresponds to the Main ID Register in Technical Reference Manuals.
189     // and is used in programs like sys-utils
190     return StringSwitch<const char *>(Part)
191         .Case("0x926", "arm926ej-s")
192         .Case("0xb02", "mpcore")
193         .Case("0xb36", "arm1136j-s")
194         .Case("0xb56", "arm1156t2-s")
195         .Case("0xb76", "arm1176jz-s")
196         .Case("0xc08", "cortex-a8")
197         .Case("0xc09", "cortex-a9")
198         .Case("0xc0f", "cortex-a15")
199         .Case("0xc20", "cortex-m0")
200         .Case("0xc23", "cortex-m3")
201         .Case("0xc24", "cortex-m4")
202         .Case("0xd22", "cortex-m55")
203         .Case("0xd02", "cortex-a34")
204         .Case("0xd04", "cortex-a35")
205         .Case("0xd03", "cortex-a53")
206         .Case("0xd07", "cortex-a57")
207         .Case("0xd08", "cortex-a72")
208         .Case("0xd09", "cortex-a73")
209         .Case("0xd0a", "cortex-a75")
210         .Case("0xd0b", "cortex-a76")
211         .Case("0xd0d", "cortex-a77")
212         .Case("0xd41", "cortex-a78")
213         .Case("0xd44", "cortex-x1")
214         .Case("0xd0c", "neoverse-n1")
215         .Case("0xd49", "neoverse-n2")
216         .Default("generic");
217   }
218 
219   if (Implementer == "0x42" || Implementer == "0x43") { // Broadcom | Cavium.
220     return StringSwitch<const char *>(Part)
221       .Case("0x516", "thunderx2t99")
222       .Case("0x0516", "thunderx2t99")
223       .Case("0xaf", "thunderx2t99")
224       .Case("0x0af", "thunderx2t99")
225       .Case("0xa1", "thunderxt88")
226       .Case("0x0a1", "thunderxt88")
227       .Default("generic");
228   }
229 
230   if (Implementer == "0x46") { // Fujitsu Ltd.
231     return StringSwitch<const char *>(Part)
232       .Case("0x001", "a64fx")
233       .Default("generic");
234   }
235 
236   if (Implementer == "0x4e") { // NVIDIA Corporation
237     return StringSwitch<const char *>(Part)
238         .Case("0x004", "carmel")
239         .Default("generic");
240   }
241 
242   if (Implementer == "0x48") // HiSilicon Technologies, Inc.
243     // The CPU part is a 3 digit hexadecimal number with a 0x prefix. The
244     // values correspond to the "Part number" in the CP15/c0 register. The
245     // contents are specified in the various processor manuals.
246     return StringSwitch<const char *>(Part)
247       .Case("0xd01", "tsv110")
248       .Default("generic");
249 
250   if (Implementer == "0x51") // Qualcomm Technologies, Inc.
251     // The CPU part is a 3 digit hexadecimal number with a 0x prefix. The
252     // values correspond to the "Part number" in the CP15/c0 register. The
253     // contents are specified in the various processor manuals.
254     return StringSwitch<const char *>(Part)
255         .Case("0x06f", "krait") // APQ8064
256         .Case("0x201", "kryo")
257         .Case("0x205", "kryo")
258         .Case("0x211", "kryo")
259         .Case("0x800", "cortex-a73") // Kryo 2xx Gold
260         .Case("0x801", "cortex-a73") // Kryo 2xx Silver
261         .Case("0x802", "cortex-a75") // Kryo 3xx Gold
262         .Case("0x803", "cortex-a75") // Kryo 3xx Silver
263         .Case("0x804", "cortex-a76") // Kryo 4xx Gold
264         .Case("0x805", "cortex-a76") // Kryo 4xx/5xx Silver
265         .Case("0xc00", "falkor")
266         .Case("0xc01", "saphira")
267         .Default("generic");
268   if (Implementer == "0x53") { // Samsung Electronics Co., Ltd.
269     // The Exynos chips have a convoluted ID scheme that doesn't seem to follow
270     // any predictive pattern across variants and parts.
271     unsigned Variant = 0, Part = 0;
272 
273     // Look for the CPU variant line, whose value is a 1 digit hexadecimal
274     // number, corresponding to the Variant bits in the CP15/C0 register.
275     for (auto I : Lines)
276       if (I.consume_front("CPU variant"))
277         I.ltrim("\t :").getAsInteger(0, Variant);
278 
279     // Look for the CPU part line, whose value is a 3 digit hexadecimal
280     // number, corresponding to the PartNum bits in the CP15/C0 register.
281     for (auto I : Lines)
282       if (I.consume_front("CPU part"))
283         I.ltrim("\t :").getAsInteger(0, Part);
284 
285     unsigned Exynos = (Variant << 12) | Part;
286     switch (Exynos) {
287     default:
288       // Default by falling through to Exynos M3.
289       LLVM_FALLTHROUGH;
290     case 0x1002:
291       return "exynos-m3";
292     case 0x1003:
293       return "exynos-m4";
294     }
295   }
296 
297   return "generic";
298 }
299 
300 namespace {
getCPUNameFromS390Model(unsigned int Id,bool HaveVectorSupport)301 StringRef getCPUNameFromS390Model(unsigned int Id, bool HaveVectorSupport) {
302   switch (Id) {
303     case 2064:  // z900 not supported by LLVM
304     case 2066:
305     case 2084:  // z990 not supported by LLVM
306     case 2086:
307     case 2094:  // z9-109 not supported by LLVM
308     case 2096:
309       return "generic";
310     case 2097:
311     case 2098:
312       return "z10";
313     case 2817:
314     case 2818:
315       return "z196";
316     case 2827:
317     case 2828:
318       return "zEC12";
319     case 2964:
320     case 2965:
321       return HaveVectorSupport? "z13" : "zEC12";
322     case 3906:
323     case 3907:
324       return HaveVectorSupport? "z14" : "zEC12";
325     case 8561:
326     case 8562:
327       return HaveVectorSupport? "z15" : "zEC12";
328     case 3931:
329     case 3932:
330     default:
331       return HaveVectorSupport? "arch14" : "zEC12";
332   }
333 }
334 } // end anonymous namespace
335 
getHostCPUNameForS390x(StringRef ProcCpuinfoContent)336 StringRef sys::detail::getHostCPUNameForS390x(StringRef ProcCpuinfoContent) {
337   // STIDP is a privileged operation, so use /proc/cpuinfo instead.
338 
339   // The "processor 0:" line comes after a fair amount of other information,
340   // including a cache breakdown, but this should be plenty.
341   SmallVector<StringRef, 32> Lines;
342   ProcCpuinfoContent.split(Lines, "\n");
343 
344   // Look for the CPU features.
345   SmallVector<StringRef, 32> CPUFeatures;
346   for (unsigned I = 0, E = Lines.size(); I != E; ++I)
347     if (Lines[I].startswith("features")) {
348       size_t Pos = Lines[I].find(':');
349       if (Pos != StringRef::npos) {
350         Lines[I].drop_front(Pos + 1).split(CPUFeatures, ' ');
351         break;
352       }
353     }
354 
355   // We need to check for the presence of vector support independently of
356   // the machine type, since we may only use the vector register set when
357   // supported by the kernel (and hypervisor).
358   bool HaveVectorSupport = false;
359   for (unsigned I = 0, E = CPUFeatures.size(); I != E; ++I) {
360     if (CPUFeatures[I] == "vx")
361       HaveVectorSupport = true;
362   }
363 
364   // Now check the processor machine type.
365   for (unsigned I = 0, E = Lines.size(); I != E; ++I) {
366     if (Lines[I].startswith("processor ")) {
367       size_t Pos = Lines[I].find("machine = ");
368       if (Pos != StringRef::npos) {
369         Pos += sizeof("machine = ") - 1;
370         unsigned int Id;
371         if (!Lines[I].drop_front(Pos).getAsInteger(10, Id))
372           return getCPUNameFromS390Model(Id, HaveVectorSupport);
373       }
374       break;
375     }
376   }
377 
378   return "generic";
379 }
380 
getHostCPUNameForBPF()381 StringRef sys::detail::getHostCPUNameForBPF() {
382 #if !defined(__linux__) || !defined(__x86_64__)
383   return "generic";
384 #else
385   uint8_t v3_insns[40] __attribute__ ((aligned (8))) =
386       /* BPF_MOV64_IMM(BPF_REG_0, 0) */
387     { 0xb7, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0,
388       /* BPF_MOV64_IMM(BPF_REG_2, 1) */
389       0xb7, 0x2, 0x0, 0x0, 0x1, 0x0, 0x0, 0x0,
390       /* BPF_JMP32_REG(BPF_JLT, BPF_REG_0, BPF_REG_2, 1) */
391       0xae, 0x20, 0x1, 0x0, 0x0, 0x0, 0x0, 0x0,
392       /* BPF_MOV64_IMM(BPF_REG_0, 1) */
393       0xb7, 0x0, 0x0, 0x0, 0x1, 0x0, 0x0, 0x0,
394       /* BPF_EXIT_INSN() */
395       0x95, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 };
396 
397   uint8_t v2_insns[40] __attribute__ ((aligned (8))) =
398       /* BPF_MOV64_IMM(BPF_REG_0, 0) */
399     { 0xb7, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0,
400       /* BPF_MOV64_IMM(BPF_REG_2, 1) */
401       0xb7, 0x2, 0x0, 0x0, 0x1, 0x0, 0x0, 0x0,
402       /* BPF_JMP_REG(BPF_JLT, BPF_REG_0, BPF_REG_2, 1) */
403       0xad, 0x20, 0x1, 0x0, 0x0, 0x0, 0x0, 0x0,
404       /* BPF_MOV64_IMM(BPF_REG_0, 1) */
405       0xb7, 0x0, 0x0, 0x0, 0x1, 0x0, 0x0, 0x0,
406       /* BPF_EXIT_INSN() */
407       0x95, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 };
408 
409   struct bpf_prog_load_attr {
410     uint32_t prog_type;
411     uint32_t insn_cnt;
412     uint64_t insns;
413     uint64_t license;
414     uint32_t log_level;
415     uint32_t log_size;
416     uint64_t log_buf;
417     uint32_t kern_version;
418     uint32_t prog_flags;
419   } attr = {};
420   attr.prog_type = 1; /* BPF_PROG_TYPE_SOCKET_FILTER */
421   attr.insn_cnt = 5;
422   attr.insns = (uint64_t)v3_insns;
423   attr.license = (uint64_t)"DUMMY";
424 
425   int fd = syscall(321 /* __NR_bpf */, 5 /* BPF_PROG_LOAD */, &attr,
426                    sizeof(attr));
427   if (fd >= 0) {
428     close(fd);
429     return "v3";
430   }
431 
432   /* Clear the whole attr in case its content changed by syscall. */
433   memset(&attr, 0, sizeof(attr));
434   attr.prog_type = 1; /* BPF_PROG_TYPE_SOCKET_FILTER */
435   attr.insn_cnt = 5;
436   attr.insns = (uint64_t)v2_insns;
437   attr.license = (uint64_t)"DUMMY";
438   fd = syscall(321 /* __NR_bpf */, 5 /* BPF_PROG_LOAD */, &attr, sizeof(attr));
439   if (fd >= 0) {
440     close(fd);
441     return "v2";
442   }
443   return "v1";
444 #endif
445 }
446 
447 #if defined(__i386__) || defined(_M_IX86) || \
448     defined(__x86_64__) || defined(_M_X64)
449 
450 // The check below for i386 was copied from clang's cpuid.h (__get_cpuid_max).
451 // Check motivated by bug reports for OpenSSL crashing on CPUs without CPUID
452 // support. Consequently, for i386, the presence of CPUID is checked first
453 // via the corresponding eflags bit.
454 // Removal of cpuid.h header motivated by PR30384
455 // Header cpuid.h and method __get_cpuid_max are not used in llvm, clang, openmp
456 // or test-suite, but are used in external projects e.g. libstdcxx
isCpuIdSupported()457 static bool isCpuIdSupported() {
458 #if defined(__GNUC__) || defined(__clang__)
459 #if defined(__i386__)
460   int __cpuid_supported;
461   __asm__("  pushfl\n"
462           "  popl   %%eax\n"
463           "  movl   %%eax,%%ecx\n"
464           "  xorl   $0x00200000,%%eax\n"
465           "  pushl  %%eax\n"
466           "  popfl\n"
467           "  pushfl\n"
468           "  popl   %%eax\n"
469           "  movl   $0,%0\n"
470           "  cmpl   %%eax,%%ecx\n"
471           "  je     1f\n"
472           "  movl   $1,%0\n"
473           "1:"
474           : "=r"(__cpuid_supported)
475           :
476           : "eax", "ecx");
477   if (!__cpuid_supported)
478     return false;
479 #endif
480   return true;
481 #endif
482   return true;
483 }
484 
485 /// getX86CpuIDAndInfo - Execute the specified cpuid and return the 4 values in
486 /// the specified arguments.  If we can't run cpuid on the host, return true.
getX86CpuIDAndInfo(unsigned value,unsigned * rEAX,unsigned * rEBX,unsigned * rECX,unsigned * rEDX)487 static bool getX86CpuIDAndInfo(unsigned value, unsigned *rEAX, unsigned *rEBX,
488                                unsigned *rECX, unsigned *rEDX) {
489 #if defined(__GNUC__) || defined(__clang__)
490 #if defined(__x86_64__)
491   // gcc doesn't know cpuid would clobber ebx/rbx. Preserve it manually.
492   // FIXME: should we save this for Clang?
493   __asm__("movq\t%%rbx, %%rsi\n\t"
494           "cpuid\n\t"
495           "xchgq\t%%rbx, %%rsi\n\t"
496           : "=a"(*rEAX), "=S"(*rEBX), "=c"(*rECX), "=d"(*rEDX)
497           : "a"(value));
498   return false;
499 #elif defined(__i386__)
500   __asm__("movl\t%%ebx, %%esi\n\t"
501           "cpuid\n\t"
502           "xchgl\t%%ebx, %%esi\n\t"
503           : "=a"(*rEAX), "=S"(*rEBX), "=c"(*rECX), "=d"(*rEDX)
504           : "a"(value));
505   return false;
506 #else
507   return true;
508 #endif
509 #elif defined(_MSC_VER)
510   // The MSVC intrinsic is portable across x86 and x64.
511   int registers[4];
512   __cpuid(registers, value);
513   *rEAX = registers[0];
514   *rEBX = registers[1];
515   *rECX = registers[2];
516   *rEDX = registers[3];
517   return false;
518 #else
519   return true;
520 #endif
521 }
522 
523 namespace llvm {
524 namespace sys {
525 namespace detail {
526 namespace x86 {
527 
getVendorSignature(unsigned * MaxLeaf)528 VendorSignatures getVendorSignature(unsigned *MaxLeaf) {
529   unsigned EAX = 0, EBX = 0, ECX = 0, EDX = 0;
530   if (MaxLeaf == nullptr)
531     MaxLeaf = &EAX;
532   else
533     *MaxLeaf = 0;
534 
535   if (!isCpuIdSupported())
536     return VendorSignatures::UNKNOWN;
537 
538   if (getX86CpuIDAndInfo(0, MaxLeaf, &EBX, &ECX, &EDX) || *MaxLeaf < 1)
539     return VendorSignatures::UNKNOWN;
540 
541   // "Genu ineI ntel"
542   if (EBX == 0x756e6547 && EDX == 0x49656e69 && ECX == 0x6c65746e)
543     return VendorSignatures::GENUINE_INTEL;
544 
545   // "Auth enti cAMD"
546   if (EBX == 0x68747541 && EDX == 0x69746e65 && ECX == 0x444d4163)
547     return VendorSignatures::AUTHENTIC_AMD;
548 
549   return VendorSignatures::UNKNOWN;
550 }
551 
552 } // namespace x86
553 } // namespace detail
554 } // namespace sys
555 } // namespace llvm
556 
557 using namespace llvm::sys::detail::x86;
558 
559 /// getX86CpuIDAndInfoEx - Execute the specified cpuid with subleaf and return
560 /// the 4 values in the specified arguments.  If we can't run cpuid on the host,
561 /// return true.
getX86CpuIDAndInfoEx(unsigned value,unsigned subleaf,unsigned * rEAX,unsigned * rEBX,unsigned * rECX,unsigned * rEDX)562 static bool getX86CpuIDAndInfoEx(unsigned value, unsigned subleaf,
563                                  unsigned *rEAX, unsigned *rEBX, unsigned *rECX,
564                                  unsigned *rEDX) {
565 #if defined(__GNUC__) || defined(__clang__)
566 #if defined(__x86_64__)
567   // gcc doesn't know cpuid would clobber ebx/rbx. Preserve it manually.
568   // FIXME: should we save this for Clang?
569   __asm__("movq\t%%rbx, %%rsi\n\t"
570           "cpuid\n\t"
571           "xchgq\t%%rbx, %%rsi\n\t"
572           : "=a"(*rEAX), "=S"(*rEBX), "=c"(*rECX), "=d"(*rEDX)
573           : "a"(value), "c"(subleaf));
574   return false;
575 #elif defined(__i386__)
576   __asm__("movl\t%%ebx, %%esi\n\t"
577           "cpuid\n\t"
578           "xchgl\t%%ebx, %%esi\n\t"
579           : "=a"(*rEAX), "=S"(*rEBX), "=c"(*rECX), "=d"(*rEDX)
580           : "a"(value), "c"(subleaf));
581   return false;
582 #else
583   return true;
584 #endif
585 #elif defined(_MSC_VER)
586   int registers[4];
587   __cpuidex(registers, value, subleaf);
588   *rEAX = registers[0];
589   *rEBX = registers[1];
590   *rECX = registers[2];
591   *rEDX = registers[3];
592   return false;
593 #else
594   return true;
595 #endif
596 }
597 
598 // Read control register 0 (XCR0). Used to detect features such as AVX.
getX86XCR0(unsigned * rEAX,unsigned * rEDX)599 static bool getX86XCR0(unsigned *rEAX, unsigned *rEDX) {
600 #if defined(__GNUC__) || defined(__clang__)
601   // Check xgetbv; this uses a .byte sequence instead of the instruction
602   // directly because older assemblers do not include support for xgetbv and
603   // there is no easy way to conditionally compile based on the assembler used.
604   __asm__(".byte 0x0f, 0x01, 0xd0" : "=a"(*rEAX), "=d"(*rEDX) : "c"(0));
605   return false;
606 #elif defined(_MSC_FULL_VER) && defined(_XCR_XFEATURE_ENABLED_MASK)
607   unsigned long long Result = _xgetbv(_XCR_XFEATURE_ENABLED_MASK);
608   *rEAX = Result;
609   *rEDX = Result >> 32;
610   return false;
611 #else
612   return true;
613 #endif
614 }
615 
detectX86FamilyModel(unsigned EAX,unsigned * Family,unsigned * Model)616 static void detectX86FamilyModel(unsigned EAX, unsigned *Family,
617                                  unsigned *Model) {
618   *Family = (EAX >> 8) & 0xf; // Bits 8 - 11
619   *Model = (EAX >> 4) & 0xf;  // Bits 4 - 7
620   if (*Family == 6 || *Family == 0xf) {
621     if (*Family == 0xf)
622       // Examine extended family ID if family ID is F.
623       *Family += (EAX >> 20) & 0xff; // Bits 20 - 27
624     // Examine extended model ID if family ID is 6 or F.
625     *Model += ((EAX >> 16) & 0xf) << 4; // Bits 16 - 19
626   }
627 }
628 
629 static StringRef
getIntelProcessorTypeAndSubtype(unsigned Family,unsigned Model,const unsigned * Features,unsigned * Type,unsigned * Subtype)630 getIntelProcessorTypeAndSubtype(unsigned Family, unsigned Model,
631                                 const unsigned *Features,
632                                 unsigned *Type, unsigned *Subtype) {
633   auto testFeature = [&](unsigned F) {
634     return (Features[F / 32] & (1U << (F % 32))) != 0;
635   };
636 
637   StringRef CPU;
638 
639   switch (Family) {
640   case 3:
641     CPU = "i386";
642     break;
643   case 4:
644     CPU = "i486";
645     break;
646   case 5:
647     if (testFeature(X86::FEATURE_MMX)) {
648       CPU = "pentium-mmx";
649       break;
650     }
651     CPU = "pentium";
652     break;
653   case 6:
654     switch (Model) {
655     case 0x0f: // Intel Core 2 Duo processor, Intel Core 2 Duo mobile
656                // processor, Intel Core 2 Quad processor, Intel Core 2 Quad
657                // mobile processor, Intel Core 2 Extreme processor, Intel
658                // Pentium Dual-Core processor, Intel Xeon processor, model
659                // 0Fh. All processors are manufactured using the 65 nm process.
660     case 0x16: // Intel Celeron processor model 16h. All processors are
661                // manufactured using the 65 nm process
662       CPU = "core2";
663       *Type = X86::INTEL_CORE2;
664       break;
665     case 0x17: // Intel Core 2 Extreme processor, Intel Xeon processor, model
666                // 17h. All processors are manufactured using the 45 nm process.
667                //
668                // 45nm: Penryn , Wolfdale, Yorkfield (XE)
669     case 0x1d: // Intel Xeon processor MP. All processors are manufactured using
670                // the 45 nm process.
671       CPU = "penryn";
672       *Type = X86::INTEL_CORE2;
673       break;
674     case 0x1a: // Intel Core i7 processor and Intel Xeon processor. All
675                // processors are manufactured using the 45 nm process.
676     case 0x1e: // Intel(R) Core(TM) i7 CPU         870  @ 2.93GHz.
677                // As found in a Summer 2010 model iMac.
678     case 0x1f:
679     case 0x2e:              // Nehalem EX
680       CPU = "nehalem";
681       *Type = X86::INTEL_COREI7;
682       *Subtype = X86::INTEL_COREI7_NEHALEM;
683       break;
684     case 0x25: // Intel Core i7, laptop version.
685     case 0x2c: // Intel Core i7 processor and Intel Xeon processor. All
686                // processors are manufactured using the 32 nm process.
687     case 0x2f: // Westmere EX
688       CPU = "westmere";
689       *Type = X86::INTEL_COREI7;
690       *Subtype = X86::INTEL_COREI7_WESTMERE;
691       break;
692     case 0x2a: // Intel Core i7 processor. All processors are manufactured
693                // using the 32 nm process.
694     case 0x2d:
695       CPU = "sandybridge";
696       *Type = X86::INTEL_COREI7;
697       *Subtype = X86::INTEL_COREI7_SANDYBRIDGE;
698       break;
699     case 0x3a:
700     case 0x3e:              // Ivy Bridge EP
701       CPU = "ivybridge";
702       *Type = X86::INTEL_COREI7;
703       *Subtype = X86::INTEL_COREI7_IVYBRIDGE;
704       break;
705 
706     // Haswell:
707     case 0x3c:
708     case 0x3f:
709     case 0x45:
710     case 0x46:
711       CPU = "haswell";
712       *Type = X86::INTEL_COREI7;
713       *Subtype = X86::INTEL_COREI7_HASWELL;
714       break;
715 
716     // Broadwell:
717     case 0x3d:
718     case 0x47:
719     case 0x4f:
720     case 0x56:
721       CPU = "broadwell";
722       *Type = X86::INTEL_COREI7;
723       *Subtype = X86::INTEL_COREI7_BROADWELL;
724       break;
725 
726     // Skylake:
727     case 0x4e:              // Skylake mobile
728     case 0x5e:              // Skylake desktop
729     case 0x8e:              // Kaby Lake mobile
730     case 0x9e:              // Kaby Lake desktop
731     case 0xa5:              // Comet Lake-H/S
732     case 0xa6:              // Comet Lake-U
733       CPU = "skylake";
734       *Type = X86::INTEL_COREI7;
735       *Subtype = X86::INTEL_COREI7_SKYLAKE;
736       break;
737 
738     // Rocketlake:
739     case 0xa7:
740       CPU = "rocketlake";
741       *Type = X86::INTEL_COREI7;
742       *Subtype = X86::INTEL_COREI7_ROCKETLAKE;
743       break;
744 
745     // Skylake Xeon:
746     case 0x55:
747       *Type = X86::INTEL_COREI7;
748       if (testFeature(X86::FEATURE_AVX512BF16)) {
749         CPU = "cooperlake";
750         *Subtype = X86::INTEL_COREI7_COOPERLAKE;
751       } else if (testFeature(X86::FEATURE_AVX512VNNI)) {
752         CPU = "cascadelake";
753         *Subtype = X86::INTEL_COREI7_CASCADELAKE;
754       } else {
755         CPU = "skylake-avx512";
756         *Subtype = X86::INTEL_COREI7_SKYLAKE_AVX512;
757       }
758       break;
759 
760     // Cannonlake:
761     case 0x66:
762       CPU = "cannonlake";
763       *Type = X86::INTEL_COREI7;
764       *Subtype = X86::INTEL_COREI7_CANNONLAKE;
765       break;
766 
767     // Icelake:
768     case 0x7d:
769     case 0x7e:
770       CPU = "icelake-client";
771       *Type = X86::INTEL_COREI7;
772       *Subtype = X86::INTEL_COREI7_ICELAKE_CLIENT;
773       break;
774 
775     // Tigerlake:
776     case 0x8c:
777     case 0x8d:
778       CPU = "tigerlake";
779       *Type = X86::INTEL_COREI7;
780       *Subtype = X86::INTEL_COREI7_TIGERLAKE;
781       break;
782 
783     // Alderlake:
784     case 0x97:
785     case 0x9a:
786       CPU = "alderlake";
787       *Type = X86::INTEL_COREI7;
788       *Subtype = X86::INTEL_COREI7_ALDERLAKE;
789       break;
790 
791     // Icelake Xeon:
792     case 0x6a:
793     case 0x6c:
794       CPU = "icelake-server";
795       *Type = X86::INTEL_COREI7;
796       *Subtype = X86::INTEL_COREI7_ICELAKE_SERVER;
797       break;
798 
799     // Sapphire Rapids:
800     case 0x8f:
801       CPU = "sapphirerapids";
802       *Type = X86::INTEL_COREI7;
803       *Subtype = X86::INTEL_COREI7_SAPPHIRERAPIDS;
804       break;
805 
806     case 0x1c: // Most 45 nm Intel Atom processors
807     case 0x26: // 45 nm Atom Lincroft
808     case 0x27: // 32 nm Atom Medfield
809     case 0x35: // 32 nm Atom Midview
810     case 0x36: // 32 nm Atom Midview
811       CPU = "bonnell";
812       *Type = X86::INTEL_BONNELL;
813       break;
814 
815     // Atom Silvermont codes from the Intel software optimization guide.
816     case 0x37:
817     case 0x4a:
818     case 0x4d:
819     case 0x5a:
820     case 0x5d:
821     case 0x4c: // really airmont
822       CPU = "silvermont";
823       *Type = X86::INTEL_SILVERMONT;
824       break;
825     // Goldmont:
826     case 0x5c: // Apollo Lake
827     case 0x5f: // Denverton
828       CPU = "goldmont";
829       *Type = X86::INTEL_GOLDMONT;
830       break;
831     case 0x7a:
832       CPU = "goldmont-plus";
833       *Type = X86::INTEL_GOLDMONT_PLUS;
834       break;
835     case 0x86:
836       CPU = "tremont";
837       *Type = X86::INTEL_TREMONT;
838       break;
839 
840     // Xeon Phi (Knights Landing + Knights Mill):
841     case 0x57:
842       CPU = "knl";
843       *Type = X86::INTEL_KNL;
844       break;
845     case 0x85:
846       CPU = "knm";
847       *Type = X86::INTEL_KNM;
848       break;
849 
850     default: // Unknown family 6 CPU, try to guess.
851       // Don't both with Type/Subtype here, they aren't used by the caller.
852       // They're used above to keep the code in sync with compiler-rt.
853       // TODO detect tigerlake host from model
854       if (testFeature(X86::FEATURE_AVX512VP2INTERSECT)) {
855         CPU = "tigerlake";
856       } else if (testFeature(X86::FEATURE_AVX512VBMI2)) {
857         CPU = "icelake-client";
858       } else if (testFeature(X86::FEATURE_AVX512VBMI)) {
859         CPU = "cannonlake";
860       } else if (testFeature(X86::FEATURE_AVX512BF16)) {
861         CPU = "cooperlake";
862       } else if (testFeature(X86::FEATURE_AVX512VNNI)) {
863         CPU = "cascadelake";
864       } else if (testFeature(X86::FEATURE_AVX512VL)) {
865         CPU = "skylake-avx512";
866       } else if (testFeature(X86::FEATURE_AVX512ER)) {
867         CPU = "knl";
868       } else if (testFeature(X86::FEATURE_CLFLUSHOPT)) {
869         if (testFeature(X86::FEATURE_SHA))
870           CPU = "goldmont";
871         else
872           CPU = "skylake";
873       } else if (testFeature(X86::FEATURE_ADX)) {
874         CPU = "broadwell";
875       } else if (testFeature(X86::FEATURE_AVX2)) {
876         CPU = "haswell";
877       } else if (testFeature(X86::FEATURE_AVX)) {
878         CPU = "sandybridge";
879       } else if (testFeature(X86::FEATURE_SSE4_2)) {
880         if (testFeature(X86::FEATURE_MOVBE))
881           CPU = "silvermont";
882         else
883           CPU = "nehalem";
884       } else if (testFeature(X86::FEATURE_SSE4_1)) {
885         CPU = "penryn";
886       } else if (testFeature(X86::FEATURE_SSSE3)) {
887         if (testFeature(X86::FEATURE_MOVBE))
888           CPU = "bonnell";
889         else
890           CPU = "core2";
891       } else if (testFeature(X86::FEATURE_64BIT)) {
892         CPU = "core2";
893       } else if (testFeature(X86::FEATURE_SSE3)) {
894         CPU = "yonah";
895       } else if (testFeature(X86::FEATURE_SSE2)) {
896         CPU = "pentium-m";
897       } else if (testFeature(X86::FEATURE_SSE)) {
898         CPU = "pentium3";
899       } else if (testFeature(X86::FEATURE_MMX)) {
900         CPU = "pentium2";
901       } else {
902         CPU = "pentiumpro";
903       }
904       break;
905     }
906     break;
907   case 15: {
908     if (testFeature(X86::FEATURE_64BIT)) {
909       CPU = "nocona";
910       break;
911     }
912     if (testFeature(X86::FEATURE_SSE3)) {
913       CPU = "prescott";
914       break;
915     }
916     CPU = "pentium4";
917     break;
918   }
919   default:
920     break; // Unknown.
921   }
922 
923   return CPU;
924 }
925 
926 static StringRef
getAMDProcessorTypeAndSubtype(unsigned Family,unsigned Model,const unsigned * Features,unsigned * Type,unsigned * Subtype)927 getAMDProcessorTypeAndSubtype(unsigned Family, unsigned Model,
928                               const unsigned *Features,
929                               unsigned *Type, unsigned *Subtype) {
930   auto testFeature = [&](unsigned F) {
931     return (Features[F / 32] & (1U << (F % 32))) != 0;
932   };
933 
934   StringRef CPU;
935 
936   switch (Family) {
937   case 4:
938     CPU = "i486";
939     break;
940   case 5:
941     CPU = "pentium";
942     switch (Model) {
943     case 6:
944     case 7:
945       CPU = "k6";
946       break;
947     case 8:
948       CPU = "k6-2";
949       break;
950     case 9:
951     case 13:
952       CPU = "k6-3";
953       break;
954     case 10:
955       CPU = "geode";
956       break;
957     }
958     break;
959   case 6:
960     if (testFeature(X86::FEATURE_SSE)) {
961       CPU = "athlon-xp";
962       break;
963     }
964     CPU = "athlon";
965     break;
966   case 15:
967     if (testFeature(X86::FEATURE_SSE3)) {
968       CPU = "k8-sse3";
969       break;
970     }
971     CPU = "k8";
972     break;
973   case 16:
974     CPU = "amdfam10";
975     *Type = X86::AMDFAM10H; // "amdfam10"
976     switch (Model) {
977     case 2:
978       *Subtype = X86::AMDFAM10H_BARCELONA;
979       break;
980     case 4:
981       *Subtype = X86::AMDFAM10H_SHANGHAI;
982       break;
983     case 8:
984       *Subtype = X86::AMDFAM10H_ISTANBUL;
985       break;
986     }
987     break;
988   case 20:
989     CPU = "btver1";
990     *Type = X86::AMD_BTVER1;
991     break;
992   case 21:
993     CPU = "bdver1";
994     *Type = X86::AMDFAM15H;
995     if (Model >= 0x60 && Model <= 0x7f) {
996       CPU = "bdver4";
997       *Subtype = X86::AMDFAM15H_BDVER4;
998       break; // 60h-7Fh: Excavator
999     }
1000     if (Model >= 0x30 && Model <= 0x3f) {
1001       CPU = "bdver3";
1002       *Subtype = X86::AMDFAM15H_BDVER3;
1003       break; // 30h-3Fh: Steamroller
1004     }
1005     if ((Model >= 0x10 && Model <= 0x1f) || Model == 0x02) {
1006       CPU = "bdver2";
1007       *Subtype = X86::AMDFAM15H_BDVER2;
1008       break; // 02h, 10h-1Fh: Piledriver
1009     }
1010     if (Model <= 0x0f) {
1011       *Subtype = X86::AMDFAM15H_BDVER1;
1012       break; // 00h-0Fh: Bulldozer
1013     }
1014     break;
1015   case 22:
1016     CPU = "btver2";
1017     *Type = X86::AMD_BTVER2;
1018     break;
1019   case 23:
1020     CPU = "znver1";
1021     *Type = X86::AMDFAM17H;
1022     if ((Model >= 0x30 && Model <= 0x3f) || Model == 0x71) {
1023       CPU = "znver2";
1024       *Subtype = X86::AMDFAM17H_ZNVER2;
1025       break; // 30h-3fh, 71h: Zen2
1026     }
1027     if (Model <= 0x0f) {
1028       *Subtype = X86::AMDFAM17H_ZNVER1;
1029       break; // 00h-0Fh: Zen1
1030     }
1031     break;
1032   case 25:
1033     CPU = "znver3";
1034     *Type = X86::AMDFAM19H;
1035     if (Model <= 0x0f) {
1036       *Subtype = X86::AMDFAM19H_ZNVER3;
1037       break; // 00h-0Fh: Zen3
1038     }
1039     break;
1040   default:
1041     break; // Unknown AMD CPU.
1042   }
1043 
1044   return CPU;
1045 }
1046 
getAvailableFeatures(unsigned ECX,unsigned EDX,unsigned MaxLeaf,unsigned * Features)1047 static void getAvailableFeatures(unsigned ECX, unsigned EDX, unsigned MaxLeaf,
1048                                  unsigned *Features) {
1049   unsigned EAX, EBX;
1050 
1051   auto setFeature = [&](unsigned F) {
1052     Features[F / 32] |= 1U << (F % 32);
1053   };
1054 
1055   if ((EDX >> 15) & 1)
1056     setFeature(X86::FEATURE_CMOV);
1057   if ((EDX >> 23) & 1)
1058     setFeature(X86::FEATURE_MMX);
1059   if ((EDX >> 25) & 1)
1060     setFeature(X86::FEATURE_SSE);
1061   if ((EDX >> 26) & 1)
1062     setFeature(X86::FEATURE_SSE2);
1063 
1064   if ((ECX >> 0) & 1)
1065     setFeature(X86::FEATURE_SSE3);
1066   if ((ECX >> 1) & 1)
1067     setFeature(X86::FEATURE_PCLMUL);
1068   if ((ECX >> 9) & 1)
1069     setFeature(X86::FEATURE_SSSE3);
1070   if ((ECX >> 12) & 1)
1071     setFeature(X86::FEATURE_FMA);
1072   if ((ECX >> 19) & 1)
1073     setFeature(X86::FEATURE_SSE4_1);
1074   if ((ECX >> 20) & 1) {
1075     setFeature(X86::FEATURE_SSE4_2);
1076     setFeature(X86::FEATURE_CRC32);
1077   }
1078   if ((ECX >> 23) & 1)
1079     setFeature(X86::FEATURE_POPCNT);
1080   if ((ECX >> 25) & 1)
1081     setFeature(X86::FEATURE_AES);
1082 
1083   if ((ECX >> 22) & 1)
1084     setFeature(X86::FEATURE_MOVBE);
1085 
1086   // If CPUID indicates support for XSAVE, XRESTORE and AVX, and XGETBV
1087   // indicates that the AVX registers will be saved and restored on context
1088   // switch, then we have full AVX support.
1089   const unsigned AVXBits = (1 << 27) | (1 << 28);
1090   bool HasAVX = ((ECX & AVXBits) == AVXBits) && !getX86XCR0(&EAX, &EDX) &&
1091                 ((EAX & 0x6) == 0x6);
1092 #if defined(__APPLE__)
1093   // Darwin lazily saves the AVX512 context on first use: trust that the OS will
1094   // save the AVX512 context if we use AVX512 instructions, even the bit is not
1095   // set right now.
1096   bool HasAVX512Save = true;
1097 #else
1098   // AVX512 requires additional context to be saved by the OS.
1099   bool HasAVX512Save = HasAVX && ((EAX & 0xe0) == 0xe0);
1100 #endif
1101 
1102   if (HasAVX)
1103     setFeature(X86::FEATURE_AVX);
1104 
1105   bool HasLeaf7 =
1106       MaxLeaf >= 0x7 && !getX86CpuIDAndInfoEx(0x7, 0x0, &EAX, &EBX, &ECX, &EDX);
1107 
1108   if (HasLeaf7 && ((EBX >> 3) & 1))
1109     setFeature(X86::FEATURE_BMI);
1110   if (HasLeaf7 && ((EBX >> 5) & 1) && HasAVX)
1111     setFeature(X86::FEATURE_AVX2);
1112   if (HasLeaf7 && ((EBX >> 8) & 1))
1113     setFeature(X86::FEATURE_BMI2);
1114   if (HasLeaf7 && ((EBX >> 16) & 1) && HasAVX512Save)
1115     setFeature(X86::FEATURE_AVX512F);
1116   if (HasLeaf7 && ((EBX >> 17) & 1) && HasAVX512Save)
1117     setFeature(X86::FEATURE_AVX512DQ);
1118   if (HasLeaf7 && ((EBX >> 19) & 1))
1119     setFeature(X86::FEATURE_ADX);
1120   if (HasLeaf7 && ((EBX >> 21) & 1) && HasAVX512Save)
1121     setFeature(X86::FEATURE_AVX512IFMA);
1122   if (HasLeaf7 && ((EBX >> 23) & 1))
1123     setFeature(X86::FEATURE_CLFLUSHOPT);
1124   if (HasLeaf7 && ((EBX >> 26) & 1) && HasAVX512Save)
1125     setFeature(X86::FEATURE_AVX512PF);
1126   if (HasLeaf7 && ((EBX >> 27) & 1) && HasAVX512Save)
1127     setFeature(X86::FEATURE_AVX512ER);
1128   if (HasLeaf7 && ((EBX >> 28) & 1) && HasAVX512Save)
1129     setFeature(X86::FEATURE_AVX512CD);
1130   if (HasLeaf7 && ((EBX >> 29) & 1))
1131     setFeature(X86::FEATURE_SHA);
1132   if (HasLeaf7 && ((EBX >> 30) & 1) && HasAVX512Save)
1133     setFeature(X86::FEATURE_AVX512BW);
1134   if (HasLeaf7 && ((EBX >> 31) & 1) && HasAVX512Save)
1135     setFeature(X86::FEATURE_AVX512VL);
1136 
1137   if (HasLeaf7 && ((ECX >> 1) & 1) && HasAVX512Save)
1138     setFeature(X86::FEATURE_AVX512VBMI);
1139   if (HasLeaf7 && ((ECX >> 6) & 1) && HasAVX512Save)
1140     setFeature(X86::FEATURE_AVX512VBMI2);
1141   if (HasLeaf7 && ((ECX >> 8) & 1))
1142     setFeature(X86::FEATURE_GFNI);
1143   if (HasLeaf7 && ((ECX >> 10) & 1) && HasAVX)
1144     setFeature(X86::FEATURE_VPCLMULQDQ);
1145   if (HasLeaf7 && ((ECX >> 11) & 1) && HasAVX512Save)
1146     setFeature(X86::FEATURE_AVX512VNNI);
1147   if (HasLeaf7 && ((ECX >> 12) & 1) && HasAVX512Save)
1148     setFeature(X86::FEATURE_AVX512BITALG);
1149   if (HasLeaf7 && ((ECX >> 14) & 1) && HasAVX512Save)
1150     setFeature(X86::FEATURE_AVX512VPOPCNTDQ);
1151 
1152   if (HasLeaf7 && ((EDX >> 2) & 1) && HasAVX512Save)
1153     setFeature(X86::FEATURE_AVX5124VNNIW);
1154   if (HasLeaf7 && ((EDX >> 3) & 1) && HasAVX512Save)
1155     setFeature(X86::FEATURE_AVX5124FMAPS);
1156   if (HasLeaf7 && ((EDX >> 8) & 1) && HasAVX512Save)
1157     setFeature(X86::FEATURE_AVX512VP2INTERSECT);
1158 
1159   bool HasLeaf7Subleaf1 =
1160       MaxLeaf >= 7 && !getX86CpuIDAndInfoEx(0x7, 0x1, &EAX, &EBX, &ECX, &EDX);
1161   if (HasLeaf7Subleaf1 && ((EAX >> 5) & 1) && HasAVX512Save)
1162     setFeature(X86::FEATURE_AVX512BF16);
1163 
1164   unsigned MaxExtLevel;
1165   getX86CpuIDAndInfo(0x80000000, &MaxExtLevel, &EBX, &ECX, &EDX);
1166 
1167   bool HasExtLeaf1 = MaxExtLevel >= 0x80000001 &&
1168                      !getX86CpuIDAndInfo(0x80000001, &EAX, &EBX, &ECX, &EDX);
1169   if (HasExtLeaf1 && ((ECX >> 6) & 1))
1170     setFeature(X86::FEATURE_SSE4_A);
1171   if (HasExtLeaf1 && ((ECX >> 11) & 1))
1172     setFeature(X86::FEATURE_XOP);
1173   if (HasExtLeaf1 && ((ECX >> 16) & 1))
1174     setFeature(X86::FEATURE_FMA4);
1175 
1176   if (HasExtLeaf1 && ((EDX >> 29) & 1))
1177     setFeature(X86::FEATURE_64BIT);
1178 }
1179 
getHostCPUName()1180 StringRef sys::getHostCPUName() {
1181   unsigned MaxLeaf = 0;
1182   const VendorSignatures Vendor = getVendorSignature(&MaxLeaf);
1183   if (Vendor == VendorSignatures::UNKNOWN)
1184     return "generic";
1185 
1186   unsigned EAX = 0, EBX = 0, ECX = 0, EDX = 0;
1187   getX86CpuIDAndInfo(0x1, &EAX, &EBX, &ECX, &EDX);
1188 
1189   unsigned Family = 0, Model = 0;
1190   unsigned Features[(X86::CPU_FEATURE_MAX + 31) / 32] = {0};
1191   detectX86FamilyModel(EAX, &Family, &Model);
1192   getAvailableFeatures(ECX, EDX, MaxLeaf, Features);
1193 
1194   // These aren't consumed in this file, but we try to keep some source code the
1195   // same or similar to compiler-rt.
1196   unsigned Type = 0;
1197   unsigned Subtype = 0;
1198 
1199   StringRef CPU;
1200 
1201   if (Vendor == VendorSignatures::GENUINE_INTEL) {
1202     CPU = getIntelProcessorTypeAndSubtype(Family, Model, Features, &Type,
1203                                           &Subtype);
1204   } else if (Vendor == VendorSignatures::AUTHENTIC_AMD) {
1205     CPU = getAMDProcessorTypeAndSubtype(Family, Model, Features, &Type,
1206                                         &Subtype);
1207   }
1208 
1209   if (!CPU.empty())
1210     return CPU;
1211 
1212   return "generic";
1213 }
1214 
1215 #elif defined(__APPLE__) && (defined(__ppc__) || defined(__powerpc__))
getHostCPUName()1216 StringRef sys::getHostCPUName() {
1217   host_basic_info_data_t hostInfo;
1218   mach_msg_type_number_t infoCount;
1219 
1220   infoCount = HOST_BASIC_INFO_COUNT;
1221   mach_port_t hostPort = mach_host_self();
1222   host_info(hostPort, HOST_BASIC_INFO, (host_info_t)&hostInfo,
1223             &infoCount);
1224   mach_port_deallocate(mach_task_self(), hostPort);
1225 
1226   if (hostInfo.cpu_type != CPU_TYPE_POWERPC)
1227     return "generic";
1228 
1229   switch (hostInfo.cpu_subtype) {
1230   case CPU_SUBTYPE_POWERPC_601:
1231     return "601";
1232   case CPU_SUBTYPE_POWERPC_602:
1233     return "602";
1234   case CPU_SUBTYPE_POWERPC_603:
1235     return "603";
1236   case CPU_SUBTYPE_POWERPC_603e:
1237     return "603e";
1238   case CPU_SUBTYPE_POWERPC_603ev:
1239     return "603ev";
1240   case CPU_SUBTYPE_POWERPC_604:
1241     return "604";
1242   case CPU_SUBTYPE_POWERPC_604e:
1243     return "604e";
1244   case CPU_SUBTYPE_POWERPC_620:
1245     return "620";
1246   case CPU_SUBTYPE_POWERPC_750:
1247     return "750";
1248   case CPU_SUBTYPE_POWERPC_7400:
1249     return "7400";
1250   case CPU_SUBTYPE_POWERPC_7450:
1251     return "7450";
1252   case CPU_SUBTYPE_POWERPC_970:
1253     return "970";
1254   default:;
1255   }
1256 
1257   return "generic";
1258 }
1259 #elif defined(__linux__) && (defined(__ppc__) || defined(__powerpc__))
getHostCPUName()1260 StringRef sys::getHostCPUName() {
1261   std::unique_ptr<llvm::MemoryBuffer> P = getProcCpuinfoContent();
1262   StringRef Content = P ? P->getBuffer() : "";
1263   return detail::getHostCPUNameForPowerPC(Content);
1264 }
1265 #elif defined(__linux__) && (defined(__arm__) || defined(__aarch64__))
getHostCPUName()1266 StringRef sys::getHostCPUName() {
1267   std::unique_ptr<llvm::MemoryBuffer> P = getProcCpuinfoContent();
1268   StringRef Content = P ? P->getBuffer() : "";
1269   return detail::getHostCPUNameForARM(Content);
1270 }
1271 #elif defined(__linux__) && defined(__s390x__)
getHostCPUName()1272 StringRef sys::getHostCPUName() {
1273   std::unique_ptr<llvm::MemoryBuffer> P = getProcCpuinfoContent();
1274   StringRef Content = P ? P->getBuffer() : "";
1275   return detail::getHostCPUNameForS390x(Content);
1276 }
1277 #elif defined(__MVS__)
getHostCPUName()1278 StringRef sys::getHostCPUName() {
1279   // Get pointer to Communications Vector Table (CVT).
1280   // The pointer is located at offset 16 of the Prefixed Save Area (PSA).
1281   // It is stored as 31 bit pointer and will be zero-extended to 64 bit.
1282   int *StartToCVTOffset = reinterpret_cast<int *>(0x10);
1283   // Since its stored as a 31-bit pointer, get the 4 bytes from the start
1284   // of address.
1285   int ReadValue = *StartToCVTOffset;
1286   // Explicitly clear the high order bit.
1287   ReadValue = (ReadValue & 0x7FFFFFFF);
1288   char *CVT = reinterpret_cast<char *>(ReadValue);
1289   // The model number is located in the CVT prefix at offset -6 and stored as
1290   // signless packed decimal.
1291   uint16_t Id = *(uint16_t *)&CVT[-6];
1292   // Convert number to integer.
1293   Id = decodePackedBCD<uint16_t>(Id, false);
1294   // Check for vector support. It's stored in field CVTFLAG5 (offset 244),
1295   // bit CVTVEF (X'80'). The facilities list is part of the PSA but the vector
1296   // extension can only be used if bit CVTVEF is on.
1297   bool HaveVectorSupport = CVT[244] & 0x80;
1298   return getCPUNameFromS390Model(Id, HaveVectorSupport);
1299 }
1300 #elif defined(__APPLE__) && defined(__aarch64__)
getHostCPUName()1301 StringRef sys::getHostCPUName() {
1302   return "cyclone";
1303 }
1304 #elif defined(__APPLE__) && defined(__arm__)
getHostCPUName()1305 StringRef sys::getHostCPUName() {
1306   host_basic_info_data_t hostInfo;
1307   mach_msg_type_number_t infoCount;
1308 
1309   infoCount = HOST_BASIC_INFO_COUNT;
1310   mach_port_t hostPort = mach_host_self();
1311   host_info(hostPort, HOST_BASIC_INFO, (host_info_t)&hostInfo,
1312             &infoCount);
1313   mach_port_deallocate(mach_task_self(), hostPort);
1314 
1315   if (hostInfo.cpu_type != CPU_TYPE_ARM) {
1316     assert(false && "CPUType not equal to ARM should not be possible on ARM");
1317     return "generic";
1318   }
1319   switch (hostInfo.cpu_subtype) {
1320     case CPU_SUBTYPE_ARM_V7S:
1321       return "swift";
1322     default:;
1323     }
1324 
1325   return "generic";
1326 }
1327 #elif defined(_AIX)
getHostCPUName()1328 StringRef sys::getHostCPUName() {
1329   switch (_system_configuration.implementation) {
1330   case POWER_4:
1331     if (_system_configuration.version == PV_4_3)
1332       return "970";
1333     return "pwr4";
1334   case POWER_5:
1335     if (_system_configuration.version == PV_5)
1336       return "pwr5";
1337     return "pwr5x";
1338   case POWER_6:
1339     if (_system_configuration.version == PV_6_Compat)
1340       return "pwr6";
1341     return "pwr6x";
1342   case POWER_7:
1343     return "pwr7";
1344   case POWER_8:
1345     return "pwr8";
1346   case POWER_9:
1347     return "pwr9";
1348 // TODO: simplify this once the macro is available in all OS levels.
1349 #ifdef POWER_10
1350   case POWER_10:
1351 #else
1352   case 0x40000:
1353 #endif
1354     return "pwr10";
1355   default:
1356     return "generic";
1357   }
1358 }
1359 #elif defined(__riscv)
getHostCPUName()1360 StringRef sys::getHostCPUName() {
1361 #if __riscv_xlen == 64
1362   return "generic-rv64";
1363 #elif __riscv_xlen == 32
1364   return "generic-rv32";
1365 #else
1366 #error "Unhandled value of __riscv_xlen"
1367 #endif
1368 }
1369 #else
getHostCPUName()1370 StringRef sys::getHostCPUName() { return "generic"; }
1371 namespace llvm {
1372 namespace sys {
1373 namespace detail {
1374 namespace x86 {
1375 
getVendorSignature(unsigned * MaxLeaf)1376 VendorSignatures getVendorSignature(unsigned *MaxLeaf) {
1377   return VendorSignatures::UNKNOWN;
1378 }
1379 
1380 } // namespace x86
1381 } // namespace detail
1382 } // namespace sys
1383 } // namespace llvm
1384 #endif
1385 
1386 #if defined(__linux__) && (defined(__i386__) || defined(__x86_64__))
1387 // On Linux, the number of physical cores can be computed from /proc/cpuinfo,
1388 // using the number of unique physical/core id pairs. The following
1389 // implementation reads the /proc/cpuinfo format on an x86_64 system.
computeHostNumPhysicalCores()1390 int computeHostNumPhysicalCores() {
1391   // Enabled represents the number of physical id/core id pairs with at least
1392   // one processor id enabled by the CPU affinity mask.
1393   cpu_set_t Affinity, Enabled;
1394   if (sched_getaffinity(0, sizeof(Affinity), &Affinity) != 0)
1395     return -1;
1396   CPU_ZERO(&Enabled);
1397 
1398   // Read /proc/cpuinfo as a stream (until EOF reached). It cannot be
1399   // mmapped because it appears to have 0 size.
1400   llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> Text =
1401       llvm::MemoryBuffer::getFileAsStream("/proc/cpuinfo");
1402   if (std::error_code EC = Text.getError()) {
1403     llvm::errs() << "Can't read "
1404                  << "/proc/cpuinfo: " << EC.message() << "\n";
1405     return -1;
1406   }
1407   SmallVector<StringRef, 8> strs;
1408   (*Text)->getBuffer().split(strs, "\n", /*MaxSplit=*/-1,
1409                              /*KeepEmpty=*/false);
1410   int CurProcessor = -1;
1411   int CurPhysicalId = -1;
1412   int CurSiblings = -1;
1413   int CurCoreId = -1;
1414   for (StringRef Line : strs) {
1415     std::pair<StringRef, StringRef> Data = Line.split(':');
1416     auto Name = Data.first.trim();
1417     auto Val = Data.second.trim();
1418     // These fields are available if the kernel is configured with CONFIG_SMP.
1419     if (Name == "processor")
1420       Val.getAsInteger(10, CurProcessor);
1421     else if (Name == "physical id")
1422       Val.getAsInteger(10, CurPhysicalId);
1423     else if (Name == "siblings")
1424       Val.getAsInteger(10, CurSiblings);
1425     else if (Name == "core id") {
1426       Val.getAsInteger(10, CurCoreId);
1427       // The processor id corresponds to an index into cpu_set_t.
1428       if (CPU_ISSET(CurProcessor, &Affinity))
1429         CPU_SET(CurPhysicalId * CurSiblings + CurCoreId, &Enabled);
1430     }
1431   }
1432   return CPU_COUNT(&Enabled);
1433 }
1434 #elif defined(__linux__) && defined(__powerpc__)
computeHostNumPhysicalCores()1435 int computeHostNumPhysicalCores() {
1436   cpu_set_t Affinity;
1437   if (sched_getaffinity(0, sizeof(Affinity), &Affinity) == 0)
1438     return CPU_COUNT(&Affinity);
1439 
1440   // The call to sched_getaffinity() may have failed because the Affinity
1441   // mask is too small for the number of CPU's on the system (i.e. the
1442   // system has more than 1024 CPUs). Allocate a mask large enough for
1443   // twice as many CPUs.
1444   cpu_set_t *DynAffinity;
1445   DynAffinity = CPU_ALLOC(2048);
1446   if (sched_getaffinity(0, CPU_ALLOC_SIZE(2048), DynAffinity) == 0) {
1447     int NumCPUs = CPU_COUNT(DynAffinity);
1448     CPU_FREE(DynAffinity);
1449     return NumCPUs;
1450   }
1451   return -1;
1452 }
1453 #elif defined(__linux__) && defined(__s390x__)
computeHostNumPhysicalCores()1454 int computeHostNumPhysicalCores() { return sysconf(_SC_NPROCESSORS_ONLN); }
1455 #elif defined(__APPLE__)
1456 #include <sys/param.h>
1457 #include <sys/sysctl.h>
1458 
1459 // Gets the number of *physical cores* on the machine.
computeHostNumPhysicalCores()1460 int computeHostNumPhysicalCores() {
1461   uint32_t count;
1462   size_t len = sizeof(count);
1463   sysctlbyname("hw.physicalcpu", &count, &len, NULL, 0);
1464   if (count < 1) {
1465     int nm[2];
1466     nm[0] = CTL_HW;
1467     nm[1] = HW_AVAILCPU;
1468     sysctl(nm, 2, &count, &len, NULL, 0);
1469     if (count < 1)
1470       return -1;
1471   }
1472   return count;
1473 }
1474 #elif defined(__MVS__)
computeHostNumPhysicalCores()1475 int computeHostNumPhysicalCores() {
1476   enum {
1477     // Byte offset of the pointer to the Communications Vector Table (CVT) in
1478     // the Prefixed Save Area (PSA). The table entry is a 31-bit pointer and
1479     // will be zero-extended to uintptr_t.
1480     FLCCVT = 16,
1481     // Byte offset of the pointer to the Common System Data Area (CSD) in the
1482     // CVT. The table entry is a 31-bit pointer and will be zero-extended to
1483     // uintptr_t.
1484     CVTCSD = 660,
1485     // Byte offset to the number of live CPs in the LPAR, stored as a signed
1486     // 32-bit value in the table.
1487     CSD_NUMBER_ONLINE_STANDARD_CPS = 264,
1488   };
1489   char *PSA = 0;
1490   char *CVT = reinterpret_cast<char *>(
1491       static_cast<uintptr_t>(reinterpret_cast<unsigned int &>(PSA[FLCCVT])));
1492   char *CSD = reinterpret_cast<char *>(
1493       static_cast<uintptr_t>(reinterpret_cast<unsigned int &>(CVT[CVTCSD])));
1494   return reinterpret_cast<int &>(CSD[CSD_NUMBER_ONLINE_STANDARD_CPS]);
1495 }
1496 #elif defined(_WIN32) && LLVM_ENABLE_THREADS != 0
1497 // Defined in llvm/lib/Support/Windows/Threading.inc
1498 int computeHostNumPhysicalCores();
1499 #else
1500 // On other systems, return -1 to indicate unknown.
computeHostNumPhysicalCores()1501 static int computeHostNumPhysicalCores() { return -1; }
1502 #endif
1503 
getHostNumPhysicalCores()1504 int sys::getHostNumPhysicalCores() {
1505   static int NumCores = computeHostNumPhysicalCores();
1506   return NumCores;
1507 }
1508 
1509 #if defined(__i386__) || defined(_M_IX86) || \
1510     defined(__x86_64__) || defined(_M_X64)
getHostCPUFeatures(StringMap<bool> & Features)1511 bool sys::getHostCPUFeatures(StringMap<bool> &Features) {
1512   unsigned EAX = 0, EBX = 0, ECX = 0, EDX = 0;
1513   unsigned MaxLevel;
1514 
1515   if (getX86CpuIDAndInfo(0, &MaxLevel, &EBX, &ECX, &EDX) || MaxLevel < 1)
1516     return false;
1517 
1518   getX86CpuIDAndInfo(1, &EAX, &EBX, &ECX, &EDX);
1519 
1520   Features["cx8"]    = (EDX >>  8) & 1;
1521   Features["cmov"]   = (EDX >> 15) & 1;
1522   Features["mmx"]    = (EDX >> 23) & 1;
1523   Features["fxsr"]   = (EDX >> 24) & 1;
1524   Features["sse"]    = (EDX >> 25) & 1;
1525   Features["sse2"]   = (EDX >> 26) & 1;
1526 
1527   Features["sse3"]   = (ECX >>  0) & 1;
1528   Features["pclmul"] = (ECX >>  1) & 1;
1529   Features["ssse3"]  = (ECX >>  9) & 1;
1530   Features["cx16"]   = (ECX >> 13) & 1;
1531   Features["sse4.1"] = (ECX >> 19) & 1;
1532   Features["sse4.2"] = (ECX >> 20) & 1;
1533   Features["crc32"]  = Features["sse4.2"];
1534   Features["movbe"]  = (ECX >> 22) & 1;
1535   Features["popcnt"] = (ECX >> 23) & 1;
1536   Features["aes"]    = (ECX >> 25) & 1;
1537   Features["rdrnd"]  = (ECX >> 30) & 1;
1538 
1539   // If CPUID indicates support for XSAVE, XRESTORE and AVX, and XGETBV
1540   // indicates that the AVX registers will be saved and restored on context
1541   // switch, then we have full AVX support.
1542   bool HasXSave = ((ECX >> 27) & 1) && !getX86XCR0(&EAX, &EDX);
1543   bool HasAVXSave = HasXSave && ((ECX >> 28) & 1) && ((EAX & 0x6) == 0x6);
1544 #if defined(__APPLE__)
1545   // Darwin lazily saves the AVX512 context on first use: trust that the OS will
1546   // save the AVX512 context if we use AVX512 instructions, even the bit is not
1547   // set right now.
1548   bool HasAVX512Save = true;
1549 #else
1550   // AVX512 requires additional context to be saved by the OS.
1551   bool HasAVX512Save = HasAVXSave && ((EAX & 0xe0) == 0xe0);
1552 #endif
1553   // AMX requires additional context to be saved by the OS.
1554   const unsigned AMXBits = (1 << 17) | (1 << 18);
1555   bool HasAMXSave = HasXSave && ((EAX & AMXBits) == AMXBits);
1556 
1557   Features["avx"]   = HasAVXSave;
1558   Features["fma"]   = ((ECX >> 12) & 1) && HasAVXSave;
1559   // Only enable XSAVE if OS has enabled support for saving YMM state.
1560   Features["xsave"] = ((ECX >> 26) & 1) && HasAVXSave;
1561   Features["f16c"]  = ((ECX >> 29) & 1) && HasAVXSave;
1562 
1563   unsigned MaxExtLevel;
1564   getX86CpuIDAndInfo(0x80000000, &MaxExtLevel, &EBX, &ECX, &EDX);
1565 
1566   bool HasExtLeaf1 = MaxExtLevel >= 0x80000001 &&
1567                      !getX86CpuIDAndInfo(0x80000001, &EAX, &EBX, &ECX, &EDX);
1568   Features["sahf"]   = HasExtLeaf1 && ((ECX >>  0) & 1);
1569   Features["lzcnt"]  = HasExtLeaf1 && ((ECX >>  5) & 1);
1570   Features["sse4a"]  = HasExtLeaf1 && ((ECX >>  6) & 1);
1571   Features["prfchw"] = HasExtLeaf1 && ((ECX >>  8) & 1);
1572   Features["xop"]    = HasExtLeaf1 && ((ECX >> 11) & 1) && HasAVXSave;
1573   Features["lwp"]    = HasExtLeaf1 && ((ECX >> 15) & 1);
1574   Features["fma4"]   = HasExtLeaf1 && ((ECX >> 16) & 1) && HasAVXSave;
1575   Features["tbm"]    = HasExtLeaf1 && ((ECX >> 21) & 1);
1576   Features["mwaitx"] = HasExtLeaf1 && ((ECX >> 29) & 1);
1577 
1578   Features["64bit"]  = HasExtLeaf1 && ((EDX >> 29) & 1);
1579 
1580   // Miscellaneous memory related features, detected by
1581   // using the 0x80000008 leaf of the CPUID instruction
1582   bool HasExtLeaf8 = MaxExtLevel >= 0x80000008 &&
1583                      !getX86CpuIDAndInfo(0x80000008, &EAX, &EBX, &ECX, &EDX);
1584   Features["clzero"]   = HasExtLeaf8 && ((EBX >> 0) & 1);
1585   Features["wbnoinvd"] = HasExtLeaf8 && ((EBX >> 9) & 1);
1586 
1587   bool HasLeaf7 =
1588       MaxLevel >= 7 && !getX86CpuIDAndInfoEx(0x7, 0x0, &EAX, &EBX, &ECX, &EDX);
1589 
1590   Features["fsgsbase"]   = HasLeaf7 && ((EBX >>  0) & 1);
1591   Features["sgx"]        = HasLeaf7 && ((EBX >>  2) & 1);
1592   Features["bmi"]        = HasLeaf7 && ((EBX >>  3) & 1);
1593   // AVX2 is only supported if we have the OS save support from AVX.
1594   Features["avx2"]       = HasLeaf7 && ((EBX >>  5) & 1) && HasAVXSave;
1595   Features["bmi2"]       = HasLeaf7 && ((EBX >>  8) & 1);
1596   Features["invpcid"]    = HasLeaf7 && ((EBX >> 10) & 1);
1597   Features["rtm"]        = HasLeaf7 && ((EBX >> 11) & 1);
1598   // AVX512 is only supported if the OS supports the context save for it.
1599   Features["avx512f"]    = HasLeaf7 && ((EBX >> 16) & 1) && HasAVX512Save;
1600   Features["avx512dq"]   = HasLeaf7 && ((EBX >> 17) & 1) && HasAVX512Save;
1601   Features["rdseed"]     = HasLeaf7 && ((EBX >> 18) & 1);
1602   Features["adx"]        = HasLeaf7 && ((EBX >> 19) & 1);
1603   Features["avx512ifma"] = HasLeaf7 && ((EBX >> 21) & 1) && HasAVX512Save;
1604   Features["clflushopt"] = HasLeaf7 && ((EBX >> 23) & 1);
1605   Features["clwb"]       = HasLeaf7 && ((EBX >> 24) & 1);
1606   Features["avx512pf"]   = HasLeaf7 && ((EBX >> 26) & 1) && HasAVX512Save;
1607   Features["avx512er"]   = HasLeaf7 && ((EBX >> 27) & 1) && HasAVX512Save;
1608   Features["avx512cd"]   = HasLeaf7 && ((EBX >> 28) & 1) && HasAVX512Save;
1609   Features["sha"]        = HasLeaf7 && ((EBX >> 29) & 1);
1610   Features["avx512bw"]   = HasLeaf7 && ((EBX >> 30) & 1) && HasAVX512Save;
1611   Features["avx512vl"]   = HasLeaf7 && ((EBX >> 31) & 1) && HasAVX512Save;
1612 
1613   Features["prefetchwt1"]     = HasLeaf7 && ((ECX >>  0) & 1);
1614   Features["avx512vbmi"]      = HasLeaf7 && ((ECX >>  1) & 1) && HasAVX512Save;
1615   Features["pku"]             = HasLeaf7 && ((ECX >>  4) & 1);
1616   Features["waitpkg"]         = HasLeaf7 && ((ECX >>  5) & 1);
1617   Features["avx512vbmi2"]     = HasLeaf7 && ((ECX >>  6) & 1) && HasAVX512Save;
1618   Features["shstk"]           = HasLeaf7 && ((ECX >>  7) & 1);
1619   Features["gfni"]            = HasLeaf7 && ((ECX >>  8) & 1);
1620   Features["vaes"]            = HasLeaf7 && ((ECX >>  9) & 1) && HasAVXSave;
1621   Features["vpclmulqdq"]      = HasLeaf7 && ((ECX >> 10) & 1) && HasAVXSave;
1622   Features["avx512vnni"]      = HasLeaf7 && ((ECX >> 11) & 1) && HasAVX512Save;
1623   Features["avx512bitalg"]    = HasLeaf7 && ((ECX >> 12) & 1) && HasAVX512Save;
1624   Features["avx512vpopcntdq"] = HasLeaf7 && ((ECX >> 14) & 1) && HasAVX512Save;
1625   Features["rdpid"]           = HasLeaf7 && ((ECX >> 22) & 1);
1626   Features["kl"]              = HasLeaf7 && ((ECX >> 23) & 1); // key locker
1627   Features["cldemote"]        = HasLeaf7 && ((ECX >> 25) & 1);
1628   Features["movdiri"]         = HasLeaf7 && ((ECX >> 27) & 1);
1629   Features["movdir64b"]       = HasLeaf7 && ((ECX >> 28) & 1);
1630   Features["enqcmd"]          = HasLeaf7 && ((ECX >> 29) & 1);
1631 
1632   Features["uintr"]           = HasLeaf7 && ((EDX >> 5) & 1);
1633   Features["avx512vp2intersect"] =
1634       HasLeaf7 && ((EDX >> 8) & 1) && HasAVX512Save;
1635   Features["serialize"]       = HasLeaf7 && ((EDX >> 14) & 1);
1636   Features["tsxldtrk"]        = HasLeaf7 && ((EDX >> 16) & 1);
1637   // There are two CPUID leafs which information associated with the pconfig
1638   // instruction:
1639   // EAX=0x7, ECX=0x0 indicates the availability of the instruction (via the 18th
1640   // bit of EDX), while the EAX=0x1b leaf returns information on the
1641   // availability of specific pconfig leafs.
1642   // The target feature here only refers to the the first of these two.
1643   // Users might need to check for the availability of specific pconfig
1644   // leaves using cpuid, since that information is ignored while
1645   // detecting features using the "-march=native" flag.
1646   // For more info, see X86 ISA docs.
1647   Features["pconfig"] = HasLeaf7 && ((EDX >> 18) & 1);
1648   Features["amx-bf16"]   = HasLeaf7 && ((EDX >> 22) & 1) && HasAMXSave;
1649   Features["avx512fp16"] = HasLeaf7 && ((EDX >> 23) & 1) && HasAVX512Save;
1650   Features["amx-tile"]   = HasLeaf7 && ((EDX >> 24) & 1) && HasAMXSave;
1651   Features["amx-int8"]   = HasLeaf7 && ((EDX >> 25) & 1) && HasAMXSave;
1652   bool HasLeaf7Subleaf1 =
1653       MaxLevel >= 7 && !getX86CpuIDAndInfoEx(0x7, 0x1, &EAX, &EBX, &ECX, &EDX);
1654   Features["avxvnni"]    = HasLeaf7Subleaf1 && ((EAX >> 4) & 1) && HasAVXSave;
1655   Features["avx512bf16"] = HasLeaf7Subleaf1 && ((EAX >> 5) & 1) && HasAVX512Save;
1656   Features["hreset"]     = HasLeaf7Subleaf1 && ((EAX >> 22) & 1);
1657 
1658   bool HasLeafD = MaxLevel >= 0xd &&
1659                   !getX86CpuIDAndInfoEx(0xd, 0x1, &EAX, &EBX, &ECX, &EDX);
1660 
1661   // Only enable XSAVE if OS has enabled support for saving YMM state.
1662   Features["xsaveopt"] = HasLeafD && ((EAX >> 0) & 1) && HasAVXSave;
1663   Features["xsavec"]   = HasLeafD && ((EAX >> 1) & 1) && HasAVXSave;
1664   Features["xsaves"]   = HasLeafD && ((EAX >> 3) & 1) && HasAVXSave;
1665 
1666   bool HasLeaf14 = MaxLevel >= 0x14 &&
1667                   !getX86CpuIDAndInfoEx(0x14, 0x0, &EAX, &EBX, &ECX, &EDX);
1668 
1669   Features["ptwrite"] = HasLeaf14 && ((EBX >> 4) & 1);
1670 
1671   bool HasLeaf19 =
1672       MaxLevel >= 0x19 && !getX86CpuIDAndInfo(0x19, &EAX, &EBX, &ECX, &EDX);
1673   Features["widekl"] = HasLeaf7 && HasLeaf19 && ((EBX >> 2) & 1);
1674 
1675   return true;
1676 }
1677 #elif defined(__linux__) && (defined(__arm__) || defined(__aarch64__))
getHostCPUFeatures(StringMap<bool> & Features)1678 bool sys::getHostCPUFeatures(StringMap<bool> &Features) {
1679   std::unique_ptr<llvm::MemoryBuffer> P = getProcCpuinfoContent();
1680   if (!P)
1681     return false;
1682 
1683   SmallVector<StringRef, 32> Lines;
1684   P->getBuffer().split(Lines, "\n");
1685 
1686   SmallVector<StringRef, 32> CPUFeatures;
1687 
1688   // Look for the CPU features.
1689   for (unsigned I = 0, E = Lines.size(); I != E; ++I)
1690     if (Lines[I].startswith("Features")) {
1691       Lines[I].split(CPUFeatures, ' ');
1692       break;
1693     }
1694 
1695 #if defined(__aarch64__)
1696   // Keep track of which crypto features we have seen
1697   enum { CAP_AES = 0x1, CAP_PMULL = 0x2, CAP_SHA1 = 0x4, CAP_SHA2 = 0x8 };
1698   uint32_t crypto = 0;
1699 #endif
1700 
1701   for (unsigned I = 0, E = CPUFeatures.size(); I != E; ++I) {
1702     StringRef LLVMFeatureStr = StringSwitch<StringRef>(CPUFeatures[I])
1703 #if defined(__aarch64__)
1704                                    .Case("asimd", "neon")
1705                                    .Case("fp", "fp-armv8")
1706                                    .Case("crc32", "crc")
1707 #else
1708                                    .Case("half", "fp16")
1709                                    .Case("neon", "neon")
1710                                    .Case("vfpv3", "vfp3")
1711                                    .Case("vfpv3d16", "d16")
1712                                    .Case("vfpv4", "vfp4")
1713                                    .Case("idiva", "hwdiv-arm")
1714                                    .Case("idivt", "hwdiv")
1715 #endif
1716                                    .Default("");
1717 
1718 #if defined(__aarch64__)
1719     // We need to check crypto separately since we need all of the crypto
1720     // extensions to enable the subtarget feature
1721     if (CPUFeatures[I] == "aes")
1722       crypto |= CAP_AES;
1723     else if (CPUFeatures[I] == "pmull")
1724       crypto |= CAP_PMULL;
1725     else if (CPUFeatures[I] == "sha1")
1726       crypto |= CAP_SHA1;
1727     else if (CPUFeatures[I] == "sha2")
1728       crypto |= CAP_SHA2;
1729 #endif
1730 
1731     if (LLVMFeatureStr != "")
1732       Features[LLVMFeatureStr] = true;
1733   }
1734 
1735 #if defined(__aarch64__)
1736   // If we have all crypto bits we can add the feature
1737   if (crypto == (CAP_AES | CAP_PMULL | CAP_SHA1 | CAP_SHA2))
1738     Features["crypto"] = true;
1739 #endif
1740 
1741   return true;
1742 }
1743 #elif defined(_WIN32) && (defined(__aarch64__) || defined(_M_ARM64))
getHostCPUFeatures(StringMap<bool> & Features)1744 bool sys::getHostCPUFeatures(StringMap<bool> &Features) {
1745   if (IsProcessorFeaturePresent(PF_ARM_NEON_INSTRUCTIONS_AVAILABLE))
1746     Features["neon"] = true;
1747   if (IsProcessorFeaturePresent(PF_ARM_V8_CRC32_INSTRUCTIONS_AVAILABLE))
1748     Features["crc"] = true;
1749   if (IsProcessorFeaturePresent(PF_ARM_V8_CRYPTO_INSTRUCTIONS_AVAILABLE))
1750     Features["crypto"] = true;
1751 
1752   return true;
1753 }
1754 #else
getHostCPUFeatures(StringMap<bool> & Features)1755 bool sys::getHostCPUFeatures(StringMap<bool> &Features) { return false; }
1756 #endif
1757 
getProcessTriple()1758 std::string sys::getProcessTriple() {
1759   std::string TargetTripleString = updateTripleOSVersion(LLVM_HOST_TRIPLE);
1760   Triple PT(Triple::normalize(TargetTripleString));
1761 
1762   if (sizeof(void *) == 8 && PT.isArch32Bit())
1763     PT = PT.get64BitArchVariant();
1764   if (sizeof(void *) == 4 && PT.isArch64Bit())
1765     PT = PT.get32BitArchVariant();
1766 
1767   return PT.str();
1768 }
1769