1 //===-- Host.cpp - Implement OS Host Concept --------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 //  This file implements the operating system Host concept.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "llvm/Support/Host.h"
14 #include "llvm/ADT/SmallSet.h"
15 #include "llvm/ADT/SmallVector.h"
16 #include "llvm/ADT/StringMap.h"
17 #include "llvm/ADT/StringRef.h"
18 #include "llvm/ADT/StringSwitch.h"
19 #include "llvm/ADT/Triple.h"
20 #include "llvm/Config/llvm-config.h"
21 #include "llvm/Support/BCD.h"
22 #include "llvm/Support/Debug.h"
23 #include "llvm/Support/FileSystem.h"
24 #include "llvm/Support/MemoryBuffer.h"
25 #include "llvm/Support/X86TargetParser.h"
26 #include "llvm/Support/raw_ostream.h"
27 #include <assert.h>
28 #include <string.h>
29 
30 // Include the platform-specific parts of this class.
31 #ifdef LLVM_ON_UNIX
32 #include "Unix/Host.inc"
33 #include <sched.h>
34 #endif
35 #ifdef _WIN32
36 #include "Windows/Host.inc"
37 #endif
38 #ifdef _MSC_VER
39 #include <intrin.h>
40 #endif
41 #if defined(__APPLE__) && (!defined(__x86_64__))
42 #include <mach/host_info.h>
43 #include <mach/mach.h>
44 #include <mach/mach_host.h>
45 #include <mach/machine.h>
46 #endif
47 #ifdef _AIX
48 #include <sys/systemcfg.h>
49 #endif
50 
51 #define DEBUG_TYPE "host-detection"
52 
53 //===----------------------------------------------------------------------===//
54 //
55 //  Implementations of the CPU detection routines
56 //
57 //===----------------------------------------------------------------------===//
58 
59 using namespace llvm;
60 
61 static std::unique_ptr<llvm::MemoryBuffer>
getProcCpuinfoContent()62     LLVM_ATTRIBUTE_UNUSED getProcCpuinfoContent() {
63   llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> Text =
64       llvm::MemoryBuffer::getFileAsStream("/proc/cpuinfo");
65   if (std::error_code EC = Text.getError()) {
66     llvm::errs() << "Can't read "
67                  << "/proc/cpuinfo: " << EC.message() << "\n";
68     return nullptr;
69   }
70   return std::move(*Text);
71 }
72 
getHostCPUNameForPowerPC(StringRef ProcCpuinfoContent)73 StringRef sys::detail::getHostCPUNameForPowerPC(StringRef ProcCpuinfoContent) {
74   // Access to the Processor Version Register (PVR) on PowerPC is privileged,
75   // and so we must use an operating-system interface to determine the current
76   // processor type. On Linux, this is exposed through the /proc/cpuinfo file.
77   const char *generic = "generic";
78 
79   // The cpu line is second (after the 'processor: 0' line), so if this
80   // buffer is too small then something has changed (or is wrong).
81   StringRef::const_iterator CPUInfoStart = ProcCpuinfoContent.begin();
82   StringRef::const_iterator CPUInfoEnd = ProcCpuinfoContent.end();
83 
84   StringRef::const_iterator CIP = CPUInfoStart;
85 
86   StringRef::const_iterator CPUStart = 0;
87   size_t CPULen = 0;
88 
89   // We need to find the first line which starts with cpu, spaces, and a colon.
90   // After the colon, there may be some additional spaces and then the cpu type.
91   while (CIP < CPUInfoEnd && CPUStart == 0) {
92     if (CIP < CPUInfoEnd && *CIP == '\n')
93       ++CIP;
94 
95     if (CIP < CPUInfoEnd && *CIP == 'c') {
96       ++CIP;
97       if (CIP < CPUInfoEnd && *CIP == 'p') {
98         ++CIP;
99         if (CIP < CPUInfoEnd && *CIP == 'u') {
100           ++CIP;
101           while (CIP < CPUInfoEnd && (*CIP == ' ' || *CIP == '\t'))
102             ++CIP;
103 
104           if (CIP < CPUInfoEnd && *CIP == ':') {
105             ++CIP;
106             while (CIP < CPUInfoEnd && (*CIP == ' ' || *CIP == '\t'))
107               ++CIP;
108 
109             if (CIP < CPUInfoEnd) {
110               CPUStart = CIP;
111               while (CIP < CPUInfoEnd && (*CIP != ' ' && *CIP != '\t' &&
112                                           *CIP != ',' && *CIP != '\n'))
113                 ++CIP;
114               CPULen = CIP - CPUStart;
115             }
116           }
117         }
118       }
119     }
120 
121     if (CPUStart == 0)
122       while (CIP < CPUInfoEnd && *CIP != '\n')
123         ++CIP;
124   }
125 
126   if (CPUStart == 0)
127     return generic;
128 
129   return StringSwitch<const char *>(StringRef(CPUStart, CPULen))
130       .Case("604e", "604e")
131       .Case("604", "604")
132       .Case("7400", "7400")
133       .Case("7410", "7400")
134       .Case("7447", "7400")
135       .Case("7455", "7450")
136       .Case("G4", "g4")
137       .Case("POWER4", "970")
138       .Case("PPC970FX", "970")
139       .Case("PPC970MP", "970")
140       .Case("G5", "g5")
141       .Case("POWER5", "g5")
142       .Case("A2", "a2")
143       .Case("POWER6", "pwr6")
144       .Case("POWER7", "pwr7")
145       .Case("POWER8", "pwr8")
146       .Case("POWER8E", "pwr8")
147       .Case("POWER8NVL", "pwr8")
148       .Case("POWER9", "pwr9")
149       .Case("POWER10", "pwr10")
150       // FIXME: If we get a simulator or machine with the capabilities of
151       // mcpu=future, we should revisit this and add the name reported by the
152       // simulator/machine.
153       .Default(generic);
154 }
155 
getHostCPUNameForARM(StringRef ProcCpuinfoContent)156 StringRef sys::detail::getHostCPUNameForARM(StringRef ProcCpuinfoContent) {
157   // The cpuid register on arm is not accessible from user space. On Linux,
158   // it is exposed through the /proc/cpuinfo file.
159 
160   // Read 32 lines from /proc/cpuinfo, which should contain the CPU part line
161   // in all cases.
162   SmallVector<StringRef, 32> Lines;
163   ProcCpuinfoContent.split(Lines, "\n");
164 
165   // Look for the CPU implementer line.
166   StringRef Implementer;
167   StringRef Hardware;
168   StringRef Part;
169   for (unsigned I = 0, E = Lines.size(); I != E; ++I) {
170     if (Lines[I].startswith("CPU implementer"))
171       Implementer = Lines[I].substr(15).ltrim("\t :");
172     if (Lines[I].startswith("Hardware"))
173       Hardware = Lines[I].substr(8).ltrim("\t :");
174     if (Lines[I].startswith("CPU part"))
175       Part = Lines[I].substr(8).ltrim("\t :");
176   }
177 
178   if (Implementer == "0x41") { // ARM Ltd.
179     // MSM8992/8994 may give cpu part for the core that the kernel is running on,
180     // which is undeterministic and wrong. Always return cortex-a53 for these SoC.
181     if (Hardware.endswith("MSM8994") || Hardware.endswith("MSM8996"))
182       return "cortex-a53";
183 
184 
185     // The CPU part is a 3 digit hexadecimal number with a 0x prefix. The
186     // values correspond to the "Part number" in the CP15/c0 register. The
187     // contents are specified in the various processor manuals.
188     // This corresponds to the Main ID Register in Technical Reference Manuals.
189     // and is used in programs like sys-utils
190     return StringSwitch<const char *>(Part)
191         .Case("0x926", "arm926ej-s")
192         .Case("0xb02", "mpcore")
193         .Case("0xb36", "arm1136j-s")
194         .Case("0xb56", "arm1156t2-s")
195         .Case("0xb76", "arm1176jz-s")
196         .Case("0xc08", "cortex-a8")
197         .Case("0xc09", "cortex-a9")
198         .Case("0xc0f", "cortex-a15")
199         .Case("0xc20", "cortex-m0")
200         .Case("0xc23", "cortex-m3")
201         .Case("0xc24", "cortex-m4")
202         .Case("0xd22", "cortex-m55")
203         .Case("0xd02", "cortex-a34")
204         .Case("0xd04", "cortex-a35")
205         .Case("0xd03", "cortex-a53")
206         .Case("0xd07", "cortex-a57")
207         .Case("0xd08", "cortex-a72")
208         .Case("0xd09", "cortex-a73")
209         .Case("0xd0a", "cortex-a75")
210         .Case("0xd0b", "cortex-a76")
211         .Case("0xd0d", "cortex-a77")
212         .Case("0xd41", "cortex-a78")
213         .Case("0xd44", "cortex-x1")
214         .Case("0xd0c", "neoverse-n1")
215         .Case("0xd49", "neoverse-n2")
216         .Default("generic");
217   }
218 
219   if (Implementer == "0x42" || Implementer == "0x43") { // Broadcom | Cavium.
220     return StringSwitch<const char *>(Part)
221       .Case("0x516", "thunderx2t99")
222       .Case("0x0516", "thunderx2t99")
223       .Case("0xaf", "thunderx2t99")
224       .Case("0x0af", "thunderx2t99")
225       .Case("0xa1", "thunderxt88")
226       .Case("0x0a1", "thunderxt88")
227       .Default("generic");
228   }
229 
230   if (Implementer == "0x46") { // Fujitsu Ltd.
231     return StringSwitch<const char *>(Part)
232       .Case("0x001", "a64fx")
233       .Default("generic");
234   }
235 
236   if (Implementer == "0x4e") { // NVIDIA Corporation
237     return StringSwitch<const char *>(Part)
238         .Case("0x004", "carmel")
239         .Default("generic");
240   }
241 
242   if (Implementer == "0x48") // HiSilicon Technologies, Inc.
243     // The CPU part is a 3 digit hexadecimal number with a 0x prefix. The
244     // values correspond to the "Part number" in the CP15/c0 register. The
245     // contents are specified in the various processor manuals.
246     return StringSwitch<const char *>(Part)
247       .Case("0xd01", "tsv110")
248       .Default("generic");
249 
250   if (Implementer == "0x51") // Qualcomm Technologies, Inc.
251     // The CPU part is a 3 digit hexadecimal number with a 0x prefix. The
252     // values correspond to the "Part number" in the CP15/c0 register. The
253     // contents are specified in the various processor manuals.
254     return StringSwitch<const char *>(Part)
255         .Case("0x06f", "krait") // APQ8064
256         .Case("0x201", "kryo")
257         .Case("0x205", "kryo")
258         .Case("0x211", "kryo")
259         .Case("0x800", "cortex-a73") // Kryo 2xx Gold
260         .Case("0x801", "cortex-a73") // Kryo 2xx Silver
261         .Case("0x802", "cortex-a75") // Kryo 3xx Gold
262         .Case("0x803", "cortex-a75") // Kryo 3xx Silver
263         .Case("0x804", "cortex-a76") // Kryo 4xx Gold
264         .Case("0x805", "cortex-a76") // Kryo 4xx/5xx Silver
265         .Case("0xc00", "falkor")
266         .Case("0xc01", "saphira")
267         .Default("generic");
268   if (Implementer == "0x53") { // Samsung Electronics Co., Ltd.
269     // The Exynos chips have a convoluted ID scheme that doesn't seem to follow
270     // any predictive pattern across variants and parts.
271     unsigned Variant = 0, Part = 0;
272 
273     // Look for the CPU variant line, whose value is a 1 digit hexadecimal
274     // number, corresponding to the Variant bits in the CP15/C0 register.
275     for (auto I : Lines)
276       if (I.consume_front("CPU variant"))
277         I.ltrim("\t :").getAsInteger(0, Variant);
278 
279     // Look for the CPU part line, whose value is a 3 digit hexadecimal
280     // number, corresponding to the PartNum bits in the CP15/C0 register.
281     for (auto I : Lines)
282       if (I.consume_front("CPU part"))
283         I.ltrim("\t :").getAsInteger(0, Part);
284 
285     unsigned Exynos = (Variant << 12) | Part;
286     switch (Exynos) {
287     default:
288       // Default by falling through to Exynos M3.
289       LLVM_FALLTHROUGH;
290     case 0x1002:
291       return "exynos-m3";
292     case 0x1003:
293       return "exynos-m4";
294     }
295   }
296 
297   return "generic";
298 }
299 
300 namespace {
getCPUNameFromS390Model(unsigned int Id,bool HaveVectorSupport)301 StringRef getCPUNameFromS390Model(unsigned int Id, bool HaveVectorSupport) {
302   switch (Id) {
303     case 2064:  // z900 not supported by LLVM
304     case 2066:
305     case 2084:  // z990 not supported by LLVM
306     case 2086:
307     case 2094:  // z9-109 not supported by LLVM
308     case 2096:
309       return "generic";
310     case 2097:
311     case 2098:
312       return "z10";
313     case 2817:
314     case 2818:
315       return "z196";
316     case 2827:
317     case 2828:
318       return "zEC12";
319     case 2964:
320     case 2965:
321       return HaveVectorSupport? "z13" : "zEC12";
322     case 3906:
323     case 3907:
324       return HaveVectorSupport? "z14" : "zEC12";
325     case 8561:
326     case 8562:
327       return HaveVectorSupport? "z15" : "zEC12";
328     case 3931:
329     case 3932:
330     default:
331       return HaveVectorSupport? "arch14" : "zEC12";
332   }
333 }
334 } // end anonymous namespace
335 
getHostCPUNameForS390x(StringRef ProcCpuinfoContent)336 StringRef sys::detail::getHostCPUNameForS390x(StringRef ProcCpuinfoContent) {
337   // STIDP is a privileged operation, so use /proc/cpuinfo instead.
338 
339   // The "processor 0:" line comes after a fair amount of other information,
340   // including a cache breakdown, but this should be plenty.
341   SmallVector<StringRef, 32> Lines;
342   ProcCpuinfoContent.split(Lines, "\n");
343 
344   // Look for the CPU features.
345   SmallVector<StringRef, 32> CPUFeatures;
346   for (unsigned I = 0, E = Lines.size(); I != E; ++I)
347     if (Lines[I].startswith("features")) {
348       size_t Pos = Lines[I].find(':');
349       if (Pos != StringRef::npos) {
350         Lines[I].drop_front(Pos + 1).split(CPUFeatures, ' ');
351         break;
352       }
353     }
354 
355   // We need to check for the presence of vector support independently of
356   // the machine type, since we may only use the vector register set when
357   // supported by the kernel (and hypervisor).
358   bool HaveVectorSupport = false;
359   for (unsigned I = 0, E = CPUFeatures.size(); I != E; ++I) {
360     if (CPUFeatures[I] == "vx")
361       HaveVectorSupport = true;
362   }
363 
364   // Now check the processor machine type.
365   for (unsigned I = 0, E = Lines.size(); I != E; ++I) {
366     if (Lines[I].startswith("processor ")) {
367       size_t Pos = Lines[I].find("machine = ");
368       if (Pos != StringRef::npos) {
369         Pos += sizeof("machine = ") - 1;
370         unsigned int Id;
371         if (!Lines[I].drop_front(Pos).getAsInteger(10, Id))
372           return getCPUNameFromS390Model(Id, HaveVectorSupport);
373       }
374       break;
375     }
376   }
377 
378   return "generic";
379 }
380 
getHostCPUNameForBPF()381 StringRef sys::detail::getHostCPUNameForBPF() {
382 #if !defined(__linux__) || !defined(__x86_64__)
383   return "generic";
384 #else
385   uint8_t v3_insns[40] __attribute__ ((aligned (8))) =
386       /* BPF_MOV64_IMM(BPF_REG_0, 0) */
387     { 0xb7, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0,
388       /* BPF_MOV64_IMM(BPF_REG_2, 1) */
389       0xb7, 0x2, 0x0, 0x0, 0x1, 0x0, 0x0, 0x0,
390       /* BPF_JMP32_REG(BPF_JLT, BPF_REG_0, BPF_REG_2, 1) */
391       0xae, 0x20, 0x1, 0x0, 0x0, 0x0, 0x0, 0x0,
392       /* BPF_MOV64_IMM(BPF_REG_0, 1) */
393       0xb7, 0x0, 0x0, 0x0, 0x1, 0x0, 0x0, 0x0,
394       /* BPF_EXIT_INSN() */
395       0x95, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 };
396 
397   uint8_t v2_insns[40] __attribute__ ((aligned (8))) =
398       /* BPF_MOV64_IMM(BPF_REG_0, 0) */
399     { 0xb7, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0,
400       /* BPF_MOV64_IMM(BPF_REG_2, 1) */
401       0xb7, 0x2, 0x0, 0x0, 0x1, 0x0, 0x0, 0x0,
402       /* BPF_JMP_REG(BPF_JLT, BPF_REG_0, BPF_REG_2, 1) */
403       0xad, 0x20, 0x1, 0x0, 0x0, 0x0, 0x0, 0x0,
404       /* BPF_MOV64_IMM(BPF_REG_0, 1) */
405       0xb7, 0x0, 0x0, 0x0, 0x1, 0x0, 0x0, 0x0,
406       /* BPF_EXIT_INSN() */
407       0x95, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 };
408 
409   struct bpf_prog_load_attr {
410     uint32_t prog_type;
411     uint32_t insn_cnt;
412     uint64_t insns;
413     uint64_t license;
414     uint32_t log_level;
415     uint32_t log_size;
416     uint64_t log_buf;
417     uint32_t kern_version;
418     uint32_t prog_flags;
419   } attr = {};
420   attr.prog_type = 1; /* BPF_PROG_TYPE_SOCKET_FILTER */
421   attr.insn_cnt = 5;
422   attr.insns = (uint64_t)v3_insns;
423   attr.license = (uint64_t)"DUMMY";
424 
425   int fd = syscall(321 /* __NR_bpf */, 5 /* BPF_PROG_LOAD */, &attr,
426                    sizeof(attr));
427   if (fd >= 0) {
428     close(fd);
429     return "v3";
430   }
431 
432   /* Clear the whole attr in case its content changed by syscall. */
433   memset(&attr, 0, sizeof(attr));
434   attr.prog_type = 1; /* BPF_PROG_TYPE_SOCKET_FILTER */
435   attr.insn_cnt = 5;
436   attr.insns = (uint64_t)v2_insns;
437   attr.license = (uint64_t)"DUMMY";
438   fd = syscall(321 /* __NR_bpf */, 5 /* BPF_PROG_LOAD */, &attr, sizeof(attr));
439   if (fd >= 0) {
440     close(fd);
441     return "v2";
442   }
443   return "v1";
444 #endif
445 }
446 
447 #if defined(__i386__) || defined(_M_IX86) || \
448     defined(__x86_64__) || defined(_M_X64)
449 
450 // The check below for i386 was copied from clang's cpuid.h (__get_cpuid_max).
451 // Check motivated by bug reports for OpenSSL crashing on CPUs without CPUID
452 // support. Consequently, for i386, the presence of CPUID is checked first
453 // via the corresponding eflags bit.
454 // Removal of cpuid.h header motivated by PR30384
455 // Header cpuid.h and method __get_cpuid_max are not used in llvm, clang, openmp
456 // or test-suite, but are used in external projects e.g. libstdcxx
isCpuIdSupported()457 static bool isCpuIdSupported() {
458 #if defined(__GNUC__) || defined(__clang__)
459 #if defined(__i386__)
460   int __cpuid_supported;
461   __asm__("  pushfl\n"
462           "  popl   %%eax\n"
463           "  movl   %%eax,%%ecx\n"
464           "  xorl   $0x00200000,%%eax\n"
465           "  pushl  %%eax\n"
466           "  popfl\n"
467           "  pushfl\n"
468           "  popl   %%eax\n"
469           "  movl   $0,%0\n"
470           "  cmpl   %%eax,%%ecx\n"
471           "  je     1f\n"
472           "  movl   $1,%0\n"
473           "1:"
474           : "=r"(__cpuid_supported)
475           :
476           : "eax", "ecx");
477   if (!__cpuid_supported)
478     return false;
479 #endif
480   return true;
481 #endif
482   return true;
483 }
484 
485 /// getX86CpuIDAndInfo - Execute the specified cpuid and return the 4 values in
486 /// the specified arguments.  If we can't run cpuid on the host, return true.
getX86CpuIDAndInfo(unsigned value,unsigned * rEAX,unsigned * rEBX,unsigned * rECX,unsigned * rEDX)487 static bool getX86CpuIDAndInfo(unsigned value, unsigned *rEAX, unsigned *rEBX,
488                                unsigned *rECX, unsigned *rEDX) {
489 #if defined(__GNUC__) || defined(__clang__)
490 #if defined(__x86_64__)
491   // gcc doesn't know cpuid would clobber ebx/rbx. Preserve it manually.
492   // FIXME: should we save this for Clang?
493   __asm__("movq\t%%rbx, %%rsi\n\t"
494           "cpuid\n\t"
495           "xchgq\t%%rbx, %%rsi\n\t"
496           : "=a"(*rEAX), "=S"(*rEBX), "=c"(*rECX), "=d"(*rEDX)
497           : "a"(value));
498   return false;
499 #elif defined(__i386__)
500   __asm__("movl\t%%ebx, %%esi\n\t"
501           "cpuid\n\t"
502           "xchgl\t%%ebx, %%esi\n\t"
503           : "=a"(*rEAX), "=S"(*rEBX), "=c"(*rECX), "=d"(*rEDX)
504           : "a"(value));
505   return false;
506 #else
507   return true;
508 #endif
509 #elif defined(_MSC_VER)
510   // The MSVC intrinsic is portable across x86 and x64.
511   int registers[4];
512   __cpuid(registers, value);
513   *rEAX = registers[0];
514   *rEBX = registers[1];
515   *rECX = registers[2];
516   *rEDX = registers[3];
517   return false;
518 #else
519   return true;
520 #endif
521 }
522 
523 namespace llvm {
524 namespace sys {
525 namespace detail {
526 namespace x86 {
527 
getVendorSignature(unsigned * MaxLeaf)528 VendorSignatures getVendorSignature(unsigned *MaxLeaf) {
529   unsigned EAX = 0, EBX = 0, ECX = 0, EDX = 0;
530   if (MaxLeaf == nullptr)
531     MaxLeaf = &EAX;
532   else
533     *MaxLeaf = 0;
534 
535   if (!isCpuIdSupported())
536     return VendorSignatures::UNKNOWN;
537 
538   if (getX86CpuIDAndInfo(0, MaxLeaf, &EBX, &ECX, &EDX) || *MaxLeaf < 1)
539     return VendorSignatures::UNKNOWN;
540 
541   // "Genu ineI ntel"
542   if (EBX == 0x756e6547 && EDX == 0x49656e69 && ECX == 0x6c65746e)
543     return VendorSignatures::GENUINE_INTEL;
544 
545   // "Auth enti cAMD"
546   if (EBX == 0x68747541 && EDX == 0x69746e65 && ECX == 0x444d4163)
547     return VendorSignatures::AUTHENTIC_AMD;
548 
549   return VendorSignatures::UNKNOWN;
550 }
551 
552 } // namespace x86
553 } // namespace detail
554 } // namespace sys
555 } // namespace llvm
556 
557 using namespace llvm::sys::detail::x86;
558 
559 /// getX86CpuIDAndInfoEx - Execute the specified cpuid with subleaf and return
560 /// the 4 values in the specified arguments.  If we can't run cpuid on the host,
561 /// return true.
getX86CpuIDAndInfoEx(unsigned value,unsigned subleaf,unsigned * rEAX,unsigned * rEBX,unsigned * rECX,unsigned * rEDX)562 static bool getX86CpuIDAndInfoEx(unsigned value, unsigned subleaf,
563                                  unsigned *rEAX, unsigned *rEBX, unsigned *rECX,
564                                  unsigned *rEDX) {
565 #if defined(__GNUC__) || defined(__clang__)
566 #if defined(__x86_64__)
567   // gcc doesn't know cpuid would clobber ebx/rbx. Preserve it manually.
568   // FIXME: should we save this for Clang?
569   __asm__("movq\t%%rbx, %%rsi\n\t"
570           "cpuid\n\t"
571           "xchgq\t%%rbx, %%rsi\n\t"
572           : "=a"(*rEAX), "=S"(*rEBX), "=c"(*rECX), "=d"(*rEDX)
573           : "a"(value), "c"(subleaf));
574   return false;
575 #elif defined(__i386__)
576   __asm__("movl\t%%ebx, %%esi\n\t"
577           "cpuid\n\t"
578           "xchgl\t%%ebx, %%esi\n\t"
579           : "=a"(*rEAX), "=S"(*rEBX), "=c"(*rECX), "=d"(*rEDX)
580           : "a"(value), "c"(subleaf));
581   return false;
582 #else
583   return true;
584 #endif
585 #elif defined(_MSC_VER)
586   int registers[4];
587   __cpuidex(registers, value, subleaf);
588   *rEAX = registers[0];
589   *rEBX = registers[1];
590   *rECX = registers[2];
591   *rEDX = registers[3];
592   return false;
593 #else
594   return true;
595 #endif
596 }
597 
598 // Read control register 0 (XCR0). Used to detect features such as AVX.
getX86XCR0(unsigned * rEAX,unsigned * rEDX)599 static bool getX86XCR0(unsigned *rEAX, unsigned *rEDX) {
600 #if defined(__GNUC__) || defined(__clang__)
601   // Check xgetbv; this uses a .byte sequence instead of the instruction
602   // directly because older assemblers do not include support for xgetbv and
603   // there is no easy way to conditionally compile based on the assembler used.
604   __asm__(".byte 0x0f, 0x01, 0xd0" : "=a"(*rEAX), "=d"(*rEDX) : "c"(0));
605   return false;
606 #elif defined(_MSC_FULL_VER) && defined(_XCR_XFEATURE_ENABLED_MASK)
607   unsigned long long Result = _xgetbv(_XCR_XFEATURE_ENABLED_MASK);
608   *rEAX = Result;
609   *rEDX = Result >> 32;
610   return false;
611 #else
612   return true;
613 #endif
614 }
615 
detectX86FamilyModel(unsigned EAX,unsigned * Family,unsigned * Model)616 static void detectX86FamilyModel(unsigned EAX, unsigned *Family,
617                                  unsigned *Model) {
618   *Family = (EAX >> 8) & 0xf; // Bits 8 - 11
619   *Model = (EAX >> 4) & 0xf;  // Bits 4 - 7
620   if (*Family == 6 || *Family == 0xf) {
621     if (*Family == 0xf)
622       // Examine extended family ID if family ID is F.
623       *Family += (EAX >> 20) & 0xff; // Bits 20 - 27
624     // Examine extended model ID if family ID is 6 or F.
625     *Model += ((EAX >> 16) & 0xf) << 4; // Bits 16 - 19
626   }
627 }
628 
629 static StringRef
getIntelProcessorTypeAndSubtype(unsigned Family,unsigned Model,const unsigned * Features,unsigned * Type,unsigned * Subtype)630 getIntelProcessorTypeAndSubtype(unsigned Family, unsigned Model,
631                                 const unsigned *Features,
632                                 unsigned *Type, unsigned *Subtype) {
633   auto testFeature = [&](unsigned F) {
634     return (Features[F / 32] & (1U << (F % 32))) != 0;
635   };
636 
637   StringRef CPU;
638 
639   switch (Family) {
640   case 3:
641     CPU = "i386";
642     break;
643   case 4:
644     CPU = "i486";
645     break;
646   case 5:
647     if (testFeature(X86::FEATURE_MMX)) {
648       CPU = "pentium-mmx";
649       break;
650     }
651     CPU = "pentium";
652     break;
653   case 6:
654     switch (Model) {
655     case 0x0f: // Intel Core 2 Duo processor, Intel Core 2 Duo mobile
656                // processor, Intel Core 2 Quad processor, Intel Core 2 Quad
657                // mobile processor, Intel Core 2 Extreme processor, Intel
658                // Pentium Dual-Core processor, Intel Xeon processor, model
659                // 0Fh. All processors are manufactured using the 65 nm process.
660     case 0x16: // Intel Celeron processor model 16h. All processors are
661                // manufactured using the 65 nm process
662       CPU = "core2";
663       *Type = X86::INTEL_CORE2;
664       break;
665     case 0x17: // Intel Core 2 Extreme processor, Intel Xeon processor, model
666                // 17h. All processors are manufactured using the 45 nm process.
667                //
668                // 45nm: Penryn , Wolfdale, Yorkfield (XE)
669     case 0x1d: // Intel Xeon processor MP. All processors are manufactured using
670                // the 45 nm process.
671       CPU = "penryn";
672       *Type = X86::INTEL_CORE2;
673       break;
674     case 0x1a: // Intel Core i7 processor and Intel Xeon processor. All
675                // processors are manufactured using the 45 nm process.
676     case 0x1e: // Intel(R) Core(TM) i7 CPU         870  @ 2.93GHz.
677                // As found in a Summer 2010 model iMac.
678     case 0x1f:
679     case 0x2e:              // Nehalem EX
680       CPU = "nehalem";
681       *Type = X86::INTEL_COREI7;
682       *Subtype = X86::INTEL_COREI7_NEHALEM;
683       break;
684     case 0x25: // Intel Core i7, laptop version.
685     case 0x2c: // Intel Core i7 processor and Intel Xeon processor. All
686                // processors are manufactured using the 32 nm process.
687     case 0x2f: // Westmere EX
688       CPU = "westmere";
689       *Type = X86::INTEL_COREI7;
690       *Subtype = X86::INTEL_COREI7_WESTMERE;
691       break;
692     case 0x2a: // Intel Core i7 processor. All processors are manufactured
693                // using the 32 nm process.
694     case 0x2d:
695       CPU = "sandybridge";
696       *Type = X86::INTEL_COREI7;
697       *Subtype = X86::INTEL_COREI7_SANDYBRIDGE;
698       break;
699     case 0x3a:
700     case 0x3e:              // Ivy Bridge EP
701       CPU = "ivybridge";
702       *Type = X86::INTEL_COREI7;
703       *Subtype = X86::INTEL_COREI7_IVYBRIDGE;
704       break;
705 
706     // Haswell:
707     case 0x3c:
708     case 0x3f:
709     case 0x45:
710     case 0x46:
711       CPU = "haswell";
712       *Type = X86::INTEL_COREI7;
713       *Subtype = X86::INTEL_COREI7_HASWELL;
714       break;
715 
716     // Broadwell:
717     case 0x3d:
718     case 0x47:
719     case 0x4f:
720     case 0x56:
721       CPU = "broadwell";
722       *Type = X86::INTEL_COREI7;
723       *Subtype = X86::INTEL_COREI7_BROADWELL;
724       break;
725 
726     // Skylake:
727     case 0x4e:              // Skylake mobile
728     case 0x5e:              // Skylake desktop
729     case 0x8e:              // Kaby Lake mobile
730     case 0x9e:              // Kaby Lake desktop
731     case 0xa5:              // Comet Lake-H/S
732     case 0xa6:              // Comet Lake-U
733       CPU = "skylake";
734       *Type = X86::INTEL_COREI7;
735       *Subtype = X86::INTEL_COREI7_SKYLAKE;
736       break;
737 
738     // Rocketlake:
739     case 0xa7:
740       CPU = "rocketlake";
741       *Type = X86::INTEL_COREI7;
742       *Subtype = X86::INTEL_COREI7_ROCKETLAKE;
743       break;
744 
745     // Skylake Xeon:
746     case 0x55:
747       *Type = X86::INTEL_COREI7;
748       if (testFeature(X86::FEATURE_AVX512BF16)) {
749         CPU = "cooperlake";
750         *Subtype = X86::INTEL_COREI7_COOPERLAKE;
751       } else if (testFeature(X86::FEATURE_AVX512VNNI)) {
752         CPU = "cascadelake";
753         *Subtype = X86::INTEL_COREI7_CASCADELAKE;
754       } else {
755         CPU = "skylake-avx512";
756         *Subtype = X86::INTEL_COREI7_SKYLAKE_AVX512;
757       }
758       break;
759 
760     // Cannonlake:
761     case 0x66:
762       CPU = "cannonlake";
763       *Type = X86::INTEL_COREI7;
764       *Subtype = X86::INTEL_COREI7_CANNONLAKE;
765       break;
766 
767     // Icelake:
768     case 0x7d:
769     case 0x7e:
770       CPU = "icelake-client";
771       *Type = X86::INTEL_COREI7;
772       *Subtype = X86::INTEL_COREI7_ICELAKE_CLIENT;
773       break;
774 
775     // Icelake Xeon:
776     case 0x6a:
777     case 0x6c:
778       CPU = "icelake-server";
779       *Type = X86::INTEL_COREI7;
780       *Subtype = X86::INTEL_COREI7_ICELAKE_SERVER;
781       break;
782 
783     // Sapphire Rapids:
784     case 0x8f:
785       CPU = "sapphirerapids";
786       *Type = X86::INTEL_COREI7;
787       *Subtype = X86::INTEL_COREI7_SAPPHIRERAPIDS;
788       break;
789 
790     case 0x1c: // Most 45 nm Intel Atom processors
791     case 0x26: // 45 nm Atom Lincroft
792     case 0x27: // 32 nm Atom Medfield
793     case 0x35: // 32 nm Atom Midview
794     case 0x36: // 32 nm Atom Midview
795       CPU = "bonnell";
796       *Type = X86::INTEL_BONNELL;
797       break;
798 
799     // Atom Silvermont codes from the Intel software optimization guide.
800     case 0x37:
801     case 0x4a:
802     case 0x4d:
803     case 0x5a:
804     case 0x5d:
805     case 0x4c: // really airmont
806       CPU = "silvermont";
807       *Type = X86::INTEL_SILVERMONT;
808       break;
809     // Goldmont:
810     case 0x5c: // Apollo Lake
811     case 0x5f: // Denverton
812       CPU = "goldmont";
813       *Type = X86::INTEL_GOLDMONT;
814       break;
815     case 0x7a:
816       CPU = "goldmont-plus";
817       *Type = X86::INTEL_GOLDMONT_PLUS;
818       break;
819     case 0x86:
820       CPU = "tremont";
821       *Type = X86::INTEL_TREMONT;
822       break;
823 
824     // Xeon Phi (Knights Landing + Knights Mill):
825     case 0x57:
826       CPU = "knl";
827       *Type = X86::INTEL_KNL;
828       break;
829     case 0x85:
830       CPU = "knm";
831       *Type = X86::INTEL_KNM;
832       break;
833 
834     default: // Unknown family 6 CPU, try to guess.
835       // Don't both with Type/Subtype here, they aren't used by the caller.
836       // They're used above to keep the code in sync with compiler-rt.
837       // TODO detect tigerlake host from model
838       if (testFeature(X86::FEATURE_AVX512VP2INTERSECT)) {
839         CPU = "tigerlake";
840       } else if (testFeature(X86::FEATURE_AVX512VBMI2)) {
841         CPU = "icelake-client";
842       } else if (testFeature(X86::FEATURE_AVX512VBMI)) {
843         CPU = "cannonlake";
844       } else if (testFeature(X86::FEATURE_AVX512BF16)) {
845         CPU = "cooperlake";
846       } else if (testFeature(X86::FEATURE_AVX512VNNI)) {
847         CPU = "cascadelake";
848       } else if (testFeature(X86::FEATURE_AVX512VL)) {
849         CPU = "skylake-avx512";
850       } else if (testFeature(X86::FEATURE_AVX512ER)) {
851         CPU = "knl";
852       } else if (testFeature(X86::FEATURE_CLFLUSHOPT)) {
853         if (testFeature(X86::FEATURE_SHA))
854           CPU = "goldmont";
855         else
856           CPU = "skylake";
857       } else if (testFeature(X86::FEATURE_ADX)) {
858         CPU = "broadwell";
859       } else if (testFeature(X86::FEATURE_AVX2)) {
860         CPU = "haswell";
861       } else if (testFeature(X86::FEATURE_AVX)) {
862         CPU = "sandybridge";
863       } else if (testFeature(X86::FEATURE_SSE4_2)) {
864         if (testFeature(X86::FEATURE_MOVBE))
865           CPU = "silvermont";
866         else
867           CPU = "nehalem";
868       } else if (testFeature(X86::FEATURE_SSE4_1)) {
869         CPU = "penryn";
870       } else if (testFeature(X86::FEATURE_SSSE3)) {
871         if (testFeature(X86::FEATURE_MOVBE))
872           CPU = "bonnell";
873         else
874           CPU = "core2";
875       } else if (testFeature(X86::FEATURE_64BIT)) {
876         CPU = "core2";
877       } else if (testFeature(X86::FEATURE_SSE3)) {
878         CPU = "yonah";
879       } else if (testFeature(X86::FEATURE_SSE2)) {
880         CPU = "pentium-m";
881       } else if (testFeature(X86::FEATURE_SSE)) {
882         CPU = "pentium3";
883       } else if (testFeature(X86::FEATURE_MMX)) {
884         CPU = "pentium2";
885       } else {
886         CPU = "pentiumpro";
887       }
888       break;
889     }
890     break;
891   case 15: {
892     if (testFeature(X86::FEATURE_64BIT)) {
893       CPU = "nocona";
894       break;
895     }
896     if (testFeature(X86::FEATURE_SSE3)) {
897       CPU = "prescott";
898       break;
899     }
900     CPU = "pentium4";
901     break;
902   }
903   default:
904     break; // Unknown.
905   }
906 
907   return CPU;
908 }
909 
910 static StringRef
getAMDProcessorTypeAndSubtype(unsigned Family,unsigned Model,const unsigned * Features,unsigned * Type,unsigned * Subtype)911 getAMDProcessorTypeAndSubtype(unsigned Family, unsigned Model,
912                               const unsigned *Features,
913                               unsigned *Type, unsigned *Subtype) {
914   auto testFeature = [&](unsigned F) {
915     return (Features[F / 32] & (1U << (F % 32))) != 0;
916   };
917 
918   StringRef CPU;
919 
920   switch (Family) {
921   case 4:
922     CPU = "i486";
923     break;
924   case 5:
925     CPU = "pentium";
926     switch (Model) {
927     case 6:
928     case 7:
929       CPU = "k6";
930       break;
931     case 8:
932       CPU = "k6-2";
933       break;
934     case 9:
935     case 13:
936       CPU = "k6-3";
937       break;
938     case 10:
939       CPU = "geode";
940       break;
941     }
942     break;
943   case 6:
944     if (testFeature(X86::FEATURE_SSE)) {
945       CPU = "athlon-xp";
946       break;
947     }
948     CPU = "athlon";
949     break;
950   case 15:
951     if (testFeature(X86::FEATURE_SSE3)) {
952       CPU = "k8-sse3";
953       break;
954     }
955     CPU = "k8";
956     break;
957   case 16:
958     CPU = "amdfam10";
959     *Type = X86::AMDFAM10H; // "amdfam10"
960     switch (Model) {
961     case 2:
962       *Subtype = X86::AMDFAM10H_BARCELONA;
963       break;
964     case 4:
965       *Subtype = X86::AMDFAM10H_SHANGHAI;
966       break;
967     case 8:
968       *Subtype = X86::AMDFAM10H_ISTANBUL;
969       break;
970     }
971     break;
972   case 20:
973     CPU = "btver1";
974     *Type = X86::AMD_BTVER1;
975     break;
976   case 21:
977     CPU = "bdver1";
978     *Type = X86::AMDFAM15H;
979     if (Model >= 0x60 && Model <= 0x7f) {
980       CPU = "bdver4";
981       *Subtype = X86::AMDFAM15H_BDVER4;
982       break; // 60h-7Fh: Excavator
983     }
984     if (Model >= 0x30 && Model <= 0x3f) {
985       CPU = "bdver3";
986       *Subtype = X86::AMDFAM15H_BDVER3;
987       break; // 30h-3Fh: Steamroller
988     }
989     if ((Model >= 0x10 && Model <= 0x1f) || Model == 0x02) {
990       CPU = "bdver2";
991       *Subtype = X86::AMDFAM15H_BDVER2;
992       break; // 02h, 10h-1Fh: Piledriver
993     }
994     if (Model <= 0x0f) {
995       *Subtype = X86::AMDFAM15H_BDVER1;
996       break; // 00h-0Fh: Bulldozer
997     }
998     break;
999   case 22:
1000     CPU = "btver2";
1001     *Type = X86::AMD_BTVER2;
1002     break;
1003   case 23:
1004     CPU = "znver1";
1005     *Type = X86::AMDFAM17H;
1006     if ((Model >= 0x30 && Model <= 0x3f) || Model == 0x71) {
1007       CPU = "znver2";
1008       *Subtype = X86::AMDFAM17H_ZNVER2;
1009       break; // 30h-3fh, 71h: Zen2
1010     }
1011     if (Model <= 0x0f) {
1012       *Subtype = X86::AMDFAM17H_ZNVER1;
1013       break; // 00h-0Fh: Zen1
1014     }
1015     break;
1016   case 25:
1017     CPU = "znver3";
1018     *Type = X86::AMDFAM19H;
1019     if (Model <= 0x0f) {
1020       *Subtype = X86::AMDFAM19H_ZNVER3;
1021       break; // 00h-0Fh: Zen3
1022     }
1023     break;
1024   default:
1025     break; // Unknown AMD CPU.
1026   }
1027 
1028   return CPU;
1029 }
1030 
getAvailableFeatures(unsigned ECX,unsigned EDX,unsigned MaxLeaf,unsigned * Features)1031 static void getAvailableFeatures(unsigned ECX, unsigned EDX, unsigned MaxLeaf,
1032                                  unsigned *Features) {
1033   unsigned EAX, EBX;
1034 
1035   auto setFeature = [&](unsigned F) {
1036     Features[F / 32] |= 1U << (F % 32);
1037   };
1038 
1039   if ((EDX >> 15) & 1)
1040     setFeature(X86::FEATURE_CMOV);
1041   if ((EDX >> 23) & 1)
1042     setFeature(X86::FEATURE_MMX);
1043   if ((EDX >> 25) & 1)
1044     setFeature(X86::FEATURE_SSE);
1045   if ((EDX >> 26) & 1)
1046     setFeature(X86::FEATURE_SSE2);
1047 
1048   if ((ECX >> 0) & 1)
1049     setFeature(X86::FEATURE_SSE3);
1050   if ((ECX >> 1) & 1)
1051     setFeature(X86::FEATURE_PCLMUL);
1052   if ((ECX >> 9) & 1)
1053     setFeature(X86::FEATURE_SSSE3);
1054   if ((ECX >> 12) & 1)
1055     setFeature(X86::FEATURE_FMA);
1056   if ((ECX >> 19) & 1)
1057     setFeature(X86::FEATURE_SSE4_1);
1058   if ((ECX >> 20) & 1)
1059     setFeature(X86::FEATURE_SSE4_2);
1060   if ((ECX >> 23) & 1)
1061     setFeature(X86::FEATURE_POPCNT);
1062   if ((ECX >> 25) & 1)
1063     setFeature(X86::FEATURE_AES);
1064 
1065   if ((ECX >> 22) & 1)
1066     setFeature(X86::FEATURE_MOVBE);
1067 
1068   // If CPUID indicates support for XSAVE, XRESTORE and AVX, and XGETBV
1069   // indicates that the AVX registers will be saved and restored on context
1070   // switch, then we have full AVX support.
1071   const unsigned AVXBits = (1 << 27) | (1 << 28);
1072   bool HasAVX = ((ECX & AVXBits) == AVXBits) && !getX86XCR0(&EAX, &EDX) &&
1073                 ((EAX & 0x6) == 0x6);
1074 #if defined(__APPLE__)
1075   // Darwin lazily saves the AVX512 context on first use: trust that the OS will
1076   // save the AVX512 context if we use AVX512 instructions, even the bit is not
1077   // set right now.
1078   bool HasAVX512Save = true;
1079 #else
1080   // AVX512 requires additional context to be saved by the OS.
1081   bool HasAVX512Save = HasAVX && ((EAX & 0xe0) == 0xe0);
1082 #endif
1083 
1084   if (HasAVX)
1085     setFeature(X86::FEATURE_AVX);
1086 
1087   bool HasLeaf7 =
1088       MaxLeaf >= 0x7 && !getX86CpuIDAndInfoEx(0x7, 0x0, &EAX, &EBX, &ECX, &EDX);
1089 
1090   if (HasLeaf7 && ((EBX >> 3) & 1))
1091     setFeature(X86::FEATURE_BMI);
1092   if (HasLeaf7 && ((EBX >> 5) & 1) && HasAVX)
1093     setFeature(X86::FEATURE_AVX2);
1094   if (HasLeaf7 && ((EBX >> 8) & 1))
1095     setFeature(X86::FEATURE_BMI2);
1096   if (HasLeaf7 && ((EBX >> 16) & 1) && HasAVX512Save)
1097     setFeature(X86::FEATURE_AVX512F);
1098   if (HasLeaf7 && ((EBX >> 17) & 1) && HasAVX512Save)
1099     setFeature(X86::FEATURE_AVX512DQ);
1100   if (HasLeaf7 && ((EBX >> 19) & 1))
1101     setFeature(X86::FEATURE_ADX);
1102   if (HasLeaf7 && ((EBX >> 21) & 1) && HasAVX512Save)
1103     setFeature(X86::FEATURE_AVX512IFMA);
1104   if (HasLeaf7 && ((EBX >> 23) & 1))
1105     setFeature(X86::FEATURE_CLFLUSHOPT);
1106   if (HasLeaf7 && ((EBX >> 26) & 1) && HasAVX512Save)
1107     setFeature(X86::FEATURE_AVX512PF);
1108   if (HasLeaf7 && ((EBX >> 27) & 1) && HasAVX512Save)
1109     setFeature(X86::FEATURE_AVX512ER);
1110   if (HasLeaf7 && ((EBX >> 28) & 1) && HasAVX512Save)
1111     setFeature(X86::FEATURE_AVX512CD);
1112   if (HasLeaf7 && ((EBX >> 29) & 1))
1113     setFeature(X86::FEATURE_SHA);
1114   if (HasLeaf7 && ((EBX >> 30) & 1) && HasAVX512Save)
1115     setFeature(X86::FEATURE_AVX512BW);
1116   if (HasLeaf7 && ((EBX >> 31) & 1) && HasAVX512Save)
1117     setFeature(X86::FEATURE_AVX512VL);
1118 
1119   if (HasLeaf7 && ((ECX >> 1) & 1) && HasAVX512Save)
1120     setFeature(X86::FEATURE_AVX512VBMI);
1121   if (HasLeaf7 && ((ECX >> 6) & 1) && HasAVX512Save)
1122     setFeature(X86::FEATURE_AVX512VBMI2);
1123   if (HasLeaf7 && ((ECX >> 8) & 1))
1124     setFeature(X86::FEATURE_GFNI);
1125   if (HasLeaf7 && ((ECX >> 10) & 1) && HasAVX)
1126     setFeature(X86::FEATURE_VPCLMULQDQ);
1127   if (HasLeaf7 && ((ECX >> 11) & 1) && HasAVX512Save)
1128     setFeature(X86::FEATURE_AVX512VNNI);
1129   if (HasLeaf7 && ((ECX >> 12) & 1) && HasAVX512Save)
1130     setFeature(X86::FEATURE_AVX512BITALG);
1131   if (HasLeaf7 && ((ECX >> 14) & 1) && HasAVX512Save)
1132     setFeature(X86::FEATURE_AVX512VPOPCNTDQ);
1133 
1134   if (HasLeaf7 && ((EDX >> 2) & 1) && HasAVX512Save)
1135     setFeature(X86::FEATURE_AVX5124VNNIW);
1136   if (HasLeaf7 && ((EDX >> 3) & 1) && HasAVX512Save)
1137     setFeature(X86::FEATURE_AVX5124FMAPS);
1138   if (HasLeaf7 && ((EDX >> 8) & 1) && HasAVX512Save)
1139     setFeature(X86::FEATURE_AVX512VP2INTERSECT);
1140 
1141   bool HasLeaf7Subleaf1 =
1142       MaxLeaf >= 7 && !getX86CpuIDAndInfoEx(0x7, 0x1, &EAX, &EBX, &ECX, &EDX);
1143   if (HasLeaf7Subleaf1 && ((EAX >> 5) & 1) && HasAVX512Save)
1144     setFeature(X86::FEATURE_AVX512BF16);
1145 
1146   unsigned MaxExtLevel;
1147   getX86CpuIDAndInfo(0x80000000, &MaxExtLevel, &EBX, &ECX, &EDX);
1148 
1149   bool HasExtLeaf1 = MaxExtLevel >= 0x80000001 &&
1150                      !getX86CpuIDAndInfo(0x80000001, &EAX, &EBX, &ECX, &EDX);
1151   if (HasExtLeaf1 && ((ECX >> 6) & 1))
1152     setFeature(X86::FEATURE_SSE4_A);
1153   if (HasExtLeaf1 && ((ECX >> 11) & 1))
1154     setFeature(X86::FEATURE_XOP);
1155   if (HasExtLeaf1 && ((ECX >> 16) & 1))
1156     setFeature(X86::FEATURE_FMA4);
1157 
1158   if (HasExtLeaf1 && ((EDX >> 29) & 1))
1159     setFeature(X86::FEATURE_64BIT);
1160 }
1161 
getHostCPUName()1162 StringRef sys::getHostCPUName() {
1163   unsigned MaxLeaf = 0;
1164   const VendorSignatures Vendor = getVendorSignature(&MaxLeaf);
1165   if (Vendor == VendorSignatures::UNKNOWN)
1166     return "generic";
1167 
1168   unsigned EAX = 0, EBX = 0, ECX = 0, EDX = 0;
1169   getX86CpuIDAndInfo(0x1, &EAX, &EBX, &ECX, &EDX);
1170 
1171   unsigned Family = 0, Model = 0;
1172   unsigned Features[(X86::CPU_FEATURE_MAX + 31) / 32] = {0};
1173   detectX86FamilyModel(EAX, &Family, &Model);
1174   getAvailableFeatures(ECX, EDX, MaxLeaf, Features);
1175 
1176   // These aren't consumed in this file, but we try to keep some source code the
1177   // same or similar to compiler-rt.
1178   unsigned Type = 0;
1179   unsigned Subtype = 0;
1180 
1181   StringRef CPU;
1182 
1183   if (Vendor == VendorSignatures::GENUINE_INTEL) {
1184     CPU = getIntelProcessorTypeAndSubtype(Family, Model, Features, &Type,
1185                                           &Subtype);
1186   } else if (Vendor == VendorSignatures::AUTHENTIC_AMD) {
1187     CPU = getAMDProcessorTypeAndSubtype(Family, Model, Features, &Type,
1188                                         &Subtype);
1189   }
1190 
1191   if (!CPU.empty())
1192     return CPU;
1193 
1194   return "generic";
1195 }
1196 
1197 #elif defined(__APPLE__) && (defined(__ppc__) || defined(__powerpc__))
getHostCPUName()1198 StringRef sys::getHostCPUName() {
1199   host_basic_info_data_t hostInfo;
1200   mach_msg_type_number_t infoCount;
1201 
1202   infoCount = HOST_BASIC_INFO_COUNT;
1203   mach_port_t hostPort = mach_host_self();
1204   host_info(hostPort, HOST_BASIC_INFO, (host_info_t)&hostInfo,
1205             &infoCount);
1206   mach_port_deallocate(mach_task_self(), hostPort);
1207 
1208   if (hostInfo.cpu_type != CPU_TYPE_POWERPC)
1209     return "generic";
1210 
1211   switch (hostInfo.cpu_subtype) {
1212   case CPU_SUBTYPE_POWERPC_601:
1213     return "601";
1214   case CPU_SUBTYPE_POWERPC_602:
1215     return "602";
1216   case CPU_SUBTYPE_POWERPC_603:
1217     return "603";
1218   case CPU_SUBTYPE_POWERPC_603e:
1219     return "603e";
1220   case CPU_SUBTYPE_POWERPC_603ev:
1221     return "603ev";
1222   case CPU_SUBTYPE_POWERPC_604:
1223     return "604";
1224   case CPU_SUBTYPE_POWERPC_604e:
1225     return "604e";
1226   case CPU_SUBTYPE_POWERPC_620:
1227     return "620";
1228   case CPU_SUBTYPE_POWERPC_750:
1229     return "750";
1230   case CPU_SUBTYPE_POWERPC_7400:
1231     return "7400";
1232   case CPU_SUBTYPE_POWERPC_7450:
1233     return "7450";
1234   case CPU_SUBTYPE_POWERPC_970:
1235     return "970";
1236   default:;
1237   }
1238 
1239   return "generic";
1240 }
1241 #elif defined(__linux__) && (defined(__ppc__) || defined(__powerpc__))
getHostCPUName()1242 StringRef sys::getHostCPUName() {
1243   std::unique_ptr<llvm::MemoryBuffer> P = getProcCpuinfoContent();
1244   StringRef Content = P ? P->getBuffer() : "";
1245   return detail::getHostCPUNameForPowerPC(Content);
1246 }
1247 #elif defined(__linux__) && (defined(__arm__) || defined(__aarch64__))
getHostCPUName()1248 StringRef sys::getHostCPUName() {
1249   std::unique_ptr<llvm::MemoryBuffer> P = getProcCpuinfoContent();
1250   StringRef Content = P ? P->getBuffer() : "";
1251   return detail::getHostCPUNameForARM(Content);
1252 }
1253 #elif defined(__linux__) && defined(__s390x__)
getHostCPUName()1254 StringRef sys::getHostCPUName() {
1255   std::unique_ptr<llvm::MemoryBuffer> P = getProcCpuinfoContent();
1256   StringRef Content = P ? P->getBuffer() : "";
1257   return detail::getHostCPUNameForS390x(Content);
1258 }
1259 #elif defined(__MVS__)
getHostCPUName()1260 StringRef sys::getHostCPUName() {
1261   // Get pointer to Communications Vector Table (CVT).
1262   // The pointer is located at offset 16 of the Prefixed Save Area (PSA).
1263   // It is stored as 31 bit pointer and will be zero-extended to 64 bit.
1264   int *StartToCVTOffset = reinterpret_cast<int *>(0x10);
1265   // Since its stored as a 31-bit pointer, get the 4 bytes from the start
1266   // of address.
1267   int ReadValue = *StartToCVTOffset;
1268   // Explicitly clear the high order bit.
1269   ReadValue = (ReadValue & 0x7FFFFFFF);
1270   char *CVT = reinterpret_cast<char *>(ReadValue);
1271   // The model number is located in the CVT prefix at offset -6 and stored as
1272   // signless packed decimal.
1273   uint16_t Id = *(uint16_t *)&CVT[-6];
1274   // Convert number to integer.
1275   Id = decodePackedBCD<uint16_t>(Id, false);
1276   // Check for vector support. It's stored in field CVTFLAG5 (offset 244),
1277   // bit CVTVEF (X'80'). The facilities list is part of the PSA but the vector
1278   // extension can only be used if bit CVTVEF is on.
1279   bool HaveVectorSupport = CVT[244] & 0x80;
1280   return getCPUNameFromS390Model(Id, HaveVectorSupport);
1281 }
1282 #elif defined(__APPLE__) && defined(__aarch64__)
getHostCPUName()1283 StringRef sys::getHostCPUName() {
1284   return "cyclone";
1285 }
1286 #elif defined(__APPLE__) && defined(__arm__)
getHostCPUName()1287 StringRef sys::getHostCPUName() {
1288   host_basic_info_data_t hostInfo;
1289   mach_msg_type_number_t infoCount;
1290 
1291   infoCount = HOST_BASIC_INFO_COUNT;
1292   mach_port_t hostPort = mach_host_self();
1293   host_info(hostPort, HOST_BASIC_INFO, (host_info_t)&hostInfo,
1294             &infoCount);
1295   mach_port_deallocate(mach_task_self(), hostPort);
1296 
1297   if (hostInfo.cpu_type != CPU_TYPE_ARM) {
1298     assert(false && "CPUType not equal to ARM should not be possible on ARM");
1299     return "generic";
1300   }
1301   switch (hostInfo.cpu_subtype) {
1302     case CPU_SUBTYPE_ARM_V7S:
1303       return "swift";
1304     default:;
1305     }
1306 
1307   return "generic";
1308 }
1309 #elif defined(_AIX)
getHostCPUName()1310 StringRef sys::getHostCPUName() {
1311   switch (_system_configuration.implementation) {
1312   case POWER_4:
1313     if (_system_configuration.version == PV_4_3)
1314       return "970";
1315     return "pwr4";
1316   case POWER_5:
1317     if (_system_configuration.version == PV_5)
1318       return "pwr5";
1319     return "pwr5x";
1320   case POWER_6:
1321     if (_system_configuration.version == PV_6_Compat)
1322       return "pwr6";
1323     return "pwr6x";
1324   case POWER_7:
1325     return "pwr7";
1326   case POWER_8:
1327     return "pwr8";
1328   case POWER_9:
1329     return "pwr9";
1330 // TODO: simplify this once the macro is available in all OS levels.
1331 #ifdef POWER_10
1332   case POWER_10:
1333 #else
1334   case 0x40000:
1335 #endif
1336     return "pwr10";
1337   default:
1338     return "generic";
1339   }
1340 }
1341 #else
getHostCPUName()1342 StringRef sys::getHostCPUName() { return "generic"; }
1343 namespace llvm {
1344 namespace sys {
1345 namespace detail {
1346 namespace x86 {
1347 
getVendorSignature(unsigned * MaxLeaf)1348 VendorSignatures getVendorSignature(unsigned *MaxLeaf) {
1349   return VendorSignatures::UNKNOWN;
1350 }
1351 
1352 } // namespace x86
1353 } // namespace detail
1354 } // namespace sys
1355 } // namespace llvm
1356 #endif
1357 
1358 #if defined(__linux__) && (defined(__i386__) || defined(__x86_64__))
1359 // On Linux, the number of physical cores can be computed from /proc/cpuinfo,
1360 // using the number of unique physical/core id pairs. The following
1361 // implementation reads the /proc/cpuinfo format on an x86_64 system.
computeHostNumPhysicalCores()1362 int computeHostNumPhysicalCores() {
1363   // Enabled represents the number of physical id/core id pairs with at least
1364   // one processor id enabled by the CPU affinity mask.
1365   cpu_set_t Affinity, Enabled;
1366   if (sched_getaffinity(0, sizeof(Affinity), &Affinity) != 0)
1367     return -1;
1368   CPU_ZERO(&Enabled);
1369 
1370   // Read /proc/cpuinfo as a stream (until EOF reached). It cannot be
1371   // mmapped because it appears to have 0 size.
1372   llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> Text =
1373       llvm::MemoryBuffer::getFileAsStream("/proc/cpuinfo");
1374   if (std::error_code EC = Text.getError()) {
1375     llvm::errs() << "Can't read "
1376                  << "/proc/cpuinfo: " << EC.message() << "\n";
1377     return -1;
1378   }
1379   SmallVector<StringRef, 8> strs;
1380   (*Text)->getBuffer().split(strs, "\n", /*MaxSplit=*/-1,
1381                              /*KeepEmpty=*/false);
1382   int CurProcessor = -1;
1383   int CurPhysicalId = -1;
1384   int CurSiblings = -1;
1385   int CurCoreId = -1;
1386   for (StringRef Line : strs) {
1387     std::pair<StringRef, StringRef> Data = Line.split(':');
1388     auto Name = Data.first.trim();
1389     auto Val = Data.second.trim();
1390     // These fields are available if the kernel is configured with CONFIG_SMP.
1391     if (Name == "processor")
1392       Val.getAsInteger(10, CurProcessor);
1393     else if (Name == "physical id")
1394       Val.getAsInteger(10, CurPhysicalId);
1395     else if (Name == "siblings")
1396       Val.getAsInteger(10, CurSiblings);
1397     else if (Name == "core id") {
1398       Val.getAsInteger(10, CurCoreId);
1399       // The processor id corresponds to an index into cpu_set_t.
1400       if (CPU_ISSET(CurProcessor, &Affinity))
1401         CPU_SET(CurPhysicalId * CurSiblings + CurCoreId, &Enabled);
1402     }
1403   }
1404   return CPU_COUNT(&Enabled);
1405 }
1406 #elif defined(__linux__) && defined(__powerpc__)
computeHostNumPhysicalCores()1407 int computeHostNumPhysicalCores() {
1408   cpu_set_t Affinity;
1409   if (sched_getaffinity(0, sizeof(Affinity), &Affinity) == 0)
1410     return CPU_COUNT(&Affinity);
1411 
1412   // The call to sched_getaffinity() may have failed because the Affinity
1413   // mask is too small for the number of CPU's on the system (i.e. the
1414   // system has more than 1024 CPUs). Allocate a mask large enough for
1415   // twice as many CPUs.
1416   cpu_set_t *DynAffinity;
1417   DynAffinity = CPU_ALLOC(2048);
1418   if (sched_getaffinity(0, CPU_ALLOC_SIZE(2048), DynAffinity) == 0) {
1419     int NumCPUs = CPU_COUNT(DynAffinity);
1420     CPU_FREE(DynAffinity);
1421     return NumCPUs;
1422   }
1423   return -1;
1424 }
1425 #elif defined(__linux__) && defined(__s390x__)
computeHostNumPhysicalCores()1426 int computeHostNumPhysicalCores() { return sysconf(_SC_NPROCESSORS_ONLN); }
1427 #elif defined(__APPLE__)
1428 #include <sys/param.h>
1429 #include <sys/sysctl.h>
1430 
1431 // Gets the number of *physical cores* on the machine.
computeHostNumPhysicalCores()1432 int computeHostNumPhysicalCores() {
1433   uint32_t count;
1434   size_t len = sizeof(count);
1435   sysctlbyname("hw.physicalcpu", &count, &len, NULL, 0);
1436   if (count < 1) {
1437     int nm[2];
1438     nm[0] = CTL_HW;
1439     nm[1] = HW_AVAILCPU;
1440     sysctl(nm, 2, &count, &len, NULL, 0);
1441     if (count < 1)
1442       return -1;
1443   }
1444   return count;
1445 }
1446 #elif defined(__MVS__)
computeHostNumPhysicalCores()1447 int computeHostNumPhysicalCores() {
1448   enum {
1449     // Byte offset of the pointer to the Communications Vector Table (CVT) in
1450     // the Prefixed Save Area (PSA). The table entry is a 31-bit pointer and
1451     // will be zero-extended to uintptr_t.
1452     FLCCVT = 16,
1453     // Byte offset of the pointer to the Common System Data Area (CSD) in the
1454     // CVT. The table entry is a 31-bit pointer and will be zero-extended to
1455     // uintptr_t.
1456     CVTCSD = 660,
1457     // Byte offset to the number of live CPs in the LPAR, stored as a signed
1458     // 32-bit value in the table.
1459     CSD_NUMBER_ONLINE_STANDARD_CPS = 264,
1460   };
1461   char *PSA = 0;
1462   char *CVT = reinterpret_cast<char *>(
1463       static_cast<uintptr_t>(reinterpret_cast<unsigned int &>(PSA[FLCCVT])));
1464   char *CSD = reinterpret_cast<char *>(
1465       static_cast<uintptr_t>(reinterpret_cast<unsigned int &>(CVT[CVTCSD])));
1466   return reinterpret_cast<int &>(CSD[CSD_NUMBER_ONLINE_STANDARD_CPS]);
1467 }
1468 #elif defined(_WIN32) && LLVM_ENABLE_THREADS != 0
1469 // Defined in llvm/lib/Support/Windows/Threading.inc
1470 int computeHostNumPhysicalCores();
1471 #else
1472 // On other systems, return -1 to indicate unknown.
computeHostNumPhysicalCores()1473 static int computeHostNumPhysicalCores() { return -1; }
1474 #endif
1475 
getHostNumPhysicalCores()1476 int sys::getHostNumPhysicalCores() {
1477   static int NumCores = computeHostNumPhysicalCores();
1478   return NumCores;
1479 }
1480 
1481 #if defined(__i386__) || defined(_M_IX86) || \
1482     defined(__x86_64__) || defined(_M_X64)
getHostCPUFeatures(StringMap<bool> & Features)1483 bool sys::getHostCPUFeatures(StringMap<bool> &Features) {
1484   unsigned EAX = 0, EBX = 0, ECX = 0, EDX = 0;
1485   unsigned MaxLevel;
1486 
1487   if (getX86CpuIDAndInfo(0, &MaxLevel, &EBX, &ECX, &EDX) || MaxLevel < 1)
1488     return false;
1489 
1490   getX86CpuIDAndInfo(1, &EAX, &EBX, &ECX, &EDX);
1491 
1492   Features["cx8"]    = (EDX >>  8) & 1;
1493   Features["cmov"]   = (EDX >> 15) & 1;
1494   Features["mmx"]    = (EDX >> 23) & 1;
1495   Features["fxsr"]   = (EDX >> 24) & 1;
1496   Features["sse"]    = (EDX >> 25) & 1;
1497   Features["sse2"]   = (EDX >> 26) & 1;
1498 
1499   Features["sse3"]   = (ECX >>  0) & 1;
1500   Features["pclmul"] = (ECX >>  1) & 1;
1501   Features["ssse3"]  = (ECX >>  9) & 1;
1502   Features["cx16"]   = (ECX >> 13) & 1;
1503   Features["sse4.1"] = (ECX >> 19) & 1;
1504   Features["sse4.2"] = (ECX >> 20) & 1;
1505   Features["movbe"]  = (ECX >> 22) & 1;
1506   Features["popcnt"] = (ECX >> 23) & 1;
1507   Features["aes"]    = (ECX >> 25) & 1;
1508   Features["rdrnd"]  = (ECX >> 30) & 1;
1509 
1510   // If CPUID indicates support for XSAVE, XRESTORE and AVX, and XGETBV
1511   // indicates that the AVX registers will be saved and restored on context
1512   // switch, then we have full AVX support.
1513   bool HasXSave = ((ECX >> 27) & 1) && !getX86XCR0(&EAX, &EDX);
1514   bool HasAVXSave = HasXSave && ((ECX >> 28) & 1) && ((EAX & 0x6) == 0x6);
1515 #if defined(__APPLE__)
1516   // Darwin lazily saves the AVX512 context on first use: trust that the OS will
1517   // save the AVX512 context if we use AVX512 instructions, even the bit is not
1518   // set right now.
1519   bool HasAVX512Save = true;
1520 #else
1521   // AVX512 requires additional context to be saved by the OS.
1522   bool HasAVX512Save = HasAVXSave && ((EAX & 0xe0) == 0xe0);
1523 #endif
1524   // AMX requires additional context to be saved by the OS.
1525   const unsigned AMXBits = (1 << 17) | (1 << 18);
1526   bool HasAMXSave = HasXSave && ((EAX & AMXBits) == AMXBits);
1527 
1528   Features["avx"]   = HasAVXSave;
1529   Features["fma"]   = ((ECX >> 12) & 1) && HasAVXSave;
1530   // Only enable XSAVE if OS has enabled support for saving YMM state.
1531   Features["xsave"] = ((ECX >> 26) & 1) && HasAVXSave;
1532   Features["f16c"]  = ((ECX >> 29) & 1) && HasAVXSave;
1533 
1534   unsigned MaxExtLevel;
1535   getX86CpuIDAndInfo(0x80000000, &MaxExtLevel, &EBX, &ECX, &EDX);
1536 
1537   bool HasExtLeaf1 = MaxExtLevel >= 0x80000001 &&
1538                      !getX86CpuIDAndInfo(0x80000001, &EAX, &EBX, &ECX, &EDX);
1539   Features["sahf"]   = HasExtLeaf1 && ((ECX >>  0) & 1);
1540   Features["lzcnt"]  = HasExtLeaf1 && ((ECX >>  5) & 1);
1541   Features["sse4a"]  = HasExtLeaf1 && ((ECX >>  6) & 1);
1542   Features["prfchw"] = HasExtLeaf1 && ((ECX >>  8) & 1);
1543   Features["xop"]    = HasExtLeaf1 && ((ECX >> 11) & 1) && HasAVXSave;
1544   Features["lwp"]    = HasExtLeaf1 && ((ECX >> 15) & 1);
1545   Features["fma4"]   = HasExtLeaf1 && ((ECX >> 16) & 1) && HasAVXSave;
1546   Features["tbm"]    = HasExtLeaf1 && ((ECX >> 21) & 1);
1547   Features["mwaitx"] = HasExtLeaf1 && ((ECX >> 29) & 1);
1548 
1549   Features["64bit"]  = HasExtLeaf1 && ((EDX >> 29) & 1);
1550 
1551   // Miscellaneous memory related features, detected by
1552   // using the 0x80000008 leaf of the CPUID instruction
1553   bool HasExtLeaf8 = MaxExtLevel >= 0x80000008 &&
1554                      !getX86CpuIDAndInfo(0x80000008, &EAX, &EBX, &ECX, &EDX);
1555   Features["clzero"]   = HasExtLeaf8 && ((EBX >> 0) & 1);
1556   Features["wbnoinvd"] = HasExtLeaf8 && ((EBX >> 9) & 1);
1557 
1558   bool HasLeaf7 =
1559       MaxLevel >= 7 && !getX86CpuIDAndInfoEx(0x7, 0x0, &EAX, &EBX, &ECX, &EDX);
1560 
1561   Features["fsgsbase"]   = HasLeaf7 && ((EBX >>  0) & 1);
1562   Features["sgx"]        = HasLeaf7 && ((EBX >>  2) & 1);
1563   Features["bmi"]        = HasLeaf7 && ((EBX >>  3) & 1);
1564   // AVX2 is only supported if we have the OS save support from AVX.
1565   Features["avx2"]       = HasLeaf7 && ((EBX >>  5) & 1) && HasAVXSave;
1566   Features["bmi2"]       = HasLeaf7 && ((EBX >>  8) & 1);
1567   Features["invpcid"]    = HasLeaf7 && ((EBX >> 10) & 1);
1568   Features["rtm"]        = HasLeaf7 && ((EBX >> 11) & 1);
1569   // AVX512 is only supported if the OS supports the context save for it.
1570   Features["avx512f"]    = HasLeaf7 && ((EBX >> 16) & 1) && HasAVX512Save;
1571   Features["avx512dq"]   = HasLeaf7 && ((EBX >> 17) & 1) && HasAVX512Save;
1572   Features["rdseed"]     = HasLeaf7 && ((EBX >> 18) & 1);
1573   Features["adx"]        = HasLeaf7 && ((EBX >> 19) & 1);
1574   Features["avx512ifma"] = HasLeaf7 && ((EBX >> 21) & 1) && HasAVX512Save;
1575   Features["clflushopt"] = HasLeaf7 && ((EBX >> 23) & 1);
1576   Features["clwb"]       = HasLeaf7 && ((EBX >> 24) & 1);
1577   Features["avx512pf"]   = HasLeaf7 && ((EBX >> 26) & 1) && HasAVX512Save;
1578   Features["avx512er"]   = HasLeaf7 && ((EBX >> 27) & 1) && HasAVX512Save;
1579   Features["avx512cd"]   = HasLeaf7 && ((EBX >> 28) & 1) && HasAVX512Save;
1580   Features["sha"]        = HasLeaf7 && ((EBX >> 29) & 1);
1581   Features["avx512bw"]   = HasLeaf7 && ((EBX >> 30) & 1) && HasAVX512Save;
1582   Features["avx512vl"]   = HasLeaf7 && ((EBX >> 31) & 1) && HasAVX512Save;
1583 
1584   Features["prefetchwt1"]     = HasLeaf7 && ((ECX >>  0) & 1);
1585   Features["avx512vbmi"]      = HasLeaf7 && ((ECX >>  1) & 1) && HasAVX512Save;
1586   Features["pku"]             = HasLeaf7 && ((ECX >>  4) & 1);
1587   Features["waitpkg"]         = HasLeaf7 && ((ECX >>  5) & 1);
1588   Features["avx512vbmi2"]     = HasLeaf7 && ((ECX >>  6) & 1) && HasAVX512Save;
1589   Features["shstk"]           = HasLeaf7 && ((ECX >>  7) & 1);
1590   Features["gfni"]            = HasLeaf7 && ((ECX >>  8) & 1);
1591   Features["vaes"]            = HasLeaf7 && ((ECX >>  9) & 1) && HasAVXSave;
1592   Features["vpclmulqdq"]      = HasLeaf7 && ((ECX >> 10) & 1) && HasAVXSave;
1593   Features["avx512vnni"]      = HasLeaf7 && ((ECX >> 11) & 1) && HasAVX512Save;
1594   Features["avx512bitalg"]    = HasLeaf7 && ((ECX >> 12) & 1) && HasAVX512Save;
1595   Features["avx512vpopcntdq"] = HasLeaf7 && ((ECX >> 14) & 1) && HasAVX512Save;
1596   Features["rdpid"]           = HasLeaf7 && ((ECX >> 22) & 1);
1597   Features["kl"]              = HasLeaf7 && ((ECX >> 23) & 1); // key locker
1598   Features["cldemote"]        = HasLeaf7 && ((ECX >> 25) & 1);
1599   Features["movdiri"]         = HasLeaf7 && ((ECX >> 27) & 1);
1600   Features["movdir64b"]       = HasLeaf7 && ((ECX >> 28) & 1);
1601   Features["enqcmd"]          = HasLeaf7 && ((ECX >> 29) & 1);
1602 
1603   Features["uintr"]           = HasLeaf7 && ((EDX >> 5) & 1);
1604   Features["avx512vp2intersect"] =
1605       HasLeaf7 && ((EDX >> 8) & 1) && HasAVX512Save;
1606   Features["serialize"]       = HasLeaf7 && ((EDX >> 14) & 1);
1607   Features["tsxldtrk"]        = HasLeaf7 && ((EDX >> 16) & 1);
1608   // There are two CPUID leafs which information associated with the pconfig
1609   // instruction:
1610   // EAX=0x7, ECX=0x0 indicates the availability of the instruction (via the 18th
1611   // bit of EDX), while the EAX=0x1b leaf returns information on the
1612   // availability of specific pconfig leafs.
1613   // The target feature here only refers to the the first of these two.
1614   // Users might need to check for the availability of specific pconfig
1615   // leaves using cpuid, since that information is ignored while
1616   // detecting features using the "-march=native" flag.
1617   // For more info, see X86 ISA docs.
1618   Features["pconfig"] = HasLeaf7 && ((EDX >> 18) & 1);
1619   Features["amx-bf16"]   = HasLeaf7 && ((EDX >> 22) & 1) && HasAMXSave;
1620   Features["amx-tile"]   = HasLeaf7 && ((EDX >> 24) & 1) && HasAMXSave;
1621   Features["amx-int8"]   = HasLeaf7 && ((EDX >> 25) & 1) && HasAMXSave;
1622   bool HasLeaf7Subleaf1 =
1623       MaxLevel >= 7 && !getX86CpuIDAndInfoEx(0x7, 0x1, &EAX, &EBX, &ECX, &EDX);
1624   Features["avxvnni"]    = HasLeaf7Subleaf1 && ((EAX >> 4) & 1) && HasAVXSave;
1625   Features["avx512bf16"] = HasLeaf7Subleaf1 && ((EAX >> 5) & 1) && HasAVX512Save;
1626   Features["hreset"]     = HasLeaf7Subleaf1 && ((EAX >> 22) & 1);
1627 
1628   bool HasLeafD = MaxLevel >= 0xd &&
1629                   !getX86CpuIDAndInfoEx(0xd, 0x1, &EAX, &EBX, &ECX, &EDX);
1630 
1631   // Only enable XSAVE if OS has enabled support for saving YMM state.
1632   Features["xsaveopt"] = HasLeafD && ((EAX >> 0) & 1) && HasAVXSave;
1633   Features["xsavec"]   = HasLeafD && ((EAX >> 1) & 1) && HasAVXSave;
1634   Features["xsaves"]   = HasLeafD && ((EAX >> 3) & 1) && HasAVXSave;
1635 
1636   bool HasLeaf14 = MaxLevel >= 0x14 &&
1637                   !getX86CpuIDAndInfoEx(0x14, 0x0, &EAX, &EBX, &ECX, &EDX);
1638 
1639   Features["ptwrite"] = HasLeaf14 && ((EBX >> 4) & 1);
1640 
1641   bool HasLeaf19 =
1642       MaxLevel >= 0x19 && !getX86CpuIDAndInfo(0x19, &EAX, &EBX, &ECX, &EDX);
1643   Features["widekl"] = HasLeaf7 && HasLeaf19 && ((EBX >> 2) & 1);
1644 
1645   return true;
1646 }
1647 #elif defined(__linux__) && (defined(__arm__) || defined(__aarch64__))
getHostCPUFeatures(StringMap<bool> & Features)1648 bool sys::getHostCPUFeatures(StringMap<bool> &Features) {
1649   std::unique_ptr<llvm::MemoryBuffer> P = getProcCpuinfoContent();
1650   if (!P)
1651     return false;
1652 
1653   SmallVector<StringRef, 32> Lines;
1654   P->getBuffer().split(Lines, "\n");
1655 
1656   SmallVector<StringRef, 32> CPUFeatures;
1657 
1658   // Look for the CPU features.
1659   for (unsigned I = 0, E = Lines.size(); I != E; ++I)
1660     if (Lines[I].startswith("Features")) {
1661       Lines[I].split(CPUFeatures, ' ');
1662       break;
1663     }
1664 
1665 #if defined(__aarch64__)
1666   // Keep track of which crypto features we have seen
1667   enum { CAP_AES = 0x1, CAP_PMULL = 0x2, CAP_SHA1 = 0x4, CAP_SHA2 = 0x8 };
1668   uint32_t crypto = 0;
1669 #endif
1670 
1671   for (unsigned I = 0, E = CPUFeatures.size(); I != E; ++I) {
1672     StringRef LLVMFeatureStr = StringSwitch<StringRef>(CPUFeatures[I])
1673 #if defined(__aarch64__)
1674                                    .Case("asimd", "neon")
1675                                    .Case("fp", "fp-armv8")
1676                                    .Case("crc32", "crc")
1677 #else
1678                                    .Case("half", "fp16")
1679                                    .Case("neon", "neon")
1680                                    .Case("vfpv3", "vfp3")
1681                                    .Case("vfpv3d16", "d16")
1682                                    .Case("vfpv4", "vfp4")
1683                                    .Case("idiva", "hwdiv-arm")
1684                                    .Case("idivt", "hwdiv")
1685 #endif
1686                                    .Default("");
1687 
1688 #if defined(__aarch64__)
1689     // We need to check crypto separately since we need all of the crypto
1690     // extensions to enable the subtarget feature
1691     if (CPUFeatures[I] == "aes")
1692       crypto |= CAP_AES;
1693     else if (CPUFeatures[I] == "pmull")
1694       crypto |= CAP_PMULL;
1695     else if (CPUFeatures[I] == "sha1")
1696       crypto |= CAP_SHA1;
1697     else if (CPUFeatures[I] == "sha2")
1698       crypto |= CAP_SHA2;
1699 #endif
1700 
1701     if (LLVMFeatureStr != "")
1702       Features[LLVMFeatureStr] = true;
1703   }
1704 
1705 #if defined(__aarch64__)
1706   // If we have all crypto bits we can add the feature
1707   if (crypto == (CAP_AES | CAP_PMULL | CAP_SHA1 | CAP_SHA2))
1708     Features["crypto"] = true;
1709 #endif
1710 
1711   return true;
1712 }
1713 #elif defined(_WIN32) && (defined(__aarch64__) || defined(_M_ARM64))
getHostCPUFeatures(StringMap<bool> & Features)1714 bool sys::getHostCPUFeatures(StringMap<bool> &Features) {
1715   if (IsProcessorFeaturePresent(PF_ARM_NEON_INSTRUCTIONS_AVAILABLE))
1716     Features["neon"] = true;
1717   if (IsProcessorFeaturePresent(PF_ARM_V8_CRC32_INSTRUCTIONS_AVAILABLE))
1718     Features["crc"] = true;
1719   if (IsProcessorFeaturePresent(PF_ARM_V8_CRYPTO_INSTRUCTIONS_AVAILABLE))
1720     Features["crypto"] = true;
1721 
1722   return true;
1723 }
1724 #else
getHostCPUFeatures(StringMap<bool> & Features)1725 bool sys::getHostCPUFeatures(StringMap<bool> &Features) { return false; }
1726 #endif
1727 
getProcessTriple()1728 std::string sys::getProcessTriple() {
1729   std::string TargetTripleString = updateTripleOSVersion(LLVM_HOST_TRIPLE);
1730   Triple PT(Triple::normalize(TargetTripleString));
1731 
1732   if (sizeof(void *) == 8 && PT.isArch32Bit())
1733     PT = PT.get64BitArchVariant();
1734   if (sizeof(void *) == 4 && PT.isArch64Bit())
1735     PT = PT.get32BitArchVariant();
1736 
1737   return PT.str();
1738 }
1739