1 //===-- Host.cpp - Implement OS Host Detection ------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 //  This file implements the operating system Host detection.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "llvm/TargetParser/Host.h"
14 #include "llvm/ADT/SmallVector.h"
15 #include "llvm/ADT/StringMap.h"
16 #include "llvm/ADT/StringRef.h"
17 #include "llvm/ADT/StringSwitch.h"
18 #include "llvm/Config/llvm-config.h"
19 #include "llvm/Support/MemoryBuffer.h"
20 #include "llvm/Support/raw_ostream.h"
21 #include "llvm/TargetParser/Triple.h"
22 #include "llvm/TargetParser/X86TargetParser.h"
23 #include <string.h>
24 
25 // Include the platform-specific parts of this class.
26 #ifdef LLVM_ON_UNIX
27 #include "Unix/Host.inc"
28 #include <sched.h>
29 #endif
30 #ifdef _WIN32
31 #include "Windows/Host.inc"
32 #endif
33 #ifdef _MSC_VER
34 #include <intrin.h>
35 #endif
36 #ifdef __MVS__
37 #include "llvm/Support/BCD.h"
38 #endif
39 #if defined(__APPLE__)
40 #include <mach/host_info.h>
41 #include <mach/mach.h>
42 #include <mach/mach_host.h>
43 #include <mach/machine.h>
44 #include <sys/param.h>
45 #include <sys/sysctl.h>
46 #endif
47 #ifdef _AIX
48 #include <sys/systemcfg.h>
49 #endif
50 #if defined(__sun__) && defined(__svr4__)
51 #include <kstat.h>
52 #endif
53 
54 #define DEBUG_TYPE "host-detection"
55 
56 //===----------------------------------------------------------------------===//
57 //
58 //  Implementations of the CPU detection routines
59 //
60 //===----------------------------------------------------------------------===//
61 
62 using namespace llvm;
63 
64 static std::unique_ptr<llvm::MemoryBuffer>
getProcCpuinfoContent()65     LLVM_ATTRIBUTE_UNUSED getProcCpuinfoContent() {
66   llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> Text =
67       llvm::MemoryBuffer::getFileAsStream("/proc/cpuinfo");
68   if (std::error_code EC = Text.getError()) {
69     llvm::errs() << "Can't read "
70                  << "/proc/cpuinfo: " << EC.message() << "\n";
71     return nullptr;
72   }
73   return std::move(*Text);
74 }
75 
getHostCPUNameForPowerPC(StringRef ProcCpuinfoContent)76 StringRef sys::detail::getHostCPUNameForPowerPC(StringRef ProcCpuinfoContent) {
77   // Access to the Processor Version Register (PVR) on PowerPC is privileged,
78   // and so we must use an operating-system interface to determine the current
79   // processor type. On Linux, this is exposed through the /proc/cpuinfo file.
80   const char *generic = "generic";
81 
82   // The cpu line is second (after the 'processor: 0' line), so if this
83   // buffer is too small then something has changed (or is wrong).
84   StringRef::const_iterator CPUInfoStart = ProcCpuinfoContent.begin();
85   StringRef::const_iterator CPUInfoEnd = ProcCpuinfoContent.end();
86 
87   StringRef::const_iterator CIP = CPUInfoStart;
88 
89   StringRef::const_iterator CPUStart = nullptr;
90   size_t CPULen = 0;
91 
92   // We need to find the first line which starts with cpu, spaces, and a colon.
93   // After the colon, there may be some additional spaces and then the cpu type.
94   while (CIP < CPUInfoEnd && CPUStart == nullptr) {
95     if (CIP < CPUInfoEnd && *CIP == '\n')
96       ++CIP;
97 
98     if (CIP < CPUInfoEnd && *CIP == 'c') {
99       ++CIP;
100       if (CIP < CPUInfoEnd && *CIP == 'p') {
101         ++CIP;
102         if (CIP < CPUInfoEnd && *CIP == 'u') {
103           ++CIP;
104           while (CIP < CPUInfoEnd && (*CIP == ' ' || *CIP == '\t'))
105             ++CIP;
106 
107           if (CIP < CPUInfoEnd && *CIP == ':') {
108             ++CIP;
109             while (CIP < CPUInfoEnd && (*CIP == ' ' || *CIP == '\t'))
110               ++CIP;
111 
112             if (CIP < CPUInfoEnd) {
113               CPUStart = CIP;
114               while (CIP < CPUInfoEnd && (*CIP != ' ' && *CIP != '\t' &&
115                                           *CIP != ',' && *CIP != '\n'))
116                 ++CIP;
117               CPULen = CIP - CPUStart;
118             }
119           }
120         }
121       }
122     }
123 
124     if (CPUStart == nullptr)
125       while (CIP < CPUInfoEnd && *CIP != '\n')
126         ++CIP;
127   }
128 
129   if (CPUStart == nullptr)
130     return generic;
131 
132   return StringSwitch<const char *>(StringRef(CPUStart, CPULen))
133       .Case("604e", "604e")
134       .Case("604", "604")
135       .Case("7400", "7400")
136       .Case("7410", "7400")
137       .Case("7447", "7400")
138       .Case("7455", "7450")
139       .Case("G4", "g4")
140       .Case("POWER4", "970")
141       .Case("PPC970FX", "970")
142       .Case("PPC970MP", "970")
143       .Case("G5", "g5")
144       .Case("POWER5", "g5")
145       .Case("A2", "a2")
146       .Case("POWER6", "pwr6")
147       .Case("POWER7", "pwr7")
148       .Case("POWER8", "pwr8")
149       .Case("POWER8E", "pwr8")
150       .Case("POWER8NVL", "pwr8")
151       .Case("POWER9", "pwr9")
152       .Case("POWER10", "pwr10")
153       // FIXME: If we get a simulator or machine with the capabilities of
154       // mcpu=future, we should revisit this and add the name reported by the
155       // simulator/machine.
156       .Default(generic);
157 }
158 
getHostCPUNameForARM(StringRef ProcCpuinfoContent)159 StringRef sys::detail::getHostCPUNameForARM(StringRef ProcCpuinfoContent) {
160   // The cpuid register on arm is not accessible from user space. On Linux,
161   // it is exposed through the /proc/cpuinfo file.
162 
163   // Read 32 lines from /proc/cpuinfo, which should contain the CPU part line
164   // in all cases.
165   SmallVector<StringRef, 32> Lines;
166   ProcCpuinfoContent.split(Lines, "\n");
167 
168   // Look for the CPU implementer line.
169   StringRef Implementer;
170   StringRef Hardware;
171   StringRef Part;
172   for (unsigned I = 0, E = Lines.size(); I != E; ++I) {
173     if (Lines[I].starts_with("CPU implementer"))
174       Implementer = Lines[I].substr(15).ltrim("\t :");
175     if (Lines[I].starts_with("Hardware"))
176       Hardware = Lines[I].substr(8).ltrim("\t :");
177     if (Lines[I].starts_with("CPU part"))
178       Part = Lines[I].substr(8).ltrim("\t :");
179   }
180 
181   if (Implementer == "0x41") { // ARM Ltd.
182     // MSM8992/8994 may give cpu part for the core that the kernel is running on,
183     // which is undeterministic and wrong. Always return cortex-a53 for these SoC.
184     if (Hardware.ends_with("MSM8994") || Hardware.ends_with("MSM8996"))
185       return "cortex-a53";
186 
187 
188     // The CPU part is a 3 digit hexadecimal number with a 0x prefix. The
189     // values correspond to the "Part number" in the CP15/c0 register. The
190     // contents are specified in the various processor manuals.
191     // This corresponds to the Main ID Register in Technical Reference Manuals.
192     // and is used in programs like sys-utils
193     return StringSwitch<const char *>(Part)
194         .Case("0x926", "arm926ej-s")
195         .Case("0xb02", "mpcore")
196         .Case("0xb36", "arm1136j-s")
197         .Case("0xb56", "arm1156t2-s")
198         .Case("0xb76", "arm1176jz-s")
199         .Case("0xc08", "cortex-a8")
200         .Case("0xc09", "cortex-a9")
201         .Case("0xc0f", "cortex-a15")
202         .Case("0xc20", "cortex-m0")
203         .Case("0xc23", "cortex-m3")
204         .Case("0xc24", "cortex-m4")
205         .Case("0xd24", "cortex-m52")
206         .Case("0xd22", "cortex-m55")
207         .Case("0xd02", "cortex-a34")
208         .Case("0xd04", "cortex-a35")
209         .Case("0xd03", "cortex-a53")
210         .Case("0xd05", "cortex-a55")
211         .Case("0xd46", "cortex-a510")
212         .Case("0xd80", "cortex-a520")
213         .Case("0xd07", "cortex-a57")
214         .Case("0xd08", "cortex-a72")
215         .Case("0xd09", "cortex-a73")
216         .Case("0xd0a", "cortex-a75")
217         .Case("0xd0b", "cortex-a76")
218         .Case("0xd0d", "cortex-a77")
219         .Case("0xd41", "cortex-a78")
220         .Case("0xd47", "cortex-a710")
221         .Case("0xd4d", "cortex-a715")
222         .Case("0xd81", "cortex-a720")
223         .Case("0xd44", "cortex-x1")
224         .Case("0xd4c", "cortex-x1c")
225         .Case("0xd48", "cortex-x2")
226         .Case("0xd4e", "cortex-x3")
227         .Case("0xd82", "cortex-x4")
228         .Case("0xd0c", "neoverse-n1")
229         .Case("0xd49", "neoverse-n2")
230         .Case("0xd40", "neoverse-v1")
231         .Case("0xd4f", "neoverse-v2")
232         .Default("generic");
233   }
234 
235   if (Implementer == "0x42" || Implementer == "0x43") { // Broadcom | Cavium.
236     return StringSwitch<const char *>(Part)
237       .Case("0x516", "thunderx2t99")
238       .Case("0x0516", "thunderx2t99")
239       .Case("0xaf", "thunderx2t99")
240       .Case("0x0af", "thunderx2t99")
241       .Case("0xa1", "thunderxt88")
242       .Case("0x0a1", "thunderxt88")
243       .Default("generic");
244   }
245 
246   if (Implementer == "0x46") { // Fujitsu Ltd.
247     return StringSwitch<const char *>(Part)
248       .Case("0x001", "a64fx")
249       .Default("generic");
250   }
251 
252   if (Implementer == "0x4e") { // NVIDIA Corporation
253     return StringSwitch<const char *>(Part)
254         .Case("0x004", "carmel")
255         .Default("generic");
256   }
257 
258   if (Implementer == "0x48") // HiSilicon Technologies, Inc.
259     // The CPU part is a 3 digit hexadecimal number with a 0x prefix. The
260     // values correspond to the "Part number" in the CP15/c0 register. The
261     // contents are specified in the various processor manuals.
262     return StringSwitch<const char *>(Part)
263       .Case("0xd01", "tsv110")
264       .Default("generic");
265 
266   if (Implementer == "0x51") // Qualcomm Technologies, Inc.
267     // The CPU part is a 3 digit hexadecimal number with a 0x prefix. The
268     // values correspond to the "Part number" in the CP15/c0 register. The
269     // contents are specified in the various processor manuals.
270     return StringSwitch<const char *>(Part)
271         .Case("0x06f", "krait") // APQ8064
272         .Case("0x201", "kryo")
273         .Case("0x205", "kryo")
274         .Case("0x211", "kryo")
275         .Case("0x800", "cortex-a73") // Kryo 2xx Gold
276         .Case("0x801", "cortex-a73") // Kryo 2xx Silver
277         .Case("0x802", "cortex-a75") // Kryo 3xx Gold
278         .Case("0x803", "cortex-a75") // Kryo 3xx Silver
279         .Case("0x804", "cortex-a76") // Kryo 4xx Gold
280         .Case("0x805", "cortex-a76") // Kryo 4xx/5xx Silver
281         .Case("0xc00", "falkor")
282         .Case("0xc01", "saphira")
283         .Default("generic");
284   if (Implementer == "0x53") { // Samsung Electronics Co., Ltd.
285     // The Exynos chips have a convoluted ID scheme that doesn't seem to follow
286     // any predictive pattern across variants and parts.
287     unsigned Variant = 0, Part = 0;
288 
289     // Look for the CPU variant line, whose value is a 1 digit hexadecimal
290     // number, corresponding to the Variant bits in the CP15/C0 register.
291     for (auto I : Lines)
292       if (I.consume_front("CPU variant"))
293         I.ltrim("\t :").getAsInteger(0, Variant);
294 
295     // Look for the CPU part line, whose value is a 3 digit hexadecimal
296     // number, corresponding to the PartNum bits in the CP15/C0 register.
297     for (auto I : Lines)
298       if (I.consume_front("CPU part"))
299         I.ltrim("\t :").getAsInteger(0, Part);
300 
301     unsigned Exynos = (Variant << 12) | Part;
302     switch (Exynos) {
303     default:
304       // Default by falling through to Exynos M3.
305       [[fallthrough]];
306     case 0x1002:
307       return "exynos-m3";
308     case 0x1003:
309       return "exynos-m4";
310     }
311   }
312 
313   if (Implementer == "0x6d") { // Microsoft Corporation.
314     // The Microsoft Azure Cobalt 100 CPU is handled as a Neoverse N2.
315     return StringSwitch<const char *>(Part)
316         .Case("0xd49", "neoverse-n2")
317         .Default("generic");
318   }
319 
320   if (Implementer == "0xc0") { // Ampere Computing
321     return StringSwitch<const char *>(Part)
322         .Case("0xac3", "ampere1")
323         .Case("0xac4", "ampere1a")
324         .Case("0xac5", "ampere1b")
325         .Default("generic");
326   }
327 
328   return "generic";
329 }
330 
331 namespace {
getCPUNameFromS390Model(unsigned int Id,bool HaveVectorSupport)332 StringRef getCPUNameFromS390Model(unsigned int Id, bool HaveVectorSupport) {
333   switch (Id) {
334     case 2064:  // z900 not supported by LLVM
335     case 2066:
336     case 2084:  // z990 not supported by LLVM
337     case 2086:
338     case 2094:  // z9-109 not supported by LLVM
339     case 2096:
340       return "generic";
341     case 2097:
342     case 2098:
343       return "z10";
344     case 2817:
345     case 2818:
346       return "z196";
347     case 2827:
348     case 2828:
349       return "zEC12";
350     case 2964:
351     case 2965:
352       return HaveVectorSupport? "z13" : "zEC12";
353     case 3906:
354     case 3907:
355       return HaveVectorSupport? "z14" : "zEC12";
356     case 8561:
357     case 8562:
358       return HaveVectorSupport? "z15" : "zEC12";
359     case 3931:
360     case 3932:
361     default:
362       return HaveVectorSupport? "z16" : "zEC12";
363   }
364 }
365 } // end anonymous namespace
366 
getHostCPUNameForS390x(StringRef ProcCpuinfoContent)367 StringRef sys::detail::getHostCPUNameForS390x(StringRef ProcCpuinfoContent) {
368   // STIDP is a privileged operation, so use /proc/cpuinfo instead.
369 
370   // The "processor 0:" line comes after a fair amount of other information,
371   // including a cache breakdown, but this should be plenty.
372   SmallVector<StringRef, 32> Lines;
373   ProcCpuinfoContent.split(Lines, "\n");
374 
375   // Look for the CPU features.
376   SmallVector<StringRef, 32> CPUFeatures;
377   for (unsigned I = 0, E = Lines.size(); I != E; ++I)
378     if (Lines[I].starts_with("features")) {
379       size_t Pos = Lines[I].find(':');
380       if (Pos != StringRef::npos) {
381         Lines[I].drop_front(Pos + 1).split(CPUFeatures, ' ');
382         break;
383       }
384     }
385 
386   // We need to check for the presence of vector support independently of
387   // the machine type, since we may only use the vector register set when
388   // supported by the kernel (and hypervisor).
389   bool HaveVectorSupport = false;
390   for (unsigned I = 0, E = CPUFeatures.size(); I != E; ++I) {
391     if (CPUFeatures[I] == "vx")
392       HaveVectorSupport = true;
393   }
394 
395   // Now check the processor machine type.
396   for (unsigned I = 0, E = Lines.size(); I != E; ++I) {
397     if (Lines[I].starts_with("processor ")) {
398       size_t Pos = Lines[I].find("machine = ");
399       if (Pos != StringRef::npos) {
400         Pos += sizeof("machine = ") - 1;
401         unsigned int Id;
402         if (!Lines[I].drop_front(Pos).getAsInteger(10, Id))
403           return getCPUNameFromS390Model(Id, HaveVectorSupport);
404       }
405       break;
406     }
407   }
408 
409   return "generic";
410 }
411 
getHostCPUNameForRISCV(StringRef ProcCpuinfoContent)412 StringRef sys::detail::getHostCPUNameForRISCV(StringRef ProcCpuinfoContent) {
413   // There are 24 lines in /proc/cpuinfo
414   SmallVector<StringRef> Lines;
415   ProcCpuinfoContent.split(Lines, "\n");
416 
417   // Look for uarch line to determine cpu name
418   StringRef UArch;
419   for (unsigned I = 0, E = Lines.size(); I != E; ++I) {
420     if (Lines[I].starts_with("uarch")) {
421       UArch = Lines[I].substr(5).ltrim("\t :");
422       break;
423     }
424   }
425 
426   return StringSwitch<const char *>(UArch)
427       .Case("sifive,u74-mc", "sifive-u74")
428       .Case("sifive,bullet0", "sifive-u74")
429       .Default("generic");
430 }
431 
getHostCPUNameForBPF()432 StringRef sys::detail::getHostCPUNameForBPF() {
433 #if !defined(__linux__) || !defined(__x86_64__)
434   return "generic";
435 #else
436   uint8_t v3_insns[40] __attribute__ ((aligned (8))) =
437       /* BPF_MOV64_IMM(BPF_REG_0, 0) */
438     { 0xb7, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0,
439       /* BPF_MOV64_IMM(BPF_REG_2, 1) */
440       0xb7, 0x2, 0x0, 0x0, 0x1, 0x0, 0x0, 0x0,
441       /* BPF_JMP32_REG(BPF_JLT, BPF_REG_0, BPF_REG_2, 1) */
442       0xae, 0x20, 0x1, 0x0, 0x0, 0x0, 0x0, 0x0,
443       /* BPF_MOV64_IMM(BPF_REG_0, 1) */
444       0xb7, 0x0, 0x0, 0x0, 0x1, 0x0, 0x0, 0x0,
445       /* BPF_EXIT_INSN() */
446       0x95, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 };
447 
448   uint8_t v2_insns[40] __attribute__ ((aligned (8))) =
449       /* BPF_MOV64_IMM(BPF_REG_0, 0) */
450     { 0xb7, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0,
451       /* BPF_MOV64_IMM(BPF_REG_2, 1) */
452       0xb7, 0x2, 0x0, 0x0, 0x1, 0x0, 0x0, 0x0,
453       /* BPF_JMP_REG(BPF_JLT, BPF_REG_0, BPF_REG_2, 1) */
454       0xad, 0x20, 0x1, 0x0, 0x0, 0x0, 0x0, 0x0,
455       /* BPF_MOV64_IMM(BPF_REG_0, 1) */
456       0xb7, 0x0, 0x0, 0x0, 0x1, 0x0, 0x0, 0x0,
457       /* BPF_EXIT_INSN() */
458       0x95, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 };
459 
460   struct bpf_prog_load_attr {
461     uint32_t prog_type;
462     uint32_t insn_cnt;
463     uint64_t insns;
464     uint64_t license;
465     uint32_t log_level;
466     uint32_t log_size;
467     uint64_t log_buf;
468     uint32_t kern_version;
469     uint32_t prog_flags;
470   } attr = {};
471   attr.prog_type = 1; /* BPF_PROG_TYPE_SOCKET_FILTER */
472   attr.insn_cnt = 5;
473   attr.insns = (uint64_t)v3_insns;
474   attr.license = (uint64_t)"DUMMY";
475 
476   int fd = syscall(321 /* __NR_bpf */, 5 /* BPF_PROG_LOAD */, &attr,
477                    sizeof(attr));
478   if (fd >= 0) {
479     close(fd);
480     return "v3";
481   }
482 
483   /* Clear the whole attr in case its content changed by syscall. */
484   memset(&attr, 0, sizeof(attr));
485   attr.prog_type = 1; /* BPF_PROG_TYPE_SOCKET_FILTER */
486   attr.insn_cnt = 5;
487   attr.insns = (uint64_t)v2_insns;
488   attr.license = (uint64_t)"DUMMY";
489   fd = syscall(321 /* __NR_bpf */, 5 /* BPF_PROG_LOAD */, &attr, sizeof(attr));
490   if (fd >= 0) {
491     close(fd);
492     return "v2";
493   }
494   return "v1";
495 #endif
496 }
497 
498 #if defined(__i386__) || defined(_M_IX86) || \
499     defined(__x86_64__) || defined(_M_X64)
500 
501 // The check below for i386 was copied from clang's cpuid.h (__get_cpuid_max).
502 // Check motivated by bug reports for OpenSSL crashing on CPUs without CPUID
503 // support. Consequently, for i386, the presence of CPUID is checked first
504 // via the corresponding eflags bit.
505 // Removal of cpuid.h header motivated by PR30384
506 // Header cpuid.h and method __get_cpuid_max are not used in llvm, clang, openmp
507 // or test-suite, but are used in external projects e.g. libstdcxx
isCpuIdSupported()508 static bool isCpuIdSupported() {
509 #if defined(__GNUC__) || defined(__clang__)
510 #if defined(__i386__)
511   int __cpuid_supported;
512   __asm__("  pushfl\n"
513           "  popl   %%eax\n"
514           "  movl   %%eax,%%ecx\n"
515           "  xorl   $0x00200000,%%eax\n"
516           "  pushl  %%eax\n"
517           "  popfl\n"
518           "  pushfl\n"
519           "  popl   %%eax\n"
520           "  movl   $0,%0\n"
521           "  cmpl   %%eax,%%ecx\n"
522           "  je     1f\n"
523           "  movl   $1,%0\n"
524           "1:"
525           : "=r"(__cpuid_supported)
526           :
527           : "eax", "ecx");
528   if (!__cpuid_supported)
529     return false;
530 #endif
531   return true;
532 #endif
533   return true;
534 }
535 
536 /// getX86CpuIDAndInfo - Execute the specified cpuid and return the 4 values in
537 /// the specified arguments.  If we can't run cpuid on the host, return true.
getX86CpuIDAndInfo(unsigned value,unsigned * rEAX,unsigned * rEBX,unsigned * rECX,unsigned * rEDX)538 static bool getX86CpuIDAndInfo(unsigned value, unsigned *rEAX, unsigned *rEBX,
539                                unsigned *rECX, unsigned *rEDX) {
540 #if defined(__GNUC__) || defined(__clang__)
541 #if defined(__x86_64__)
542   // gcc doesn't know cpuid would clobber ebx/rbx. Preserve it manually.
543   // FIXME: should we save this for Clang?
544   __asm__("movq\t%%rbx, %%rsi\n\t"
545           "cpuid\n\t"
546           "xchgq\t%%rbx, %%rsi\n\t"
547           : "=a"(*rEAX), "=S"(*rEBX), "=c"(*rECX), "=d"(*rEDX)
548           : "a"(value));
549   return false;
550 #elif defined(__i386__)
551   __asm__("movl\t%%ebx, %%esi\n\t"
552           "cpuid\n\t"
553           "xchgl\t%%ebx, %%esi\n\t"
554           : "=a"(*rEAX), "=S"(*rEBX), "=c"(*rECX), "=d"(*rEDX)
555           : "a"(value));
556   return false;
557 #else
558   return true;
559 #endif
560 #elif defined(_MSC_VER)
561   // The MSVC intrinsic is portable across x86 and x64.
562   int registers[4];
563   __cpuid(registers, value);
564   *rEAX = registers[0];
565   *rEBX = registers[1];
566   *rECX = registers[2];
567   *rEDX = registers[3];
568   return false;
569 #else
570   return true;
571 #endif
572 }
573 
574 namespace llvm {
575 namespace sys {
576 namespace detail {
577 namespace x86 {
578 
getVendorSignature(unsigned * MaxLeaf)579 VendorSignatures getVendorSignature(unsigned *MaxLeaf) {
580   unsigned EAX = 0, EBX = 0, ECX = 0, EDX = 0;
581   if (MaxLeaf == nullptr)
582     MaxLeaf = &EAX;
583   else
584     *MaxLeaf = 0;
585 
586   if (!isCpuIdSupported())
587     return VendorSignatures::UNKNOWN;
588 
589   if (getX86CpuIDAndInfo(0, MaxLeaf, &EBX, &ECX, &EDX) || *MaxLeaf < 1)
590     return VendorSignatures::UNKNOWN;
591 
592   // "Genu ineI ntel"
593   if (EBX == 0x756e6547 && EDX == 0x49656e69 && ECX == 0x6c65746e)
594     return VendorSignatures::GENUINE_INTEL;
595 
596   // "Auth enti cAMD"
597   if (EBX == 0x68747541 && EDX == 0x69746e65 && ECX == 0x444d4163)
598     return VendorSignatures::AUTHENTIC_AMD;
599 
600   return VendorSignatures::UNKNOWN;
601 }
602 
603 } // namespace x86
604 } // namespace detail
605 } // namespace sys
606 } // namespace llvm
607 
608 using namespace llvm::sys::detail::x86;
609 
610 /// getX86CpuIDAndInfoEx - Execute the specified cpuid with subleaf and return
611 /// the 4 values in the specified arguments.  If we can't run cpuid on the host,
612 /// return true.
getX86CpuIDAndInfoEx(unsigned value,unsigned subleaf,unsigned * rEAX,unsigned * rEBX,unsigned * rECX,unsigned * rEDX)613 static bool getX86CpuIDAndInfoEx(unsigned value, unsigned subleaf,
614                                  unsigned *rEAX, unsigned *rEBX, unsigned *rECX,
615                                  unsigned *rEDX) {
616 #if defined(__GNUC__) || defined(__clang__)
617 #if defined(__x86_64__)
618   // gcc doesn't know cpuid would clobber ebx/rbx. Preserve it manually.
619   // FIXME: should we save this for Clang?
620   __asm__("movq\t%%rbx, %%rsi\n\t"
621           "cpuid\n\t"
622           "xchgq\t%%rbx, %%rsi\n\t"
623           : "=a"(*rEAX), "=S"(*rEBX), "=c"(*rECX), "=d"(*rEDX)
624           : "a"(value), "c"(subleaf));
625   return false;
626 #elif defined(__i386__)
627   __asm__("movl\t%%ebx, %%esi\n\t"
628           "cpuid\n\t"
629           "xchgl\t%%ebx, %%esi\n\t"
630           : "=a"(*rEAX), "=S"(*rEBX), "=c"(*rECX), "=d"(*rEDX)
631           : "a"(value), "c"(subleaf));
632   return false;
633 #else
634   return true;
635 #endif
636 #elif defined(_MSC_VER)
637   int registers[4];
638   __cpuidex(registers, value, subleaf);
639   *rEAX = registers[0];
640   *rEBX = registers[1];
641   *rECX = registers[2];
642   *rEDX = registers[3];
643   return false;
644 #else
645   return true;
646 #endif
647 }
648 
649 // Read control register 0 (XCR0). Used to detect features such as AVX.
getX86XCR0(unsigned * rEAX,unsigned * rEDX)650 static bool getX86XCR0(unsigned *rEAX, unsigned *rEDX) {
651 #if defined(__GNUC__) || defined(__clang__)
652   // Check xgetbv; this uses a .byte sequence instead of the instruction
653   // directly because older assemblers do not include support for xgetbv and
654   // there is no easy way to conditionally compile based on the assembler used.
655   __asm__(".byte 0x0f, 0x01, 0xd0" : "=a"(*rEAX), "=d"(*rEDX) : "c"(0));
656   return false;
657 #elif defined(_MSC_FULL_VER) && defined(_XCR_XFEATURE_ENABLED_MASK)
658   unsigned long long Result = _xgetbv(_XCR_XFEATURE_ENABLED_MASK);
659   *rEAX = Result;
660   *rEDX = Result >> 32;
661   return false;
662 #else
663   return true;
664 #endif
665 }
666 
detectX86FamilyModel(unsigned EAX,unsigned * Family,unsigned * Model)667 static void detectX86FamilyModel(unsigned EAX, unsigned *Family,
668                                  unsigned *Model) {
669   *Family = (EAX >> 8) & 0xf; // Bits 8 - 11
670   *Model = (EAX >> 4) & 0xf;  // Bits 4 - 7
671   if (*Family == 6 || *Family == 0xf) {
672     if (*Family == 0xf)
673       // Examine extended family ID if family ID is F.
674       *Family += (EAX >> 20) & 0xff; // Bits 20 - 27
675     // Examine extended model ID if family ID is 6 or F.
676     *Model += ((EAX >> 16) & 0xf) << 4; // Bits 16 - 19
677   }
678 }
679 
680 static StringRef
getIntelProcessorTypeAndSubtype(unsigned Family,unsigned Model,const unsigned * Features,unsigned * Type,unsigned * Subtype)681 getIntelProcessorTypeAndSubtype(unsigned Family, unsigned Model,
682                                 const unsigned *Features,
683                                 unsigned *Type, unsigned *Subtype) {
684   auto testFeature = [&](unsigned F) {
685     return (Features[F / 32] & (1U << (F % 32))) != 0;
686   };
687 
688   StringRef CPU;
689 
690   switch (Family) {
691   case 3:
692     CPU = "i386";
693     break;
694   case 4:
695     CPU = "i486";
696     break;
697   case 5:
698     if (testFeature(X86::FEATURE_MMX)) {
699       CPU = "pentium-mmx";
700       break;
701     }
702     CPU = "pentium";
703     break;
704   case 6:
705     switch (Model) {
706     case 0x0f: // Intel Core 2 Duo processor, Intel Core 2 Duo mobile
707                // processor, Intel Core 2 Quad processor, Intel Core 2 Quad
708                // mobile processor, Intel Core 2 Extreme processor, Intel
709                // Pentium Dual-Core processor, Intel Xeon processor, model
710                // 0Fh. All processors are manufactured using the 65 nm process.
711     case 0x16: // Intel Celeron processor model 16h. All processors are
712                // manufactured using the 65 nm process
713       CPU = "core2";
714       *Type = X86::INTEL_CORE2;
715       break;
716     case 0x17: // Intel Core 2 Extreme processor, Intel Xeon processor, model
717                // 17h. All processors are manufactured using the 45 nm process.
718                //
719                // 45nm: Penryn , Wolfdale, Yorkfield (XE)
720     case 0x1d: // Intel Xeon processor MP. All processors are manufactured using
721                // the 45 nm process.
722       CPU = "penryn";
723       *Type = X86::INTEL_CORE2;
724       break;
725     case 0x1a: // Intel Core i7 processor and Intel Xeon processor. All
726                // processors are manufactured using the 45 nm process.
727     case 0x1e: // Intel(R) Core(TM) i7 CPU         870  @ 2.93GHz.
728                // As found in a Summer 2010 model iMac.
729     case 0x1f:
730     case 0x2e:              // Nehalem EX
731       CPU = "nehalem";
732       *Type = X86::INTEL_COREI7;
733       *Subtype = X86::INTEL_COREI7_NEHALEM;
734       break;
735     case 0x25: // Intel Core i7, laptop version.
736     case 0x2c: // Intel Core i7 processor and Intel Xeon processor. All
737                // processors are manufactured using the 32 nm process.
738     case 0x2f: // Westmere EX
739       CPU = "westmere";
740       *Type = X86::INTEL_COREI7;
741       *Subtype = X86::INTEL_COREI7_WESTMERE;
742       break;
743     case 0x2a: // Intel Core i7 processor. All processors are manufactured
744                // using the 32 nm process.
745     case 0x2d:
746       CPU = "sandybridge";
747       *Type = X86::INTEL_COREI7;
748       *Subtype = X86::INTEL_COREI7_SANDYBRIDGE;
749       break;
750     case 0x3a:
751     case 0x3e:              // Ivy Bridge EP
752       CPU = "ivybridge";
753       *Type = X86::INTEL_COREI7;
754       *Subtype = X86::INTEL_COREI7_IVYBRIDGE;
755       break;
756 
757     // Haswell:
758     case 0x3c:
759     case 0x3f:
760     case 0x45:
761     case 0x46:
762       CPU = "haswell";
763       *Type = X86::INTEL_COREI7;
764       *Subtype = X86::INTEL_COREI7_HASWELL;
765       break;
766 
767     // Broadwell:
768     case 0x3d:
769     case 0x47:
770     case 0x4f:
771     case 0x56:
772       CPU = "broadwell";
773       *Type = X86::INTEL_COREI7;
774       *Subtype = X86::INTEL_COREI7_BROADWELL;
775       break;
776 
777     // Skylake:
778     case 0x4e:              // Skylake mobile
779     case 0x5e:              // Skylake desktop
780     case 0x8e:              // Kaby Lake mobile
781     case 0x9e:              // Kaby Lake desktop
782     case 0xa5:              // Comet Lake-H/S
783     case 0xa6:              // Comet Lake-U
784       CPU = "skylake";
785       *Type = X86::INTEL_COREI7;
786       *Subtype = X86::INTEL_COREI7_SKYLAKE;
787       break;
788 
789     // Rocketlake:
790     case 0xa7:
791       CPU = "rocketlake";
792       *Type = X86::INTEL_COREI7;
793       *Subtype = X86::INTEL_COREI7_ROCKETLAKE;
794       break;
795 
796     // Skylake Xeon:
797     case 0x55:
798       *Type = X86::INTEL_COREI7;
799       if (testFeature(X86::FEATURE_AVX512BF16)) {
800         CPU = "cooperlake";
801         *Subtype = X86::INTEL_COREI7_COOPERLAKE;
802       } else if (testFeature(X86::FEATURE_AVX512VNNI)) {
803         CPU = "cascadelake";
804         *Subtype = X86::INTEL_COREI7_CASCADELAKE;
805       } else {
806         CPU = "skylake-avx512";
807         *Subtype = X86::INTEL_COREI7_SKYLAKE_AVX512;
808       }
809       break;
810 
811     // Cannonlake:
812     case 0x66:
813       CPU = "cannonlake";
814       *Type = X86::INTEL_COREI7;
815       *Subtype = X86::INTEL_COREI7_CANNONLAKE;
816       break;
817 
818     // Icelake:
819     case 0x7d:
820     case 0x7e:
821       CPU = "icelake-client";
822       *Type = X86::INTEL_COREI7;
823       *Subtype = X86::INTEL_COREI7_ICELAKE_CLIENT;
824       break;
825 
826     // Tigerlake:
827     case 0x8c:
828     case 0x8d:
829       CPU = "tigerlake";
830       *Type = X86::INTEL_COREI7;
831       *Subtype = X86::INTEL_COREI7_TIGERLAKE;
832       break;
833 
834     // Alderlake:
835     case 0x97:
836     case 0x9a:
837     // Gracemont
838     case 0xbe:
839     // Raptorlake:
840     case 0xb7:
841     case 0xba:
842     case 0xbf:
843     // Meteorlake:
844     case 0xaa:
845     case 0xac:
846       CPU = "alderlake";
847       *Type = X86::INTEL_COREI7;
848       *Subtype = X86::INTEL_COREI7_ALDERLAKE;
849       break;
850 
851     // Arrowlake:
852     case 0xc5:
853       CPU = "arrowlake";
854       *Type = X86::INTEL_COREI7;
855       *Subtype = X86::INTEL_COREI7_ARROWLAKE;
856       break;
857 
858     // Arrowlake S:
859     case 0xc6:
860     // Lunarlake:
861     case 0xbd:
862       CPU = "arrowlake-s";
863       *Type = X86::INTEL_COREI7;
864       *Subtype = X86::INTEL_COREI7_ARROWLAKE_S;
865       break;
866 
867     // Pantherlake:
868     case 0xcc:
869       CPU = "pantherlake";
870       *Type = X86::INTEL_COREI7;
871       *Subtype = X86::INTEL_COREI7_PANTHERLAKE;
872       break;
873 
874     // Graniterapids:
875     case 0xad:
876       CPU = "graniterapids";
877       *Type = X86::INTEL_COREI7;
878       *Subtype = X86::INTEL_COREI7_GRANITERAPIDS;
879       break;
880 
881     // Granite Rapids D:
882     case 0xae:
883       CPU = "graniterapids-d";
884       *Type = X86::INTEL_COREI7;
885       *Subtype = X86::INTEL_COREI7_GRANITERAPIDS_D;
886       break;
887 
888     // Icelake Xeon:
889     case 0x6a:
890     case 0x6c:
891       CPU = "icelake-server";
892       *Type = X86::INTEL_COREI7;
893       *Subtype = X86::INTEL_COREI7_ICELAKE_SERVER;
894       break;
895 
896     // Emerald Rapids:
897     case 0xcf:
898     // Sapphire Rapids:
899     case 0x8f:
900       CPU = "sapphirerapids";
901       *Type = X86::INTEL_COREI7;
902       *Subtype = X86::INTEL_COREI7_SAPPHIRERAPIDS;
903       break;
904 
905     case 0x1c: // Most 45 nm Intel Atom processors
906     case 0x26: // 45 nm Atom Lincroft
907     case 0x27: // 32 nm Atom Medfield
908     case 0x35: // 32 nm Atom Midview
909     case 0x36: // 32 nm Atom Midview
910       CPU = "bonnell";
911       *Type = X86::INTEL_BONNELL;
912       break;
913 
914     // Atom Silvermont codes from the Intel software optimization guide.
915     case 0x37:
916     case 0x4a:
917     case 0x4d:
918     case 0x5a:
919     case 0x5d:
920     case 0x4c: // really airmont
921       CPU = "silvermont";
922       *Type = X86::INTEL_SILVERMONT;
923       break;
924     // Goldmont:
925     case 0x5c: // Apollo Lake
926     case 0x5f: // Denverton
927       CPU = "goldmont";
928       *Type = X86::INTEL_GOLDMONT;
929       break;
930     case 0x7a:
931       CPU = "goldmont-plus";
932       *Type = X86::INTEL_GOLDMONT_PLUS;
933       break;
934     case 0x86:
935     case 0x8a: // Lakefield
936     case 0x96: // Elkhart Lake
937     case 0x9c: // Jasper Lake
938       CPU = "tremont";
939       *Type = X86::INTEL_TREMONT;
940       break;
941 
942     // Sierraforest:
943     case 0xaf:
944       CPU = "sierraforest";
945       *Type = X86::INTEL_SIERRAFOREST;
946       break;
947 
948     // Grandridge:
949     case 0xb6:
950       CPU = "grandridge";
951       *Type = X86::INTEL_GRANDRIDGE;
952       break;
953 
954     // Clearwaterforest:
955     case 0xdd:
956       CPU = "clearwaterforest";
957       *Type = X86::INTEL_CLEARWATERFOREST;
958       break;
959 
960     // Xeon Phi (Knights Landing + Knights Mill):
961     case 0x57:
962       CPU = "knl";
963       *Type = X86::INTEL_KNL;
964       break;
965     case 0x85:
966       CPU = "knm";
967       *Type = X86::INTEL_KNM;
968       break;
969 
970     default: // Unknown family 6 CPU, try to guess.
971       // Don't both with Type/Subtype here, they aren't used by the caller.
972       // They're used above to keep the code in sync with compiler-rt.
973       // TODO detect tigerlake host from model
974       if (testFeature(X86::FEATURE_AVX512VP2INTERSECT)) {
975         CPU = "tigerlake";
976       } else if (testFeature(X86::FEATURE_AVX512VBMI2)) {
977         CPU = "icelake-client";
978       } else if (testFeature(X86::FEATURE_AVX512VBMI)) {
979         CPU = "cannonlake";
980       } else if (testFeature(X86::FEATURE_AVX512BF16)) {
981         CPU = "cooperlake";
982       } else if (testFeature(X86::FEATURE_AVX512VNNI)) {
983         CPU = "cascadelake";
984       } else if (testFeature(X86::FEATURE_AVX512VL)) {
985         CPU = "skylake-avx512";
986       } else if (testFeature(X86::FEATURE_AVX512ER)) {
987         CPU = "knl";
988       } else if (testFeature(X86::FEATURE_CLFLUSHOPT)) {
989         if (testFeature(X86::FEATURE_SHA))
990           CPU = "goldmont";
991         else
992           CPU = "skylake";
993       } else if (testFeature(X86::FEATURE_ADX)) {
994         CPU = "broadwell";
995       } else if (testFeature(X86::FEATURE_AVX2)) {
996         CPU = "haswell";
997       } else if (testFeature(X86::FEATURE_AVX)) {
998         CPU = "sandybridge";
999       } else if (testFeature(X86::FEATURE_SSE4_2)) {
1000         if (testFeature(X86::FEATURE_MOVBE))
1001           CPU = "silvermont";
1002         else
1003           CPU = "nehalem";
1004       } else if (testFeature(X86::FEATURE_SSE4_1)) {
1005         CPU = "penryn";
1006       } else if (testFeature(X86::FEATURE_SSSE3)) {
1007         if (testFeature(X86::FEATURE_MOVBE))
1008           CPU = "bonnell";
1009         else
1010           CPU = "core2";
1011       } else if (testFeature(X86::FEATURE_64BIT)) {
1012         CPU = "core2";
1013       } else if (testFeature(X86::FEATURE_SSE3)) {
1014         CPU = "yonah";
1015       } else if (testFeature(X86::FEATURE_SSE2)) {
1016         CPU = "pentium-m";
1017       } else if (testFeature(X86::FEATURE_SSE)) {
1018         CPU = "pentium3";
1019       } else if (testFeature(X86::FEATURE_MMX)) {
1020         CPU = "pentium2";
1021       } else {
1022         CPU = "pentiumpro";
1023       }
1024       break;
1025     }
1026     break;
1027   case 15: {
1028     if (testFeature(X86::FEATURE_64BIT)) {
1029       CPU = "nocona";
1030       break;
1031     }
1032     if (testFeature(X86::FEATURE_SSE3)) {
1033       CPU = "prescott";
1034       break;
1035     }
1036     CPU = "pentium4";
1037     break;
1038   }
1039   default:
1040     break; // Unknown.
1041   }
1042 
1043   return CPU;
1044 }
1045 
1046 static StringRef
getAMDProcessorTypeAndSubtype(unsigned Family,unsigned Model,const unsigned * Features,unsigned * Type,unsigned * Subtype)1047 getAMDProcessorTypeAndSubtype(unsigned Family, unsigned Model,
1048                               const unsigned *Features,
1049                               unsigned *Type, unsigned *Subtype) {
1050   auto testFeature = [&](unsigned F) {
1051     return (Features[F / 32] & (1U << (F % 32))) != 0;
1052   };
1053 
1054   StringRef CPU;
1055 
1056   switch (Family) {
1057   case 4:
1058     CPU = "i486";
1059     break;
1060   case 5:
1061     CPU = "pentium";
1062     switch (Model) {
1063     case 6:
1064     case 7:
1065       CPU = "k6";
1066       break;
1067     case 8:
1068       CPU = "k6-2";
1069       break;
1070     case 9:
1071     case 13:
1072       CPU = "k6-3";
1073       break;
1074     case 10:
1075       CPU = "geode";
1076       break;
1077     }
1078     break;
1079   case 6:
1080     if (testFeature(X86::FEATURE_SSE)) {
1081       CPU = "athlon-xp";
1082       break;
1083     }
1084     CPU = "athlon";
1085     break;
1086   case 15:
1087     if (testFeature(X86::FEATURE_SSE3)) {
1088       CPU = "k8-sse3";
1089       break;
1090     }
1091     CPU = "k8";
1092     break;
1093   case 16:
1094     CPU = "amdfam10";
1095     *Type = X86::AMDFAM10H; // "amdfam10"
1096     switch (Model) {
1097     case 2:
1098       *Subtype = X86::AMDFAM10H_BARCELONA;
1099       break;
1100     case 4:
1101       *Subtype = X86::AMDFAM10H_SHANGHAI;
1102       break;
1103     case 8:
1104       *Subtype = X86::AMDFAM10H_ISTANBUL;
1105       break;
1106     }
1107     break;
1108   case 20:
1109     CPU = "btver1";
1110     *Type = X86::AMD_BTVER1;
1111     break;
1112   case 21:
1113     CPU = "bdver1";
1114     *Type = X86::AMDFAM15H;
1115     if (Model >= 0x60 && Model <= 0x7f) {
1116       CPU = "bdver4";
1117       *Subtype = X86::AMDFAM15H_BDVER4;
1118       break; // 60h-7Fh: Excavator
1119     }
1120     if (Model >= 0x30 && Model <= 0x3f) {
1121       CPU = "bdver3";
1122       *Subtype = X86::AMDFAM15H_BDVER3;
1123       break; // 30h-3Fh: Steamroller
1124     }
1125     if ((Model >= 0x10 && Model <= 0x1f) || Model == 0x02) {
1126       CPU = "bdver2";
1127       *Subtype = X86::AMDFAM15H_BDVER2;
1128       break; // 02h, 10h-1Fh: Piledriver
1129     }
1130     if (Model <= 0x0f) {
1131       *Subtype = X86::AMDFAM15H_BDVER1;
1132       break; // 00h-0Fh: Bulldozer
1133     }
1134     break;
1135   case 22:
1136     CPU = "btver2";
1137     *Type = X86::AMD_BTVER2;
1138     break;
1139   case 23:
1140     CPU = "znver1";
1141     *Type = X86::AMDFAM17H;
1142     if ((Model >= 0x30 && Model <= 0x3f) || (Model == 0x47) ||
1143         (Model >= 0x60 && Model <= 0x67) || (Model >= 0x68 && Model <= 0x6f) ||
1144         (Model >= 0x70 && Model <= 0x7f) || (Model >= 0x84 && Model <= 0x87) ||
1145         (Model >= 0x90 && Model <= 0x97) || (Model >= 0x98 && Model <= 0x9f) ||
1146         (Model >= 0xa0 && Model <= 0xaf)) {
1147       // Family 17h Models 30h-3Fh (Starship) Zen 2
1148       // Family 17h Models 47h (Cardinal) Zen 2
1149       // Family 17h Models 60h-67h (Renoir) Zen 2
1150       // Family 17h Models 68h-6Fh (Lucienne) Zen 2
1151       // Family 17h Models 70h-7Fh (Matisse) Zen 2
1152       // Family 17h Models 84h-87h (ProjectX) Zen 2
1153       // Family 17h Models 90h-97h (VanGogh) Zen 2
1154       // Family 17h Models 98h-9Fh (Mero) Zen 2
1155       // Family 17h Models A0h-AFh (Mendocino) Zen 2
1156       CPU = "znver2";
1157       *Subtype = X86::AMDFAM17H_ZNVER2;
1158       break;
1159     }
1160     if ((Model >= 0x10 && Model <= 0x1f) || (Model >= 0x20 && Model <= 0x2f)) {
1161       // Family 17h Models 10h-1Fh (Raven1) Zen
1162       // Family 17h Models 10h-1Fh (Picasso) Zen+
1163       // Family 17h Models 20h-2Fh (Raven2 x86) Zen
1164       *Subtype = X86::AMDFAM17H_ZNVER1;
1165       break;
1166     }
1167     break;
1168   case 25:
1169     CPU = "znver3";
1170     *Type = X86::AMDFAM19H;
1171     if (Model <= 0x0f || (Model >= 0x20 && Model <= 0x2f) ||
1172         (Model >= 0x30 && Model <= 0x3f) || (Model >= 0x40 && Model <= 0x4f) ||
1173         (Model >= 0x50 && Model <= 0x5f)) {
1174       // Family 19h Models 00h-0Fh (Genesis, Chagall) Zen 3
1175       // Family 19h Models 20h-2Fh (Vermeer) Zen 3
1176       // Family 19h Models 30h-3Fh (Badami) Zen 3
1177       // Family 19h Models 40h-4Fh (Rembrandt) Zen 3+
1178       // Family 19h Models 50h-5Fh (Cezanne) Zen 3
1179       *Subtype = X86::AMDFAM19H_ZNVER3;
1180       break;
1181     }
1182     if ((Model >= 0x10 && Model <= 0x1f) || (Model >= 0x60 && Model <= 0x6f) ||
1183         (Model >= 0x70 && Model <= 0x77) || (Model >= 0x78 && Model <= 0x7f) ||
1184         (Model >= 0xa0 && Model <= 0xaf)) {
1185       // Family 19h Models 10h-1Fh (Stones; Storm Peak) Zen 4
1186       // Family 19h Models 60h-6Fh (Raphael) Zen 4
1187       // Family 19h Models 70h-77h (Phoenix, Hawkpoint1) Zen 4
1188       // Family 19h Models 78h-7Fh (Phoenix 2, Hawkpoint2) Zen 4
1189       // Family 19h Models A0h-AFh (Stones-Dense) Zen 4
1190       CPU = "znver4";
1191       *Subtype = X86::AMDFAM19H_ZNVER4;
1192       break; //  "znver4"
1193     }
1194     break;
1195   default:
1196     break; // Unknown AMD CPU.
1197   }
1198 
1199   return CPU;
1200 }
1201 
getAvailableFeatures(unsigned ECX,unsigned EDX,unsigned MaxLeaf,unsigned * Features)1202 static void getAvailableFeatures(unsigned ECX, unsigned EDX, unsigned MaxLeaf,
1203                                  unsigned *Features) {
1204   unsigned EAX, EBX;
1205 
1206   auto setFeature = [&](unsigned F) {
1207     Features[F / 32] |= 1U << (F % 32);
1208   };
1209 
1210   if ((EDX >> 15) & 1)
1211     setFeature(X86::FEATURE_CMOV);
1212   if ((EDX >> 23) & 1)
1213     setFeature(X86::FEATURE_MMX);
1214   if ((EDX >> 25) & 1)
1215     setFeature(X86::FEATURE_SSE);
1216   if ((EDX >> 26) & 1)
1217     setFeature(X86::FEATURE_SSE2);
1218 
1219   if ((ECX >> 0) & 1)
1220     setFeature(X86::FEATURE_SSE3);
1221   if ((ECX >> 1) & 1)
1222     setFeature(X86::FEATURE_PCLMUL);
1223   if ((ECX >> 9) & 1)
1224     setFeature(X86::FEATURE_SSSE3);
1225   if ((ECX >> 12) & 1)
1226     setFeature(X86::FEATURE_FMA);
1227   if ((ECX >> 19) & 1)
1228     setFeature(X86::FEATURE_SSE4_1);
1229   if ((ECX >> 20) & 1) {
1230     setFeature(X86::FEATURE_SSE4_2);
1231     setFeature(X86::FEATURE_CRC32);
1232   }
1233   if ((ECX >> 23) & 1)
1234     setFeature(X86::FEATURE_POPCNT);
1235   if ((ECX >> 25) & 1)
1236     setFeature(X86::FEATURE_AES);
1237 
1238   if ((ECX >> 22) & 1)
1239     setFeature(X86::FEATURE_MOVBE);
1240 
1241   // If CPUID indicates support for XSAVE, XRESTORE and AVX, and XGETBV
1242   // indicates that the AVX registers will be saved and restored on context
1243   // switch, then we have full AVX support.
1244   const unsigned AVXBits = (1 << 27) | (1 << 28);
1245   bool HasAVX = ((ECX & AVXBits) == AVXBits) && !getX86XCR0(&EAX, &EDX) &&
1246                 ((EAX & 0x6) == 0x6);
1247 #if defined(__APPLE__)
1248   // Darwin lazily saves the AVX512 context on first use: trust that the OS will
1249   // save the AVX512 context if we use AVX512 instructions, even the bit is not
1250   // set right now.
1251   bool HasAVX512Save = true;
1252 #else
1253   // AVX512 requires additional context to be saved by the OS.
1254   bool HasAVX512Save = HasAVX && ((EAX & 0xe0) == 0xe0);
1255 #endif
1256 
1257   if (HasAVX)
1258     setFeature(X86::FEATURE_AVX);
1259 
1260   bool HasLeaf7 =
1261       MaxLeaf >= 0x7 && !getX86CpuIDAndInfoEx(0x7, 0x0, &EAX, &EBX, &ECX, &EDX);
1262 
1263   if (HasLeaf7 && ((EBX >> 3) & 1))
1264     setFeature(X86::FEATURE_BMI);
1265   if (HasLeaf7 && ((EBX >> 5) & 1) && HasAVX)
1266     setFeature(X86::FEATURE_AVX2);
1267   if (HasLeaf7 && ((EBX >> 8) & 1))
1268     setFeature(X86::FEATURE_BMI2);
1269   if (HasLeaf7 && ((EBX >> 16) & 1) && HasAVX512Save) {
1270     setFeature(X86::FEATURE_AVX512F);
1271     setFeature(X86::FEATURE_EVEX512);
1272   }
1273   if (HasLeaf7 && ((EBX >> 17) & 1) && HasAVX512Save)
1274     setFeature(X86::FEATURE_AVX512DQ);
1275   if (HasLeaf7 && ((EBX >> 19) & 1))
1276     setFeature(X86::FEATURE_ADX);
1277   if (HasLeaf7 && ((EBX >> 21) & 1) && HasAVX512Save)
1278     setFeature(X86::FEATURE_AVX512IFMA);
1279   if (HasLeaf7 && ((EBX >> 23) & 1))
1280     setFeature(X86::FEATURE_CLFLUSHOPT);
1281   if (HasLeaf7 && ((EBX >> 26) & 1) && HasAVX512Save)
1282     setFeature(X86::FEATURE_AVX512PF);
1283   if (HasLeaf7 && ((EBX >> 27) & 1) && HasAVX512Save)
1284     setFeature(X86::FEATURE_AVX512ER);
1285   if (HasLeaf7 && ((EBX >> 28) & 1) && HasAVX512Save)
1286     setFeature(X86::FEATURE_AVX512CD);
1287   if (HasLeaf7 && ((EBX >> 29) & 1))
1288     setFeature(X86::FEATURE_SHA);
1289   if (HasLeaf7 && ((EBX >> 30) & 1) && HasAVX512Save)
1290     setFeature(X86::FEATURE_AVX512BW);
1291   if (HasLeaf7 && ((EBX >> 31) & 1) && HasAVX512Save)
1292     setFeature(X86::FEATURE_AVX512VL);
1293 
1294   if (HasLeaf7 && ((ECX >> 1) & 1) && HasAVX512Save)
1295     setFeature(X86::FEATURE_AVX512VBMI);
1296   if (HasLeaf7 && ((ECX >> 6) & 1) && HasAVX512Save)
1297     setFeature(X86::FEATURE_AVX512VBMI2);
1298   if (HasLeaf7 && ((ECX >> 8) & 1))
1299     setFeature(X86::FEATURE_GFNI);
1300   if (HasLeaf7 && ((ECX >> 10) & 1) && HasAVX)
1301     setFeature(X86::FEATURE_VPCLMULQDQ);
1302   if (HasLeaf7 && ((ECX >> 11) & 1) && HasAVX512Save)
1303     setFeature(X86::FEATURE_AVX512VNNI);
1304   if (HasLeaf7 && ((ECX >> 12) & 1) && HasAVX512Save)
1305     setFeature(X86::FEATURE_AVX512BITALG);
1306   if (HasLeaf7 && ((ECX >> 14) & 1) && HasAVX512Save)
1307     setFeature(X86::FEATURE_AVX512VPOPCNTDQ);
1308 
1309   if (HasLeaf7 && ((EDX >> 2) & 1) && HasAVX512Save)
1310     setFeature(X86::FEATURE_AVX5124VNNIW);
1311   if (HasLeaf7 && ((EDX >> 3) & 1) && HasAVX512Save)
1312     setFeature(X86::FEATURE_AVX5124FMAPS);
1313   if (HasLeaf7 && ((EDX >> 8) & 1) && HasAVX512Save)
1314     setFeature(X86::FEATURE_AVX512VP2INTERSECT);
1315 
1316   // EAX from subleaf 0 is the maximum subleaf supported. Some CPUs don't
1317   // return all 0s for invalid subleaves so check the limit.
1318   bool HasLeaf7Subleaf1 =
1319       HasLeaf7 && EAX >= 1 &&
1320       !getX86CpuIDAndInfoEx(0x7, 0x1, &EAX, &EBX, &ECX, &EDX);
1321   if (HasLeaf7Subleaf1 && ((EAX >> 5) & 1) && HasAVX512Save)
1322     setFeature(X86::FEATURE_AVX512BF16);
1323 
1324   unsigned MaxExtLevel;
1325   getX86CpuIDAndInfo(0x80000000, &MaxExtLevel, &EBX, &ECX, &EDX);
1326 
1327   bool HasExtLeaf1 = MaxExtLevel >= 0x80000001 &&
1328                      !getX86CpuIDAndInfo(0x80000001, &EAX, &EBX, &ECX, &EDX);
1329   if (HasExtLeaf1 && ((ECX >> 6) & 1))
1330     setFeature(X86::FEATURE_SSE4_A);
1331   if (HasExtLeaf1 && ((ECX >> 11) & 1))
1332     setFeature(X86::FEATURE_XOP);
1333   if (HasExtLeaf1 && ((ECX >> 16) & 1))
1334     setFeature(X86::FEATURE_FMA4);
1335 
1336   if (HasExtLeaf1 && ((EDX >> 29) & 1))
1337     setFeature(X86::FEATURE_64BIT);
1338 }
1339 
getHostCPUName()1340 StringRef sys::getHostCPUName() {
1341   unsigned MaxLeaf = 0;
1342   const VendorSignatures Vendor = getVendorSignature(&MaxLeaf);
1343   if (Vendor == VendorSignatures::UNKNOWN)
1344     return "generic";
1345 
1346   unsigned EAX = 0, EBX = 0, ECX = 0, EDX = 0;
1347   getX86CpuIDAndInfo(0x1, &EAX, &EBX, &ECX, &EDX);
1348 
1349   unsigned Family = 0, Model = 0;
1350   unsigned Features[(X86::CPU_FEATURE_MAX + 31) / 32] = {0};
1351   detectX86FamilyModel(EAX, &Family, &Model);
1352   getAvailableFeatures(ECX, EDX, MaxLeaf, Features);
1353 
1354   // These aren't consumed in this file, but we try to keep some source code the
1355   // same or similar to compiler-rt.
1356   unsigned Type = 0;
1357   unsigned Subtype = 0;
1358 
1359   StringRef CPU;
1360 
1361   if (Vendor == VendorSignatures::GENUINE_INTEL) {
1362     CPU = getIntelProcessorTypeAndSubtype(Family, Model, Features, &Type,
1363                                           &Subtype);
1364   } else if (Vendor == VendorSignatures::AUTHENTIC_AMD) {
1365     CPU = getAMDProcessorTypeAndSubtype(Family, Model, Features, &Type,
1366                                         &Subtype);
1367   }
1368 
1369   if (!CPU.empty())
1370     return CPU;
1371 
1372   return "generic";
1373 }
1374 
1375 #elif defined(__APPLE__) && defined(__powerpc__)
getHostCPUName()1376 StringRef sys::getHostCPUName() {
1377   host_basic_info_data_t hostInfo;
1378   mach_msg_type_number_t infoCount;
1379 
1380   infoCount = HOST_BASIC_INFO_COUNT;
1381   mach_port_t hostPort = mach_host_self();
1382   host_info(hostPort, HOST_BASIC_INFO, (host_info_t)&hostInfo,
1383             &infoCount);
1384   mach_port_deallocate(mach_task_self(), hostPort);
1385 
1386   if (hostInfo.cpu_type != CPU_TYPE_POWERPC)
1387     return "generic";
1388 
1389   switch (hostInfo.cpu_subtype) {
1390   case CPU_SUBTYPE_POWERPC_601:
1391     return "601";
1392   case CPU_SUBTYPE_POWERPC_602:
1393     return "602";
1394   case CPU_SUBTYPE_POWERPC_603:
1395     return "603";
1396   case CPU_SUBTYPE_POWERPC_603e:
1397     return "603e";
1398   case CPU_SUBTYPE_POWERPC_603ev:
1399     return "603ev";
1400   case CPU_SUBTYPE_POWERPC_604:
1401     return "604";
1402   case CPU_SUBTYPE_POWERPC_604e:
1403     return "604e";
1404   case CPU_SUBTYPE_POWERPC_620:
1405     return "620";
1406   case CPU_SUBTYPE_POWERPC_750:
1407     return "750";
1408   case CPU_SUBTYPE_POWERPC_7400:
1409     return "7400";
1410   case CPU_SUBTYPE_POWERPC_7450:
1411     return "7450";
1412   case CPU_SUBTYPE_POWERPC_970:
1413     return "970";
1414   default:;
1415   }
1416 
1417   return "generic";
1418 }
1419 #elif defined(__linux__) && defined(__powerpc__)
getHostCPUName()1420 StringRef sys::getHostCPUName() {
1421   std::unique_ptr<llvm::MemoryBuffer> P = getProcCpuinfoContent();
1422   StringRef Content = P ? P->getBuffer() : "";
1423   return detail::getHostCPUNameForPowerPC(Content);
1424 }
1425 #elif defined(__linux__) && (defined(__arm__) || defined(__aarch64__))
getHostCPUName()1426 StringRef sys::getHostCPUName() {
1427   std::unique_ptr<llvm::MemoryBuffer> P = getProcCpuinfoContent();
1428   StringRef Content = P ? P->getBuffer() : "";
1429   return detail::getHostCPUNameForARM(Content);
1430 }
1431 #elif defined(__linux__) && defined(__s390x__)
getHostCPUName()1432 StringRef sys::getHostCPUName() {
1433   std::unique_ptr<llvm::MemoryBuffer> P = getProcCpuinfoContent();
1434   StringRef Content = P ? P->getBuffer() : "";
1435   return detail::getHostCPUNameForS390x(Content);
1436 }
1437 #elif defined(__MVS__)
getHostCPUName()1438 StringRef sys::getHostCPUName() {
1439   // Get pointer to Communications Vector Table (CVT).
1440   // The pointer is located at offset 16 of the Prefixed Save Area (PSA).
1441   // It is stored as 31 bit pointer and will be zero-extended to 64 bit.
1442   int *StartToCVTOffset = reinterpret_cast<int *>(0x10);
1443   // Since its stored as a 31-bit pointer, get the 4 bytes from the start
1444   // of address.
1445   int ReadValue = *StartToCVTOffset;
1446   // Explicitly clear the high order bit.
1447   ReadValue = (ReadValue & 0x7FFFFFFF);
1448   char *CVT = reinterpret_cast<char *>(ReadValue);
1449   // The model number is located in the CVT prefix at offset -6 and stored as
1450   // signless packed decimal.
1451   uint16_t Id = *(uint16_t *)&CVT[-6];
1452   // Convert number to integer.
1453   Id = decodePackedBCD<uint16_t>(Id, false);
1454   // Check for vector support. It's stored in field CVTFLAG5 (offset 244),
1455   // bit CVTVEF (X'80'). The facilities list is part of the PSA but the vector
1456   // extension can only be used if bit CVTVEF is on.
1457   bool HaveVectorSupport = CVT[244] & 0x80;
1458   return getCPUNameFromS390Model(Id, HaveVectorSupport);
1459 }
1460 #elif defined(__APPLE__) && (defined(__arm__) || defined(__aarch64__))
1461 #define CPUFAMILY_ARM_SWIFT 0x1e2d6381
1462 #define CPUFAMILY_ARM_CYCLONE 0x37a09642
1463 #define CPUFAMILY_ARM_TYPHOON 0x2c91a47e
1464 #define CPUFAMILY_ARM_TWISTER 0x92fb37c8
1465 #define CPUFAMILY_ARM_HURRICANE 0x67ceee93
1466 #define CPUFAMILY_ARM_MONSOON_MISTRAL 0xe81e7ef6
1467 #define CPUFAMILY_ARM_VORTEX_TEMPEST 0x07d34b9f
1468 #define CPUFAMILY_ARM_LIGHTNING_THUNDER 0x462504d2
1469 #define CPUFAMILY_ARM_FIRESTORM_ICESTORM 0x1b588bb3
1470 
getHostCPUName()1471 StringRef sys::getHostCPUName() {
1472   uint32_t Family;
1473   size_t Length = sizeof(Family);
1474   sysctlbyname("hw.cpufamily", &Family, &Length, NULL, 0);
1475 
1476   switch (Family) {
1477   case CPUFAMILY_ARM_SWIFT:
1478     return "swift";
1479   case CPUFAMILY_ARM_CYCLONE:
1480     return "apple-a7";
1481   case CPUFAMILY_ARM_TYPHOON:
1482     return "apple-a8";
1483   case CPUFAMILY_ARM_TWISTER:
1484     return "apple-a9";
1485   case CPUFAMILY_ARM_HURRICANE:
1486     return "apple-a10";
1487   case CPUFAMILY_ARM_MONSOON_MISTRAL:
1488     return "apple-a11";
1489   case CPUFAMILY_ARM_VORTEX_TEMPEST:
1490     return "apple-a12";
1491   case CPUFAMILY_ARM_LIGHTNING_THUNDER:
1492     return "apple-a13";
1493   case CPUFAMILY_ARM_FIRESTORM_ICESTORM:
1494     return "apple-m1";
1495   default:
1496     // Default to the newest CPU we know about.
1497     return "apple-m1";
1498   }
1499 }
1500 #elif defined(_AIX)
getHostCPUName()1501 StringRef sys::getHostCPUName() {
1502   switch (_system_configuration.implementation) {
1503   case POWER_4:
1504     if (_system_configuration.version == PV_4_3)
1505       return "970";
1506     return "pwr4";
1507   case POWER_5:
1508     if (_system_configuration.version == PV_5)
1509       return "pwr5";
1510     return "pwr5x";
1511   case POWER_6:
1512     if (_system_configuration.version == PV_6_Compat)
1513       return "pwr6";
1514     return "pwr6x";
1515   case POWER_7:
1516     return "pwr7";
1517   case POWER_8:
1518     return "pwr8";
1519   case POWER_9:
1520     return "pwr9";
1521 // TODO: simplify this once the macro is available in all OS levels.
1522 #ifdef POWER_10
1523   case POWER_10:
1524 #else
1525   case 0x40000:
1526 #endif
1527     return "pwr10";
1528   default:
1529     return "generic";
1530   }
1531 }
1532 #elif defined(__loongarch__)
getHostCPUName()1533 StringRef sys::getHostCPUName() {
1534   // Use processor id to detect cpu name.
1535   uint32_t processor_id;
1536   __asm__("cpucfg %[prid], $zero\n\t" : [prid] "=r"(processor_id));
1537   // Refer PRID_SERIES_MASK in linux kernel: arch/loongarch/include/asm/cpu.h.
1538   switch (processor_id & 0xf000) {
1539   case 0xc000: // Loongson 64bit, 4-issue
1540     return "la464";
1541   // TODO: Others.
1542   default:
1543     break;
1544   }
1545   return "generic";
1546 }
1547 #elif defined(__riscv)
getHostCPUName()1548 StringRef sys::getHostCPUName() {
1549 #if defined(__linux__)
1550   std::unique_ptr<llvm::MemoryBuffer> P = getProcCpuinfoContent();
1551   StringRef Content = P ? P->getBuffer() : "";
1552   return detail::getHostCPUNameForRISCV(Content);
1553 #else
1554 #if __riscv_xlen == 64
1555   return "generic-rv64";
1556 #elif __riscv_xlen == 32
1557   return "generic-rv32";
1558 #else
1559 #error "Unhandled value of __riscv_xlen"
1560 #endif
1561 #endif
1562 }
1563 #elif defined(__sparc__)
1564 #if defined(__linux__)
getHostCPUNameForSPARC(StringRef ProcCpuinfoContent)1565 StringRef sys::detail::getHostCPUNameForSPARC(StringRef ProcCpuinfoContent) {
1566   SmallVector<StringRef> Lines;
1567   ProcCpuinfoContent.split(Lines, "\n");
1568 
1569   // Look for cpu line to determine cpu name
1570   StringRef Cpu;
1571   for (unsigned I = 0, E = Lines.size(); I != E; ++I) {
1572     if (Lines[I].starts_with("cpu")) {
1573       Cpu = Lines[I].substr(5).ltrim("\t :");
1574       break;
1575     }
1576   }
1577 
1578   return StringSwitch<const char *>(Cpu)
1579       .StartsWith("SuperSparc", "supersparc")
1580       .StartsWith("HyperSparc", "hypersparc")
1581       .StartsWith("SpitFire", "ultrasparc")
1582       .StartsWith("BlackBird", "ultrasparc")
1583       .StartsWith("Sabre", " ultrasparc")
1584       .StartsWith("Hummingbird", "ultrasparc")
1585       .StartsWith("Cheetah", "ultrasparc3")
1586       .StartsWith("Jalapeno", "ultrasparc3")
1587       .StartsWith("Jaguar", "ultrasparc3")
1588       .StartsWith("Panther", "ultrasparc3")
1589       .StartsWith("Serrano", "ultrasparc3")
1590       .StartsWith("UltraSparc T1", "niagara")
1591       .StartsWith("UltraSparc T2", "niagara2")
1592       .StartsWith("UltraSparc T3", "niagara3")
1593       .StartsWith("UltraSparc T4", "niagara4")
1594       .StartsWith("UltraSparc T5", "niagara4")
1595       .StartsWith("LEON", "leon3")
1596       // niagara7/m8 not supported by LLVM yet.
1597       .StartsWith("SPARC-M7", "niagara4" /* "niagara7" */)
1598       .StartsWith("SPARC-S7", "niagara4" /* "niagara7" */)
1599       .StartsWith("SPARC-M8", "niagara4" /* "m8" */)
1600       .Default("generic");
1601 }
1602 #endif
1603 
getHostCPUName()1604 StringRef sys::getHostCPUName() {
1605 #if defined(__linux__)
1606   std::unique_ptr<llvm::MemoryBuffer> P = getProcCpuinfoContent();
1607   StringRef Content = P ? P->getBuffer() : "";
1608   return detail::getHostCPUNameForSPARC(Content);
1609 #elif defined(__sun__) && defined(__svr4__)
1610   char *buf = NULL;
1611   kstat_ctl_t *kc;
1612   kstat_t *ksp;
1613   kstat_named_t *brand = NULL;
1614 
1615   kc = kstat_open();
1616   if (kc != NULL) {
1617     ksp = kstat_lookup(kc, const_cast<char *>("cpu_info"), -1, NULL);
1618     if (ksp != NULL && kstat_read(kc, ksp, NULL) != -1 &&
1619         ksp->ks_type == KSTAT_TYPE_NAMED)
1620       brand =
1621           (kstat_named_t *)kstat_data_lookup(ksp, const_cast<char *>("brand"));
1622     if (brand != NULL && brand->data_type == KSTAT_DATA_STRING)
1623       buf = KSTAT_NAMED_STR_PTR(brand);
1624   }
1625   kstat_close(kc);
1626 
1627   return StringSwitch<const char *>(buf)
1628       .Case("TMS390S10", "supersparc") // Texas Instruments microSPARC I
1629       .Case("TMS390Z50", "supersparc") // Texas Instruments SuperSPARC I
1630       .Case("TMS390Z55",
1631             "supersparc") // Texas Instruments SuperSPARC I with SuperCache
1632       .Case("MB86904", "supersparc") // Fujitsu microSPARC II
1633       .Case("MB86907", "supersparc") // Fujitsu TurboSPARC
1634       .Case("RT623", "hypersparc")   // Ross hyperSPARC
1635       .Case("RT625", "hypersparc")
1636       .Case("RT626", "hypersparc")
1637       .Case("UltraSPARC-I", "ultrasparc")
1638       .Case("UltraSPARC-II", "ultrasparc")
1639       .Case("UltraSPARC-IIe", "ultrasparc")
1640       .Case("UltraSPARC-IIi", "ultrasparc")
1641       .Case("SPARC64-III", "ultrasparc")
1642       .Case("SPARC64-IV", "ultrasparc")
1643       .Case("UltraSPARC-III", "ultrasparc3")
1644       .Case("UltraSPARC-III+", "ultrasparc3")
1645       .Case("UltraSPARC-IIIi", "ultrasparc3")
1646       .Case("UltraSPARC-IIIi+", "ultrasparc3")
1647       .Case("UltraSPARC-IV", "ultrasparc3")
1648       .Case("UltraSPARC-IV+", "ultrasparc3")
1649       .Case("SPARC64-V", "ultrasparc3")
1650       .Case("SPARC64-VI", "ultrasparc3")
1651       .Case("SPARC64-VII", "ultrasparc3")
1652       .Case("UltraSPARC-T1", "niagara")
1653       .Case("UltraSPARC-T2", "niagara2")
1654       .Case("UltraSPARC-T2", "niagara2")
1655       .Case("UltraSPARC-T2+", "niagara2")
1656       .Case("SPARC-T3", "niagara3")
1657       .Case("SPARC-T4", "niagara4")
1658       .Case("SPARC-T5", "niagara4")
1659       // niagara7/m8 not supported by LLVM yet.
1660       .Case("SPARC-M7", "niagara4" /* "niagara7" */)
1661       .Case("SPARC-S7", "niagara4" /* "niagara7" */)
1662       .Case("SPARC-M8", "niagara4" /* "m8" */)
1663       .Default("generic");
1664 #else
1665   return "generic";
1666 #endif
1667 }
1668 #else
getHostCPUName()1669 StringRef sys::getHostCPUName() { return "generic"; }
1670 namespace llvm {
1671 namespace sys {
1672 namespace detail {
1673 namespace x86 {
1674 
getVendorSignature(unsigned * MaxLeaf)1675 VendorSignatures getVendorSignature(unsigned *MaxLeaf) {
1676   return VendorSignatures::UNKNOWN;
1677 }
1678 
1679 } // namespace x86
1680 } // namespace detail
1681 } // namespace sys
1682 } // namespace llvm
1683 #endif
1684 
1685 #if defined(__i386__) || defined(_M_IX86) || \
1686     defined(__x86_64__) || defined(_M_X64)
getHostCPUFeatures(StringMap<bool> & Features)1687 bool sys::getHostCPUFeatures(StringMap<bool> &Features) {
1688   unsigned EAX = 0, EBX = 0, ECX = 0, EDX = 0;
1689   unsigned MaxLevel;
1690 
1691   if (getX86CpuIDAndInfo(0, &MaxLevel, &EBX, &ECX, &EDX) || MaxLevel < 1)
1692     return false;
1693 
1694   getX86CpuIDAndInfo(1, &EAX, &EBX, &ECX, &EDX);
1695 
1696   Features["cx8"]    = (EDX >>  8) & 1;
1697   Features["cmov"]   = (EDX >> 15) & 1;
1698   Features["mmx"]    = (EDX >> 23) & 1;
1699   Features["fxsr"]   = (EDX >> 24) & 1;
1700   Features["sse"]    = (EDX >> 25) & 1;
1701   Features["sse2"]   = (EDX >> 26) & 1;
1702 
1703   Features["sse3"]   = (ECX >>  0) & 1;
1704   Features["pclmul"] = (ECX >>  1) & 1;
1705   Features["ssse3"]  = (ECX >>  9) & 1;
1706   Features["cx16"]   = (ECX >> 13) & 1;
1707   Features["sse4.1"] = (ECX >> 19) & 1;
1708   Features["sse4.2"] = (ECX >> 20) & 1;
1709   Features["crc32"]  = Features["sse4.2"];
1710   Features["movbe"]  = (ECX >> 22) & 1;
1711   Features["popcnt"] = (ECX >> 23) & 1;
1712   Features["aes"]    = (ECX >> 25) & 1;
1713   Features["rdrnd"]  = (ECX >> 30) & 1;
1714 
1715   // If CPUID indicates support for XSAVE, XRESTORE and AVX, and XGETBV
1716   // indicates that the AVX registers will be saved and restored on context
1717   // switch, then we have full AVX support.
1718   bool HasXSave = ((ECX >> 27) & 1) && !getX86XCR0(&EAX, &EDX);
1719   bool HasAVXSave = HasXSave && ((ECX >> 28) & 1) && ((EAX & 0x6) == 0x6);
1720 #if defined(__APPLE__)
1721   // Darwin lazily saves the AVX512 context on first use: trust that the OS will
1722   // save the AVX512 context if we use AVX512 instructions, even the bit is not
1723   // set right now.
1724   bool HasAVX512Save = true;
1725 #else
1726   // AVX512 requires additional context to be saved by the OS.
1727   bool HasAVX512Save = HasAVXSave && ((EAX & 0xe0) == 0xe0);
1728 #endif
1729   // AMX requires additional context to be saved by the OS.
1730   const unsigned AMXBits = (1 << 17) | (1 << 18);
1731   bool HasAMXSave = HasXSave && ((EAX & AMXBits) == AMXBits);
1732 
1733   Features["avx"]   = HasAVXSave;
1734   Features["fma"]   = ((ECX >> 12) & 1) && HasAVXSave;
1735   // Only enable XSAVE if OS has enabled support for saving YMM state.
1736   Features["xsave"] = ((ECX >> 26) & 1) && HasAVXSave;
1737   Features["f16c"]  = ((ECX >> 29) & 1) && HasAVXSave;
1738 
1739   unsigned MaxExtLevel;
1740   getX86CpuIDAndInfo(0x80000000, &MaxExtLevel, &EBX, &ECX, &EDX);
1741 
1742   bool HasExtLeaf1 = MaxExtLevel >= 0x80000001 &&
1743                      !getX86CpuIDAndInfo(0x80000001, &EAX, &EBX, &ECX, &EDX);
1744   Features["sahf"]   = HasExtLeaf1 && ((ECX >>  0) & 1);
1745   Features["lzcnt"]  = HasExtLeaf1 && ((ECX >>  5) & 1);
1746   Features["sse4a"]  = HasExtLeaf1 && ((ECX >>  6) & 1);
1747   Features["prfchw"] = HasExtLeaf1 && ((ECX >>  8) & 1);
1748   Features["xop"]    = HasExtLeaf1 && ((ECX >> 11) & 1) && HasAVXSave;
1749   Features["lwp"]    = HasExtLeaf1 && ((ECX >> 15) & 1);
1750   Features["fma4"]   = HasExtLeaf1 && ((ECX >> 16) & 1) && HasAVXSave;
1751   Features["tbm"]    = HasExtLeaf1 && ((ECX >> 21) & 1);
1752   Features["mwaitx"] = HasExtLeaf1 && ((ECX >> 29) & 1);
1753 
1754   Features["64bit"]  = HasExtLeaf1 && ((EDX >> 29) & 1);
1755 
1756   // Miscellaneous memory related features, detected by
1757   // using the 0x80000008 leaf of the CPUID instruction
1758   bool HasExtLeaf8 = MaxExtLevel >= 0x80000008 &&
1759                      !getX86CpuIDAndInfo(0x80000008, &EAX, &EBX, &ECX, &EDX);
1760   Features["clzero"]   = HasExtLeaf8 && ((EBX >> 0) & 1);
1761   Features["rdpru"]    = HasExtLeaf8 && ((EBX >> 4) & 1);
1762   Features["wbnoinvd"] = HasExtLeaf8 && ((EBX >> 9) & 1);
1763 
1764   bool HasLeaf7 =
1765       MaxLevel >= 7 && !getX86CpuIDAndInfoEx(0x7, 0x0, &EAX, &EBX, &ECX, &EDX);
1766 
1767   Features["fsgsbase"]   = HasLeaf7 && ((EBX >>  0) & 1);
1768   Features["sgx"]        = HasLeaf7 && ((EBX >>  2) & 1);
1769   Features["bmi"]        = HasLeaf7 && ((EBX >>  3) & 1);
1770   // AVX2 is only supported if we have the OS save support from AVX.
1771   Features["avx2"]       = HasLeaf7 && ((EBX >>  5) & 1) && HasAVXSave;
1772   Features["bmi2"]       = HasLeaf7 && ((EBX >>  8) & 1);
1773   Features["invpcid"]    = HasLeaf7 && ((EBX >> 10) & 1);
1774   Features["rtm"]        = HasLeaf7 && ((EBX >> 11) & 1);
1775   // AVX512 is only supported if the OS supports the context save for it.
1776   Features["avx512f"]    = HasLeaf7 && ((EBX >> 16) & 1) && HasAVX512Save;
1777   if (Features["avx512f"])
1778     Features["evex512"]  = true;
1779   Features["avx512dq"]   = HasLeaf7 && ((EBX >> 17) & 1) && HasAVX512Save;
1780   Features["rdseed"]     = HasLeaf7 && ((EBX >> 18) & 1);
1781   Features["adx"]        = HasLeaf7 && ((EBX >> 19) & 1);
1782   Features["avx512ifma"] = HasLeaf7 && ((EBX >> 21) & 1) && HasAVX512Save;
1783   Features["clflushopt"] = HasLeaf7 && ((EBX >> 23) & 1);
1784   Features["clwb"]       = HasLeaf7 && ((EBX >> 24) & 1);
1785   Features["avx512pf"]   = HasLeaf7 && ((EBX >> 26) & 1) && HasAVX512Save;
1786   Features["avx512er"]   = HasLeaf7 && ((EBX >> 27) & 1) && HasAVX512Save;
1787   Features["avx512cd"]   = HasLeaf7 && ((EBX >> 28) & 1) && HasAVX512Save;
1788   Features["sha"]        = HasLeaf7 && ((EBX >> 29) & 1);
1789   Features["avx512bw"]   = HasLeaf7 && ((EBX >> 30) & 1) && HasAVX512Save;
1790   Features["avx512vl"]   = HasLeaf7 && ((EBX >> 31) & 1) && HasAVX512Save;
1791 
1792   Features["prefetchwt1"]     = HasLeaf7 && ((ECX >>  0) & 1);
1793   Features["avx512vbmi"]      = HasLeaf7 && ((ECX >>  1) & 1) && HasAVX512Save;
1794   Features["pku"]             = HasLeaf7 && ((ECX >>  4) & 1);
1795   Features["waitpkg"]         = HasLeaf7 && ((ECX >>  5) & 1);
1796   Features["avx512vbmi2"]     = HasLeaf7 && ((ECX >>  6) & 1) && HasAVX512Save;
1797   Features["shstk"]           = HasLeaf7 && ((ECX >>  7) & 1);
1798   Features["gfni"]            = HasLeaf7 && ((ECX >>  8) & 1);
1799   Features["vaes"]            = HasLeaf7 && ((ECX >>  9) & 1) && HasAVXSave;
1800   Features["vpclmulqdq"]      = HasLeaf7 && ((ECX >> 10) & 1) && HasAVXSave;
1801   Features["avx512vnni"]      = HasLeaf7 && ((ECX >> 11) & 1) && HasAVX512Save;
1802   Features["avx512bitalg"]    = HasLeaf7 && ((ECX >> 12) & 1) && HasAVX512Save;
1803   Features["avx512vpopcntdq"] = HasLeaf7 && ((ECX >> 14) & 1) && HasAVX512Save;
1804   Features["rdpid"]           = HasLeaf7 && ((ECX >> 22) & 1);
1805   Features["kl"]              = HasLeaf7 && ((ECX >> 23) & 1); // key locker
1806   Features["cldemote"]        = HasLeaf7 && ((ECX >> 25) & 1);
1807   Features["movdiri"]         = HasLeaf7 && ((ECX >> 27) & 1);
1808   Features["movdir64b"]       = HasLeaf7 && ((ECX >> 28) & 1);
1809   Features["enqcmd"]          = HasLeaf7 && ((ECX >> 29) & 1);
1810 
1811   Features["uintr"]           = HasLeaf7 && ((EDX >> 5) & 1);
1812   Features["avx512vp2intersect"] =
1813       HasLeaf7 && ((EDX >> 8) & 1) && HasAVX512Save;
1814   Features["serialize"]       = HasLeaf7 && ((EDX >> 14) & 1);
1815   Features["tsxldtrk"]        = HasLeaf7 && ((EDX >> 16) & 1);
1816   // There are two CPUID leafs which information associated with the pconfig
1817   // instruction:
1818   // EAX=0x7, ECX=0x0 indicates the availability of the instruction (via the 18th
1819   // bit of EDX), while the EAX=0x1b leaf returns information on the
1820   // availability of specific pconfig leafs.
1821   // The target feature here only refers to the the first of these two.
1822   // Users might need to check for the availability of specific pconfig
1823   // leaves using cpuid, since that information is ignored while
1824   // detecting features using the "-march=native" flag.
1825   // For more info, see X86 ISA docs.
1826   Features["pconfig"] = HasLeaf7 && ((EDX >> 18) & 1);
1827   Features["amx-bf16"]   = HasLeaf7 && ((EDX >> 22) & 1) && HasAMXSave;
1828   Features["avx512fp16"] = HasLeaf7 && ((EDX >> 23) & 1) && HasAVX512Save;
1829   Features["amx-tile"]   = HasLeaf7 && ((EDX >> 24) & 1) && HasAMXSave;
1830   Features["amx-int8"]   = HasLeaf7 && ((EDX >> 25) & 1) && HasAMXSave;
1831   // EAX from subleaf 0 is the maximum subleaf supported. Some CPUs don't
1832   // return all 0s for invalid subleaves so check the limit.
1833   bool HasLeaf7Subleaf1 =
1834       HasLeaf7 && EAX >= 1 &&
1835       !getX86CpuIDAndInfoEx(0x7, 0x1, &EAX, &EBX, &ECX, &EDX);
1836   Features["sha512"]     = HasLeaf7Subleaf1 && ((EAX >> 0) & 1);
1837   Features["sm3"]        = HasLeaf7Subleaf1 && ((EAX >> 1) & 1);
1838   Features["sm4"]        = HasLeaf7Subleaf1 && ((EAX >> 2) & 1);
1839   Features["raoint"]     = HasLeaf7Subleaf1 && ((EAX >> 3) & 1);
1840   Features["avxvnni"]    = HasLeaf7Subleaf1 && ((EAX >> 4) & 1) && HasAVXSave;
1841   Features["avx512bf16"] = HasLeaf7Subleaf1 && ((EAX >> 5) & 1) && HasAVX512Save;
1842   Features["amx-fp16"]   = HasLeaf7Subleaf1 && ((EAX >> 21) & 1) && HasAMXSave;
1843   Features["cmpccxadd"]  = HasLeaf7Subleaf1 && ((EAX >> 7) & 1);
1844   Features["hreset"]     = HasLeaf7Subleaf1 && ((EAX >> 22) & 1);
1845   Features["avxifma"]    = HasLeaf7Subleaf1 && ((EAX >> 23) & 1) && HasAVXSave;
1846   Features["avxvnniint8"] = HasLeaf7Subleaf1 && ((EDX >> 4) & 1) && HasAVXSave;
1847   Features["avxneconvert"] = HasLeaf7Subleaf1 && ((EDX >> 5) & 1) && HasAVXSave;
1848   Features["amx-complex"] = HasLeaf7Subleaf1 && ((EDX >> 8) & 1) && HasAMXSave;
1849   Features["avxvnniint16"] = HasLeaf7Subleaf1 && ((EDX >> 10) & 1) && HasAVXSave;
1850   Features["prefetchi"]  = HasLeaf7Subleaf1 && ((EDX >> 14) & 1);
1851   Features["usermsr"]  = HasLeaf7Subleaf1 && ((EDX >> 15) & 1);
1852   Features["avx10.1-256"] = HasLeaf7Subleaf1 && ((EDX >> 19) & 1);
1853 
1854   bool HasLeafD = MaxLevel >= 0xd &&
1855                   !getX86CpuIDAndInfoEx(0xd, 0x1, &EAX, &EBX, &ECX, &EDX);
1856 
1857   // Only enable XSAVE if OS has enabled support for saving YMM state.
1858   Features["xsaveopt"] = HasLeafD && ((EAX >> 0) & 1) && HasAVXSave;
1859   Features["xsavec"]   = HasLeafD && ((EAX >> 1) & 1) && HasAVXSave;
1860   Features["xsaves"]   = HasLeafD && ((EAX >> 3) & 1) && HasAVXSave;
1861 
1862   bool HasLeaf14 = MaxLevel >= 0x14 &&
1863                   !getX86CpuIDAndInfoEx(0x14, 0x0, &EAX, &EBX, &ECX, &EDX);
1864 
1865   Features["ptwrite"] = HasLeaf14 && ((EBX >> 4) & 1);
1866 
1867   bool HasLeaf19 =
1868       MaxLevel >= 0x19 && !getX86CpuIDAndInfo(0x19, &EAX, &EBX, &ECX, &EDX);
1869   Features["widekl"] = HasLeaf7 && HasLeaf19 && ((EBX >> 2) & 1);
1870 
1871   bool HasLeaf24 =
1872       MaxLevel >= 0x24 && !getX86CpuIDAndInfo(0x24, &EAX, &EBX, &ECX, &EDX);
1873   Features["avx10.1-512"] =
1874       Features["avx10.1-256"] && HasLeaf24 && ((EBX >> 18) & 1);
1875 
1876   return true;
1877 }
1878 #elif defined(__linux__) && (defined(__arm__) || defined(__aarch64__))
getHostCPUFeatures(StringMap<bool> & Features)1879 bool sys::getHostCPUFeatures(StringMap<bool> &Features) {
1880   std::unique_ptr<llvm::MemoryBuffer> P = getProcCpuinfoContent();
1881   if (!P)
1882     return false;
1883 
1884   SmallVector<StringRef, 32> Lines;
1885   P->getBuffer().split(Lines, "\n");
1886 
1887   SmallVector<StringRef, 32> CPUFeatures;
1888 
1889   // Look for the CPU features.
1890   for (unsigned I = 0, E = Lines.size(); I != E; ++I)
1891     if (Lines[I].starts_with("Features")) {
1892       Lines[I].split(CPUFeatures, ' ');
1893       break;
1894     }
1895 
1896 #if defined(__aarch64__)
1897   // Keep track of which crypto features we have seen
1898   enum { CAP_AES = 0x1, CAP_PMULL = 0x2, CAP_SHA1 = 0x4, CAP_SHA2 = 0x8 };
1899   uint32_t crypto = 0;
1900 #endif
1901 
1902   for (unsigned I = 0, E = CPUFeatures.size(); I != E; ++I) {
1903     StringRef LLVMFeatureStr = StringSwitch<StringRef>(CPUFeatures[I])
1904 #if defined(__aarch64__)
1905                                    .Case("asimd", "neon")
1906                                    .Case("fp", "fp-armv8")
1907                                    .Case("crc32", "crc")
1908                                    .Case("atomics", "lse")
1909                                    .Case("sve", "sve")
1910                                    .Case("sve2", "sve2")
1911 #else
1912                                    .Case("half", "fp16")
1913                                    .Case("neon", "neon")
1914                                    .Case("vfpv3", "vfp3")
1915                                    .Case("vfpv3d16", "vfp3d16")
1916                                    .Case("vfpv4", "vfp4")
1917                                    .Case("idiva", "hwdiv-arm")
1918                                    .Case("idivt", "hwdiv")
1919 #endif
1920                                    .Default("");
1921 
1922 #if defined(__aarch64__)
1923     // We need to check crypto separately since we need all of the crypto
1924     // extensions to enable the subtarget feature
1925     if (CPUFeatures[I] == "aes")
1926       crypto |= CAP_AES;
1927     else if (CPUFeatures[I] == "pmull")
1928       crypto |= CAP_PMULL;
1929     else if (CPUFeatures[I] == "sha1")
1930       crypto |= CAP_SHA1;
1931     else if (CPUFeatures[I] == "sha2")
1932       crypto |= CAP_SHA2;
1933 #endif
1934 
1935     if (LLVMFeatureStr != "")
1936       Features[LLVMFeatureStr] = true;
1937   }
1938 
1939 #if defined(__aarch64__)
1940   // If we have all crypto bits we can add the feature
1941   if (crypto == (CAP_AES | CAP_PMULL | CAP_SHA1 | CAP_SHA2))
1942     Features["crypto"] = true;
1943 #endif
1944 
1945   return true;
1946 }
1947 #elif defined(_WIN32) && (defined(__aarch64__) || defined(_M_ARM64))
getHostCPUFeatures(StringMap<bool> & Features)1948 bool sys::getHostCPUFeatures(StringMap<bool> &Features) {
1949   if (IsProcessorFeaturePresent(PF_ARM_NEON_INSTRUCTIONS_AVAILABLE))
1950     Features["neon"] = true;
1951   if (IsProcessorFeaturePresent(PF_ARM_V8_CRC32_INSTRUCTIONS_AVAILABLE))
1952     Features["crc"] = true;
1953   if (IsProcessorFeaturePresent(PF_ARM_V8_CRYPTO_INSTRUCTIONS_AVAILABLE))
1954     Features["crypto"] = true;
1955 
1956   return true;
1957 }
1958 #elif defined(__linux__) && defined(__loongarch__)
1959 #include <sys/auxv.h>
getHostCPUFeatures(StringMap<bool> & Features)1960 bool sys::getHostCPUFeatures(StringMap<bool> &Features) {
1961   unsigned long hwcap = getauxval(AT_HWCAP);
1962   bool HasFPU = hwcap & (1UL << 3); // HWCAP_LOONGARCH_FPU
1963   uint32_t cpucfg2 = 0x2;
1964   __asm__("cpucfg %[cpucfg2], %[cpucfg2]\n\t" : [cpucfg2] "+r"(cpucfg2));
1965 
1966   Features["f"] = HasFPU && (cpucfg2 & (1U << 1)); // CPUCFG.2.FP_SP
1967   Features["d"] = HasFPU && (cpucfg2 & (1U << 2)); // CPUCFG.2.FP_DP
1968 
1969   Features["lsx"] = hwcap & (1UL << 4);  // HWCAP_LOONGARCH_LSX
1970   Features["lasx"] = hwcap & (1UL << 5); // HWCAP_LOONGARCH_LASX
1971   Features["lvz"] = hwcap & (1UL << 9);  // HWCAP_LOONGARCH_LVZ
1972 
1973   return true;
1974 }
1975 #else
getHostCPUFeatures(StringMap<bool> & Features)1976 bool sys::getHostCPUFeatures(StringMap<bool> &Features) { return false; }
1977 #endif
1978 
1979 #if __APPLE__
1980 /// \returns the \p triple, but with the Host's arch spliced in.
withHostArch(Triple T)1981 static Triple withHostArch(Triple T) {
1982 #if defined(__arm__)
1983   T.setArch(Triple::arm);
1984   T.setArchName("arm");
1985 #elif defined(__arm64e__)
1986   T.setArch(Triple::aarch64, Triple::AArch64SubArch_arm64e);
1987   T.setArchName("arm64e");
1988 #elif defined(__aarch64__)
1989   T.setArch(Triple::aarch64);
1990   T.setArchName("arm64");
1991 #elif defined(__x86_64h__)
1992   T.setArch(Triple::x86_64);
1993   T.setArchName("x86_64h");
1994 #elif defined(__x86_64__)
1995   T.setArch(Triple::x86_64);
1996   T.setArchName("x86_64");
1997 #elif defined(__i386__)
1998   T.setArch(Triple::x86);
1999   T.setArchName("i386");
2000 #elif defined(__powerpc__)
2001   T.setArch(Triple::ppc);
2002   T.setArchName("powerpc");
2003 #else
2004 #  error "Unimplemented host arch fixup"
2005 #endif
2006   return T;
2007 }
2008 #endif
2009 
getProcessTriple()2010 std::string sys::getProcessTriple() {
2011   std::string TargetTripleString = updateTripleOSVersion(LLVM_HOST_TRIPLE);
2012   Triple PT(Triple::normalize(TargetTripleString));
2013 
2014 #if __APPLE__
2015   /// In Universal builds, LLVM_HOST_TRIPLE will have the wrong arch in one of
2016   /// the slices. This fixes that up.
2017   PT = withHostArch(PT);
2018 #endif
2019 
2020   if (sizeof(void *) == 8 && PT.isArch32Bit())
2021     PT = PT.get64BitArchVariant();
2022   if (sizeof(void *) == 4 && PT.isArch64Bit())
2023     PT = PT.get32BitArchVariant();
2024 
2025   return PT.str();
2026 }
2027 
printDefaultTargetAndDetectedCPU(raw_ostream & OS)2028 void sys::printDefaultTargetAndDetectedCPU(raw_ostream &OS) {
2029 #if LLVM_VERSION_PRINTER_SHOW_HOST_TARGET_INFO
2030   std::string CPU = std::string(sys::getHostCPUName());
2031   if (CPU == "generic")
2032     CPU = "(unknown)";
2033   OS << "  Default target: " << sys::getDefaultTargetTriple() << '\n'
2034      << "  Host CPU: " << CPU << '\n';
2035 #endif
2036 }
2037