xref: /openbsd/gnu/llvm/llvm/lib/Target/X86/X86Subtarget.h (revision a96b3639)
1 //===-- X86Subtarget.h - Define Subtarget for the X86 ----------*- C++ -*--===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file declares the X86 specific subclass of TargetSubtargetInfo.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #ifndef LLVM_LIB_TARGET_X86_X86SUBTARGET_H
14 #define LLVM_LIB_TARGET_X86_X86SUBTARGET_H
15 
16 #include "X86FrameLowering.h"
17 #include "X86ISelLowering.h"
18 #include "X86InstrInfo.h"
19 #include "X86SelectionDAGInfo.h"
20 #include "llvm/ADT/Triple.h"
21 #include "llvm/CodeGen/TargetSubtargetInfo.h"
22 #include "llvm/IR/CallingConv.h"
23 #include <climits>
24 #include <memory>
25 
26 #define GET_SUBTARGETINFO_HEADER
27 #include "X86GenSubtargetInfo.inc"
28 
29 namespace llvm {
30 
31 class CallLowering;
32 class GlobalValue;
33 class InstructionSelector;
34 class LegalizerInfo;
35 class RegisterBankInfo;
36 class StringRef;
37 class TargetMachine;
38 
39 /// The X86 backend supports a number of different styles of PIC.
40 ///
41 namespace PICStyles {
42 
43 enum class Style {
44   StubPIC,          // Used on i386-darwin in pic mode.
45   GOT,              // Used on 32 bit elf on when in pic mode.
46   RIPRel,           // Used on X86-64 when in pic mode.
47   None              // Set when not in pic mode.
48 };
49 
50 } // end namespace PICStyles
51 
52 class X86Subtarget final : public X86GenSubtargetInfo {
53   enum X86SSEEnum {
54     NoSSE, SSE1, SSE2, SSE3, SSSE3, SSE41, SSE42, AVX, AVX2, AVX512
55   };
56 
57   enum X863DNowEnum {
58     NoThreeDNow, MMX, ThreeDNow, ThreeDNowA
59   };
60 
61   /// Which PIC style to use
62   PICStyles::Style PICStyle;
63 
64   const TargetMachine &TM;
65 
66   /// SSE1, SSE2, SSE3, SSSE3, SSE41, SSE42, or none supported.
67   X86SSEEnum X86SSELevel = NoSSE;
68 
69   /// MMX, 3DNow, 3DNow Athlon, or none supported.
70   X863DNowEnum X863DNowLevel = NoThreeDNow;
71 
72 #define GET_SUBTARGETINFO_MACRO(ATTRIBUTE, DEFAULT, GETTER)                    \
73   bool ATTRIBUTE = DEFAULT;
74 #include "X86GenSubtargetInfo.inc"
75   /// The minimum alignment known to hold of the stack frame on
76   /// entry to the function and which must be maintained by every function.
77   Align stackAlignment = Align(4);
78 
79   Align TileConfigAlignment = Align(4);
80 
81   /// Max. memset / memcpy size that is turned into rep/movs, rep/stos ops.
82   ///
83   // FIXME: this is a known good value for Yonah. How about others?
84   unsigned MaxInlineSizeThreshold = 128;
85 
86   /// What processor and OS we're targeting.
87   Triple TargetTriple;
88 
89   /// GlobalISel related APIs.
90   std::unique_ptr<CallLowering> CallLoweringInfo;
91   std::unique_ptr<LegalizerInfo> Legalizer;
92   std::unique_ptr<RegisterBankInfo> RegBankInfo;
93   std::unique_ptr<InstructionSelector> InstSelector;
94 
95   /// Override the stack alignment.
96   MaybeAlign StackAlignOverride;
97 
98   /// Preferred vector width from function attribute.
99   unsigned PreferVectorWidthOverride;
100 
101   /// Resolved preferred vector width from function attribute and subtarget
102   /// features.
103   unsigned PreferVectorWidth = UINT32_MAX;
104 
105   /// Required vector width from function attribute.
106   unsigned RequiredVectorWidth;
107 
108   X86SelectionDAGInfo TSInfo;
109   // Ordering here is important. X86InstrInfo initializes X86RegisterInfo which
110   // X86TargetLowering needs.
111   X86InstrInfo InstrInfo;
112   X86TargetLowering TLInfo;
113   X86FrameLowering FrameLowering;
114 
115 public:
116   /// This constructor initializes the data members to match that
117   /// of the specified triple.
118   ///
119   X86Subtarget(const Triple &TT, StringRef CPU, StringRef TuneCPU, StringRef FS,
120                const X86TargetMachine &TM, MaybeAlign StackAlignOverride,
121                unsigned PreferVectorWidthOverride,
122                unsigned RequiredVectorWidth);
123 
getTargetLowering()124   const X86TargetLowering *getTargetLowering() const override {
125     return &TLInfo;
126   }
127 
getInstrInfo()128   const X86InstrInfo *getInstrInfo() const override { return &InstrInfo; }
129 
getFrameLowering()130   const X86FrameLowering *getFrameLowering() const override {
131     return &FrameLowering;
132   }
133 
getSelectionDAGInfo()134   const X86SelectionDAGInfo *getSelectionDAGInfo() const override {
135     return &TSInfo;
136   }
137 
getRegisterInfo()138   const X86RegisterInfo *getRegisterInfo() const override {
139     return &getInstrInfo()->getRegisterInfo();
140   }
141 
getSaveArgs()142   bool getSaveArgs() const { return SaveArgs; }
143 
getTileConfigSize()144   unsigned getTileConfigSize() const { return 64; }
getTileConfigAlignment()145   Align getTileConfigAlignment() const { return TileConfigAlignment; }
146 
147   /// Returns the minimum alignment known to hold of the
148   /// stack frame on entry to the function and which must be maintained by every
149   /// function for this subtarget.
getStackAlignment()150   Align getStackAlignment() const { return stackAlignment; }
151 
152   /// Returns the maximum memset / memcpy size
153   /// that still makes it profitable to inline the call.
getMaxInlineSizeThreshold()154   unsigned getMaxInlineSizeThreshold() const { return MaxInlineSizeThreshold; }
155 
156   /// ParseSubtargetFeatures - Parses features string setting specified
157   /// subtarget options.  Definition of function is auto generated by tblgen.
158   void ParseSubtargetFeatures(StringRef CPU, StringRef TuneCPU, StringRef FS);
159 
160   /// Methods used by Global ISel
161   const CallLowering *getCallLowering() const override;
162   InstructionSelector *getInstructionSelector() const override;
163   const LegalizerInfo *getLegalizerInfo() const override;
164   const RegisterBankInfo *getRegBankInfo() const override;
165 
166 private:
167   /// Initialize the full set of dependencies so we can use an initializer
168   /// list for X86Subtarget.
169   X86Subtarget &initializeSubtargetDependencies(StringRef CPU,
170                                                 StringRef TuneCPU,
171                                                 StringRef FS);
172   void initSubtargetFeatures(StringRef CPU, StringRef TuneCPU, StringRef FS);
173 
174 public:
175 
176 #define GET_SUBTARGETINFO_MACRO(ATTRIBUTE, DEFAULT, GETTER)                    \
177   bool GETTER() const { return ATTRIBUTE; }
178 #include "X86GenSubtargetInfo.inc"
179 
180   /// Is this x86_64 with the ILP32 programming model (x32 ABI)?
isTarget64BitILP32()181   bool isTarget64BitILP32() const {
182     return Is64Bit && (TargetTriple.isX32() || TargetTriple.isOSNaCl());
183   }
184 
185   /// Is this x86_64 with the LP64 programming model (standard AMD64, no x32)?
isTarget64BitLP64()186   bool isTarget64BitLP64() const {
187     return Is64Bit && (!TargetTriple.isX32() && !TargetTriple.isOSNaCl());
188   }
189 
getPICStyle()190   PICStyles::Style getPICStyle() const { return PICStyle; }
setPICStyle(PICStyles::Style Style)191   void setPICStyle(PICStyles::Style Style)  { PICStyle = Style; }
192 
canUseCMPXCHG8B()193   bool canUseCMPXCHG8B() const { return hasCX8(); }
canUseCMPXCHG16B()194   bool canUseCMPXCHG16B() const {
195     // CX16 is just the CPUID bit, instruction requires 64-bit mode too.
196     return hasCX16() && is64Bit();
197   }
198   // SSE codegen depends on cmovs, and all SSE1+ processors support them.
199   // All 64-bit processors support cmov.
canUseCMOV()200   bool canUseCMOV() const { return hasCMOV() || hasSSE1() || is64Bit(); }
hasSSE1()201   bool hasSSE1() const { return X86SSELevel >= SSE1; }
hasSSE2()202   bool hasSSE2() const { return X86SSELevel >= SSE2; }
hasSSE3()203   bool hasSSE3() const { return X86SSELevel >= SSE3; }
hasSSSE3()204   bool hasSSSE3() const { return X86SSELevel >= SSSE3; }
hasSSE41()205   bool hasSSE41() const { return X86SSELevel >= SSE41; }
hasSSE42()206   bool hasSSE42() const { return X86SSELevel >= SSE42; }
hasAVX()207   bool hasAVX() const { return X86SSELevel >= AVX; }
hasAVX2()208   bool hasAVX2() const { return X86SSELevel >= AVX2; }
hasAVX512()209   bool hasAVX512() const { return X86SSELevel >= AVX512; }
hasInt256()210   bool hasInt256() const { return hasAVX2(); }
hasMMX()211   bool hasMMX() const { return X863DNowLevel >= MMX; }
hasThreeDNow()212   bool hasThreeDNow() const { return X863DNowLevel >= ThreeDNow; }
hasThreeDNowA()213   bool hasThreeDNowA() const { return X863DNowLevel >= ThreeDNowA; }
hasAnyFMA()214   bool hasAnyFMA() const { return hasFMA() || hasFMA4(); }
hasPrefetchW()215   bool hasPrefetchW() const {
216     // The PREFETCHW instruction was added with 3DNow but later CPUs gave it
217     // its own CPUID bit as part of deprecating 3DNow. Intel eventually added
218     // it and KNL has another that prefetches to L2 cache. We assume the
219     // L1 version exists if the L2 version does.
220     return hasThreeDNow() || hasPRFCHW() || hasPREFETCHWT1();
221   }
hasSSEPrefetch()222   bool hasSSEPrefetch() const {
223     // We implicitly enable these when we have a write prefix supporting cache
224     // level OR if we have prfchw, but don't already have a read prefetch from
225     // 3dnow.
226     return hasSSE1() || (hasPRFCHW() && !hasThreeDNow()) || hasPREFETCHWT1() ||
227            hasPREFETCHI();
228   }
canUseLAHFSAHF()229   bool canUseLAHFSAHF() const { return hasLAHFSAHF64() || !is64Bit(); }
230   // These are generic getters that OR together all of the thunk types
231   // supported by the subtarget. Therefore useIndirectThunk*() will return true
232   // if any respective thunk feature is enabled.
useIndirectThunkCalls()233   bool useIndirectThunkCalls() const {
234     return useRetpolineIndirectCalls() || useLVIControlFlowIntegrity();
235   }
useIndirectThunkBranches()236   bool useIndirectThunkBranches() const {
237     return useRetpolineIndirectBranches() || useLVIControlFlowIntegrity();
238   }
239 
getPreferVectorWidth()240   unsigned getPreferVectorWidth() const { return PreferVectorWidth; }
getRequiredVectorWidth()241   unsigned getRequiredVectorWidth() const { return RequiredVectorWidth; }
242 
243   // Helper functions to determine when we should allow widening to 512-bit
244   // during codegen.
245   // TODO: Currently we're always allowing widening on CPUs without VLX,
246   // because for many cases we don't have a better option.
canExtendTo512DQ()247   bool canExtendTo512DQ() const {
248     return hasAVX512() && (!hasVLX() || getPreferVectorWidth() >= 512);
249   }
canExtendTo512BW()250   bool canExtendTo512BW() const  {
251     return hasBWI() && canExtendTo512DQ();
252   }
253 
254   // If there are no 512-bit vectors and we prefer not to use 512-bit registers,
255   // disable them in the legalizer.
useAVX512Regs()256   bool useAVX512Regs() const {
257     return hasAVX512() && (canExtendTo512DQ() || RequiredVectorWidth > 256);
258   }
259 
useLight256BitInstructions()260   bool useLight256BitInstructions() const {
261     return getPreferVectorWidth() >= 256 || AllowLight256Bit;
262   }
263 
useBWIRegs()264   bool useBWIRegs() const {
265     return hasBWI() && useAVX512Regs();
266   }
267 
isXRaySupported()268   bool isXRaySupported() const override { return is64Bit(); }
269 
270   /// Use clflush if we have SSE2 or we're on x86-64 (even if we asked for
271   /// no-sse2). There isn't any reason to disable it if the target processor
272   /// supports it.
hasCLFLUSH()273   bool hasCLFLUSH() const { return hasSSE2() || is64Bit(); }
274 
275   /// Use mfence if we have SSE2 or we're on x86-64 (even if we asked for
276   /// no-sse2). There isn't any reason to disable it if the target processor
277   /// supports it.
hasMFence()278   bool hasMFence() const { return hasSSE2() || is64Bit(); }
279 
getTargetTriple()280   const Triple &getTargetTriple() const { return TargetTriple; }
281 
isTargetDarwin()282   bool isTargetDarwin() const { return TargetTriple.isOSDarwin(); }
isTargetFreeBSD()283   bool isTargetFreeBSD() const { return TargetTriple.isOSFreeBSD(); }
isTargetOpenBSD()284   bool isTargetOpenBSD() const { return TargetTriple.isOSOpenBSD(); }
isTargetDragonFly()285   bool isTargetDragonFly() const { return TargetTriple.isOSDragonFly(); }
isTargetSolaris()286   bool isTargetSolaris() const { return TargetTriple.isOSSolaris(); }
isTargetPS()287   bool isTargetPS() const { return TargetTriple.isPS(); }
288 
isTargetELF()289   bool isTargetELF() const { return TargetTriple.isOSBinFormatELF(); }
isTargetCOFF()290   bool isTargetCOFF() const { return TargetTriple.isOSBinFormatCOFF(); }
isTargetMachO()291   bool isTargetMachO() const { return TargetTriple.isOSBinFormatMachO(); }
292 
isTargetLinux()293   bool isTargetLinux() const { return TargetTriple.isOSLinux(); }
isTargetKFreeBSD()294   bool isTargetKFreeBSD() const { return TargetTriple.isOSKFreeBSD(); }
isTargetGlibc()295   bool isTargetGlibc() const { return TargetTriple.isOSGlibc(); }
isTargetAndroid()296   bool isTargetAndroid() const { return TargetTriple.isAndroid(); }
isTargetNaCl()297   bool isTargetNaCl() const { return TargetTriple.isOSNaCl(); }
isTargetNaCl32()298   bool isTargetNaCl32() const { return isTargetNaCl() && !is64Bit(); }
isTargetNaCl64()299   bool isTargetNaCl64() const { return isTargetNaCl() && is64Bit(); }
isTargetMCU()300   bool isTargetMCU() const { return TargetTriple.isOSIAMCU(); }
isTargetFuchsia()301   bool isTargetFuchsia() const { return TargetTriple.isOSFuchsia(); }
302 
isTargetWindowsMSVC()303   bool isTargetWindowsMSVC() const {
304     return TargetTriple.isWindowsMSVCEnvironment();
305   }
306 
isTargetWindowsCoreCLR()307   bool isTargetWindowsCoreCLR() const {
308     return TargetTriple.isWindowsCoreCLREnvironment();
309   }
310 
isTargetWindowsCygwin()311   bool isTargetWindowsCygwin() const {
312     return TargetTriple.isWindowsCygwinEnvironment();
313   }
314 
isTargetWindowsGNU()315   bool isTargetWindowsGNU() const {
316     return TargetTriple.isWindowsGNUEnvironment();
317   }
318 
isTargetWindowsItanium()319   bool isTargetWindowsItanium() const {
320     return TargetTriple.isWindowsItaniumEnvironment();
321   }
322 
isTargetCygMing()323   bool isTargetCygMing() const { return TargetTriple.isOSCygMing(); }
324 
isOSWindows()325   bool isOSWindows() const { return TargetTriple.isOSWindows(); }
326 
isTargetWin64()327   bool isTargetWin64() const { return Is64Bit && isOSWindows(); }
328 
isTargetWin32()329   bool isTargetWin32() const { return !Is64Bit && isOSWindows(); }
330 
isPICStyleGOT()331   bool isPICStyleGOT() const { return PICStyle == PICStyles::Style::GOT; }
isPICStyleRIPRel()332   bool isPICStyleRIPRel() const { return PICStyle == PICStyles::Style::RIPRel; }
333 
isPICStyleStubPIC()334   bool isPICStyleStubPIC() const {
335     return PICStyle == PICStyles::Style::StubPIC;
336   }
337 
338   bool isPositionIndependent() const;
339 
isCallingConvWin64(CallingConv::ID CC)340   bool isCallingConvWin64(CallingConv::ID CC) const {
341     switch (CC) {
342     // On Win64, all these conventions just use the default convention.
343     case CallingConv::C:
344     case CallingConv::Fast:
345     case CallingConv::Tail:
346     case CallingConv::Swift:
347     case CallingConv::SwiftTail:
348     case CallingConv::X86_FastCall:
349     case CallingConv::X86_StdCall:
350     case CallingConv::X86_ThisCall:
351     case CallingConv::X86_VectorCall:
352     case CallingConv::Intel_OCL_BI:
353       return isTargetWin64();
354     // This convention allows using the Win64 convention on other targets.
355     case CallingConv::Win64:
356       return true;
357     // This convention allows using the SysV convention on Windows targets.
358     case CallingConv::X86_64_SysV:
359       return false;
360     // Otherwise, who knows what this is.
361     default:
362       return false;
363     }
364   }
365 
366   /// Classify a global variable reference for the current subtarget according
367   /// to how we should reference it in a non-pcrel context.
368   unsigned char classifyLocalReference(const GlobalValue *GV) const;
369 
370   unsigned char classifyGlobalReference(const GlobalValue *GV,
371                                         const Module &M) const;
372   unsigned char classifyGlobalReference(const GlobalValue *GV) const;
373 
374   /// Classify a global function reference for the current subtarget.
375   unsigned char classifyGlobalFunctionReference(const GlobalValue *GV,
376                                                 const Module &M) const;
377   unsigned char
378   classifyGlobalFunctionReference(const GlobalValue *GV) const override;
379 
380   /// Classify a blockaddress reference for the current subtarget according to
381   /// how we should reference it in a non-pcrel context.
382   unsigned char classifyBlockAddressReference() const;
383 
384   /// Return true if the subtarget allows calls to immediate address.
385   bool isLegalToCallImmediateAddr() const;
386 
387   /// Return whether FrameLowering should always set the "extended frame
388   /// present" bit in FP, or set it based on a symbol in the runtime.
swiftAsyncContextIsDynamicallySet()389   bool swiftAsyncContextIsDynamicallySet() const {
390     // Older OS versions (particularly system unwinders) are confused by the
391     // Swift extended frame, so when building code that might be run on them we
392     // must dynamically query the concurrency library to determine whether
393     // extended frames should be flagged as present.
394     const Triple &TT = getTargetTriple();
395 
396     unsigned Major = TT.getOSVersion().getMajor();
397     switch(TT.getOS()) {
398     default:
399       return false;
400     case Triple::IOS:
401     case Triple::TvOS:
402       return Major < 15;
403     case Triple::WatchOS:
404       return Major < 8;
405     case Triple::MacOSX:
406     case Triple::Darwin:
407       return Major < 12;
408     }
409   }
410 
411   /// If we are using indirect thunks, we need to expand indirectbr to avoid it
412   /// lowering to an actual indirect jump.
enableIndirectBrExpand()413   bool enableIndirectBrExpand() const override {
414     return useIndirectThunkBranches();
415   }
416 
417   /// Enable the MachineScheduler pass for all X86 subtargets.
enableMachineScheduler()418   bool enableMachineScheduler() const override { return true; }
419 
420   bool enableEarlyIfConversion() const override;
421 
422   void getPostRAMutations(std::vector<std::unique_ptr<ScheduleDAGMutation>>
423                               &Mutations) const override;
424 
getAntiDepBreakMode()425   AntiDepBreakMode getAntiDepBreakMode() const override {
426     return TargetSubtargetInfo::ANTIDEP_CRITICAL;
427   }
428 };
429 
430 } // end namespace llvm
431 
432 #endif // LLVM_LIB_TARGET_X86_X86SUBTARGET_H
433