1 //===-- X86Subtarget.h - Define Subtarget for the X86 ----------*- C++ -*--===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file declares the X86 specific subclass of TargetSubtargetInfo.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #ifndef LLVM_LIB_TARGET_X86_X86SUBTARGET_H
14 #define LLVM_LIB_TARGET_X86_X86SUBTARGET_H
15 
16 #include "X86FrameLowering.h"
17 #include "X86ISelLowering.h"
18 #include "X86InstrInfo.h"
19 #include "X86SelectionDAGInfo.h"
20 #include "llvm/CodeGen/GlobalISel/LegalizerInfo.h"
21 #include "llvm/CodeGen/TargetSubtargetInfo.h"
22 #include "llvm/IR/CallingConv.h"
23 #include "llvm/TargetParser/Triple.h"
24 #include <climits>
25 #include <memory>
26 
27 #define GET_SUBTARGETINFO_HEADER
28 #include "X86GenSubtargetInfo.inc"
29 
30 namespace llvm {
31 
32 class CallLowering;
33 class GlobalValue;
34 class InstructionSelector;
35 class LegalizerInfo;
36 class RegisterBankInfo;
37 class StringRef;
38 class TargetMachine;
39 
40 /// The X86 backend supports a number of different styles of PIC.
41 ///
42 namespace PICStyles {
43 
44 enum class Style {
45   StubPIC,          // Used on i386-darwin in pic mode.
46   GOT,              // Used on 32 bit elf on when in pic mode.
47   RIPRel,           // Used on X86-64 when in pic mode.
48   None              // Set when not in pic mode.
49 };
50 
51 } // end namespace PICStyles
52 
53 class X86Subtarget final : public X86GenSubtargetInfo {
54   enum X86SSEEnum {
55     NoSSE, SSE1, SSE2, SSE3, SSSE3, SSE41, SSE42, AVX, AVX2, AVX512
56   };
57 
58   enum X863DNowEnum {
59     NoThreeDNow, MMX, ThreeDNow, ThreeDNowA
60   };
61 
62   /// Which PIC style to use
63   PICStyles::Style PICStyle;
64 
65   const TargetMachine &TM;
66 
67   /// SSE1, SSE2, SSE3, SSSE3, SSE41, SSE42, or none supported.
68   X86SSEEnum X86SSELevel = NoSSE;
69 
70   /// MMX, 3DNow, 3DNow Athlon, or none supported.
71   X863DNowEnum X863DNowLevel = NoThreeDNow;
72 
73 #define GET_SUBTARGETINFO_MACRO(ATTRIBUTE, DEFAULT, GETTER)                    \
74   bool ATTRIBUTE = DEFAULT;
75 #include "X86GenSubtargetInfo.inc"
76   /// The minimum alignment known to hold of the stack frame on
77   /// entry to the function and which must be maintained by every function.
78   Align stackAlignment = Align(4);
79 
80   Align TileConfigAlignment = Align(4);
81 
82   /// Max. memset / memcpy size that is turned into rep/movs, rep/stos ops.
83   ///
84   // FIXME: this is a known good value for Yonah. How about others?
85   unsigned MaxInlineSizeThreshold = 128;
86 
87   /// What processor and OS we're targeting.
88   Triple TargetTriple;
89 
90   /// GlobalISel related APIs.
91   std::unique_ptr<CallLowering> CallLoweringInfo;
92   std::unique_ptr<LegalizerInfo> Legalizer;
93   std::unique_ptr<RegisterBankInfo> RegBankInfo;
94   std::unique_ptr<InstructionSelector> InstSelector;
95 
96   /// Override the stack alignment.
97   MaybeAlign StackAlignOverride;
98 
99   /// Preferred vector width from function attribute.
100   unsigned PreferVectorWidthOverride;
101 
102   /// Resolved preferred vector width from function attribute and subtarget
103   /// features.
104   unsigned PreferVectorWidth = UINT32_MAX;
105 
106   /// Required vector width from function attribute.
107   unsigned RequiredVectorWidth;
108 
109   X86SelectionDAGInfo TSInfo;
110   // Ordering here is important. X86InstrInfo initializes X86RegisterInfo which
111   // X86TargetLowering needs.
112   X86InstrInfo InstrInfo;
113   X86TargetLowering TLInfo;
114   X86FrameLowering FrameLowering;
115 
116 public:
117   /// This constructor initializes the data members to match that
118   /// of the specified triple.
119   ///
120   X86Subtarget(const Triple &TT, StringRef CPU, StringRef TuneCPU, StringRef FS,
121                const X86TargetMachine &TM, MaybeAlign StackAlignOverride,
122                unsigned PreferVectorWidthOverride,
123                unsigned RequiredVectorWidth);
124 
getTargetLowering()125   const X86TargetLowering *getTargetLowering() const override {
126     return &TLInfo;
127   }
128 
getInstrInfo()129   const X86InstrInfo *getInstrInfo() const override { return &InstrInfo; }
130 
getFrameLowering()131   const X86FrameLowering *getFrameLowering() const override {
132     return &FrameLowering;
133   }
134 
getSelectionDAGInfo()135   const X86SelectionDAGInfo *getSelectionDAGInfo() const override {
136     return &TSInfo;
137   }
138 
getRegisterInfo()139   const X86RegisterInfo *getRegisterInfo() const override {
140     return &getInstrInfo()->getRegisterInfo();
141   }
142 
getTileConfigSize()143   unsigned getTileConfigSize() const { return 64; }
getTileConfigAlignment()144   Align getTileConfigAlignment() const { return TileConfigAlignment; }
145 
146   /// Returns the minimum alignment known to hold of the
147   /// stack frame on entry to the function and which must be maintained by every
148   /// function for this subtarget.
getStackAlignment()149   Align getStackAlignment() const { return stackAlignment; }
150 
151   /// Returns the maximum memset / memcpy size
152   /// that still makes it profitable to inline the call.
getMaxInlineSizeThreshold()153   unsigned getMaxInlineSizeThreshold() const { return MaxInlineSizeThreshold; }
154 
155   /// ParseSubtargetFeatures - Parses features string setting specified
156   /// subtarget options.  Definition of function is auto generated by tblgen.
157   void ParseSubtargetFeatures(StringRef CPU, StringRef TuneCPU, StringRef FS);
158 
159   /// Methods used by Global ISel
160   const CallLowering *getCallLowering() const override;
161   InstructionSelector *getInstructionSelector() const override;
162   const LegalizerInfo *getLegalizerInfo() const override;
163   const RegisterBankInfo *getRegBankInfo() const override;
164 
165 private:
166   /// Initialize the full set of dependencies so we can use an initializer
167   /// list for X86Subtarget.
168   X86Subtarget &initializeSubtargetDependencies(StringRef CPU,
169                                                 StringRef TuneCPU,
170                                                 StringRef FS);
171   void initSubtargetFeatures(StringRef CPU, StringRef TuneCPU, StringRef FS);
172 
173 public:
174 
175 #define GET_SUBTARGETINFO_MACRO(ATTRIBUTE, DEFAULT, GETTER)                    \
176   bool GETTER() const { return ATTRIBUTE; }
177 #include "X86GenSubtargetInfo.inc"
178 
179   /// Is this x86_64 with the ILP32 programming model (x32 ABI)?
isTarget64BitILP32()180   bool isTarget64BitILP32() const {
181     return Is64Bit && (TargetTriple.isX32() || TargetTriple.isOSNaCl());
182   }
183 
184   /// Is this x86_64 with the LP64 programming model (standard AMD64, no x32)?
isTarget64BitLP64()185   bool isTarget64BitLP64() const {
186     return Is64Bit && (!TargetTriple.isX32() && !TargetTriple.isOSNaCl());
187   }
188 
getPICStyle()189   PICStyles::Style getPICStyle() const { return PICStyle; }
setPICStyle(PICStyles::Style Style)190   void setPICStyle(PICStyles::Style Style)  { PICStyle = Style; }
191 
canUseCMPXCHG8B()192   bool canUseCMPXCHG8B() const { return hasCX8(); }
canUseCMPXCHG16B()193   bool canUseCMPXCHG16B() const {
194     // CX16 is just the CPUID bit, instruction requires 64-bit mode too.
195     return hasCX16() && is64Bit();
196   }
197   // SSE codegen depends on cmovs, and all SSE1+ processors support them.
198   // All 64-bit processors support cmov.
canUseCMOV()199   bool canUseCMOV() const { return hasCMOV() || hasSSE1() || is64Bit(); }
hasSSE1()200   bool hasSSE1() const { return X86SSELevel >= SSE1; }
hasSSE2()201   bool hasSSE2() const { return X86SSELevel >= SSE2; }
hasSSE3()202   bool hasSSE3() const { return X86SSELevel >= SSE3; }
hasSSSE3()203   bool hasSSSE3() const { return X86SSELevel >= SSSE3; }
hasSSE41()204   bool hasSSE41() const { return X86SSELevel >= SSE41; }
hasSSE42()205   bool hasSSE42() const { return X86SSELevel >= SSE42; }
hasAVX()206   bool hasAVX() const { return X86SSELevel >= AVX; }
hasAVX2()207   bool hasAVX2() const { return X86SSELevel >= AVX2; }
hasAVX512()208   bool hasAVX512() const { return X86SSELevel >= AVX512; }
hasInt256()209   bool hasInt256() const { return hasAVX2(); }
hasMMX()210   bool hasMMX() const { return X863DNowLevel >= MMX; }
hasThreeDNow()211   bool hasThreeDNow() const { return X863DNowLevel >= ThreeDNow; }
hasThreeDNowA()212   bool hasThreeDNowA() const { return X863DNowLevel >= ThreeDNowA; }
hasAnyFMA()213   bool hasAnyFMA() const { return hasFMA() || hasFMA4(); }
hasPrefetchW()214   bool hasPrefetchW() const {
215     // The PREFETCHW instruction was added with 3DNow but later CPUs gave it
216     // its own CPUID bit as part of deprecating 3DNow. Intel eventually added
217     // it and KNL has another that prefetches to L2 cache. We assume the
218     // L1 version exists if the L2 version does.
219     return hasThreeDNow() || hasPRFCHW() || hasPREFETCHWT1();
220   }
hasSSEPrefetch()221   bool hasSSEPrefetch() const {
222     // We implicitly enable these when we have a write prefix supporting cache
223     // level OR if we have prfchw, but don't already have a read prefetch from
224     // 3dnow.
225     return hasSSE1() || (hasPRFCHW() && !hasThreeDNow()) || hasPREFETCHWT1() ||
226            hasPREFETCHI();
227   }
canUseLAHFSAHF()228   bool canUseLAHFSAHF() const { return hasLAHFSAHF64() || !is64Bit(); }
229   // These are generic getters that OR together all of the thunk types
230   // supported by the subtarget. Therefore useIndirectThunk*() will return true
231   // if any respective thunk feature is enabled.
useIndirectThunkCalls()232   bool useIndirectThunkCalls() const {
233     return useRetpolineIndirectCalls() || useLVIControlFlowIntegrity();
234   }
useIndirectThunkBranches()235   bool useIndirectThunkBranches() const {
236     return useRetpolineIndirectBranches() || useLVIControlFlowIntegrity();
237   }
238 
getPreferVectorWidth()239   unsigned getPreferVectorWidth() const { return PreferVectorWidth; }
getRequiredVectorWidth()240   unsigned getRequiredVectorWidth() const { return RequiredVectorWidth; }
241 
242   // Helper functions to determine when we should allow widening to 512-bit
243   // during codegen.
244   // TODO: Currently we're always allowing widening on CPUs without VLX,
245   // because for many cases we don't have a better option.
canExtendTo512DQ()246   bool canExtendTo512DQ() const {
247     return hasAVX512() && hasEVEX512() &&
248            (!hasVLX() || getPreferVectorWidth() >= 512);
249   }
canExtendTo512BW()250   bool canExtendTo512BW() const  {
251     return hasBWI() && canExtendTo512DQ();
252   }
253 
hasNoDomainDelay()254   bool hasNoDomainDelay() const { return NoDomainDelay; }
hasNoDomainDelayMov()255   bool hasNoDomainDelayMov() const {
256       return hasNoDomainDelay() || NoDomainDelayMov;
257   }
hasNoDomainDelayBlend()258   bool hasNoDomainDelayBlend() const {
259       return hasNoDomainDelay() || NoDomainDelayBlend;
260   }
hasNoDomainDelayShuffle()261   bool hasNoDomainDelayShuffle() const {
262       return hasNoDomainDelay() || NoDomainDelayShuffle;
263   }
264 
265   // If there are no 512-bit vectors and we prefer not to use 512-bit registers,
266   // disable them in the legalizer.
useAVX512Regs()267   bool useAVX512Regs() const {
268     return hasAVX512() && hasEVEX512() &&
269            (canExtendTo512DQ() || RequiredVectorWidth > 256);
270   }
271 
useLight256BitInstructions()272   bool useLight256BitInstructions() const {
273     return getPreferVectorWidth() >= 256 || AllowLight256Bit;
274   }
275 
useBWIRegs()276   bool useBWIRegs() const {
277     return hasBWI() && useAVX512Regs();
278   }
279 
isXRaySupported()280   bool isXRaySupported() const override { return is64Bit(); }
281 
282   /// Use clflush if we have SSE2 or we're on x86-64 (even if we asked for
283   /// no-sse2). There isn't any reason to disable it if the target processor
284   /// supports it.
hasCLFLUSH()285   bool hasCLFLUSH() const { return hasSSE2() || is64Bit(); }
286 
287   /// Use mfence if we have SSE2 or we're on x86-64 (even if we asked for
288   /// no-sse2). There isn't any reason to disable it if the target processor
289   /// supports it.
hasMFence()290   bool hasMFence() const { return hasSSE2() || is64Bit(); }
291 
getTargetTriple()292   const Triple &getTargetTriple() const { return TargetTriple; }
293 
isTargetDarwin()294   bool isTargetDarwin() const { return TargetTriple.isOSDarwin(); }
isTargetFreeBSD()295   bool isTargetFreeBSD() const { return TargetTriple.isOSFreeBSD(); }
isTargetDragonFly()296   bool isTargetDragonFly() const { return TargetTriple.isOSDragonFly(); }
isTargetSolaris()297   bool isTargetSolaris() const { return TargetTriple.isOSSolaris(); }
isTargetPS()298   bool isTargetPS() const { return TargetTriple.isPS(); }
299 
isTargetELF()300   bool isTargetELF() const { return TargetTriple.isOSBinFormatELF(); }
isTargetCOFF()301   bool isTargetCOFF() const { return TargetTriple.isOSBinFormatCOFF(); }
isTargetMachO()302   bool isTargetMachO() const { return TargetTriple.isOSBinFormatMachO(); }
303 
isTargetLinux()304   bool isTargetLinux() const { return TargetTriple.isOSLinux(); }
isTargetKFreeBSD()305   bool isTargetKFreeBSD() const { return TargetTriple.isOSKFreeBSD(); }
isTargetGlibc()306   bool isTargetGlibc() const { return TargetTriple.isOSGlibc(); }
isTargetAndroid()307   bool isTargetAndroid() const { return TargetTriple.isAndroid(); }
isTargetNaCl()308   bool isTargetNaCl() const { return TargetTriple.isOSNaCl(); }
isTargetNaCl32()309   bool isTargetNaCl32() const { return isTargetNaCl() && !is64Bit(); }
isTargetNaCl64()310   bool isTargetNaCl64() const { return isTargetNaCl() && is64Bit(); }
isTargetMCU()311   bool isTargetMCU() const { return TargetTriple.isOSIAMCU(); }
isTargetFuchsia()312   bool isTargetFuchsia() const { return TargetTriple.isOSFuchsia(); }
313 
isTargetWindowsMSVC()314   bool isTargetWindowsMSVC() const {
315     return TargetTriple.isWindowsMSVCEnvironment();
316   }
317 
isTargetWindowsCoreCLR()318   bool isTargetWindowsCoreCLR() const {
319     return TargetTriple.isWindowsCoreCLREnvironment();
320   }
321 
isTargetWindowsCygwin()322   bool isTargetWindowsCygwin() const {
323     return TargetTriple.isWindowsCygwinEnvironment();
324   }
325 
isTargetWindowsGNU()326   bool isTargetWindowsGNU() const {
327     return TargetTriple.isWindowsGNUEnvironment();
328   }
329 
isTargetWindowsItanium()330   bool isTargetWindowsItanium() const {
331     return TargetTriple.isWindowsItaniumEnvironment();
332   }
333 
isTargetCygMing()334   bool isTargetCygMing() const { return TargetTriple.isOSCygMing(); }
335 
isOSWindows()336   bool isOSWindows() const { return TargetTriple.isOSWindows(); }
337 
isTargetWin64()338   bool isTargetWin64() const { return Is64Bit && isOSWindows(); }
339 
isTargetWin32()340   bool isTargetWin32() const { return !Is64Bit && isOSWindows(); }
341 
isPICStyleGOT()342   bool isPICStyleGOT() const { return PICStyle == PICStyles::Style::GOT; }
isPICStyleRIPRel()343   bool isPICStyleRIPRel() const { return PICStyle == PICStyles::Style::RIPRel; }
344 
isPICStyleStubPIC()345   bool isPICStyleStubPIC() const {
346     return PICStyle == PICStyles::Style::StubPIC;
347   }
348 
349   bool isPositionIndependent() const;
350 
isCallingConvWin64(CallingConv::ID CC)351   bool isCallingConvWin64(CallingConv::ID CC) const {
352     switch (CC) {
353     // On Win64, all these conventions just use the default convention.
354     case CallingConv::C:
355     case CallingConv::Fast:
356     case CallingConv::Tail:
357     case CallingConv::Swift:
358     case CallingConv::SwiftTail:
359     case CallingConv::X86_FastCall:
360     case CallingConv::X86_StdCall:
361     case CallingConv::X86_ThisCall:
362     case CallingConv::X86_VectorCall:
363     case CallingConv::Intel_OCL_BI:
364       return isTargetWin64();
365     // This convention allows using the Win64 convention on other targets.
366     case CallingConv::Win64:
367       return true;
368     // This convention allows using the SysV convention on Windows targets.
369     case CallingConv::X86_64_SysV:
370       return false;
371     // Otherwise, who knows what this is.
372     default:
373       return false;
374     }
375   }
376 
377   /// Classify a global variable reference for the current subtarget according
378   /// to how we should reference it in a non-pcrel context.
379   unsigned char classifyLocalReference(const GlobalValue *GV) const;
380 
381   unsigned char classifyGlobalReference(const GlobalValue *GV,
382                                         const Module &M) const;
383   unsigned char classifyGlobalReference(const GlobalValue *GV) const;
384 
385   /// Classify a global function reference for the current subtarget.
386   unsigned char classifyGlobalFunctionReference(const GlobalValue *GV,
387                                                 const Module &M) const;
388   unsigned char
389   classifyGlobalFunctionReference(const GlobalValue *GV) const override;
390 
391   /// Classify a blockaddress reference for the current subtarget according to
392   /// how we should reference it in a non-pcrel context.
393   unsigned char classifyBlockAddressReference() const;
394 
395   /// Return true if the subtarget allows calls to immediate address.
396   bool isLegalToCallImmediateAddr() const;
397 
398   /// Return whether FrameLowering should always set the "extended frame
399   /// present" bit in FP, or set it based on a symbol in the runtime.
swiftAsyncContextIsDynamicallySet()400   bool swiftAsyncContextIsDynamicallySet() const {
401     // Older OS versions (particularly system unwinders) are confused by the
402     // Swift extended frame, so when building code that might be run on them we
403     // must dynamically query the concurrency library to determine whether
404     // extended frames should be flagged as present.
405     const Triple &TT = getTargetTriple();
406 
407     unsigned Major = TT.getOSVersion().getMajor();
408     switch(TT.getOS()) {
409     default:
410       return false;
411     case Triple::IOS:
412     case Triple::TvOS:
413       return Major < 15;
414     case Triple::WatchOS:
415       return Major < 8;
416     case Triple::MacOSX:
417     case Triple::Darwin:
418       return Major < 12;
419     }
420   }
421 
422   /// If we are using indirect thunks, we need to expand indirectbr to avoid it
423   /// lowering to an actual indirect jump.
enableIndirectBrExpand()424   bool enableIndirectBrExpand() const override {
425     return useIndirectThunkBranches();
426   }
427 
428   /// Enable the MachineScheduler pass for all X86 subtargets.
enableMachineScheduler()429   bool enableMachineScheduler() const override { return true; }
430 
431   bool enableEarlyIfConversion() const override;
432 
433   void getPostRAMutations(std::vector<std::unique_ptr<ScheduleDAGMutation>>
434                               &Mutations) const override;
435 
getAntiDepBreakMode()436   AntiDepBreakMode getAntiDepBreakMode() const override {
437     return TargetSubtargetInfo::ANTIDEP_CRITICAL;
438   }
439 };
440 
441 } // end namespace llvm
442 
443 #endif // LLVM_LIB_TARGET_X86_X86SUBTARGET_H
444