1 //===-- X86RegisterInfo.cpp - X86 Register Information --------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file contains the X86 implementation of the TargetRegisterInfo class.
10 // This file is responsible for the frame pointer elimination optimization
11 // on X86.
12 //
13 //===----------------------------------------------------------------------===//
14 
15 #include "X86RegisterInfo.h"
16 #include "X86FrameLowering.h"
17 #include "X86MachineFunctionInfo.h"
18 #include "X86Subtarget.h"
19 #include "llvm/ADT/BitVector.h"
20 #include "llvm/ADT/STLExtras.h"
21 #include "llvm/ADT/SmallSet.h"
22 #include "llvm/CodeGen/LiveRegMatrix.h"
23 #include "llvm/CodeGen/MachineFrameInfo.h"
24 #include "llvm/CodeGen/MachineFunction.h"
25 #include "llvm/CodeGen/MachineFunctionPass.h"
26 #include "llvm/CodeGen/MachineRegisterInfo.h"
27 #include "llvm/CodeGen/TargetFrameLowering.h"
28 #include "llvm/CodeGen/TargetInstrInfo.h"
29 #include "llvm/IR/Constants.h"
30 #include "llvm/IR/Function.h"
31 #include "llvm/IR/Type.h"
32 #include "llvm/Support/CommandLine.h"
33 #include "llvm/Support/ErrorHandling.h"
34 #include "llvm/Target/TargetMachine.h"
35 #include "llvm/Target/TargetOptions.h"
36 
37 using namespace llvm;
38 
39 #define GET_REGINFO_TARGET_DESC
40 #include "X86GenRegisterInfo.inc"
41 
42 static cl::opt<bool>
43 EnableBasePointer("x86-use-base-pointer", cl::Hidden, cl::init(true),
44           cl::desc("Enable use of a base pointer for complex stack frames"));
45 
46 X86RegisterInfo::X86RegisterInfo(const Triple &TT)
47     : X86GenRegisterInfo((TT.isArch64Bit() ? X86::RIP : X86::EIP),
48                          X86_MC::getDwarfRegFlavour(TT, false),
49                          X86_MC::getDwarfRegFlavour(TT, true),
50                          (TT.isArch64Bit() ? X86::RIP : X86::EIP)) {
51   X86_MC::initLLVMToSEHAndCVRegMapping(this);
52 
53   // Cache some information.
54   Is64Bit = TT.isArch64Bit();
55   IsWin64 = Is64Bit && TT.isOSWindows();
56 
57   // Use a callee-saved register as the base pointer.  These registers must
58   // not conflict with any ABI requirements.  For example, in 32-bit mode PIC
59   // requires GOT in the EBX register before function calls via PLT GOT pointer.
60   if (Is64Bit) {
61     SlotSize = 8;
62     // This matches the simplified 32-bit pointer code in the data layout
63     // computation.
64     // FIXME: Should use the data layout?
65     bool Use64BitReg = !TT.isX32();
66     StackPtr = Use64BitReg ? X86::RSP : X86::ESP;
67     FramePtr = Use64BitReg ? X86::RBP : X86::EBP;
68     BasePtr = Use64BitReg ? X86::RBX : X86::EBX;
69   } else {
70     SlotSize = 4;
71     StackPtr = X86::ESP;
72     FramePtr = X86::EBP;
73     BasePtr = X86::ESI;
74   }
75 }
76 
77 int
78 X86RegisterInfo::getSEHRegNum(unsigned i) const {
79   return getEncodingValue(i);
80 }
81 
82 const TargetRegisterClass *
83 X86RegisterInfo::getSubClassWithSubReg(const TargetRegisterClass *RC,
84                                        unsigned Idx) const {
85   // The sub_8bit sub-register index is more constrained in 32-bit mode.
86   // It behaves just like the sub_8bit_hi index.
87   if (!Is64Bit && Idx == X86::sub_8bit)
88     Idx = X86::sub_8bit_hi;
89 
90   // Forward to TableGen's default version.
91   return X86GenRegisterInfo::getSubClassWithSubReg(RC, Idx);
92 }
93 
94 const TargetRegisterClass *
95 X86RegisterInfo::getMatchingSuperRegClass(const TargetRegisterClass *A,
96                                           const TargetRegisterClass *B,
97                                           unsigned SubIdx) const {
98   // The sub_8bit sub-register index is more constrained in 32-bit mode.
99   if (!Is64Bit && SubIdx == X86::sub_8bit) {
100     A = X86GenRegisterInfo::getSubClassWithSubReg(A, X86::sub_8bit_hi);
101     if (!A)
102       return nullptr;
103   }
104   return X86GenRegisterInfo::getMatchingSuperRegClass(A, B, SubIdx);
105 }
106 
107 const TargetRegisterClass *
108 X86RegisterInfo::getLargestLegalSuperClass(const TargetRegisterClass *RC,
109                                            const MachineFunction &MF) const {
110   // Don't allow super-classes of GR8_NOREX.  This class is only used after
111   // extracting sub_8bit_hi sub-registers.  The H sub-registers cannot be copied
112   // to the full GR8 register class in 64-bit mode, so we cannot allow the
113   // reigster class inflation.
114   //
115   // The GR8_NOREX class is always used in a way that won't be constrained to a
116   // sub-class, so sub-classes like GR8_ABCD_L are allowed to expand to the
117   // full GR8 class.
118   if (RC == &X86::GR8_NOREXRegClass)
119     return RC;
120 
121   const X86Subtarget &Subtarget = MF.getSubtarget<X86Subtarget>();
122 
123   const TargetRegisterClass *Super = RC;
124   TargetRegisterClass::sc_iterator I = RC->getSuperClasses();
125   do {
126     switch (Super->getID()) {
127     case X86::FR32RegClassID:
128     case X86::FR64RegClassID:
129       // If AVX-512 isn't supported we should only inflate to these classes.
130       if (!Subtarget.hasAVX512() &&
131           getRegSizeInBits(*Super) == getRegSizeInBits(*RC))
132         return Super;
133       break;
134     case X86::VR128RegClassID:
135     case X86::VR256RegClassID:
136       // If VLX isn't supported we should only inflate to these classes.
137       if (!Subtarget.hasVLX() &&
138           getRegSizeInBits(*Super) == getRegSizeInBits(*RC))
139         return Super;
140       break;
141     case X86::VR128XRegClassID:
142     case X86::VR256XRegClassID:
143       // If VLX isn't support we shouldn't inflate to these classes.
144       if (Subtarget.hasVLX() &&
145           getRegSizeInBits(*Super) == getRegSizeInBits(*RC))
146         return Super;
147       break;
148     case X86::FR32XRegClassID:
149     case X86::FR64XRegClassID:
150       // If AVX-512 isn't support we shouldn't inflate to these classes.
151       if (Subtarget.hasAVX512() &&
152           getRegSizeInBits(*Super) == getRegSizeInBits(*RC))
153         return Super;
154       break;
155     case X86::GR8RegClassID:
156     case X86::GR16RegClassID:
157     case X86::GR32RegClassID:
158     case X86::GR64RegClassID:
159     case X86::RFP32RegClassID:
160     case X86::RFP64RegClassID:
161     case X86::RFP80RegClassID:
162     case X86::VR512_0_15RegClassID:
163     case X86::VR512RegClassID:
164       // Don't return a super-class that would shrink the spill size.
165       // That can happen with the vector and float classes.
166       if (getRegSizeInBits(*Super) == getRegSizeInBits(*RC))
167         return Super;
168     }
169     Super = *I++;
170   } while (Super);
171   return RC;
172 }
173 
174 const TargetRegisterClass *
175 X86RegisterInfo::getPointerRegClass(const MachineFunction &MF,
176                                     unsigned Kind) const {
177   const X86Subtarget &Subtarget = MF.getSubtarget<X86Subtarget>();
178   switch (Kind) {
179   default: llvm_unreachable("Unexpected Kind in getPointerRegClass!");
180   case 0: // Normal GPRs.
181     if (Subtarget.isTarget64BitLP64())
182       return &X86::GR64RegClass;
183     // If the target is 64bit but we have been told to use 32bit addresses,
184     // we can still use 64-bit register as long as we know the high bits
185     // are zeros.
186     // Reflect that in the returned register class.
187     if (Is64Bit) {
188       // When the target also allows 64-bit frame pointer and we do have a
189       // frame, this is fine to use it for the address accesses as well.
190       const X86FrameLowering *TFI = getFrameLowering(MF);
191       return TFI->hasFP(MF) && TFI->Uses64BitFramePtr
192                  ? &X86::LOW32_ADDR_ACCESS_RBPRegClass
193                  : &X86::LOW32_ADDR_ACCESSRegClass;
194     }
195     return &X86::GR32RegClass;
196   case 1: // Normal GPRs except the stack pointer (for encoding reasons).
197     if (Subtarget.isTarget64BitLP64())
198       return &X86::GR64_NOSPRegClass;
199     // NOSP does not contain RIP, so no special case here.
200     return &X86::GR32_NOSPRegClass;
201   case 2: // NOREX GPRs.
202     if (Subtarget.isTarget64BitLP64())
203       return &X86::GR64_NOREXRegClass;
204     return &X86::GR32_NOREXRegClass;
205   case 3: // NOREX GPRs except the stack pointer (for encoding reasons).
206     if (Subtarget.isTarget64BitLP64())
207       return &X86::GR64_NOREX_NOSPRegClass;
208     // NOSP does not contain RIP, so no special case here.
209     return &X86::GR32_NOREX_NOSPRegClass;
210   case 4: // Available for tailcall (not callee-saved GPRs).
211     return getGPRsForTailCall(MF);
212   }
213 }
214 
215 bool X86RegisterInfo::shouldRewriteCopySrc(const TargetRegisterClass *DefRC,
216                                            unsigned DefSubReg,
217                                            const TargetRegisterClass *SrcRC,
218                                            unsigned SrcSubReg) const {
219   // Prevent rewriting a copy where the destination size is larger than the
220   // input size. See PR41619.
221   // FIXME: Should this be factored into the base implementation somehow.
222   if (DefRC->hasSuperClassEq(&X86::GR64RegClass) && DefSubReg == 0 &&
223       SrcRC->hasSuperClassEq(&X86::GR64RegClass) && SrcSubReg == X86::sub_32bit)
224     return false;
225 
226   return TargetRegisterInfo::shouldRewriteCopySrc(DefRC, DefSubReg,
227                                                   SrcRC, SrcSubReg);
228 }
229 
230 const TargetRegisterClass *
231 X86RegisterInfo::getGPRsForTailCall(const MachineFunction &MF) const {
232   const Function &F = MF.getFunction();
233   if (IsWin64 || (F.getCallingConv() == CallingConv::Win64))
234     return &X86::GR64_TCW64RegClass;
235   else if (Is64Bit)
236     return &X86::GR64_TCRegClass;
237 
238   bool hasHipeCC = (F.getCallingConv() == CallingConv::HiPE);
239   if (hasHipeCC)
240     return &X86::GR32RegClass;
241   return &X86::GR32_TCRegClass;
242 }
243 
244 const TargetRegisterClass *
245 X86RegisterInfo::getCrossCopyRegClass(const TargetRegisterClass *RC) const {
246   if (RC == &X86::CCRRegClass) {
247     if (Is64Bit)
248       return &X86::GR64RegClass;
249     else
250       return &X86::GR32RegClass;
251   }
252   return RC;
253 }
254 
255 unsigned
256 X86RegisterInfo::getRegPressureLimit(const TargetRegisterClass *RC,
257                                      MachineFunction &MF) const {
258   const X86FrameLowering *TFI = getFrameLowering(MF);
259 
260   unsigned FPDiff = TFI->hasFP(MF) ? 1 : 0;
261   switch (RC->getID()) {
262   default:
263     return 0;
264   case X86::GR32RegClassID:
265     return 4 - FPDiff;
266   case X86::GR64RegClassID:
267     return 12 - FPDiff;
268   case X86::VR128RegClassID:
269     return Is64Bit ? 10 : 4;
270   case X86::VR64RegClassID:
271     return 4;
272   }
273 }
274 
275 const MCPhysReg *
276 X86RegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
277   assert(MF && "MachineFunction required");
278 
279   const X86Subtarget &Subtarget = MF->getSubtarget<X86Subtarget>();
280   const Function &F = MF->getFunction();
281   bool HasSSE = Subtarget.hasSSE1();
282   bool HasAVX = Subtarget.hasAVX();
283   bool HasAVX512 = Subtarget.hasAVX512();
284   bool CallsEHReturn = MF->callsEHReturn();
285 
286   CallingConv::ID CC = F.getCallingConv();
287 
288   // If attribute NoCallerSavedRegisters exists then we set X86_INTR calling
289   // convention because it has the CSR list.
290   if (MF->getFunction().hasFnAttribute("no_caller_saved_registers"))
291     CC = CallingConv::X86_INTR;
292 
293   // If atribute specified, override the CSRs normally specified by the
294   // calling convention and use the empty set instead.
295   if (MF->getFunction().hasFnAttribute("no_callee_saved_registers"))
296     return CSR_NoRegs_SaveList;
297 
298   switch (CC) {
299   case CallingConv::GHC:
300   case CallingConv::HiPE:
301     return CSR_NoRegs_SaveList;
302   case CallingConv::AnyReg:
303     if (HasAVX)
304       return CSR_64_AllRegs_AVX_SaveList;
305     return CSR_64_AllRegs_SaveList;
306   case CallingConv::PreserveMost:
307     return CSR_64_RT_MostRegs_SaveList;
308   case CallingConv::PreserveAll:
309     if (HasAVX)
310       return CSR_64_RT_AllRegs_AVX_SaveList;
311     return CSR_64_RT_AllRegs_SaveList;
312   case CallingConv::CXX_FAST_TLS:
313     if (Is64Bit)
314       return MF->getInfo<X86MachineFunctionInfo>()->isSplitCSR() ?
315              CSR_64_CXX_TLS_Darwin_PE_SaveList : CSR_64_TLS_Darwin_SaveList;
316     break;
317   case CallingConv::Intel_OCL_BI: {
318     if (HasAVX512 && IsWin64)
319       return CSR_Win64_Intel_OCL_BI_AVX512_SaveList;
320     if (HasAVX512 && Is64Bit)
321       return CSR_64_Intel_OCL_BI_AVX512_SaveList;
322     if (HasAVX && IsWin64)
323       return CSR_Win64_Intel_OCL_BI_AVX_SaveList;
324     if (HasAVX && Is64Bit)
325       return CSR_64_Intel_OCL_BI_AVX_SaveList;
326     if (!HasAVX && !IsWin64 && Is64Bit)
327       return CSR_64_Intel_OCL_BI_SaveList;
328     break;
329   }
330   case CallingConv::HHVM:
331     return CSR_64_HHVM_SaveList;
332   case CallingConv::X86_RegCall:
333     if (Is64Bit) {
334       if (IsWin64) {
335         return (HasSSE ? CSR_Win64_RegCall_SaveList :
336                          CSR_Win64_RegCall_NoSSE_SaveList);
337       } else {
338         return (HasSSE ? CSR_SysV64_RegCall_SaveList :
339                          CSR_SysV64_RegCall_NoSSE_SaveList);
340       }
341     } else {
342       return (HasSSE ? CSR_32_RegCall_SaveList :
343                        CSR_32_RegCall_NoSSE_SaveList);
344     }
345   case CallingConv::CFGuard_Check:
346     assert(!Is64Bit && "CFGuard check mechanism only used on 32-bit X86");
347     return (HasSSE ? CSR_Win32_CFGuard_Check_SaveList
348                    : CSR_Win32_CFGuard_Check_NoSSE_SaveList);
349   case CallingConv::Cold:
350     if (Is64Bit)
351       return CSR_64_MostRegs_SaveList;
352     break;
353   case CallingConv::Win64:
354     if (!HasSSE)
355       return CSR_Win64_NoSSE_SaveList;
356     return CSR_Win64_SaveList;
357   case CallingConv::SwiftTail:
358     if (!Is64Bit)
359       return CSR_32_SaveList;
360     return IsWin64 ? CSR_Win64_SwiftTail_SaveList : CSR_64_SwiftTail_SaveList;
361   case CallingConv::X86_64_SysV:
362     if (CallsEHReturn)
363       return CSR_64EHRet_SaveList;
364     return CSR_64_SaveList;
365   case CallingConv::X86_INTR:
366     if (Is64Bit) {
367       if (HasAVX512)
368         return CSR_64_AllRegs_AVX512_SaveList;
369       if (HasAVX)
370         return CSR_64_AllRegs_AVX_SaveList;
371       if (HasSSE)
372         return CSR_64_AllRegs_SaveList;
373       return CSR_64_AllRegs_NoSSE_SaveList;
374     } else {
375       if (HasAVX512)
376         return CSR_32_AllRegs_AVX512_SaveList;
377       if (HasAVX)
378         return CSR_32_AllRegs_AVX_SaveList;
379       if (HasSSE)
380         return CSR_32_AllRegs_SSE_SaveList;
381       return CSR_32_AllRegs_SaveList;
382     }
383   default:
384     break;
385   }
386 
387   if (Is64Bit) {
388     bool IsSwiftCC = Subtarget.getTargetLowering()->supportSwiftError() &&
389                      F.getAttributes().hasAttrSomewhere(Attribute::SwiftError);
390     if (IsSwiftCC)
391       return IsWin64 ? CSR_Win64_SwiftError_SaveList
392                      : CSR_64_SwiftError_SaveList;
393 
394     if (IsWin64)
395       return HasSSE ? CSR_Win64_SaveList : CSR_Win64_NoSSE_SaveList;
396     if (CallsEHReturn)
397       return CSR_64EHRet_SaveList;
398     return CSR_64_SaveList;
399   }
400 
401   return CallsEHReturn ? CSR_32EHRet_SaveList : CSR_32_SaveList;
402 }
403 
404 const MCPhysReg *X86RegisterInfo::getCalleeSavedRegsViaCopy(
405     const MachineFunction *MF) const {
406   assert(MF && "Invalid MachineFunction pointer.");
407   if (MF->getFunction().getCallingConv() == CallingConv::CXX_FAST_TLS &&
408       MF->getInfo<X86MachineFunctionInfo>()->isSplitCSR())
409     return CSR_64_CXX_TLS_Darwin_ViaCopy_SaveList;
410   return nullptr;
411 }
412 
413 const uint32_t *
414 X86RegisterInfo::getCallPreservedMask(const MachineFunction &MF,
415                                       CallingConv::ID CC) const {
416   const X86Subtarget &Subtarget = MF.getSubtarget<X86Subtarget>();
417   bool HasSSE = Subtarget.hasSSE1();
418   bool HasAVX = Subtarget.hasAVX();
419   bool HasAVX512 = Subtarget.hasAVX512();
420 
421   switch (CC) {
422   case CallingConv::GHC:
423   case CallingConv::HiPE:
424     return CSR_NoRegs_RegMask;
425   case CallingConv::AnyReg:
426     if (HasAVX)
427       return CSR_64_AllRegs_AVX_RegMask;
428     return CSR_64_AllRegs_RegMask;
429   case CallingConv::PreserveMost:
430     return CSR_64_RT_MostRegs_RegMask;
431   case CallingConv::PreserveAll:
432     if (HasAVX)
433       return CSR_64_RT_AllRegs_AVX_RegMask;
434     return CSR_64_RT_AllRegs_RegMask;
435   case CallingConv::CXX_FAST_TLS:
436     if (Is64Bit)
437       return CSR_64_TLS_Darwin_RegMask;
438     break;
439   case CallingConv::Intel_OCL_BI: {
440     if (HasAVX512 && IsWin64)
441       return CSR_Win64_Intel_OCL_BI_AVX512_RegMask;
442     if (HasAVX512 && Is64Bit)
443       return CSR_64_Intel_OCL_BI_AVX512_RegMask;
444     if (HasAVX && IsWin64)
445       return CSR_Win64_Intel_OCL_BI_AVX_RegMask;
446     if (HasAVX && Is64Bit)
447       return CSR_64_Intel_OCL_BI_AVX_RegMask;
448     if (!HasAVX && !IsWin64 && Is64Bit)
449       return CSR_64_Intel_OCL_BI_RegMask;
450     break;
451   }
452   case CallingConv::HHVM:
453     return CSR_64_HHVM_RegMask;
454   case CallingConv::X86_RegCall:
455     if (Is64Bit) {
456       if (IsWin64) {
457         return (HasSSE ? CSR_Win64_RegCall_RegMask :
458                          CSR_Win64_RegCall_NoSSE_RegMask);
459       } else {
460         return (HasSSE ? CSR_SysV64_RegCall_RegMask :
461                          CSR_SysV64_RegCall_NoSSE_RegMask);
462       }
463     } else {
464       return (HasSSE ? CSR_32_RegCall_RegMask :
465                        CSR_32_RegCall_NoSSE_RegMask);
466     }
467   case CallingConv::CFGuard_Check:
468     assert(!Is64Bit && "CFGuard check mechanism only used on 32-bit X86");
469     return (HasSSE ? CSR_Win32_CFGuard_Check_RegMask
470                    : CSR_Win32_CFGuard_Check_NoSSE_RegMask);
471   case CallingConv::Cold:
472     if (Is64Bit)
473       return CSR_64_MostRegs_RegMask;
474     break;
475   case CallingConv::Win64:
476     return CSR_Win64_RegMask;
477   case CallingConv::SwiftTail:
478     if (!Is64Bit)
479       return CSR_32_RegMask;
480     return IsWin64 ? CSR_Win64_SwiftTail_RegMask : CSR_64_SwiftTail_RegMask;
481   case CallingConv::X86_64_SysV:
482     return CSR_64_RegMask;
483   case CallingConv::X86_INTR:
484     if (Is64Bit) {
485       if (HasAVX512)
486         return CSR_64_AllRegs_AVX512_RegMask;
487       if (HasAVX)
488         return CSR_64_AllRegs_AVX_RegMask;
489       if (HasSSE)
490         return CSR_64_AllRegs_RegMask;
491       return CSR_64_AllRegs_NoSSE_RegMask;
492     } else {
493       if (HasAVX512)
494         return CSR_32_AllRegs_AVX512_RegMask;
495       if (HasAVX)
496         return CSR_32_AllRegs_AVX_RegMask;
497       if (HasSSE)
498         return CSR_32_AllRegs_SSE_RegMask;
499       return CSR_32_AllRegs_RegMask;
500     }
501   default:
502     break;
503   }
504 
505   // Unlike getCalleeSavedRegs(), we don't have MMI so we can't check
506   // callsEHReturn().
507   if (Is64Bit) {
508     const Function &F = MF.getFunction();
509     bool IsSwiftCC = Subtarget.getTargetLowering()->supportSwiftError() &&
510                      F.getAttributes().hasAttrSomewhere(Attribute::SwiftError);
511     if (IsSwiftCC)
512       return IsWin64 ? CSR_Win64_SwiftError_RegMask : CSR_64_SwiftError_RegMask;
513 
514     return IsWin64 ? CSR_Win64_RegMask : CSR_64_RegMask;
515   }
516 
517   return CSR_32_RegMask;
518 }
519 
520 const uint32_t*
521 X86RegisterInfo::getNoPreservedMask() const {
522   return CSR_NoRegs_RegMask;
523 }
524 
525 const uint32_t *X86RegisterInfo::getDarwinTLSCallPreservedMask() const {
526   return CSR_64_TLS_Darwin_RegMask;
527 }
528 
529 BitVector X86RegisterInfo::getReservedRegs(const MachineFunction &MF) const {
530   BitVector Reserved(getNumRegs());
531   const X86FrameLowering *TFI = getFrameLowering(MF);
532 
533   // Set the floating point control register as reserved.
534   Reserved.set(X86::FPCW);
535 
536   // Set the floating point status register as reserved.
537   Reserved.set(X86::FPSW);
538 
539   // Set the SIMD floating point control register as reserved.
540   Reserved.set(X86::MXCSR);
541 
542   // Set the stack-pointer register and its aliases as reserved.
543   for (const MCPhysReg &SubReg : subregs_inclusive(X86::RSP))
544     Reserved.set(SubReg);
545 
546   // Set the Shadow Stack Pointer as reserved.
547   Reserved.set(X86::SSP);
548 
549   // Set the instruction pointer register and its aliases as reserved.
550   for (const MCPhysReg &SubReg : subregs_inclusive(X86::RIP))
551     Reserved.set(SubReg);
552 
553   // Set the frame-pointer register and its aliases as reserved if needed.
554   if (TFI->hasFP(MF)) {
555     for (const MCPhysReg &SubReg : subregs_inclusive(X86::RBP))
556       Reserved.set(SubReg);
557   }
558 
559   // Set the base-pointer register and its aliases as reserved if needed.
560   if (hasBasePointer(MF)) {
561     CallingConv::ID CC = MF.getFunction().getCallingConv();
562     const uint32_t *RegMask = getCallPreservedMask(MF, CC);
563     if (MachineOperand::clobbersPhysReg(RegMask, getBaseRegister()))
564       report_fatal_error(
565         "Stack realignment in presence of dynamic allocas is not supported with"
566         "this calling convention.");
567 
568     Register BasePtr = getX86SubSuperRegister(getBaseRegister(), 64);
569     for (const MCPhysReg &SubReg : subregs_inclusive(BasePtr))
570       Reserved.set(SubReg);
571   }
572 
573   // Mark the segment registers as reserved.
574   Reserved.set(X86::CS);
575   Reserved.set(X86::SS);
576   Reserved.set(X86::DS);
577   Reserved.set(X86::ES);
578   Reserved.set(X86::FS);
579   Reserved.set(X86::GS);
580 
581   // Mark the floating point stack registers as reserved.
582   for (unsigned n = 0; n != 8; ++n)
583     Reserved.set(X86::ST0 + n);
584 
585   // Reserve the registers that only exist in 64-bit mode.
586   if (!Is64Bit) {
587     // These 8-bit registers are part of the x86-64 extension even though their
588     // super-registers are old 32-bits.
589     Reserved.set(X86::SIL);
590     Reserved.set(X86::DIL);
591     Reserved.set(X86::BPL);
592     Reserved.set(X86::SPL);
593     Reserved.set(X86::SIH);
594     Reserved.set(X86::DIH);
595     Reserved.set(X86::BPH);
596     Reserved.set(X86::SPH);
597 
598     for (unsigned n = 0; n != 8; ++n) {
599       // R8, R9, ...
600       for (MCRegAliasIterator AI(X86::R8 + n, this, true); AI.isValid(); ++AI)
601         Reserved.set(*AI);
602 
603       // XMM8, XMM9, ...
604       for (MCRegAliasIterator AI(X86::XMM8 + n, this, true); AI.isValid(); ++AI)
605         Reserved.set(*AI);
606     }
607   }
608   if (!Is64Bit || !MF.getSubtarget<X86Subtarget>().hasAVX512()) {
609     for (unsigned n = 16; n != 32; ++n) {
610       for (MCRegAliasIterator AI(X86::XMM0 + n, this, true); AI.isValid(); ++AI)
611         Reserved.set(*AI);
612     }
613   }
614 
615   assert(checkAllSuperRegsMarked(Reserved,
616                                  {X86::SIL, X86::DIL, X86::BPL, X86::SPL,
617                                   X86::SIH, X86::DIH, X86::BPH, X86::SPH}));
618   return Reserved;
619 }
620 
621 void X86RegisterInfo::adjustStackMapLiveOutMask(uint32_t *Mask) const {
622   // Check if the EFLAGS register is marked as live-out. This shouldn't happen,
623   // because the calling convention defines the EFLAGS register as NOT
624   // preserved.
625   //
626   // Unfortunatelly the EFLAGS show up as live-out after branch folding. Adding
627   // an assert to track this and clear the register afterwards to avoid
628   // unnecessary crashes during release builds.
629   assert(!(Mask[X86::EFLAGS / 32] & (1U << (X86::EFLAGS % 32))) &&
630          "EFLAGS are not live-out from a patchpoint.");
631 
632   // Also clean other registers that don't need preserving (IP).
633   for (auto Reg : {X86::EFLAGS, X86::RIP, X86::EIP, X86::IP})
634     Mask[Reg / 32] &= ~(1U << (Reg % 32));
635 }
636 
637 //===----------------------------------------------------------------------===//
638 // Stack Frame Processing methods
639 //===----------------------------------------------------------------------===//
640 
641 static bool CantUseSP(const MachineFrameInfo &MFI) {
642   return MFI.hasVarSizedObjects() || MFI.hasOpaqueSPAdjustment();
643 }
644 
645 bool X86RegisterInfo::hasBasePointer(const MachineFunction &MF) const {
646   const X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>();
647   if (X86FI->hasPreallocatedCall())
648     return true;
649 
650   const MachineFrameInfo &MFI = MF.getFrameInfo();
651 
652   if (!EnableBasePointer)
653     return false;
654 
655   // When we need stack realignment, we can't address the stack from the frame
656   // pointer.  When we have dynamic allocas or stack-adjusting inline asm, we
657   // can't address variables from the stack pointer.  MS inline asm can
658   // reference locals while also adjusting the stack pointer.  When we can't
659   // use both the SP and the FP, we need a separate base pointer register.
660   bool CantUseFP = hasStackRealignment(MF);
661   return CantUseFP && CantUseSP(MFI);
662 }
663 
664 bool X86RegisterInfo::canRealignStack(const MachineFunction &MF) const {
665   if (!TargetRegisterInfo::canRealignStack(MF))
666     return false;
667 
668   const MachineFrameInfo &MFI = MF.getFrameInfo();
669   const MachineRegisterInfo *MRI = &MF.getRegInfo();
670 
671   // Stack realignment requires a frame pointer.  If we already started
672   // register allocation with frame pointer elimination, it is too late now.
673   if (!MRI->canReserveReg(FramePtr))
674     return false;
675 
676   // If a base pointer is necessary.  Check that it isn't too late to reserve
677   // it.
678   if (CantUseSP(MFI))
679     return MRI->canReserveReg(BasePtr);
680   return true;
681 }
682 
683 // tryOptimizeLEAtoMOV - helper function that tries to replace a LEA instruction
684 // of the form 'lea (%esp), %ebx' --> 'mov %esp, %ebx'.
685 // TODO: In this case we should be really trying first to entirely eliminate
686 // this instruction which is a plain copy.
687 static bool tryOptimizeLEAtoMOV(MachineBasicBlock::iterator II) {
688   MachineInstr &MI = *II;
689   unsigned Opc = II->getOpcode();
690   // Check if this is a LEA of the form 'lea (%esp), %ebx'
691   if ((Opc != X86::LEA32r && Opc != X86::LEA64r && Opc != X86::LEA64_32r) ||
692       MI.getOperand(2).getImm() != 1 ||
693       MI.getOperand(3).getReg() != X86::NoRegister ||
694       MI.getOperand(4).getImm() != 0 ||
695       MI.getOperand(5).getReg() != X86::NoRegister)
696     return false;
697   Register BasePtr = MI.getOperand(1).getReg();
698   // In X32 mode, ensure the base-pointer is a 32-bit operand, so the LEA will
699   // be replaced with a 32-bit operand MOV which will zero extend the upper
700   // 32-bits of the super register.
701   if (Opc == X86::LEA64_32r)
702     BasePtr = getX86SubSuperRegister(BasePtr, 32);
703   Register NewDestReg = MI.getOperand(0).getReg();
704   const X86InstrInfo *TII =
705       MI.getParent()->getParent()->getSubtarget<X86Subtarget>().getInstrInfo();
706   TII->copyPhysReg(*MI.getParent(), II, MI.getDebugLoc(), NewDestReg, BasePtr,
707                    MI.getOperand(1).isKill());
708   MI.eraseFromParent();
709   return true;
710 }
711 
712 static bool isFuncletReturnInstr(MachineInstr &MI) {
713   switch (MI.getOpcode()) {
714   case X86::CATCHRET:
715   case X86::CLEANUPRET:
716     return true;
717   default:
718     return false;
719   }
720   llvm_unreachable("impossible");
721 }
722 
723 void
724 X86RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
725                                      int SPAdj, unsigned FIOperandNum,
726                                      RegScavenger *RS) const {
727   MachineInstr &MI = *II;
728   MachineBasicBlock &MBB = *MI.getParent();
729   MachineFunction &MF = *MBB.getParent();
730   MachineBasicBlock::iterator MBBI = MBB.getFirstTerminator();
731   bool IsEHFuncletEpilogue = MBBI == MBB.end() ? false
732                                                : isFuncletReturnInstr(*MBBI);
733   const X86FrameLowering *TFI = getFrameLowering(MF);
734   int FrameIndex = MI.getOperand(FIOperandNum).getIndex();
735 
736   // Determine base register and offset.
737   int FIOffset;
738   Register BasePtr;
739   if (MI.isReturn()) {
740     assert((!hasStackRealignment(MF) ||
741             MF.getFrameInfo().isFixedObjectIndex(FrameIndex)) &&
742            "Return instruction can only reference SP relative frame objects");
743     FIOffset =
744         TFI->getFrameIndexReferenceSP(MF, FrameIndex, BasePtr, 0).getFixed();
745   } else if (TFI->Is64Bit && (MBB.isEHFuncletEntry() || IsEHFuncletEpilogue)) {
746     FIOffset = TFI->getWin64EHFrameIndexRef(MF, FrameIndex, BasePtr);
747   } else {
748     FIOffset = TFI->getFrameIndexReference(MF, FrameIndex, BasePtr).getFixed();
749   }
750 
751   // LOCAL_ESCAPE uses a single offset, with no register. It only works in the
752   // simple FP case, and doesn't work with stack realignment. On 32-bit, the
753   // offset is from the traditional base pointer location.  On 64-bit, the
754   // offset is from the SP at the end of the prologue, not the FP location. This
755   // matches the behavior of llvm.frameaddress.
756   unsigned Opc = MI.getOpcode();
757   if (Opc == TargetOpcode::LOCAL_ESCAPE) {
758     MachineOperand &FI = MI.getOperand(FIOperandNum);
759     FI.ChangeToImmediate(FIOffset);
760     return;
761   }
762 
763   // For LEA64_32r when BasePtr is 32-bits (X32) we can use full-size 64-bit
764   // register as source operand, semantic is the same and destination is
765   // 32-bits. It saves one byte per lea in code since 0x67 prefix is avoided.
766   // Don't change BasePtr since it is used later for stack adjustment.
767   Register MachineBasePtr = BasePtr;
768   if (Opc == X86::LEA64_32r && X86::GR32RegClass.contains(BasePtr))
769     MachineBasePtr = getX86SubSuperRegister(BasePtr, 64);
770 
771   // This must be part of a four operand memory reference.  Replace the
772   // FrameIndex with base register.  Add an offset to the offset.
773   MI.getOperand(FIOperandNum).ChangeToRegister(MachineBasePtr, false);
774 
775   if (BasePtr == StackPtr)
776     FIOffset += SPAdj;
777 
778   // The frame index format for stackmaps and patchpoints is different from the
779   // X86 format. It only has a FI and an offset.
780   if (Opc == TargetOpcode::STACKMAP || Opc == TargetOpcode::PATCHPOINT) {
781     assert(BasePtr == FramePtr && "Expected the FP as base register");
782     int64_t Offset = MI.getOperand(FIOperandNum + 1).getImm() + FIOffset;
783     MI.getOperand(FIOperandNum + 1).ChangeToImmediate(Offset);
784     return;
785   }
786 
787   if (MI.getOperand(FIOperandNum+3).isImm()) {
788     // Offset is a 32-bit integer.
789     int Imm = (int)(MI.getOperand(FIOperandNum + 3).getImm());
790     int Offset = FIOffset + Imm;
791     assert((!Is64Bit || isInt<32>((long long)FIOffset + Imm)) &&
792            "Requesting 64-bit offset in 32-bit immediate!");
793     if (Offset != 0 || !tryOptimizeLEAtoMOV(II))
794       MI.getOperand(FIOperandNum + 3).ChangeToImmediate(Offset);
795   } else {
796     // Offset is symbolic. This is extremely rare.
797     uint64_t Offset = FIOffset +
798       (uint64_t)MI.getOperand(FIOperandNum+3).getOffset();
799     MI.getOperand(FIOperandNum + 3).setOffset(Offset);
800   }
801 }
802 
803 unsigned X86RegisterInfo::findDeadCallerSavedReg(
804     MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI) const {
805   const MachineFunction *MF = MBB.getParent();
806   if (MF->callsEHReturn())
807     return 0;
808 
809   const TargetRegisterClass &AvailableRegs = *getGPRsForTailCall(*MF);
810 
811   if (MBBI == MBB.end())
812     return 0;
813 
814   switch (MBBI->getOpcode()) {
815   default:
816     return 0;
817   case TargetOpcode::PATCHABLE_RET:
818   case X86::RET:
819   case X86::RETL:
820   case X86::RETQ:
821   case X86::RETIL:
822   case X86::RETIQ:
823   case X86::TCRETURNdi:
824   case X86::TCRETURNri:
825   case X86::TCRETURNmi:
826   case X86::TCRETURNdi64:
827   case X86::TCRETURNri64:
828   case X86::TCRETURNmi64:
829   case X86::EH_RETURN:
830   case X86::EH_RETURN64: {
831     SmallSet<uint16_t, 8> Uses;
832     for (unsigned I = 0, E = MBBI->getNumOperands(); I != E; ++I) {
833       MachineOperand &MO = MBBI->getOperand(I);
834       if (!MO.isReg() || MO.isDef())
835         continue;
836       Register Reg = MO.getReg();
837       if (!Reg)
838         continue;
839       for (MCRegAliasIterator AI(Reg, this, true); AI.isValid(); ++AI)
840         Uses.insert(*AI);
841     }
842 
843     for (auto CS : AvailableRegs)
844       if (!Uses.count(CS) && CS != X86::RIP && CS != X86::RSP && CS != X86::ESP)
845         return CS;
846   }
847   }
848 
849   return 0;
850 }
851 
852 Register X86RegisterInfo::getFrameRegister(const MachineFunction &MF) const {
853   const X86FrameLowering *TFI = getFrameLowering(MF);
854   return TFI->hasFP(MF) ? FramePtr : StackPtr;
855 }
856 
857 unsigned
858 X86RegisterInfo::getPtrSizedFrameRegister(const MachineFunction &MF) const {
859   const X86Subtarget &Subtarget = MF.getSubtarget<X86Subtarget>();
860   Register FrameReg = getFrameRegister(MF);
861   if (Subtarget.isTarget64BitILP32())
862     FrameReg = getX86SubSuperRegister(FrameReg, 32);
863   return FrameReg;
864 }
865 
866 unsigned
867 X86RegisterInfo::getPtrSizedStackRegister(const MachineFunction &MF) const {
868   const X86Subtarget &Subtarget = MF.getSubtarget<X86Subtarget>();
869   Register StackReg = getStackRegister();
870   if (Subtarget.isTarget64BitILP32())
871     StackReg = getX86SubSuperRegister(StackReg, 32);
872   return StackReg;
873 }
874 
875 static ShapeT getTileShape(Register VirtReg, VirtRegMap *VRM,
876                            const MachineRegisterInfo *MRI) {
877   if (VRM->hasShape(VirtReg))
878     return VRM->getShape(VirtReg);
879 
880   const MachineOperand &Def = *MRI->def_begin(VirtReg);
881   MachineInstr *MI = const_cast<MachineInstr *>(Def.getParent());
882   unsigned OpCode = MI->getOpcode();
883   switch (OpCode) {
884   default:
885     llvm_unreachable("Unexpected machine instruction on tile register!");
886     break;
887   case X86::COPY: {
888     Register SrcReg = MI->getOperand(1).getReg();
889     ShapeT Shape = getTileShape(SrcReg, VRM, MRI);
890     VRM->assignVirt2Shape(VirtReg, Shape);
891     return Shape;
892   }
893   // We only collect the tile shape that is defined.
894   case X86::PTILELOADDV:
895   case X86::PTILELOADDT1V:
896   case X86::PTDPBSSDV:
897   case X86::PTDPBSUDV:
898   case X86::PTDPBUSDV:
899   case X86::PTDPBUUDV:
900   case X86::PTILEZEROV:
901   case X86::PTDPBF16PSV:
902     MachineOperand &MO1 = MI->getOperand(1);
903     MachineOperand &MO2 = MI->getOperand(2);
904     ShapeT Shape(&MO1, &MO2, MRI);
905     VRM->assignVirt2Shape(VirtReg, Shape);
906     return Shape;
907   }
908 }
909 
910 bool X86RegisterInfo::getRegAllocationHints(Register VirtReg,
911                                             ArrayRef<MCPhysReg> Order,
912                                             SmallVectorImpl<MCPhysReg> &Hints,
913                                             const MachineFunction &MF,
914                                             const VirtRegMap *VRM,
915                                             const LiveRegMatrix *Matrix) const {
916   const MachineRegisterInfo *MRI = &MF.getRegInfo();
917   const TargetRegisterClass &RC = *MRI->getRegClass(VirtReg);
918   bool BaseImplRetVal = TargetRegisterInfo::getRegAllocationHints(
919       VirtReg, Order, Hints, MF, VRM, Matrix);
920 
921   if (RC.getID() != X86::TILERegClassID)
922     return BaseImplRetVal;
923 
924   ShapeT VirtShape = getTileShape(VirtReg, const_cast<VirtRegMap *>(VRM), MRI);
925   auto AddHint = [&](MCPhysReg PhysReg) {
926     Register VReg = Matrix->getOneVReg(PhysReg);
927     if (VReg == MCRegister::NoRegister) { // Not allocated yet
928       Hints.push_back(PhysReg);
929       return;
930     }
931     ShapeT PhysShape = getTileShape(VReg, const_cast<VirtRegMap *>(VRM), MRI);
932     if (PhysShape == VirtShape)
933       Hints.push_back(PhysReg);
934   };
935 
936   SmallSet<MCPhysReg, 4> CopyHints;
937   CopyHints.insert(Hints.begin(), Hints.end());
938   Hints.clear();
939   for (auto Hint : CopyHints) {
940     if (RC.contains(Hint) && !MRI->isReserved(Hint))
941       AddHint(Hint);
942   }
943   for (MCPhysReg PhysReg : Order) {
944     if (!CopyHints.count(PhysReg) && RC.contains(PhysReg) &&
945         !MRI->isReserved(PhysReg))
946       AddHint(PhysReg);
947   }
948 
949 #define DEBUG_TYPE "tile-hint"
950   LLVM_DEBUG({
951     dbgs() << "Hints for virtual register " << format_hex(VirtReg, 8) << "\n";
952     for (auto Hint : Hints) {
953       dbgs() << "tmm" << Hint << ",";
954     }
955     dbgs() << "\n";
956   });
957 #undef DEBUG_TYPE
958 
959   return true;
960 }
961