1 //===-- X86RegisterInfo.cpp - X86 Register Information --------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file contains the X86 implementation of the TargetRegisterInfo class.
10 // This file is responsible for the frame pointer elimination optimization
11 // on X86.
12 //
13 //===----------------------------------------------------------------------===//
14 
15 #include "X86RegisterInfo.h"
16 #include "X86FrameLowering.h"
17 #include "X86MachineFunctionInfo.h"
18 #include "X86Subtarget.h"
19 #include "llvm/ADT/BitVector.h"
20 #include "llvm/ADT/STLExtras.h"
21 #include "llvm/ADT/SmallSet.h"
22 #include "llvm/CodeGen/LiveRegMatrix.h"
23 #include "llvm/CodeGen/MachineFrameInfo.h"
24 #include "llvm/CodeGen/MachineFunction.h"
25 #include "llvm/CodeGen/MachineFunctionPass.h"
26 #include "llvm/CodeGen/MachineRegisterInfo.h"
27 #include "llvm/CodeGen/TargetFrameLowering.h"
28 #include "llvm/CodeGen/TargetInstrInfo.h"
29 #include "llvm/CodeGen/TileShapeInfo.h"
30 #include "llvm/CodeGen/VirtRegMap.h"
31 #include "llvm/IR/Constants.h"
32 #include "llvm/IR/Function.h"
33 #include "llvm/IR/Type.h"
34 #include "llvm/Support/CommandLine.h"
35 #include "llvm/Support/ErrorHandling.h"
36 #include "llvm/Target/TargetMachine.h"
37 #include "llvm/Target/TargetOptions.h"
38 
39 using namespace llvm;
40 
41 #define GET_REGINFO_TARGET_DESC
42 #include "X86GenRegisterInfo.inc"
43 
44 static cl::opt<bool>
45 EnableBasePointer("x86-use-base-pointer", cl::Hidden, cl::init(true),
46           cl::desc("Enable use of a base pointer for complex stack frames"));
47 
48 X86RegisterInfo::X86RegisterInfo(const Triple &TT)
49     : X86GenRegisterInfo((TT.isArch64Bit() ? X86::RIP : X86::EIP),
50                          X86_MC::getDwarfRegFlavour(TT, false),
51                          X86_MC::getDwarfRegFlavour(TT, true),
52                          (TT.isArch64Bit() ? X86::RIP : X86::EIP)) {
53   X86_MC::initLLVMToSEHAndCVRegMapping(this);
54 
55   // Cache some information.
56   Is64Bit = TT.isArch64Bit();
57   IsWin64 = Is64Bit && TT.isOSWindows();
58 
59   // Use a callee-saved register as the base pointer.  These registers must
60   // not conflict with any ABI requirements.  For example, in 32-bit mode PIC
61   // requires GOT in the EBX register before function calls via PLT GOT pointer.
62   if (Is64Bit) {
63     SlotSize = 8;
64     // This matches the simplified 32-bit pointer code in the data layout
65     // computation.
66     // FIXME: Should use the data layout?
67     bool Use64BitReg = !TT.isX32();
68     StackPtr = Use64BitReg ? X86::RSP : X86::ESP;
69     FramePtr = Use64BitReg ? X86::RBP : X86::EBP;
70     BasePtr = Use64BitReg ? X86::RBX : X86::EBX;
71   } else {
72     SlotSize = 4;
73     StackPtr = X86::ESP;
74     FramePtr = X86::EBP;
75     BasePtr = X86::ESI;
76   }
77 }
78 
79 int
80 X86RegisterInfo::getSEHRegNum(unsigned i) const {
81   return getEncodingValue(i);
82 }
83 
84 const TargetRegisterClass *
85 X86RegisterInfo::getSubClassWithSubReg(const TargetRegisterClass *RC,
86                                        unsigned Idx) const {
87   // The sub_8bit sub-register index is more constrained in 32-bit mode.
88   // It behaves just like the sub_8bit_hi index.
89   if (!Is64Bit && Idx == X86::sub_8bit)
90     Idx = X86::sub_8bit_hi;
91 
92   // Forward to TableGen's default version.
93   return X86GenRegisterInfo::getSubClassWithSubReg(RC, Idx);
94 }
95 
96 const TargetRegisterClass *
97 X86RegisterInfo::getMatchingSuperRegClass(const TargetRegisterClass *A,
98                                           const TargetRegisterClass *B,
99                                           unsigned SubIdx) const {
100   // The sub_8bit sub-register index is more constrained in 32-bit mode.
101   if (!Is64Bit && SubIdx == X86::sub_8bit) {
102     A = X86GenRegisterInfo::getSubClassWithSubReg(A, X86::sub_8bit_hi);
103     if (!A)
104       return nullptr;
105   }
106   return X86GenRegisterInfo::getMatchingSuperRegClass(A, B, SubIdx);
107 }
108 
109 const TargetRegisterClass *
110 X86RegisterInfo::getLargestLegalSuperClass(const TargetRegisterClass *RC,
111                                            const MachineFunction &MF) const {
112   // Don't allow super-classes of GR8_NOREX.  This class is only used after
113   // extracting sub_8bit_hi sub-registers.  The H sub-registers cannot be copied
114   // to the full GR8 register class in 64-bit mode, so we cannot allow the
115   // reigster class inflation.
116   //
117   // The GR8_NOREX class is always used in a way that won't be constrained to a
118   // sub-class, so sub-classes like GR8_ABCD_L are allowed to expand to the
119   // full GR8 class.
120   if (RC == &X86::GR8_NOREXRegClass)
121     return RC;
122 
123   const X86Subtarget &Subtarget = MF.getSubtarget<X86Subtarget>();
124 
125   const TargetRegisterClass *Super = RC;
126   TargetRegisterClass::sc_iterator I = RC->getSuperClasses();
127   do {
128     switch (Super->getID()) {
129     case X86::FR32RegClassID:
130     case X86::FR64RegClassID:
131       // If AVX-512 isn't supported we should only inflate to these classes.
132       if (!Subtarget.hasAVX512() &&
133           getRegSizeInBits(*Super) == getRegSizeInBits(*RC))
134         return Super;
135       break;
136     case X86::VR128RegClassID:
137     case X86::VR256RegClassID:
138       // If VLX isn't supported we should only inflate to these classes.
139       if (!Subtarget.hasVLX() &&
140           getRegSizeInBits(*Super) == getRegSizeInBits(*RC))
141         return Super;
142       break;
143     case X86::VR128XRegClassID:
144     case X86::VR256XRegClassID:
145       // If VLX isn't support we shouldn't inflate to these classes.
146       if (Subtarget.hasVLX() &&
147           getRegSizeInBits(*Super) == getRegSizeInBits(*RC))
148         return Super;
149       break;
150     case X86::FR32XRegClassID:
151     case X86::FR64XRegClassID:
152       // If AVX-512 isn't support we shouldn't inflate to these classes.
153       if (Subtarget.hasAVX512() &&
154           getRegSizeInBits(*Super) == getRegSizeInBits(*RC))
155         return Super;
156       break;
157     case X86::GR8RegClassID:
158     case X86::GR16RegClassID:
159     case X86::GR32RegClassID:
160     case X86::GR64RegClassID:
161     case X86::RFP32RegClassID:
162     case X86::RFP64RegClassID:
163     case X86::RFP80RegClassID:
164     case X86::VR512_0_15RegClassID:
165     case X86::VR512RegClassID:
166       // Don't return a super-class that would shrink the spill size.
167       // That can happen with the vector and float classes.
168       if (getRegSizeInBits(*Super) == getRegSizeInBits(*RC))
169         return Super;
170     }
171     Super = *I++;
172   } while (Super);
173   return RC;
174 }
175 
176 const TargetRegisterClass *
177 X86RegisterInfo::getPointerRegClass(const MachineFunction &MF,
178                                     unsigned Kind) const {
179   const X86Subtarget &Subtarget = MF.getSubtarget<X86Subtarget>();
180   switch (Kind) {
181   default: llvm_unreachable("Unexpected Kind in getPointerRegClass!");
182   case 0: // Normal GPRs.
183     if (Subtarget.isTarget64BitLP64())
184       return &X86::GR64RegClass;
185     // If the target is 64bit but we have been told to use 32bit addresses,
186     // we can still use 64-bit register as long as we know the high bits
187     // are zeros.
188     // Reflect that in the returned register class.
189     if (Is64Bit) {
190       // When the target also allows 64-bit frame pointer and we do have a
191       // frame, this is fine to use it for the address accesses as well.
192       const X86FrameLowering *TFI = getFrameLowering(MF);
193       return TFI->hasFP(MF) && TFI->Uses64BitFramePtr
194                  ? &X86::LOW32_ADDR_ACCESS_RBPRegClass
195                  : &X86::LOW32_ADDR_ACCESSRegClass;
196     }
197     return &X86::GR32RegClass;
198   case 1: // Normal GPRs except the stack pointer (for encoding reasons).
199     if (Subtarget.isTarget64BitLP64())
200       return &X86::GR64_NOSPRegClass;
201     // NOSP does not contain RIP, so no special case here.
202     return &X86::GR32_NOSPRegClass;
203   case 2: // NOREX GPRs.
204     if (Subtarget.isTarget64BitLP64())
205       return &X86::GR64_NOREXRegClass;
206     return &X86::GR32_NOREXRegClass;
207   case 3: // NOREX GPRs except the stack pointer (for encoding reasons).
208     if (Subtarget.isTarget64BitLP64())
209       return &X86::GR64_NOREX_NOSPRegClass;
210     // NOSP does not contain RIP, so no special case here.
211     return &X86::GR32_NOREX_NOSPRegClass;
212   case 4: // Available for tailcall (not callee-saved GPRs).
213     return getGPRsForTailCall(MF);
214   }
215 }
216 
217 bool X86RegisterInfo::shouldRewriteCopySrc(const TargetRegisterClass *DefRC,
218                                            unsigned DefSubReg,
219                                            const TargetRegisterClass *SrcRC,
220                                            unsigned SrcSubReg) const {
221   // Prevent rewriting a copy where the destination size is larger than the
222   // input size. See PR41619.
223   // FIXME: Should this be factored into the base implementation somehow.
224   if (DefRC->hasSuperClassEq(&X86::GR64RegClass) && DefSubReg == 0 &&
225       SrcRC->hasSuperClassEq(&X86::GR64RegClass) && SrcSubReg == X86::sub_32bit)
226     return false;
227 
228   return TargetRegisterInfo::shouldRewriteCopySrc(DefRC, DefSubReg,
229                                                   SrcRC, SrcSubReg);
230 }
231 
232 const TargetRegisterClass *
233 X86RegisterInfo::getGPRsForTailCall(const MachineFunction &MF) const {
234   const Function &F = MF.getFunction();
235   if (IsWin64 || (F.getCallingConv() == CallingConv::Win64))
236     return &X86::GR64_TCW64RegClass;
237   else if (Is64Bit)
238     return &X86::GR64_TCRegClass;
239 
240   bool hasHipeCC = (F.getCallingConv() == CallingConv::HiPE);
241   if (hasHipeCC)
242     return &X86::GR32RegClass;
243   return &X86::GR32_TCRegClass;
244 }
245 
246 const TargetRegisterClass *
247 X86RegisterInfo::getCrossCopyRegClass(const TargetRegisterClass *RC) const {
248   if (RC == &X86::CCRRegClass) {
249     if (Is64Bit)
250       return &X86::GR64RegClass;
251     else
252       return &X86::GR32RegClass;
253   }
254   return RC;
255 }
256 
257 unsigned
258 X86RegisterInfo::getRegPressureLimit(const TargetRegisterClass *RC,
259                                      MachineFunction &MF) const {
260   const X86FrameLowering *TFI = getFrameLowering(MF);
261 
262   unsigned FPDiff = TFI->hasFP(MF) ? 1 : 0;
263   switch (RC->getID()) {
264   default:
265     return 0;
266   case X86::GR32RegClassID:
267     return 4 - FPDiff;
268   case X86::GR64RegClassID:
269     return 12 - FPDiff;
270   case X86::VR128RegClassID:
271     return Is64Bit ? 10 : 4;
272   case X86::VR64RegClassID:
273     return 4;
274   }
275 }
276 
277 const MCPhysReg *
278 X86RegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
279   assert(MF && "MachineFunction required");
280 
281   const X86Subtarget &Subtarget = MF->getSubtarget<X86Subtarget>();
282   const Function &F = MF->getFunction();
283   bool HasSSE = Subtarget.hasSSE1();
284   bool HasAVX = Subtarget.hasAVX();
285   bool HasAVX512 = Subtarget.hasAVX512();
286   bool CallsEHReturn = MF->callsEHReturn();
287 
288   CallingConv::ID CC = F.getCallingConv();
289 
290   // If attribute NoCallerSavedRegisters exists then we set X86_INTR calling
291   // convention because it has the CSR list.
292   if (MF->getFunction().hasFnAttribute("no_caller_saved_registers"))
293     CC = CallingConv::X86_INTR;
294 
295   // If atribute specified, override the CSRs normally specified by the
296   // calling convention and use the empty set instead.
297   if (MF->getFunction().hasFnAttribute("no_callee_saved_registers"))
298     return CSR_NoRegs_SaveList;
299 
300   switch (CC) {
301   case CallingConv::GHC:
302   case CallingConv::HiPE:
303     return CSR_NoRegs_SaveList;
304   case CallingConv::AnyReg:
305     if (HasAVX)
306       return CSR_64_AllRegs_AVX_SaveList;
307     return CSR_64_AllRegs_SaveList;
308   case CallingConv::PreserveMost:
309     return CSR_64_RT_MostRegs_SaveList;
310   case CallingConv::PreserveAll:
311     if (HasAVX)
312       return CSR_64_RT_AllRegs_AVX_SaveList;
313     return CSR_64_RT_AllRegs_SaveList;
314   case CallingConv::CXX_FAST_TLS:
315     if (Is64Bit)
316       return MF->getInfo<X86MachineFunctionInfo>()->isSplitCSR() ?
317              CSR_64_CXX_TLS_Darwin_PE_SaveList : CSR_64_TLS_Darwin_SaveList;
318     break;
319   case CallingConv::Intel_OCL_BI: {
320     if (HasAVX512 && IsWin64)
321       return CSR_Win64_Intel_OCL_BI_AVX512_SaveList;
322     if (HasAVX512 && Is64Bit)
323       return CSR_64_Intel_OCL_BI_AVX512_SaveList;
324     if (HasAVX && IsWin64)
325       return CSR_Win64_Intel_OCL_BI_AVX_SaveList;
326     if (HasAVX && Is64Bit)
327       return CSR_64_Intel_OCL_BI_AVX_SaveList;
328     if (!HasAVX && !IsWin64 && Is64Bit)
329       return CSR_64_Intel_OCL_BI_SaveList;
330     break;
331   }
332   case CallingConv::X86_RegCall:
333     if (Is64Bit) {
334       if (IsWin64) {
335         return (HasSSE ? CSR_Win64_RegCall_SaveList :
336                          CSR_Win64_RegCall_NoSSE_SaveList);
337       } else {
338         return (HasSSE ? CSR_SysV64_RegCall_SaveList :
339                          CSR_SysV64_RegCall_NoSSE_SaveList);
340       }
341     } else {
342       return (HasSSE ? CSR_32_RegCall_SaveList :
343                        CSR_32_RegCall_NoSSE_SaveList);
344     }
345   case CallingConv::CFGuard_Check:
346     assert(!Is64Bit && "CFGuard check mechanism only used on 32-bit X86");
347     return (HasSSE ? CSR_Win32_CFGuard_Check_SaveList
348                    : CSR_Win32_CFGuard_Check_NoSSE_SaveList);
349   case CallingConv::Cold:
350     if (Is64Bit)
351       return CSR_64_MostRegs_SaveList;
352     break;
353   case CallingConv::Win64:
354     if (!HasSSE)
355       return CSR_Win64_NoSSE_SaveList;
356     return CSR_Win64_SaveList;
357   case CallingConv::SwiftTail:
358     if (!Is64Bit)
359       return CSR_32_SaveList;
360     return IsWin64 ? CSR_Win64_SwiftTail_SaveList : CSR_64_SwiftTail_SaveList;
361   case CallingConv::X86_64_SysV:
362     if (CallsEHReturn)
363       return CSR_64EHRet_SaveList;
364     return CSR_64_SaveList;
365   case CallingConv::X86_INTR:
366     if (Is64Bit) {
367       if (HasAVX512)
368         return CSR_64_AllRegs_AVX512_SaveList;
369       if (HasAVX)
370         return CSR_64_AllRegs_AVX_SaveList;
371       if (HasSSE)
372         return CSR_64_AllRegs_SaveList;
373       return CSR_64_AllRegs_NoSSE_SaveList;
374     } else {
375       if (HasAVX512)
376         return CSR_32_AllRegs_AVX512_SaveList;
377       if (HasAVX)
378         return CSR_32_AllRegs_AVX_SaveList;
379       if (HasSSE)
380         return CSR_32_AllRegs_SSE_SaveList;
381       return CSR_32_AllRegs_SaveList;
382     }
383   default:
384     break;
385   }
386 
387   if (Is64Bit) {
388     bool IsSwiftCC = Subtarget.getTargetLowering()->supportSwiftError() &&
389                      F.getAttributes().hasAttrSomewhere(Attribute::SwiftError);
390     if (IsSwiftCC)
391       return IsWin64 ? CSR_Win64_SwiftError_SaveList
392                      : CSR_64_SwiftError_SaveList;
393 
394     if (IsWin64)
395       return HasSSE ? CSR_Win64_SaveList : CSR_Win64_NoSSE_SaveList;
396     if (CallsEHReturn)
397       return CSR_64EHRet_SaveList;
398     return CSR_64_SaveList;
399   }
400 
401   return CallsEHReturn ? CSR_32EHRet_SaveList : CSR_32_SaveList;
402 }
403 
404 const MCPhysReg *X86RegisterInfo::getCalleeSavedRegsViaCopy(
405     const MachineFunction *MF) const {
406   assert(MF && "Invalid MachineFunction pointer.");
407   if (MF->getFunction().getCallingConv() == CallingConv::CXX_FAST_TLS &&
408       MF->getInfo<X86MachineFunctionInfo>()->isSplitCSR())
409     return CSR_64_CXX_TLS_Darwin_ViaCopy_SaveList;
410   return nullptr;
411 }
412 
413 const uint32_t *
414 X86RegisterInfo::getCallPreservedMask(const MachineFunction &MF,
415                                       CallingConv::ID CC) const {
416   const X86Subtarget &Subtarget = MF.getSubtarget<X86Subtarget>();
417   bool HasSSE = Subtarget.hasSSE1();
418   bool HasAVX = Subtarget.hasAVX();
419   bool HasAVX512 = Subtarget.hasAVX512();
420 
421   switch (CC) {
422   case CallingConv::GHC:
423   case CallingConv::HiPE:
424     return CSR_NoRegs_RegMask;
425   case CallingConv::AnyReg:
426     if (HasAVX)
427       return CSR_64_AllRegs_AVX_RegMask;
428     return CSR_64_AllRegs_RegMask;
429   case CallingConv::PreserveMost:
430     return CSR_64_RT_MostRegs_RegMask;
431   case CallingConv::PreserveAll:
432     if (HasAVX)
433       return CSR_64_RT_AllRegs_AVX_RegMask;
434     return CSR_64_RT_AllRegs_RegMask;
435   case CallingConv::CXX_FAST_TLS:
436     if (Is64Bit)
437       return CSR_64_TLS_Darwin_RegMask;
438     break;
439   case CallingConv::Intel_OCL_BI: {
440     if (HasAVX512 && IsWin64)
441       return CSR_Win64_Intel_OCL_BI_AVX512_RegMask;
442     if (HasAVX512 && Is64Bit)
443       return CSR_64_Intel_OCL_BI_AVX512_RegMask;
444     if (HasAVX && IsWin64)
445       return CSR_Win64_Intel_OCL_BI_AVX_RegMask;
446     if (HasAVX && Is64Bit)
447       return CSR_64_Intel_OCL_BI_AVX_RegMask;
448     if (!HasAVX && !IsWin64 && Is64Bit)
449       return CSR_64_Intel_OCL_BI_RegMask;
450     break;
451   }
452   case CallingConv::X86_RegCall:
453     if (Is64Bit) {
454       if (IsWin64) {
455         return (HasSSE ? CSR_Win64_RegCall_RegMask :
456                          CSR_Win64_RegCall_NoSSE_RegMask);
457       } else {
458         return (HasSSE ? CSR_SysV64_RegCall_RegMask :
459                          CSR_SysV64_RegCall_NoSSE_RegMask);
460       }
461     } else {
462       return (HasSSE ? CSR_32_RegCall_RegMask :
463                        CSR_32_RegCall_NoSSE_RegMask);
464     }
465   case CallingConv::CFGuard_Check:
466     assert(!Is64Bit && "CFGuard check mechanism only used on 32-bit X86");
467     return (HasSSE ? CSR_Win32_CFGuard_Check_RegMask
468                    : CSR_Win32_CFGuard_Check_NoSSE_RegMask);
469   case CallingConv::Cold:
470     if (Is64Bit)
471       return CSR_64_MostRegs_RegMask;
472     break;
473   case CallingConv::Win64:
474     return CSR_Win64_RegMask;
475   case CallingConv::SwiftTail:
476     if (!Is64Bit)
477       return CSR_32_RegMask;
478     return IsWin64 ? CSR_Win64_SwiftTail_RegMask : CSR_64_SwiftTail_RegMask;
479   case CallingConv::X86_64_SysV:
480     return CSR_64_RegMask;
481   case CallingConv::X86_INTR:
482     if (Is64Bit) {
483       if (HasAVX512)
484         return CSR_64_AllRegs_AVX512_RegMask;
485       if (HasAVX)
486         return CSR_64_AllRegs_AVX_RegMask;
487       if (HasSSE)
488         return CSR_64_AllRegs_RegMask;
489       return CSR_64_AllRegs_NoSSE_RegMask;
490     } else {
491       if (HasAVX512)
492         return CSR_32_AllRegs_AVX512_RegMask;
493       if (HasAVX)
494         return CSR_32_AllRegs_AVX_RegMask;
495       if (HasSSE)
496         return CSR_32_AllRegs_SSE_RegMask;
497       return CSR_32_AllRegs_RegMask;
498     }
499   default:
500     break;
501   }
502 
503   // Unlike getCalleeSavedRegs(), we don't have MMI so we can't check
504   // callsEHReturn().
505   if (Is64Bit) {
506     const Function &F = MF.getFunction();
507     bool IsSwiftCC = Subtarget.getTargetLowering()->supportSwiftError() &&
508                      F.getAttributes().hasAttrSomewhere(Attribute::SwiftError);
509     if (IsSwiftCC)
510       return IsWin64 ? CSR_Win64_SwiftError_RegMask : CSR_64_SwiftError_RegMask;
511 
512     return IsWin64 ? CSR_Win64_RegMask : CSR_64_RegMask;
513   }
514 
515   return CSR_32_RegMask;
516 }
517 
518 const uint32_t*
519 X86RegisterInfo::getNoPreservedMask() const {
520   return CSR_NoRegs_RegMask;
521 }
522 
523 const uint32_t *X86RegisterInfo::getDarwinTLSCallPreservedMask() const {
524   return CSR_64_TLS_Darwin_RegMask;
525 }
526 
527 BitVector X86RegisterInfo::getReservedRegs(const MachineFunction &MF) const {
528   BitVector Reserved(getNumRegs());
529   const X86FrameLowering *TFI = getFrameLowering(MF);
530 
531   // Set the floating point control register as reserved.
532   Reserved.set(X86::FPCW);
533 
534   // Set the floating point status register as reserved.
535   Reserved.set(X86::FPSW);
536 
537   // Set the SIMD floating point control register as reserved.
538   Reserved.set(X86::MXCSR);
539 
540   // Set the stack-pointer register and its aliases as reserved.
541   for (const MCPhysReg &SubReg : subregs_inclusive(X86::RSP))
542     Reserved.set(SubReg);
543 
544   // Set the Shadow Stack Pointer as reserved.
545   Reserved.set(X86::SSP);
546 
547   // Set the instruction pointer register and its aliases as reserved.
548   for (const MCPhysReg &SubReg : subregs_inclusive(X86::RIP))
549     Reserved.set(SubReg);
550 
551   // Set the frame-pointer register and its aliases as reserved if needed.
552   if (TFI->hasFP(MF)) {
553     for (const MCPhysReg &SubReg : subregs_inclusive(X86::RBP))
554       Reserved.set(SubReg);
555   }
556 
557   // Set the base-pointer register and its aliases as reserved if needed.
558   if (hasBasePointer(MF)) {
559     CallingConv::ID CC = MF.getFunction().getCallingConv();
560     const uint32_t *RegMask = getCallPreservedMask(MF, CC);
561     if (MachineOperand::clobbersPhysReg(RegMask, getBaseRegister()))
562       report_fatal_error(
563         "Stack realignment in presence of dynamic allocas is not supported with"
564         "this calling convention.");
565 
566     Register BasePtr = getX86SubSuperRegister(getBaseRegister(), 64);
567     for (const MCPhysReg &SubReg : subregs_inclusive(BasePtr))
568       Reserved.set(SubReg);
569   }
570 
571   // Mark the segment registers as reserved.
572   Reserved.set(X86::CS);
573   Reserved.set(X86::SS);
574   Reserved.set(X86::DS);
575   Reserved.set(X86::ES);
576   Reserved.set(X86::FS);
577   Reserved.set(X86::GS);
578 
579   // Mark the floating point stack registers as reserved.
580   for (unsigned n = 0; n != 8; ++n)
581     Reserved.set(X86::ST0 + n);
582 
583   // Reserve the registers that only exist in 64-bit mode.
584   if (!Is64Bit) {
585     // These 8-bit registers are part of the x86-64 extension even though their
586     // super-registers are old 32-bits.
587     Reserved.set(X86::SIL);
588     Reserved.set(X86::DIL);
589     Reserved.set(X86::BPL);
590     Reserved.set(X86::SPL);
591     Reserved.set(X86::SIH);
592     Reserved.set(X86::DIH);
593     Reserved.set(X86::BPH);
594     Reserved.set(X86::SPH);
595 
596     for (unsigned n = 0; n != 8; ++n) {
597       // R8, R9, ...
598       for (MCRegAliasIterator AI(X86::R8 + n, this, true); AI.isValid(); ++AI)
599         Reserved.set(*AI);
600 
601       // XMM8, XMM9, ...
602       for (MCRegAliasIterator AI(X86::XMM8 + n, this, true); AI.isValid(); ++AI)
603         Reserved.set(*AI);
604     }
605   }
606   if (!Is64Bit || !MF.getSubtarget<X86Subtarget>().hasAVX512()) {
607     for (unsigned n = 16; n != 32; ++n) {
608       for (MCRegAliasIterator AI(X86::XMM0 + n, this, true); AI.isValid(); ++AI)
609         Reserved.set(*AI);
610     }
611   }
612 
613   assert(checkAllSuperRegsMarked(Reserved,
614                                  {X86::SIL, X86::DIL, X86::BPL, X86::SPL,
615                                   X86::SIH, X86::DIH, X86::BPH, X86::SPH}));
616   return Reserved;
617 }
618 
619 bool X86RegisterInfo::isArgumentRegister(const MachineFunction &MF,
620                                          MCRegister Reg) const {
621   const X86Subtarget &ST = MF.getSubtarget<X86Subtarget>();
622   const TargetRegisterInfo &TRI = *ST.getRegisterInfo();
623   auto IsSubReg = [&](MCRegister RegA, MCRegister RegB) {
624     return TRI.isSuperOrSubRegisterEq(RegA, RegB);
625   };
626 
627   if (!ST.is64Bit())
628     return llvm::any_of(
629                SmallVector<MCRegister>{X86::EAX, X86::ECX, X86::EDX},
630                [&](MCRegister &RegA) { return IsSubReg(RegA, Reg); }) ||
631            (ST.hasMMX() && X86::VR64RegClass.contains(Reg));
632 
633   CallingConv::ID CC = MF.getFunction().getCallingConv();
634 
635   if (CC == CallingConv::X86_64_SysV && IsSubReg(X86::RAX, Reg))
636     return true;
637 
638   if (llvm::any_of(
639           SmallVector<MCRegister>{X86::RDX, X86::RCX, X86::R8, X86::R9},
640           [&](MCRegister &RegA) { return IsSubReg(RegA, Reg); }))
641     return true;
642 
643   if (CC != CallingConv::Win64 &&
644       llvm::any_of(SmallVector<MCRegister>{X86::RDI, X86::RSI},
645                    [&](MCRegister &RegA) { return IsSubReg(RegA, Reg); }))
646     return true;
647 
648   if (ST.hasSSE1() &&
649       llvm::any_of(SmallVector<MCRegister>{X86::XMM0, X86::XMM1, X86::XMM2,
650                                            X86::XMM3, X86::XMM4, X86::XMM5,
651                                            X86::XMM6, X86::XMM7},
652                    [&](MCRegister &RegA) { return IsSubReg(RegA, Reg); }))
653     return true;
654 
655   return X86GenRegisterInfo::isArgumentRegister(MF, Reg);
656 }
657 
658 bool X86RegisterInfo::isFixedRegister(const MachineFunction &MF,
659                                       MCRegister PhysReg) const {
660   const X86Subtarget &ST = MF.getSubtarget<X86Subtarget>();
661   const TargetRegisterInfo &TRI = *ST.getRegisterInfo();
662 
663   // Stack pointer.
664   if (TRI.isSuperOrSubRegisterEq(X86::RSP, PhysReg))
665     return true;
666 
667   // Don't use the frame pointer if it's being used.
668   const X86FrameLowering &TFI = *getFrameLowering(MF);
669   if (TFI.hasFP(MF) && TRI.isSuperOrSubRegisterEq(X86::RBP, PhysReg))
670     return true;
671 
672   return X86GenRegisterInfo::isFixedRegister(MF, PhysReg);
673 }
674 
675 bool X86RegisterInfo::isTileRegisterClass(const TargetRegisterClass *RC) const {
676   return RC->getID() == X86::TILERegClassID;
677 }
678 
679 void X86RegisterInfo::adjustStackMapLiveOutMask(uint32_t *Mask) const {
680   // Check if the EFLAGS register is marked as live-out. This shouldn't happen,
681   // because the calling convention defines the EFLAGS register as NOT
682   // preserved.
683   //
684   // Unfortunatelly the EFLAGS show up as live-out after branch folding. Adding
685   // an assert to track this and clear the register afterwards to avoid
686   // unnecessary crashes during release builds.
687   assert(!(Mask[X86::EFLAGS / 32] & (1U << (X86::EFLAGS % 32))) &&
688          "EFLAGS are not live-out from a patchpoint.");
689 
690   // Also clean other registers that don't need preserving (IP).
691   for (auto Reg : {X86::EFLAGS, X86::RIP, X86::EIP, X86::IP})
692     Mask[Reg / 32] &= ~(1U << (Reg % 32));
693 }
694 
695 //===----------------------------------------------------------------------===//
696 // Stack Frame Processing methods
697 //===----------------------------------------------------------------------===//
698 
699 static bool CantUseSP(const MachineFrameInfo &MFI) {
700   return MFI.hasVarSizedObjects() || MFI.hasOpaqueSPAdjustment();
701 }
702 
703 bool X86RegisterInfo::hasBasePointer(const MachineFunction &MF) const {
704   const X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>();
705   // We have a virtual register to reference argument, and don't need base
706   // pointer.
707   if (X86FI->getStackPtrSaveMI() != nullptr)
708     return false;
709 
710   if (X86FI->hasPreallocatedCall())
711     return true;
712 
713   const MachineFrameInfo &MFI = MF.getFrameInfo();
714 
715   if (!EnableBasePointer)
716     return false;
717 
718   // When we need stack realignment, we can't address the stack from the frame
719   // pointer.  When we have dynamic allocas or stack-adjusting inline asm, we
720   // can't address variables from the stack pointer.  MS inline asm can
721   // reference locals while also adjusting the stack pointer.  When we can't
722   // use both the SP and the FP, we need a separate base pointer register.
723   bool CantUseFP = hasStackRealignment(MF);
724   return CantUseFP && CantUseSP(MFI);
725 }
726 
727 bool X86RegisterInfo::canRealignStack(const MachineFunction &MF) const {
728   if (!TargetRegisterInfo::canRealignStack(MF))
729     return false;
730 
731   const MachineFrameInfo &MFI = MF.getFrameInfo();
732   const MachineRegisterInfo *MRI = &MF.getRegInfo();
733 
734   // Stack realignment requires a frame pointer.  If we already started
735   // register allocation with frame pointer elimination, it is too late now.
736   if (!MRI->canReserveReg(FramePtr))
737     return false;
738 
739   // If a base pointer is necessary.  Check that it isn't too late to reserve
740   // it.
741   if (CantUseSP(MFI))
742     return MRI->canReserveReg(BasePtr);
743   return true;
744 }
745 
746 bool X86RegisterInfo::shouldRealignStack(const MachineFunction &MF) const {
747   if (TargetRegisterInfo::shouldRealignStack(MF))
748     return true;
749 
750   return !Is64Bit && MF.getFunction().getCallingConv() == CallingConv::X86_INTR;
751 }
752 
753 // tryOptimizeLEAtoMOV - helper function that tries to replace a LEA instruction
754 // of the form 'lea (%esp), %ebx' --> 'mov %esp, %ebx'.
755 // TODO: In this case we should be really trying first to entirely eliminate
756 // this instruction which is a plain copy.
757 static bool tryOptimizeLEAtoMOV(MachineBasicBlock::iterator II) {
758   MachineInstr &MI = *II;
759   unsigned Opc = II->getOpcode();
760   // Check if this is a LEA of the form 'lea (%esp), %ebx'
761   if ((Opc != X86::LEA32r && Opc != X86::LEA64r && Opc != X86::LEA64_32r) ||
762       MI.getOperand(2).getImm() != 1 ||
763       MI.getOperand(3).getReg() != X86::NoRegister ||
764       MI.getOperand(4).getImm() != 0 ||
765       MI.getOperand(5).getReg() != X86::NoRegister)
766     return false;
767   Register BasePtr = MI.getOperand(1).getReg();
768   // In X32 mode, ensure the base-pointer is a 32-bit operand, so the LEA will
769   // be replaced with a 32-bit operand MOV which will zero extend the upper
770   // 32-bits of the super register.
771   if (Opc == X86::LEA64_32r)
772     BasePtr = getX86SubSuperRegister(BasePtr, 32);
773   Register NewDestReg = MI.getOperand(0).getReg();
774   const X86InstrInfo *TII =
775       MI.getParent()->getParent()->getSubtarget<X86Subtarget>().getInstrInfo();
776   TII->copyPhysReg(*MI.getParent(), II, MI.getDebugLoc(), NewDestReg, BasePtr,
777                    MI.getOperand(1).isKill());
778   MI.eraseFromParent();
779   return true;
780 }
781 
782 static bool isFuncletReturnInstr(MachineInstr &MI) {
783   switch (MI.getOpcode()) {
784   case X86::CATCHRET:
785   case X86::CLEANUPRET:
786     return true;
787   default:
788     return false;
789   }
790   llvm_unreachable("impossible");
791 }
792 
793 void X86RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
794                                           unsigned FIOperandNum,
795                                           Register BaseReg,
796                                           int FIOffset) const {
797   MachineInstr &MI = *II;
798   unsigned Opc = MI.getOpcode();
799   if (Opc == TargetOpcode::LOCAL_ESCAPE) {
800     MachineOperand &FI = MI.getOperand(FIOperandNum);
801     FI.ChangeToImmediate(FIOffset);
802     return;
803   }
804 
805   MI.getOperand(FIOperandNum).ChangeToRegister(BaseReg, false);
806 
807   // The frame index format for stackmaps and patchpoints is different from the
808   // X86 format. It only has a FI and an offset.
809   if (Opc == TargetOpcode::STACKMAP || Opc == TargetOpcode::PATCHPOINT) {
810     assert(BasePtr == FramePtr && "Expected the FP as base register");
811     int64_t Offset = MI.getOperand(FIOperandNum + 1).getImm() + FIOffset;
812     MI.getOperand(FIOperandNum + 1).ChangeToImmediate(Offset);
813     return;
814   }
815 
816   if (MI.getOperand(FIOperandNum + 3).isImm()) {
817     // Offset is a 32-bit integer.
818     int Imm = (int)(MI.getOperand(FIOperandNum + 3).getImm());
819     int Offset = FIOffset + Imm;
820     assert((!Is64Bit || isInt<32>((long long)FIOffset + Imm)) &&
821            "Requesting 64-bit offset in 32-bit immediate!");
822     if (Offset != 0)
823       MI.getOperand(FIOperandNum + 3).ChangeToImmediate(Offset);
824   } else {
825     // Offset is symbolic. This is extremely rare.
826     uint64_t Offset =
827         FIOffset + (uint64_t)MI.getOperand(FIOperandNum + 3).getOffset();
828     MI.getOperand(FIOperandNum + 3).setOffset(Offset);
829   }
830 }
831 
832 bool
833 X86RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
834                                      int SPAdj, unsigned FIOperandNum,
835                                      RegScavenger *RS) const {
836   MachineInstr &MI = *II;
837   MachineBasicBlock &MBB = *MI.getParent();
838   MachineFunction &MF = *MBB.getParent();
839   MachineBasicBlock::iterator MBBI = MBB.getFirstTerminator();
840   bool IsEHFuncletEpilogue = MBBI == MBB.end() ? false
841                                                : isFuncletReturnInstr(*MBBI);
842   const X86FrameLowering *TFI = getFrameLowering(MF);
843   int FrameIndex = MI.getOperand(FIOperandNum).getIndex();
844 
845   // Determine base register and offset.
846   int FIOffset;
847   Register BasePtr;
848   if (MI.isReturn()) {
849     assert((!hasStackRealignment(MF) ||
850             MF.getFrameInfo().isFixedObjectIndex(FrameIndex)) &&
851            "Return instruction can only reference SP relative frame objects");
852     FIOffset =
853         TFI->getFrameIndexReferenceSP(MF, FrameIndex, BasePtr, 0).getFixed();
854   } else if (TFI->Is64Bit && (MBB.isEHFuncletEntry() || IsEHFuncletEpilogue)) {
855     FIOffset = TFI->getWin64EHFrameIndexRef(MF, FrameIndex, BasePtr);
856   } else {
857     FIOffset = TFI->getFrameIndexReference(MF, FrameIndex, BasePtr).getFixed();
858   }
859 
860   // LOCAL_ESCAPE uses a single offset, with no register. It only works in the
861   // simple FP case, and doesn't work with stack realignment. On 32-bit, the
862   // offset is from the traditional base pointer location.  On 64-bit, the
863   // offset is from the SP at the end of the prologue, not the FP location. This
864   // matches the behavior of llvm.frameaddress.
865   unsigned Opc = MI.getOpcode();
866   if (Opc == TargetOpcode::LOCAL_ESCAPE) {
867     MachineOperand &FI = MI.getOperand(FIOperandNum);
868     FI.ChangeToImmediate(FIOffset);
869     return false;
870   }
871 
872   // For LEA64_32r when BasePtr is 32-bits (X32) we can use full-size 64-bit
873   // register as source operand, semantic is the same and destination is
874   // 32-bits. It saves one byte per lea in code since 0x67 prefix is avoided.
875   // Don't change BasePtr since it is used later for stack adjustment.
876   Register MachineBasePtr = BasePtr;
877   if (Opc == X86::LEA64_32r && X86::GR32RegClass.contains(BasePtr))
878     MachineBasePtr = getX86SubSuperRegister(BasePtr, 64);
879 
880   // This must be part of a four operand memory reference.  Replace the
881   // FrameIndex with base register.  Add an offset to the offset.
882   MI.getOperand(FIOperandNum).ChangeToRegister(MachineBasePtr, false);
883 
884   if (BasePtr == StackPtr)
885     FIOffset += SPAdj;
886 
887   // The frame index format for stackmaps and patchpoints is different from the
888   // X86 format. It only has a FI and an offset.
889   if (Opc == TargetOpcode::STACKMAP || Opc == TargetOpcode::PATCHPOINT) {
890     assert(BasePtr == FramePtr && "Expected the FP as base register");
891     int64_t Offset = MI.getOperand(FIOperandNum + 1).getImm() + FIOffset;
892     MI.getOperand(FIOperandNum + 1).ChangeToImmediate(Offset);
893     return false;
894   }
895 
896   if (MI.getOperand(FIOperandNum+3).isImm()) {
897     // Offset is a 32-bit integer.
898     int Imm = (int)(MI.getOperand(FIOperandNum + 3).getImm());
899     int Offset = FIOffset + Imm;
900     assert((!Is64Bit || isInt<32>((long long)FIOffset + Imm)) &&
901            "Requesting 64-bit offset in 32-bit immediate!");
902     if (Offset != 0 || !tryOptimizeLEAtoMOV(II))
903       MI.getOperand(FIOperandNum + 3).ChangeToImmediate(Offset);
904   } else {
905     // Offset is symbolic. This is extremely rare.
906     uint64_t Offset = FIOffset +
907       (uint64_t)MI.getOperand(FIOperandNum+3).getOffset();
908     MI.getOperand(FIOperandNum + 3).setOffset(Offset);
909   }
910   return false;
911 }
912 
913 unsigned X86RegisterInfo::findDeadCallerSavedReg(
914     MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI) const {
915   const MachineFunction *MF = MBB.getParent();
916   if (MF->callsEHReturn())
917     return 0;
918 
919   const TargetRegisterClass &AvailableRegs = *getGPRsForTailCall(*MF);
920 
921   if (MBBI == MBB.end())
922     return 0;
923 
924   switch (MBBI->getOpcode()) {
925   default:
926     return 0;
927   case TargetOpcode::PATCHABLE_RET:
928   case X86::RET:
929   case X86::RET32:
930   case X86::RET64:
931   case X86::RETI32:
932   case X86::RETI64:
933   case X86::TCRETURNdi:
934   case X86::TCRETURNri:
935   case X86::TCRETURNmi:
936   case X86::TCRETURNdi64:
937   case X86::TCRETURNri64:
938   case X86::TCRETURNmi64:
939   case X86::EH_RETURN:
940   case X86::EH_RETURN64: {
941     SmallSet<uint16_t, 8> Uses;
942     for (MachineOperand &MO : MBBI->operands()) {
943       if (!MO.isReg() || MO.isDef())
944         continue;
945       Register Reg = MO.getReg();
946       if (!Reg)
947         continue;
948       for (MCRegAliasIterator AI(Reg, this, true); AI.isValid(); ++AI)
949         Uses.insert(*AI);
950     }
951 
952     for (auto CS : AvailableRegs)
953       if (!Uses.count(CS) && CS != X86::RIP && CS != X86::RSP && CS != X86::ESP)
954         return CS;
955   }
956   }
957 
958   return 0;
959 }
960 
961 Register X86RegisterInfo::getFrameRegister(const MachineFunction &MF) const {
962   const X86FrameLowering *TFI = getFrameLowering(MF);
963   return TFI->hasFP(MF) ? FramePtr : StackPtr;
964 }
965 
966 unsigned
967 X86RegisterInfo::getPtrSizedFrameRegister(const MachineFunction &MF) const {
968   const X86Subtarget &Subtarget = MF.getSubtarget<X86Subtarget>();
969   Register FrameReg = getFrameRegister(MF);
970   if (Subtarget.isTarget64BitILP32())
971     FrameReg = getX86SubSuperRegister(FrameReg, 32);
972   return FrameReg;
973 }
974 
975 unsigned
976 X86RegisterInfo::getPtrSizedStackRegister(const MachineFunction &MF) const {
977   const X86Subtarget &Subtarget = MF.getSubtarget<X86Subtarget>();
978   Register StackReg = getStackRegister();
979   if (Subtarget.isTarget64BitILP32())
980     StackReg = getX86SubSuperRegister(StackReg, 32);
981   return StackReg;
982 }
983 
984 static ShapeT getTileShape(Register VirtReg, VirtRegMap *VRM,
985                            const MachineRegisterInfo *MRI) {
986   if (VRM->hasShape(VirtReg))
987     return VRM->getShape(VirtReg);
988 
989   const MachineOperand &Def = *MRI->def_begin(VirtReg);
990   MachineInstr *MI = const_cast<MachineInstr *>(Def.getParent());
991   unsigned OpCode = MI->getOpcode();
992   switch (OpCode) {
993   default:
994     llvm_unreachable("Unexpected machine instruction on tile register!");
995     break;
996   case X86::COPY: {
997     Register SrcReg = MI->getOperand(1).getReg();
998     ShapeT Shape = getTileShape(SrcReg, VRM, MRI);
999     VRM->assignVirt2Shape(VirtReg, Shape);
1000     return Shape;
1001   }
1002   // We only collect the tile shape that is defined.
1003   case X86::PTILELOADDV:
1004   case X86::PTILELOADDT1V:
1005   case X86::PTDPBSSDV:
1006   case X86::PTDPBSUDV:
1007   case X86::PTDPBUSDV:
1008   case X86::PTDPBUUDV:
1009   case X86::PTILEZEROV:
1010   case X86::PTDPBF16PSV:
1011   case X86::PTDPFP16PSV:
1012   case X86::PTCMMIMFP16PSV:
1013   case X86::PTCMMRLFP16PSV:
1014     MachineOperand &MO1 = MI->getOperand(1);
1015     MachineOperand &MO2 = MI->getOperand(2);
1016     ShapeT Shape(&MO1, &MO2, MRI);
1017     VRM->assignVirt2Shape(VirtReg, Shape);
1018     return Shape;
1019   }
1020 }
1021 
1022 bool X86RegisterInfo::getRegAllocationHints(Register VirtReg,
1023                                             ArrayRef<MCPhysReg> Order,
1024                                             SmallVectorImpl<MCPhysReg> &Hints,
1025                                             const MachineFunction &MF,
1026                                             const VirtRegMap *VRM,
1027                                             const LiveRegMatrix *Matrix) const {
1028   const MachineRegisterInfo *MRI = &MF.getRegInfo();
1029   const TargetRegisterClass &RC = *MRI->getRegClass(VirtReg);
1030   bool BaseImplRetVal = TargetRegisterInfo::getRegAllocationHints(
1031       VirtReg, Order, Hints, MF, VRM, Matrix);
1032 
1033   if (RC.getID() != X86::TILERegClassID)
1034     return BaseImplRetVal;
1035 
1036   ShapeT VirtShape = getTileShape(VirtReg, const_cast<VirtRegMap *>(VRM), MRI);
1037   auto AddHint = [&](MCPhysReg PhysReg) {
1038     Register VReg = Matrix->getOneVReg(PhysReg);
1039     if (VReg == MCRegister::NoRegister) { // Not allocated yet
1040       Hints.push_back(PhysReg);
1041       return;
1042     }
1043     ShapeT PhysShape = getTileShape(VReg, const_cast<VirtRegMap *>(VRM), MRI);
1044     if (PhysShape == VirtShape)
1045       Hints.push_back(PhysReg);
1046   };
1047 
1048   SmallSet<MCPhysReg, 4> CopyHints;
1049   CopyHints.insert(Hints.begin(), Hints.end());
1050   Hints.clear();
1051   for (auto Hint : CopyHints) {
1052     if (RC.contains(Hint) && !MRI->isReserved(Hint))
1053       AddHint(Hint);
1054   }
1055   for (MCPhysReg PhysReg : Order) {
1056     if (!CopyHints.count(PhysReg) && RC.contains(PhysReg) &&
1057         !MRI->isReserved(PhysReg))
1058       AddHint(PhysReg);
1059   }
1060 
1061 #define DEBUG_TYPE "tile-hint"
1062   LLVM_DEBUG({
1063     dbgs() << "Hints for virtual register " << format_hex(VirtReg, 8) << "\n";
1064     for (auto Hint : Hints) {
1065       dbgs() << "tmm" << Hint << ",";
1066     }
1067     dbgs() << "\n";
1068   });
1069 #undef DEBUG_TYPE
1070 
1071   return true;
1072 }
1073