1 //===-- X86RegisterInfo.cpp - X86 Register Information --------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file contains the X86 implementation of the TargetRegisterInfo class. 10 // This file is responsible for the frame pointer elimination optimization 11 // on X86. 12 // 13 //===----------------------------------------------------------------------===// 14 15 #include "X86RegisterInfo.h" 16 #include "X86FrameLowering.h" 17 #include "X86MachineFunctionInfo.h" 18 #include "X86Subtarget.h" 19 #include "llvm/ADT/BitVector.h" 20 #include "llvm/ADT/STLExtras.h" 21 #include "llvm/ADT/SmallSet.h" 22 #include "llvm/CodeGen/LiveRegMatrix.h" 23 #include "llvm/CodeGen/MachineFrameInfo.h" 24 #include "llvm/CodeGen/MachineFunction.h" 25 #include "llvm/CodeGen/MachineFunctionPass.h" 26 #include "llvm/CodeGen/MachineRegisterInfo.h" 27 #include "llvm/CodeGen/TargetFrameLowering.h" 28 #include "llvm/CodeGen/TargetInstrInfo.h" 29 #include "llvm/CodeGen/TileShapeInfo.h" 30 #include "llvm/CodeGen/VirtRegMap.h" 31 #include "llvm/IR/Constants.h" 32 #include "llvm/IR/Function.h" 33 #include "llvm/IR/Type.h" 34 #include "llvm/Support/CommandLine.h" 35 #include "llvm/Support/ErrorHandling.h" 36 #include "llvm/Target/TargetMachine.h" 37 #include "llvm/Target/TargetOptions.h" 38 39 using namespace llvm; 40 41 #define GET_REGINFO_TARGET_DESC 42 #include "X86GenRegisterInfo.inc" 43 44 static cl::opt<bool> 45 EnableBasePointer("x86-use-base-pointer", cl::Hidden, cl::init(true), 46 cl::desc("Enable use of a base pointer for complex stack frames")); 47 48 X86RegisterInfo::X86RegisterInfo(const Triple &TT) 49 : X86GenRegisterInfo((TT.isArch64Bit() ? X86::RIP : X86::EIP), 50 X86_MC::getDwarfRegFlavour(TT, false), 51 X86_MC::getDwarfRegFlavour(TT, true), 52 (TT.isArch64Bit() ? X86::RIP : X86::EIP)) { 53 X86_MC::initLLVMToSEHAndCVRegMapping(this); 54 55 // Cache some information. 56 Is64Bit = TT.isArch64Bit(); 57 IsWin64 = Is64Bit && TT.isOSWindows(); 58 59 // Use a callee-saved register as the base pointer. These registers must 60 // not conflict with any ABI requirements. For example, in 32-bit mode PIC 61 // requires GOT in the EBX register before function calls via PLT GOT pointer. 62 if (Is64Bit) { 63 SlotSize = 8; 64 // This matches the simplified 32-bit pointer code in the data layout 65 // computation. 66 // FIXME: Should use the data layout? 67 bool Use64BitReg = !TT.isX32(); 68 StackPtr = Use64BitReg ? X86::RSP : X86::ESP; 69 FramePtr = Use64BitReg ? X86::RBP : X86::EBP; 70 BasePtr = Use64BitReg ? X86::RBX : X86::EBX; 71 } else { 72 SlotSize = 4; 73 StackPtr = X86::ESP; 74 FramePtr = X86::EBP; 75 BasePtr = X86::ESI; 76 } 77 } 78 79 int 80 X86RegisterInfo::getSEHRegNum(unsigned i) const { 81 return getEncodingValue(i); 82 } 83 84 const TargetRegisterClass * 85 X86RegisterInfo::getSubClassWithSubReg(const TargetRegisterClass *RC, 86 unsigned Idx) const { 87 // The sub_8bit sub-register index is more constrained in 32-bit mode. 88 // It behaves just like the sub_8bit_hi index. 89 if (!Is64Bit && Idx == X86::sub_8bit) 90 Idx = X86::sub_8bit_hi; 91 92 // Forward to TableGen's default version. 93 return X86GenRegisterInfo::getSubClassWithSubReg(RC, Idx); 94 } 95 96 const TargetRegisterClass * 97 X86RegisterInfo::getMatchingSuperRegClass(const TargetRegisterClass *A, 98 const TargetRegisterClass *B, 99 unsigned SubIdx) const { 100 // The sub_8bit sub-register index is more constrained in 32-bit mode. 101 if (!Is64Bit && SubIdx == X86::sub_8bit) { 102 A = X86GenRegisterInfo::getSubClassWithSubReg(A, X86::sub_8bit_hi); 103 if (!A) 104 return nullptr; 105 } 106 return X86GenRegisterInfo::getMatchingSuperRegClass(A, B, SubIdx); 107 } 108 109 const TargetRegisterClass * 110 X86RegisterInfo::getLargestLegalSuperClass(const TargetRegisterClass *RC, 111 const MachineFunction &MF) const { 112 // Don't allow super-classes of GR8_NOREX. This class is only used after 113 // extracting sub_8bit_hi sub-registers. The H sub-registers cannot be copied 114 // to the full GR8 register class in 64-bit mode, so we cannot allow the 115 // reigster class inflation. 116 // 117 // The GR8_NOREX class is always used in a way that won't be constrained to a 118 // sub-class, so sub-classes like GR8_ABCD_L are allowed to expand to the 119 // full GR8 class. 120 if (RC == &X86::GR8_NOREXRegClass) 121 return RC; 122 123 const X86Subtarget &Subtarget = MF.getSubtarget<X86Subtarget>(); 124 125 const TargetRegisterClass *Super = RC; 126 TargetRegisterClass::sc_iterator I = RC->getSuperClasses(); 127 do { 128 switch (Super->getID()) { 129 case X86::FR32RegClassID: 130 case X86::FR64RegClassID: 131 // If AVX-512 isn't supported we should only inflate to these classes. 132 if (!Subtarget.hasAVX512() && 133 getRegSizeInBits(*Super) == getRegSizeInBits(*RC)) 134 return Super; 135 break; 136 case X86::VR128RegClassID: 137 case X86::VR256RegClassID: 138 // If VLX isn't supported we should only inflate to these classes. 139 if (!Subtarget.hasVLX() && 140 getRegSizeInBits(*Super) == getRegSizeInBits(*RC)) 141 return Super; 142 break; 143 case X86::VR128XRegClassID: 144 case X86::VR256XRegClassID: 145 // If VLX isn't support we shouldn't inflate to these classes. 146 if (Subtarget.hasVLX() && 147 getRegSizeInBits(*Super) == getRegSizeInBits(*RC)) 148 return Super; 149 break; 150 case X86::FR32XRegClassID: 151 case X86::FR64XRegClassID: 152 // If AVX-512 isn't support we shouldn't inflate to these classes. 153 if (Subtarget.hasAVX512() && 154 getRegSizeInBits(*Super) == getRegSizeInBits(*RC)) 155 return Super; 156 break; 157 case X86::GR8RegClassID: 158 case X86::GR16RegClassID: 159 case X86::GR32RegClassID: 160 case X86::GR64RegClassID: 161 case X86::RFP32RegClassID: 162 case X86::RFP64RegClassID: 163 case X86::RFP80RegClassID: 164 case X86::VR512_0_15RegClassID: 165 case X86::VR512RegClassID: 166 // Don't return a super-class that would shrink the spill size. 167 // That can happen with the vector and float classes. 168 if (getRegSizeInBits(*Super) == getRegSizeInBits(*RC)) 169 return Super; 170 } 171 Super = *I++; 172 } while (Super); 173 return RC; 174 } 175 176 const TargetRegisterClass * 177 X86RegisterInfo::getPointerRegClass(const MachineFunction &MF, 178 unsigned Kind) const { 179 const X86Subtarget &Subtarget = MF.getSubtarget<X86Subtarget>(); 180 switch (Kind) { 181 default: llvm_unreachable("Unexpected Kind in getPointerRegClass!"); 182 case 0: // Normal GPRs. 183 if (Subtarget.isTarget64BitLP64()) 184 return &X86::GR64RegClass; 185 // If the target is 64bit but we have been told to use 32bit addresses, 186 // we can still use 64-bit register as long as we know the high bits 187 // are zeros. 188 // Reflect that in the returned register class. 189 if (Is64Bit) { 190 // When the target also allows 64-bit frame pointer and we do have a 191 // frame, this is fine to use it for the address accesses as well. 192 const X86FrameLowering *TFI = getFrameLowering(MF); 193 return TFI->hasFP(MF) && TFI->Uses64BitFramePtr 194 ? &X86::LOW32_ADDR_ACCESS_RBPRegClass 195 : &X86::LOW32_ADDR_ACCESSRegClass; 196 } 197 return &X86::GR32RegClass; 198 case 1: // Normal GPRs except the stack pointer (for encoding reasons). 199 if (Subtarget.isTarget64BitLP64()) 200 return &X86::GR64_NOSPRegClass; 201 // NOSP does not contain RIP, so no special case here. 202 return &X86::GR32_NOSPRegClass; 203 case 2: // NOREX GPRs. 204 if (Subtarget.isTarget64BitLP64()) 205 return &X86::GR64_NOREXRegClass; 206 return &X86::GR32_NOREXRegClass; 207 case 3: // NOREX GPRs except the stack pointer (for encoding reasons). 208 if (Subtarget.isTarget64BitLP64()) 209 return &X86::GR64_NOREX_NOSPRegClass; 210 // NOSP does not contain RIP, so no special case here. 211 return &X86::GR32_NOREX_NOSPRegClass; 212 case 4: // Available for tailcall (not callee-saved GPRs). 213 return getGPRsForTailCall(MF); 214 } 215 } 216 217 bool X86RegisterInfo::shouldRewriteCopySrc(const TargetRegisterClass *DefRC, 218 unsigned DefSubReg, 219 const TargetRegisterClass *SrcRC, 220 unsigned SrcSubReg) const { 221 // Prevent rewriting a copy where the destination size is larger than the 222 // input size. See PR41619. 223 // FIXME: Should this be factored into the base implementation somehow. 224 if (DefRC->hasSuperClassEq(&X86::GR64RegClass) && DefSubReg == 0 && 225 SrcRC->hasSuperClassEq(&X86::GR64RegClass) && SrcSubReg == X86::sub_32bit) 226 return false; 227 228 return TargetRegisterInfo::shouldRewriteCopySrc(DefRC, DefSubReg, 229 SrcRC, SrcSubReg); 230 } 231 232 const TargetRegisterClass * 233 X86RegisterInfo::getGPRsForTailCall(const MachineFunction &MF) const { 234 const Function &F = MF.getFunction(); 235 if (IsWin64 || (F.getCallingConv() == CallingConv::Win64)) 236 return &X86::GR64_TCW64RegClass; 237 else if (Is64Bit) 238 return &X86::GR64_TCRegClass; 239 240 bool hasHipeCC = (F.getCallingConv() == CallingConv::HiPE); 241 if (hasHipeCC) 242 return &X86::GR32RegClass; 243 return &X86::GR32_TCRegClass; 244 } 245 246 const TargetRegisterClass * 247 X86RegisterInfo::getCrossCopyRegClass(const TargetRegisterClass *RC) const { 248 if (RC == &X86::CCRRegClass) { 249 if (Is64Bit) 250 return &X86::GR64RegClass; 251 else 252 return &X86::GR32RegClass; 253 } 254 return RC; 255 } 256 257 unsigned 258 X86RegisterInfo::getRegPressureLimit(const TargetRegisterClass *RC, 259 MachineFunction &MF) const { 260 const X86FrameLowering *TFI = getFrameLowering(MF); 261 262 unsigned FPDiff = TFI->hasFP(MF) ? 1 : 0; 263 switch (RC->getID()) { 264 default: 265 return 0; 266 case X86::GR32RegClassID: 267 return 4 - FPDiff; 268 case X86::GR64RegClassID: 269 return 12 - FPDiff; 270 case X86::VR128RegClassID: 271 return Is64Bit ? 10 : 4; 272 case X86::VR64RegClassID: 273 return 4; 274 } 275 } 276 277 const MCPhysReg * 278 X86RegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const { 279 assert(MF && "MachineFunction required"); 280 281 const X86Subtarget &Subtarget = MF->getSubtarget<X86Subtarget>(); 282 const Function &F = MF->getFunction(); 283 bool HasSSE = Subtarget.hasSSE1(); 284 bool HasAVX = Subtarget.hasAVX(); 285 bool HasAVX512 = Subtarget.hasAVX512(); 286 bool CallsEHReturn = MF->callsEHReturn(); 287 288 CallingConv::ID CC = F.getCallingConv(); 289 290 // If attribute NoCallerSavedRegisters exists then we set X86_INTR calling 291 // convention because it has the CSR list. 292 if (MF->getFunction().hasFnAttribute("no_caller_saved_registers")) 293 CC = CallingConv::X86_INTR; 294 295 // If atribute specified, override the CSRs normally specified by the 296 // calling convention and use the empty set instead. 297 if (MF->getFunction().hasFnAttribute("no_callee_saved_registers")) 298 return CSR_NoRegs_SaveList; 299 300 switch (CC) { 301 case CallingConv::GHC: 302 case CallingConv::HiPE: 303 return CSR_NoRegs_SaveList; 304 case CallingConv::AnyReg: 305 if (HasAVX) 306 return CSR_64_AllRegs_AVX_SaveList; 307 return CSR_64_AllRegs_SaveList; 308 case CallingConv::PreserveMost: 309 return CSR_64_RT_MostRegs_SaveList; 310 case CallingConv::PreserveAll: 311 if (HasAVX) 312 return CSR_64_RT_AllRegs_AVX_SaveList; 313 return CSR_64_RT_AllRegs_SaveList; 314 case CallingConv::CXX_FAST_TLS: 315 if (Is64Bit) 316 return MF->getInfo<X86MachineFunctionInfo>()->isSplitCSR() ? 317 CSR_64_CXX_TLS_Darwin_PE_SaveList : CSR_64_TLS_Darwin_SaveList; 318 break; 319 case CallingConv::Intel_OCL_BI: { 320 if (HasAVX512 && IsWin64) 321 return CSR_Win64_Intel_OCL_BI_AVX512_SaveList; 322 if (HasAVX512 && Is64Bit) 323 return CSR_64_Intel_OCL_BI_AVX512_SaveList; 324 if (HasAVX && IsWin64) 325 return CSR_Win64_Intel_OCL_BI_AVX_SaveList; 326 if (HasAVX && Is64Bit) 327 return CSR_64_Intel_OCL_BI_AVX_SaveList; 328 if (!HasAVX && !IsWin64 && Is64Bit) 329 return CSR_64_Intel_OCL_BI_SaveList; 330 break; 331 } 332 case CallingConv::X86_RegCall: 333 if (Is64Bit) { 334 if (IsWin64) { 335 return (HasSSE ? CSR_Win64_RegCall_SaveList : 336 CSR_Win64_RegCall_NoSSE_SaveList); 337 } else { 338 return (HasSSE ? CSR_SysV64_RegCall_SaveList : 339 CSR_SysV64_RegCall_NoSSE_SaveList); 340 } 341 } else { 342 return (HasSSE ? CSR_32_RegCall_SaveList : 343 CSR_32_RegCall_NoSSE_SaveList); 344 } 345 case CallingConv::CFGuard_Check: 346 assert(!Is64Bit && "CFGuard check mechanism only used on 32-bit X86"); 347 return (HasSSE ? CSR_Win32_CFGuard_Check_SaveList 348 : CSR_Win32_CFGuard_Check_NoSSE_SaveList); 349 case CallingConv::Cold: 350 if (Is64Bit) 351 return CSR_64_MostRegs_SaveList; 352 break; 353 case CallingConv::Win64: 354 if (!HasSSE) 355 return CSR_Win64_NoSSE_SaveList; 356 return CSR_Win64_SaveList; 357 case CallingConv::SwiftTail: 358 if (!Is64Bit) 359 return CSR_32_SaveList; 360 return IsWin64 ? CSR_Win64_SwiftTail_SaveList : CSR_64_SwiftTail_SaveList; 361 case CallingConv::X86_64_SysV: 362 if (CallsEHReturn) 363 return CSR_64EHRet_SaveList; 364 return CSR_64_SaveList; 365 case CallingConv::X86_INTR: 366 if (Is64Bit) { 367 if (HasAVX512) 368 return CSR_64_AllRegs_AVX512_SaveList; 369 if (HasAVX) 370 return CSR_64_AllRegs_AVX_SaveList; 371 if (HasSSE) 372 return CSR_64_AllRegs_SaveList; 373 return CSR_64_AllRegs_NoSSE_SaveList; 374 } else { 375 if (HasAVX512) 376 return CSR_32_AllRegs_AVX512_SaveList; 377 if (HasAVX) 378 return CSR_32_AllRegs_AVX_SaveList; 379 if (HasSSE) 380 return CSR_32_AllRegs_SSE_SaveList; 381 return CSR_32_AllRegs_SaveList; 382 } 383 default: 384 break; 385 } 386 387 if (Is64Bit) { 388 bool IsSwiftCC = Subtarget.getTargetLowering()->supportSwiftError() && 389 F.getAttributes().hasAttrSomewhere(Attribute::SwiftError); 390 if (IsSwiftCC) 391 return IsWin64 ? CSR_Win64_SwiftError_SaveList 392 : CSR_64_SwiftError_SaveList; 393 394 if (IsWin64) 395 return HasSSE ? CSR_Win64_SaveList : CSR_Win64_NoSSE_SaveList; 396 if (CallsEHReturn) 397 return CSR_64EHRet_SaveList; 398 return CSR_64_SaveList; 399 } 400 401 return CallsEHReturn ? CSR_32EHRet_SaveList : CSR_32_SaveList; 402 } 403 404 const MCPhysReg *X86RegisterInfo::getCalleeSavedRegsViaCopy( 405 const MachineFunction *MF) const { 406 assert(MF && "Invalid MachineFunction pointer."); 407 if (MF->getFunction().getCallingConv() == CallingConv::CXX_FAST_TLS && 408 MF->getInfo<X86MachineFunctionInfo>()->isSplitCSR()) 409 return CSR_64_CXX_TLS_Darwin_ViaCopy_SaveList; 410 return nullptr; 411 } 412 413 const uint32_t * 414 X86RegisterInfo::getCallPreservedMask(const MachineFunction &MF, 415 CallingConv::ID CC) const { 416 const X86Subtarget &Subtarget = MF.getSubtarget<X86Subtarget>(); 417 bool HasSSE = Subtarget.hasSSE1(); 418 bool HasAVX = Subtarget.hasAVX(); 419 bool HasAVX512 = Subtarget.hasAVX512(); 420 421 switch (CC) { 422 case CallingConv::GHC: 423 case CallingConv::HiPE: 424 return CSR_NoRegs_RegMask; 425 case CallingConv::AnyReg: 426 if (HasAVX) 427 return CSR_64_AllRegs_AVX_RegMask; 428 return CSR_64_AllRegs_RegMask; 429 case CallingConv::PreserveMost: 430 return CSR_64_RT_MostRegs_RegMask; 431 case CallingConv::PreserveAll: 432 if (HasAVX) 433 return CSR_64_RT_AllRegs_AVX_RegMask; 434 return CSR_64_RT_AllRegs_RegMask; 435 case CallingConv::CXX_FAST_TLS: 436 if (Is64Bit) 437 return CSR_64_TLS_Darwin_RegMask; 438 break; 439 case CallingConv::Intel_OCL_BI: { 440 if (HasAVX512 && IsWin64) 441 return CSR_Win64_Intel_OCL_BI_AVX512_RegMask; 442 if (HasAVX512 && Is64Bit) 443 return CSR_64_Intel_OCL_BI_AVX512_RegMask; 444 if (HasAVX && IsWin64) 445 return CSR_Win64_Intel_OCL_BI_AVX_RegMask; 446 if (HasAVX && Is64Bit) 447 return CSR_64_Intel_OCL_BI_AVX_RegMask; 448 if (!HasAVX && !IsWin64 && Is64Bit) 449 return CSR_64_Intel_OCL_BI_RegMask; 450 break; 451 } 452 case CallingConv::X86_RegCall: 453 if (Is64Bit) { 454 if (IsWin64) { 455 return (HasSSE ? CSR_Win64_RegCall_RegMask : 456 CSR_Win64_RegCall_NoSSE_RegMask); 457 } else { 458 return (HasSSE ? CSR_SysV64_RegCall_RegMask : 459 CSR_SysV64_RegCall_NoSSE_RegMask); 460 } 461 } else { 462 return (HasSSE ? CSR_32_RegCall_RegMask : 463 CSR_32_RegCall_NoSSE_RegMask); 464 } 465 case CallingConv::CFGuard_Check: 466 assert(!Is64Bit && "CFGuard check mechanism only used on 32-bit X86"); 467 return (HasSSE ? CSR_Win32_CFGuard_Check_RegMask 468 : CSR_Win32_CFGuard_Check_NoSSE_RegMask); 469 case CallingConv::Cold: 470 if (Is64Bit) 471 return CSR_64_MostRegs_RegMask; 472 break; 473 case CallingConv::Win64: 474 return CSR_Win64_RegMask; 475 case CallingConv::SwiftTail: 476 if (!Is64Bit) 477 return CSR_32_RegMask; 478 return IsWin64 ? CSR_Win64_SwiftTail_RegMask : CSR_64_SwiftTail_RegMask; 479 case CallingConv::X86_64_SysV: 480 return CSR_64_RegMask; 481 case CallingConv::X86_INTR: 482 if (Is64Bit) { 483 if (HasAVX512) 484 return CSR_64_AllRegs_AVX512_RegMask; 485 if (HasAVX) 486 return CSR_64_AllRegs_AVX_RegMask; 487 if (HasSSE) 488 return CSR_64_AllRegs_RegMask; 489 return CSR_64_AllRegs_NoSSE_RegMask; 490 } else { 491 if (HasAVX512) 492 return CSR_32_AllRegs_AVX512_RegMask; 493 if (HasAVX) 494 return CSR_32_AllRegs_AVX_RegMask; 495 if (HasSSE) 496 return CSR_32_AllRegs_SSE_RegMask; 497 return CSR_32_AllRegs_RegMask; 498 } 499 default: 500 break; 501 } 502 503 // Unlike getCalleeSavedRegs(), we don't have MMI so we can't check 504 // callsEHReturn(). 505 if (Is64Bit) { 506 const Function &F = MF.getFunction(); 507 bool IsSwiftCC = Subtarget.getTargetLowering()->supportSwiftError() && 508 F.getAttributes().hasAttrSomewhere(Attribute::SwiftError); 509 if (IsSwiftCC) 510 return IsWin64 ? CSR_Win64_SwiftError_RegMask : CSR_64_SwiftError_RegMask; 511 512 return IsWin64 ? CSR_Win64_RegMask : CSR_64_RegMask; 513 } 514 515 return CSR_32_RegMask; 516 } 517 518 const uint32_t* 519 X86RegisterInfo::getNoPreservedMask() const { 520 return CSR_NoRegs_RegMask; 521 } 522 523 const uint32_t *X86RegisterInfo::getDarwinTLSCallPreservedMask() const { 524 return CSR_64_TLS_Darwin_RegMask; 525 } 526 527 BitVector X86RegisterInfo::getReservedRegs(const MachineFunction &MF) const { 528 BitVector Reserved(getNumRegs()); 529 const X86FrameLowering *TFI = getFrameLowering(MF); 530 531 // Set the floating point control register as reserved. 532 Reserved.set(X86::FPCW); 533 534 // Set the floating point status register as reserved. 535 Reserved.set(X86::FPSW); 536 537 // Set the SIMD floating point control register as reserved. 538 Reserved.set(X86::MXCSR); 539 540 // Set the stack-pointer register and its aliases as reserved. 541 for (const MCPhysReg &SubReg : subregs_inclusive(X86::RSP)) 542 Reserved.set(SubReg); 543 544 // Set the Shadow Stack Pointer as reserved. 545 Reserved.set(X86::SSP); 546 547 // Set the instruction pointer register and its aliases as reserved. 548 for (const MCPhysReg &SubReg : subregs_inclusive(X86::RIP)) 549 Reserved.set(SubReg); 550 551 // Set the frame-pointer register and its aliases as reserved if needed. 552 if (TFI->hasFP(MF)) { 553 for (const MCPhysReg &SubReg : subregs_inclusive(X86::RBP)) 554 Reserved.set(SubReg); 555 } 556 557 // Set the base-pointer register and its aliases as reserved if needed. 558 if (hasBasePointer(MF)) { 559 CallingConv::ID CC = MF.getFunction().getCallingConv(); 560 const uint32_t *RegMask = getCallPreservedMask(MF, CC); 561 if (MachineOperand::clobbersPhysReg(RegMask, getBaseRegister())) 562 report_fatal_error( 563 "Stack realignment in presence of dynamic allocas is not supported with" 564 "this calling convention."); 565 566 Register BasePtr = getX86SubSuperRegister(getBaseRegister(), 64); 567 for (const MCPhysReg &SubReg : subregs_inclusive(BasePtr)) 568 Reserved.set(SubReg); 569 } 570 571 // Mark the segment registers as reserved. 572 Reserved.set(X86::CS); 573 Reserved.set(X86::SS); 574 Reserved.set(X86::DS); 575 Reserved.set(X86::ES); 576 Reserved.set(X86::FS); 577 Reserved.set(X86::GS); 578 579 // Mark the floating point stack registers as reserved. 580 for (unsigned n = 0; n != 8; ++n) 581 Reserved.set(X86::ST0 + n); 582 583 // Reserve the registers that only exist in 64-bit mode. 584 if (!Is64Bit) { 585 // These 8-bit registers are part of the x86-64 extension even though their 586 // super-registers are old 32-bits. 587 Reserved.set(X86::SIL); 588 Reserved.set(X86::DIL); 589 Reserved.set(X86::BPL); 590 Reserved.set(X86::SPL); 591 Reserved.set(X86::SIH); 592 Reserved.set(X86::DIH); 593 Reserved.set(X86::BPH); 594 Reserved.set(X86::SPH); 595 596 for (unsigned n = 0; n != 8; ++n) { 597 // R8, R9, ... 598 for (MCRegAliasIterator AI(X86::R8 + n, this, true); AI.isValid(); ++AI) 599 Reserved.set(*AI); 600 601 // XMM8, XMM9, ... 602 for (MCRegAliasIterator AI(X86::XMM8 + n, this, true); AI.isValid(); ++AI) 603 Reserved.set(*AI); 604 } 605 } 606 if (!Is64Bit || !MF.getSubtarget<X86Subtarget>().hasAVX512()) { 607 for (unsigned n = 16; n != 32; ++n) { 608 for (MCRegAliasIterator AI(X86::XMM0 + n, this, true); AI.isValid(); ++AI) 609 Reserved.set(*AI); 610 } 611 } 612 613 assert(checkAllSuperRegsMarked(Reserved, 614 {X86::SIL, X86::DIL, X86::BPL, X86::SPL, 615 X86::SIH, X86::DIH, X86::BPH, X86::SPH})); 616 return Reserved; 617 } 618 619 bool X86RegisterInfo::isArgumentRegister(const MachineFunction &MF, 620 MCRegister Reg) const { 621 const X86Subtarget &ST = MF.getSubtarget<X86Subtarget>(); 622 const TargetRegisterInfo &TRI = *ST.getRegisterInfo(); 623 auto IsSubReg = [&](MCRegister RegA, MCRegister RegB) { 624 return TRI.isSuperOrSubRegisterEq(RegA, RegB); 625 }; 626 627 if (!ST.is64Bit()) 628 return llvm::any_of( 629 SmallVector<MCRegister>{X86::EAX, X86::ECX, X86::EDX}, 630 [&](MCRegister &RegA) { return IsSubReg(RegA, Reg); }) || 631 (ST.hasMMX() && X86::VR64RegClass.contains(Reg)); 632 633 CallingConv::ID CC = MF.getFunction().getCallingConv(); 634 635 if (CC == CallingConv::X86_64_SysV && IsSubReg(X86::RAX, Reg)) 636 return true; 637 638 if (llvm::any_of( 639 SmallVector<MCRegister>{X86::RDX, X86::RCX, X86::R8, X86::R9}, 640 [&](MCRegister &RegA) { return IsSubReg(RegA, Reg); })) 641 return true; 642 643 if (CC != CallingConv::Win64 && 644 llvm::any_of(SmallVector<MCRegister>{X86::RDI, X86::RSI}, 645 [&](MCRegister &RegA) { return IsSubReg(RegA, Reg); })) 646 return true; 647 648 if (ST.hasSSE1() && 649 llvm::any_of(SmallVector<MCRegister>{X86::XMM0, X86::XMM1, X86::XMM2, 650 X86::XMM3, X86::XMM4, X86::XMM5, 651 X86::XMM6, X86::XMM7}, 652 [&](MCRegister &RegA) { return IsSubReg(RegA, Reg); })) 653 return true; 654 655 return X86GenRegisterInfo::isArgumentRegister(MF, Reg); 656 } 657 658 bool X86RegisterInfo::isFixedRegister(const MachineFunction &MF, 659 MCRegister PhysReg) const { 660 const X86Subtarget &ST = MF.getSubtarget<X86Subtarget>(); 661 const TargetRegisterInfo &TRI = *ST.getRegisterInfo(); 662 663 // Stack pointer. 664 if (TRI.isSuperOrSubRegisterEq(X86::RSP, PhysReg)) 665 return true; 666 667 // Don't use the frame pointer if it's being used. 668 const X86FrameLowering &TFI = *getFrameLowering(MF); 669 if (TFI.hasFP(MF) && TRI.isSuperOrSubRegisterEq(X86::RBP, PhysReg)) 670 return true; 671 672 return X86GenRegisterInfo::isFixedRegister(MF, PhysReg); 673 } 674 675 bool X86RegisterInfo::isTileRegisterClass(const TargetRegisterClass *RC) const { 676 return RC->getID() == X86::TILERegClassID; 677 } 678 679 void X86RegisterInfo::adjustStackMapLiveOutMask(uint32_t *Mask) const { 680 // Check if the EFLAGS register is marked as live-out. This shouldn't happen, 681 // because the calling convention defines the EFLAGS register as NOT 682 // preserved. 683 // 684 // Unfortunatelly the EFLAGS show up as live-out after branch folding. Adding 685 // an assert to track this and clear the register afterwards to avoid 686 // unnecessary crashes during release builds. 687 assert(!(Mask[X86::EFLAGS / 32] & (1U << (X86::EFLAGS % 32))) && 688 "EFLAGS are not live-out from a patchpoint."); 689 690 // Also clean other registers that don't need preserving (IP). 691 for (auto Reg : {X86::EFLAGS, X86::RIP, X86::EIP, X86::IP}) 692 Mask[Reg / 32] &= ~(1U << (Reg % 32)); 693 } 694 695 //===----------------------------------------------------------------------===// 696 // Stack Frame Processing methods 697 //===----------------------------------------------------------------------===// 698 699 static bool CantUseSP(const MachineFrameInfo &MFI) { 700 return MFI.hasVarSizedObjects() || MFI.hasOpaqueSPAdjustment(); 701 } 702 703 bool X86RegisterInfo::hasBasePointer(const MachineFunction &MF) const { 704 const X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>(); 705 // We have a virtual register to reference argument, and don't need base 706 // pointer. 707 if (X86FI->getStackPtrSaveMI() != nullptr) 708 return false; 709 710 if (X86FI->hasPreallocatedCall()) 711 return true; 712 713 const MachineFrameInfo &MFI = MF.getFrameInfo(); 714 715 if (!EnableBasePointer) 716 return false; 717 718 // When we need stack realignment, we can't address the stack from the frame 719 // pointer. When we have dynamic allocas or stack-adjusting inline asm, we 720 // can't address variables from the stack pointer. MS inline asm can 721 // reference locals while also adjusting the stack pointer. When we can't 722 // use both the SP and the FP, we need a separate base pointer register. 723 bool CantUseFP = hasStackRealignment(MF); 724 return CantUseFP && CantUseSP(MFI); 725 } 726 727 bool X86RegisterInfo::canRealignStack(const MachineFunction &MF) const { 728 if (!TargetRegisterInfo::canRealignStack(MF)) 729 return false; 730 731 const MachineFrameInfo &MFI = MF.getFrameInfo(); 732 const MachineRegisterInfo *MRI = &MF.getRegInfo(); 733 734 // Stack realignment requires a frame pointer. If we already started 735 // register allocation with frame pointer elimination, it is too late now. 736 if (!MRI->canReserveReg(FramePtr)) 737 return false; 738 739 // If a base pointer is necessary. Check that it isn't too late to reserve 740 // it. 741 if (CantUseSP(MFI)) 742 return MRI->canReserveReg(BasePtr); 743 return true; 744 } 745 746 bool X86RegisterInfo::shouldRealignStack(const MachineFunction &MF) const { 747 if (TargetRegisterInfo::shouldRealignStack(MF)) 748 return true; 749 750 return !Is64Bit && MF.getFunction().getCallingConv() == CallingConv::X86_INTR; 751 } 752 753 // tryOptimizeLEAtoMOV - helper function that tries to replace a LEA instruction 754 // of the form 'lea (%esp), %ebx' --> 'mov %esp, %ebx'. 755 // TODO: In this case we should be really trying first to entirely eliminate 756 // this instruction which is a plain copy. 757 static bool tryOptimizeLEAtoMOV(MachineBasicBlock::iterator II) { 758 MachineInstr &MI = *II; 759 unsigned Opc = II->getOpcode(); 760 // Check if this is a LEA of the form 'lea (%esp), %ebx' 761 if ((Opc != X86::LEA32r && Opc != X86::LEA64r && Opc != X86::LEA64_32r) || 762 MI.getOperand(2).getImm() != 1 || 763 MI.getOperand(3).getReg() != X86::NoRegister || 764 MI.getOperand(4).getImm() != 0 || 765 MI.getOperand(5).getReg() != X86::NoRegister) 766 return false; 767 Register BasePtr = MI.getOperand(1).getReg(); 768 // In X32 mode, ensure the base-pointer is a 32-bit operand, so the LEA will 769 // be replaced with a 32-bit operand MOV which will zero extend the upper 770 // 32-bits of the super register. 771 if (Opc == X86::LEA64_32r) 772 BasePtr = getX86SubSuperRegister(BasePtr, 32); 773 Register NewDestReg = MI.getOperand(0).getReg(); 774 const X86InstrInfo *TII = 775 MI.getParent()->getParent()->getSubtarget<X86Subtarget>().getInstrInfo(); 776 TII->copyPhysReg(*MI.getParent(), II, MI.getDebugLoc(), NewDestReg, BasePtr, 777 MI.getOperand(1).isKill()); 778 MI.eraseFromParent(); 779 return true; 780 } 781 782 static bool isFuncletReturnInstr(MachineInstr &MI) { 783 switch (MI.getOpcode()) { 784 case X86::CATCHRET: 785 case X86::CLEANUPRET: 786 return true; 787 default: 788 return false; 789 } 790 llvm_unreachable("impossible"); 791 } 792 793 void X86RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, 794 unsigned FIOperandNum, 795 Register BaseReg, 796 int FIOffset) const { 797 MachineInstr &MI = *II; 798 unsigned Opc = MI.getOpcode(); 799 if (Opc == TargetOpcode::LOCAL_ESCAPE) { 800 MachineOperand &FI = MI.getOperand(FIOperandNum); 801 FI.ChangeToImmediate(FIOffset); 802 return; 803 } 804 805 MI.getOperand(FIOperandNum).ChangeToRegister(BaseReg, false); 806 807 // The frame index format for stackmaps and patchpoints is different from the 808 // X86 format. It only has a FI and an offset. 809 if (Opc == TargetOpcode::STACKMAP || Opc == TargetOpcode::PATCHPOINT) { 810 assert(BasePtr == FramePtr && "Expected the FP as base register"); 811 int64_t Offset = MI.getOperand(FIOperandNum + 1).getImm() + FIOffset; 812 MI.getOperand(FIOperandNum + 1).ChangeToImmediate(Offset); 813 return; 814 } 815 816 if (MI.getOperand(FIOperandNum + 3).isImm()) { 817 // Offset is a 32-bit integer. 818 int Imm = (int)(MI.getOperand(FIOperandNum + 3).getImm()); 819 int Offset = FIOffset + Imm; 820 assert((!Is64Bit || isInt<32>((long long)FIOffset + Imm)) && 821 "Requesting 64-bit offset in 32-bit immediate!"); 822 if (Offset != 0) 823 MI.getOperand(FIOperandNum + 3).ChangeToImmediate(Offset); 824 } else { 825 // Offset is symbolic. This is extremely rare. 826 uint64_t Offset = 827 FIOffset + (uint64_t)MI.getOperand(FIOperandNum + 3).getOffset(); 828 MI.getOperand(FIOperandNum + 3).setOffset(Offset); 829 } 830 } 831 832 bool 833 X86RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, 834 int SPAdj, unsigned FIOperandNum, 835 RegScavenger *RS) const { 836 MachineInstr &MI = *II; 837 MachineBasicBlock &MBB = *MI.getParent(); 838 MachineFunction &MF = *MBB.getParent(); 839 MachineBasicBlock::iterator MBBI = MBB.getFirstTerminator(); 840 bool IsEHFuncletEpilogue = MBBI == MBB.end() ? false 841 : isFuncletReturnInstr(*MBBI); 842 const X86FrameLowering *TFI = getFrameLowering(MF); 843 int FrameIndex = MI.getOperand(FIOperandNum).getIndex(); 844 845 // Determine base register and offset. 846 int FIOffset; 847 Register BasePtr; 848 if (MI.isReturn()) { 849 assert((!hasStackRealignment(MF) || 850 MF.getFrameInfo().isFixedObjectIndex(FrameIndex)) && 851 "Return instruction can only reference SP relative frame objects"); 852 FIOffset = 853 TFI->getFrameIndexReferenceSP(MF, FrameIndex, BasePtr, 0).getFixed(); 854 } else if (TFI->Is64Bit && (MBB.isEHFuncletEntry() || IsEHFuncletEpilogue)) { 855 FIOffset = TFI->getWin64EHFrameIndexRef(MF, FrameIndex, BasePtr); 856 } else { 857 FIOffset = TFI->getFrameIndexReference(MF, FrameIndex, BasePtr).getFixed(); 858 } 859 860 // LOCAL_ESCAPE uses a single offset, with no register. It only works in the 861 // simple FP case, and doesn't work with stack realignment. On 32-bit, the 862 // offset is from the traditional base pointer location. On 64-bit, the 863 // offset is from the SP at the end of the prologue, not the FP location. This 864 // matches the behavior of llvm.frameaddress. 865 unsigned Opc = MI.getOpcode(); 866 if (Opc == TargetOpcode::LOCAL_ESCAPE) { 867 MachineOperand &FI = MI.getOperand(FIOperandNum); 868 FI.ChangeToImmediate(FIOffset); 869 return false; 870 } 871 872 // For LEA64_32r when BasePtr is 32-bits (X32) we can use full-size 64-bit 873 // register as source operand, semantic is the same and destination is 874 // 32-bits. It saves one byte per lea in code since 0x67 prefix is avoided. 875 // Don't change BasePtr since it is used later for stack adjustment. 876 Register MachineBasePtr = BasePtr; 877 if (Opc == X86::LEA64_32r && X86::GR32RegClass.contains(BasePtr)) 878 MachineBasePtr = getX86SubSuperRegister(BasePtr, 64); 879 880 // This must be part of a four operand memory reference. Replace the 881 // FrameIndex with base register. Add an offset to the offset. 882 MI.getOperand(FIOperandNum).ChangeToRegister(MachineBasePtr, false); 883 884 if (BasePtr == StackPtr) 885 FIOffset += SPAdj; 886 887 // The frame index format for stackmaps and patchpoints is different from the 888 // X86 format. It only has a FI and an offset. 889 if (Opc == TargetOpcode::STACKMAP || Opc == TargetOpcode::PATCHPOINT) { 890 assert(BasePtr == FramePtr && "Expected the FP as base register"); 891 int64_t Offset = MI.getOperand(FIOperandNum + 1).getImm() + FIOffset; 892 MI.getOperand(FIOperandNum + 1).ChangeToImmediate(Offset); 893 return false; 894 } 895 896 if (MI.getOperand(FIOperandNum+3).isImm()) { 897 // Offset is a 32-bit integer. 898 int Imm = (int)(MI.getOperand(FIOperandNum + 3).getImm()); 899 int Offset = FIOffset + Imm; 900 assert((!Is64Bit || isInt<32>((long long)FIOffset + Imm)) && 901 "Requesting 64-bit offset in 32-bit immediate!"); 902 if (Offset != 0 || !tryOptimizeLEAtoMOV(II)) 903 MI.getOperand(FIOperandNum + 3).ChangeToImmediate(Offset); 904 } else { 905 // Offset is symbolic. This is extremely rare. 906 uint64_t Offset = FIOffset + 907 (uint64_t)MI.getOperand(FIOperandNum+3).getOffset(); 908 MI.getOperand(FIOperandNum + 3).setOffset(Offset); 909 } 910 return false; 911 } 912 913 unsigned X86RegisterInfo::findDeadCallerSavedReg( 914 MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI) const { 915 const MachineFunction *MF = MBB.getParent(); 916 if (MF->callsEHReturn()) 917 return 0; 918 919 const TargetRegisterClass &AvailableRegs = *getGPRsForTailCall(*MF); 920 921 if (MBBI == MBB.end()) 922 return 0; 923 924 switch (MBBI->getOpcode()) { 925 default: 926 return 0; 927 case TargetOpcode::PATCHABLE_RET: 928 case X86::RET: 929 case X86::RET32: 930 case X86::RET64: 931 case X86::RETI32: 932 case X86::RETI64: 933 case X86::TCRETURNdi: 934 case X86::TCRETURNri: 935 case X86::TCRETURNmi: 936 case X86::TCRETURNdi64: 937 case X86::TCRETURNri64: 938 case X86::TCRETURNmi64: 939 case X86::EH_RETURN: 940 case X86::EH_RETURN64: { 941 SmallSet<uint16_t, 8> Uses; 942 for (MachineOperand &MO : MBBI->operands()) { 943 if (!MO.isReg() || MO.isDef()) 944 continue; 945 Register Reg = MO.getReg(); 946 if (!Reg) 947 continue; 948 for (MCRegAliasIterator AI(Reg, this, true); AI.isValid(); ++AI) 949 Uses.insert(*AI); 950 } 951 952 for (auto CS : AvailableRegs) 953 if (!Uses.count(CS) && CS != X86::RIP && CS != X86::RSP && CS != X86::ESP) 954 return CS; 955 } 956 } 957 958 return 0; 959 } 960 961 Register X86RegisterInfo::getFrameRegister(const MachineFunction &MF) const { 962 const X86FrameLowering *TFI = getFrameLowering(MF); 963 return TFI->hasFP(MF) ? FramePtr : StackPtr; 964 } 965 966 unsigned 967 X86RegisterInfo::getPtrSizedFrameRegister(const MachineFunction &MF) const { 968 const X86Subtarget &Subtarget = MF.getSubtarget<X86Subtarget>(); 969 Register FrameReg = getFrameRegister(MF); 970 if (Subtarget.isTarget64BitILP32()) 971 FrameReg = getX86SubSuperRegister(FrameReg, 32); 972 return FrameReg; 973 } 974 975 unsigned 976 X86RegisterInfo::getPtrSizedStackRegister(const MachineFunction &MF) const { 977 const X86Subtarget &Subtarget = MF.getSubtarget<X86Subtarget>(); 978 Register StackReg = getStackRegister(); 979 if (Subtarget.isTarget64BitILP32()) 980 StackReg = getX86SubSuperRegister(StackReg, 32); 981 return StackReg; 982 } 983 984 static ShapeT getTileShape(Register VirtReg, VirtRegMap *VRM, 985 const MachineRegisterInfo *MRI) { 986 if (VRM->hasShape(VirtReg)) 987 return VRM->getShape(VirtReg); 988 989 const MachineOperand &Def = *MRI->def_begin(VirtReg); 990 MachineInstr *MI = const_cast<MachineInstr *>(Def.getParent()); 991 unsigned OpCode = MI->getOpcode(); 992 switch (OpCode) { 993 default: 994 llvm_unreachable("Unexpected machine instruction on tile register!"); 995 break; 996 case X86::COPY: { 997 Register SrcReg = MI->getOperand(1).getReg(); 998 ShapeT Shape = getTileShape(SrcReg, VRM, MRI); 999 VRM->assignVirt2Shape(VirtReg, Shape); 1000 return Shape; 1001 } 1002 // We only collect the tile shape that is defined. 1003 case X86::PTILELOADDV: 1004 case X86::PTILELOADDT1V: 1005 case X86::PTDPBSSDV: 1006 case X86::PTDPBSUDV: 1007 case X86::PTDPBUSDV: 1008 case X86::PTDPBUUDV: 1009 case X86::PTILEZEROV: 1010 case X86::PTDPBF16PSV: 1011 case X86::PTDPFP16PSV: 1012 case X86::PTCMMIMFP16PSV: 1013 case X86::PTCMMRLFP16PSV: 1014 MachineOperand &MO1 = MI->getOperand(1); 1015 MachineOperand &MO2 = MI->getOperand(2); 1016 ShapeT Shape(&MO1, &MO2, MRI); 1017 VRM->assignVirt2Shape(VirtReg, Shape); 1018 return Shape; 1019 } 1020 } 1021 1022 bool X86RegisterInfo::getRegAllocationHints(Register VirtReg, 1023 ArrayRef<MCPhysReg> Order, 1024 SmallVectorImpl<MCPhysReg> &Hints, 1025 const MachineFunction &MF, 1026 const VirtRegMap *VRM, 1027 const LiveRegMatrix *Matrix) const { 1028 const MachineRegisterInfo *MRI = &MF.getRegInfo(); 1029 const TargetRegisterClass &RC = *MRI->getRegClass(VirtReg); 1030 bool BaseImplRetVal = TargetRegisterInfo::getRegAllocationHints( 1031 VirtReg, Order, Hints, MF, VRM, Matrix); 1032 1033 if (RC.getID() != X86::TILERegClassID) 1034 return BaseImplRetVal; 1035 1036 ShapeT VirtShape = getTileShape(VirtReg, const_cast<VirtRegMap *>(VRM), MRI); 1037 auto AddHint = [&](MCPhysReg PhysReg) { 1038 Register VReg = Matrix->getOneVReg(PhysReg); 1039 if (VReg == MCRegister::NoRegister) { // Not allocated yet 1040 Hints.push_back(PhysReg); 1041 return; 1042 } 1043 ShapeT PhysShape = getTileShape(VReg, const_cast<VirtRegMap *>(VRM), MRI); 1044 if (PhysShape == VirtShape) 1045 Hints.push_back(PhysReg); 1046 }; 1047 1048 SmallSet<MCPhysReg, 4> CopyHints; 1049 CopyHints.insert(Hints.begin(), Hints.end()); 1050 Hints.clear(); 1051 for (auto Hint : CopyHints) { 1052 if (RC.contains(Hint) && !MRI->isReserved(Hint)) 1053 AddHint(Hint); 1054 } 1055 for (MCPhysReg PhysReg : Order) { 1056 if (!CopyHints.count(PhysReg) && RC.contains(PhysReg) && 1057 !MRI->isReserved(PhysReg)) 1058 AddHint(PhysReg); 1059 } 1060 1061 #define DEBUG_TYPE "tile-hint" 1062 LLVM_DEBUG({ 1063 dbgs() << "Hints for virtual register " << format_hex(VirtReg, 8) << "\n"; 1064 for (auto Hint : Hints) { 1065 dbgs() << "tmm" << Hint << ","; 1066 } 1067 dbgs() << "\n"; 1068 }); 1069 #undef DEBUG_TYPE 1070 1071 return true; 1072 } 1073