1 //===-- SIRegisterInfo.cpp - SI Register Information ---------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 /// \file 10 /// SI implementation of the TargetRegisterInfo class. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "SIRegisterInfo.h" 15 #include "AMDGPU.h" 16 #include "AMDGPURegisterBankInfo.h" 17 #include "GCNSubtarget.h" 18 #include "MCTargetDesc/AMDGPUInstPrinter.h" 19 #include "MCTargetDesc/AMDGPUMCTargetDesc.h" 20 #include "SIMachineFunctionInfo.h" 21 #include "llvm/CodeGen/LiveIntervals.h" 22 #include "llvm/CodeGen/MachineDominators.h" 23 #include "llvm/CodeGen/RegisterScavenging.h" 24 25 using namespace llvm; 26 27 #define GET_REGINFO_TARGET_DESC 28 #include "AMDGPUGenRegisterInfo.inc" 29 30 static cl::opt<bool> EnableSpillSGPRToVGPR( 31 "amdgpu-spill-sgpr-to-vgpr", 32 cl::desc("Enable spilling VGPRs to SGPRs"), 33 cl::ReallyHidden, 34 cl::init(true)); 35 36 std::array<std::vector<int16_t>, 16> SIRegisterInfo::RegSplitParts; 37 std::array<std::array<uint16_t, 32>, 9> SIRegisterInfo::SubRegFromChannelTable; 38 39 // Map numbers of DWORDs to indexes in SubRegFromChannelTable. 40 // Valid indexes are shifted 1, such that a 0 mapping means unsupported. 41 // e.g. for 8 DWORDs (256-bit), SubRegFromChannelTableWidthMap[8] = 8, 42 // meaning index 7 in SubRegFromChannelTable. 43 static const std::array<unsigned, 17> SubRegFromChannelTableWidthMap = { 44 0, 1, 2, 3, 4, 5, 6, 7, 8, 0, 0, 0, 0, 0, 0, 0, 9}; 45 46 namespace llvm { 47 48 // A temporary struct to spill SGPRs. 49 // This is mostly to spill SGPRs to memory. Spilling SGPRs into VGPR lanes emits 50 // just v_writelane and v_readlane. 51 // 52 // When spilling to memory, the SGPRs are written into VGPR lanes and the VGPR 53 // is saved to scratch (or the other way around for loads). 54 // For this, a VGPR is required where the needed lanes can be clobbered. The 55 // RegScavenger can provide a VGPR where currently active lanes can be 56 // clobbered, but we still need to save inactive lanes. 57 // The high-level steps are: 58 // - Try to scavenge SGPR(s) to save exec 59 // - Try to scavenge VGPR 60 // - Save needed, all or inactive lanes of a TmpVGPR 61 // - Spill/Restore SGPRs using TmpVGPR 62 // - Restore TmpVGPR 63 // 64 // To save all lanes of TmpVGPR, exec needs to be saved and modified. If we 65 // cannot scavenge temporary SGPRs to save exec, we use the following code: 66 // buffer_store_dword TmpVGPR ; only if active lanes need to be saved 67 // s_not exec, exec 68 // buffer_store_dword TmpVGPR ; save inactive lanes 69 // s_not exec, exec 70 struct SGPRSpillBuilder { 71 struct PerVGPRData { 72 unsigned PerVGPR; 73 unsigned NumVGPRs; 74 int64_t VGPRLanes; 75 }; 76 77 // The SGPR to save 78 Register SuperReg; 79 MachineBasicBlock::iterator MI; 80 ArrayRef<int16_t> SplitParts; 81 unsigned NumSubRegs; 82 bool IsKill; 83 const DebugLoc &DL; 84 85 /* When spilling to stack */ 86 // The SGPRs are written into this VGPR, which is then written to scratch 87 // (or vice versa for loads). 88 Register TmpVGPR = AMDGPU::NoRegister; 89 // Temporary spill slot to save TmpVGPR to. 90 int TmpVGPRIndex = 0; 91 // If TmpVGPR is live before the spill or if it is scavenged. 92 bool TmpVGPRLive = false; 93 // Scavenged SGPR to save EXEC. 94 Register SavedExecReg = AMDGPU::NoRegister; 95 // Stack index to write the SGPRs to. 96 int Index; 97 unsigned EltSize = 4; 98 99 RegScavenger *RS; 100 MachineBasicBlock &MBB; 101 MachineFunction &MF; 102 SIMachineFunctionInfo &MFI; 103 const SIInstrInfo &TII; 104 const SIRegisterInfo &TRI; 105 bool IsWave32; 106 Register ExecReg; 107 unsigned MovOpc; 108 unsigned NotOpc; 109 110 SGPRSpillBuilder(const SIRegisterInfo &TRI, const SIInstrInfo &TII, 111 bool IsWave32, MachineBasicBlock::iterator MI, int Index, 112 RegScavenger *RS) 113 : SuperReg(MI->getOperand(0).getReg()), MI(MI), 114 IsKill(MI->getOperand(0).isKill()), DL(MI->getDebugLoc()), Index(Index), 115 RS(RS), MBB(*MI->getParent()), MF(*MBB.getParent()), 116 MFI(*MF.getInfo<SIMachineFunctionInfo>()), TII(TII), TRI(TRI), 117 IsWave32(IsWave32) { 118 const TargetRegisterClass *RC = TRI.getPhysRegClass(SuperReg); 119 SplitParts = TRI.getRegSplitParts(RC, EltSize); 120 NumSubRegs = SplitParts.empty() ? 1 : SplitParts.size(); 121 122 if (IsWave32) { 123 ExecReg = AMDGPU::EXEC_LO; 124 MovOpc = AMDGPU::S_MOV_B32; 125 NotOpc = AMDGPU::S_NOT_B32; 126 } else { 127 ExecReg = AMDGPU::EXEC; 128 MovOpc = AMDGPU::S_MOV_B64; 129 NotOpc = AMDGPU::S_NOT_B64; 130 } 131 132 assert(SuperReg != AMDGPU::M0 && "m0 should never spill"); 133 assert(SuperReg != AMDGPU::EXEC_LO && SuperReg != AMDGPU::EXEC_HI && 134 SuperReg != AMDGPU::EXEC && "exec should never spill"); 135 } 136 137 PerVGPRData getPerVGPRData() { 138 PerVGPRData Data; 139 Data.PerVGPR = IsWave32 ? 32 : 64; 140 Data.NumVGPRs = (NumSubRegs + (Data.PerVGPR - 1)) / Data.PerVGPR; 141 Data.VGPRLanes = (1LL << std::min(Data.PerVGPR, NumSubRegs)) - 1LL; 142 return Data; 143 } 144 145 // Tries to scavenge SGPRs to save EXEC and a VGPR. Uses v0 if no VGPR is 146 // free. 147 // Writes these instructions if an SGPR can be scavenged: 148 // s_mov_b64 s[6:7], exec ; Save exec 149 // s_mov_b64 exec, 3 ; Wanted lanemask 150 // buffer_store_dword v1 ; Write scavenged VGPR to emergency slot 151 // 152 // Writes these instructions if no SGPR can be scavenged: 153 // buffer_store_dword v0 ; Only if no free VGPR was found 154 // s_not_b64 exec, exec 155 // buffer_store_dword v0 ; Save inactive lanes 156 // ; exec stays inverted, it is flipped back in 157 // ; restore. 158 void prepare() { 159 // Scavenged temporary VGPR to use. It must be scavenged once for any number 160 // of spilled subregs. 161 // FIXME: The liveness analysis is limited and does not tell if a register 162 // is in use in lanes that are currently inactive. We can never be sure if 163 // a register as actually in use in another lane, so we need to save all 164 // used lanes of the chosen VGPR. 165 assert(RS && "Cannot spill SGPR to memory without RegScavenger"); 166 TmpVGPR = RS->scavengeRegister(&AMDGPU::VGPR_32RegClass, MI, 0, false); 167 168 // Reserve temporary stack slot 169 TmpVGPRIndex = MFI.getScavengeFI(MF.getFrameInfo(), TRI); 170 if (TmpVGPR) { 171 // Found a register that is dead in the currently active lanes, we only 172 // need to spill inactive lanes. 173 TmpVGPRLive = false; 174 } else { 175 // Pick v0 because it doesn't make a difference. 176 TmpVGPR = AMDGPU::VGPR0; 177 TmpVGPRLive = true; 178 } 179 180 // Try to scavenge SGPRs to save exec 181 assert(!SavedExecReg && "Exec is already saved, refuse to save again"); 182 const TargetRegisterClass &RC = 183 IsWave32 ? AMDGPU::SGPR_32RegClass : AMDGPU::SGPR_64RegClass; 184 RS->setRegUsed(SuperReg); 185 SavedExecReg = RS->scavengeRegister(&RC, MI, 0, false); 186 187 int64_t VGPRLanes = getPerVGPRData().VGPRLanes; 188 189 if (SavedExecReg) { 190 RS->setRegUsed(SavedExecReg); 191 // Set exec to needed lanes 192 BuildMI(MBB, MI, DL, TII.get(MovOpc), SavedExecReg).addReg(ExecReg); 193 auto I = BuildMI(MBB, MI, DL, TII.get(MovOpc), ExecReg).addImm(VGPRLanes); 194 if (!TmpVGPRLive) 195 I.addReg(TmpVGPR, RegState::ImplicitDefine); 196 // Spill needed lanes 197 TRI.buildVGPRSpillLoadStore(*this, TmpVGPRIndex, 0, /*IsLoad*/ false); 198 } else { 199 // Spill active lanes 200 if (TmpVGPRLive) 201 TRI.buildVGPRSpillLoadStore(*this, TmpVGPRIndex, 0, /*IsLoad*/ false, 202 /*IsKill*/ false); 203 // Spill inactive lanes 204 auto I = BuildMI(MBB, MI, DL, TII.get(NotOpc), ExecReg).addReg(ExecReg); 205 if (!TmpVGPRLive) 206 I.addReg(TmpVGPR, RegState::ImplicitDefine); 207 TRI.buildVGPRSpillLoadStore(*this, TmpVGPRIndex, 0, /*IsLoad*/ false); 208 } 209 } 210 211 // Writes these instructions if an SGPR can be scavenged: 212 // buffer_load_dword v1 ; Write scavenged VGPR to emergency slot 213 // s_waitcnt vmcnt(0) ; If a free VGPR was found 214 // s_mov_b64 exec, s[6:7] ; Save exec 215 // 216 // Writes these instructions if no SGPR can be scavenged: 217 // buffer_load_dword v0 ; Restore inactive lanes 218 // s_waitcnt vmcnt(0) ; If a free VGPR was found 219 // s_not_b64 exec, exec 220 // buffer_load_dword v0 ; Only if no free VGPR was found 221 void restore() { 222 if (SavedExecReg) { 223 // Restore used lanes 224 TRI.buildVGPRSpillLoadStore(*this, TmpVGPRIndex, 0, /*IsLoad*/ true, 225 /*IsKill*/ false); 226 // Restore exec 227 auto I = BuildMI(MBB, MI, DL, TII.get(MovOpc), ExecReg) 228 .addReg(SavedExecReg, RegState::Kill); 229 // Add an implicit use of the load so it is not dead. 230 // FIXME This inserts an unnecessary waitcnt 231 if (!TmpVGPRLive) { 232 I.addReg(TmpVGPR, RegState::ImplicitKill); 233 } 234 } else { 235 // Restore inactive lanes 236 TRI.buildVGPRSpillLoadStore(*this, TmpVGPRIndex, 0, /*IsLoad*/ true, 237 /*IsKill*/ false); 238 auto I = BuildMI(MBB, MI, DL, TII.get(NotOpc), ExecReg).addReg(ExecReg); 239 if (!TmpVGPRLive) { 240 I.addReg(TmpVGPR, RegState::ImplicitKill); 241 } 242 // Restore active lanes 243 if (TmpVGPRLive) 244 TRI.buildVGPRSpillLoadStore(*this, TmpVGPRIndex, 0, /*IsLoad*/ true); 245 } 246 } 247 248 // Write TmpVGPR to memory or read TmpVGPR from memory. 249 // Either using a single buffer_load/store if exec is set to the needed mask 250 // or using 251 // buffer_load 252 // s_not exec, exec 253 // buffer_load 254 // s_not exec, exec 255 void readWriteTmpVGPR(unsigned Offset, bool IsLoad) { 256 if (SavedExecReg) { 257 // Spill needed lanes 258 TRI.buildVGPRSpillLoadStore(*this, Index, Offset, IsLoad); 259 } else { 260 // Spill active lanes 261 TRI.buildVGPRSpillLoadStore(*this, Index, Offset, IsLoad, 262 /*IsKill*/ false); 263 // Spill inactive lanes 264 BuildMI(MBB, MI, DL, TII.get(NotOpc), ExecReg).addReg(ExecReg); 265 TRI.buildVGPRSpillLoadStore(*this, Index, Offset, IsLoad); 266 BuildMI(MBB, MI, DL, TII.get(NotOpc), ExecReg).addReg(ExecReg); 267 } 268 } 269 }; 270 271 } // namespace llvm 272 273 SIRegisterInfo::SIRegisterInfo(const GCNSubtarget &ST) 274 : AMDGPUGenRegisterInfo(AMDGPU::PC_REG, ST.getAMDGPUDwarfFlavour()), ST(ST), 275 SpillSGPRToVGPR(EnableSpillSGPRToVGPR), isWave32(ST.isWave32()) { 276 277 assert(getSubRegIndexLaneMask(AMDGPU::sub0).getAsInteger() == 3 && 278 getSubRegIndexLaneMask(AMDGPU::sub31).getAsInteger() == (3ULL << 62) && 279 (getSubRegIndexLaneMask(AMDGPU::lo16) | 280 getSubRegIndexLaneMask(AMDGPU::hi16)).getAsInteger() == 281 getSubRegIndexLaneMask(AMDGPU::sub0).getAsInteger() && 282 "getNumCoveredRegs() will not work with generated subreg masks!"); 283 284 RegPressureIgnoredUnits.resize(getNumRegUnits()); 285 RegPressureIgnoredUnits.set( 286 *MCRegUnitIterator(MCRegister::from(AMDGPU::M0), this)); 287 for (auto Reg : AMDGPU::VGPR_HI16RegClass) 288 RegPressureIgnoredUnits.set(*MCRegUnitIterator(Reg, this)); 289 290 // HACK: Until this is fully tablegen'd. 291 static llvm::once_flag InitializeRegSplitPartsFlag; 292 293 static auto InitializeRegSplitPartsOnce = [this]() { 294 for (unsigned Idx = 1, E = getNumSubRegIndices() - 1; Idx < E; ++Idx) { 295 unsigned Size = getSubRegIdxSize(Idx); 296 if (Size & 31) 297 continue; 298 std::vector<int16_t> &Vec = RegSplitParts[Size / 32 - 1]; 299 unsigned Pos = getSubRegIdxOffset(Idx); 300 if (Pos % Size) 301 continue; 302 Pos /= Size; 303 if (Vec.empty()) { 304 unsigned MaxNumParts = 1024 / Size; // Maximum register is 1024 bits. 305 Vec.resize(MaxNumParts); 306 } 307 Vec[Pos] = Idx; 308 } 309 }; 310 311 static llvm::once_flag InitializeSubRegFromChannelTableFlag; 312 313 static auto InitializeSubRegFromChannelTableOnce = [this]() { 314 for (auto &Row : SubRegFromChannelTable) 315 Row.fill(AMDGPU::NoSubRegister); 316 for (uint16_t Idx = 1; Idx < getNumSubRegIndices(); ++Idx) { 317 unsigned Width = AMDGPUSubRegIdxRanges[Idx].Size / 32; 318 unsigned Offset = AMDGPUSubRegIdxRanges[Idx].Offset / 32; 319 assert(Width < SubRegFromChannelTableWidthMap.size()); 320 Width = SubRegFromChannelTableWidthMap[Width]; 321 if (Width == 0) 322 continue; 323 unsigned TableIdx = Width - 1; 324 assert(TableIdx < SubRegFromChannelTable.size()); 325 assert(Offset < SubRegFromChannelTable[TableIdx].size()); 326 SubRegFromChannelTable[TableIdx][Offset] = Idx; 327 } 328 }; 329 330 llvm::call_once(InitializeRegSplitPartsFlag, InitializeRegSplitPartsOnce); 331 llvm::call_once(InitializeSubRegFromChannelTableFlag, 332 InitializeSubRegFromChannelTableOnce); 333 } 334 335 void SIRegisterInfo::reserveRegisterTuples(BitVector &Reserved, 336 MCRegister Reg) const { 337 MCRegAliasIterator R(Reg, this, true); 338 339 for (; R.isValid(); ++R) 340 Reserved.set(*R); 341 } 342 343 // Forced to be here by one .inc 344 const MCPhysReg *SIRegisterInfo::getCalleeSavedRegs( 345 const MachineFunction *MF) const { 346 CallingConv::ID CC = MF->getFunction().getCallingConv(); 347 switch (CC) { 348 case CallingConv::C: 349 case CallingConv::Fast: 350 case CallingConv::Cold: 351 case CallingConv::AMDGPU_Gfx: 352 return MF->getSubtarget<GCNSubtarget>().hasGFX90AInsts() 353 ? CSR_AMDGPU_HighRegs_With_AGPRs_SaveList 354 : CSR_AMDGPU_HighRegs_SaveList; 355 default: { 356 // Dummy to not crash RegisterClassInfo. 357 static const MCPhysReg NoCalleeSavedReg = AMDGPU::NoRegister; 358 return &NoCalleeSavedReg; 359 } 360 } 361 } 362 363 const MCPhysReg * 364 SIRegisterInfo::getCalleeSavedRegsViaCopy(const MachineFunction *MF) const { 365 return nullptr; 366 } 367 368 const uint32_t *SIRegisterInfo::getCallPreservedMask(const MachineFunction &MF, 369 CallingConv::ID CC) const { 370 switch (CC) { 371 case CallingConv::C: 372 case CallingConv::Fast: 373 case CallingConv::Cold: 374 case CallingConv::AMDGPU_Gfx: 375 return MF.getSubtarget<GCNSubtarget>().hasGFX90AInsts() 376 ? CSR_AMDGPU_HighRegs_With_AGPRs_RegMask 377 : CSR_AMDGPU_HighRegs_RegMask; 378 default: 379 return nullptr; 380 } 381 } 382 383 const uint32_t *SIRegisterInfo::getNoPreservedMask() const { 384 return CSR_AMDGPU_NoRegs_RegMask; 385 } 386 387 Register SIRegisterInfo::getFrameRegister(const MachineFunction &MF) const { 388 const SIFrameLowering *TFI = 389 MF.getSubtarget<GCNSubtarget>().getFrameLowering(); 390 const SIMachineFunctionInfo *FuncInfo = MF.getInfo<SIMachineFunctionInfo>(); 391 // During ISel lowering we always reserve the stack pointer in entry 392 // functions, but never actually want to reference it when accessing our own 393 // frame. If we need a frame pointer we use it, but otherwise we can just use 394 // an immediate "0" which we represent by returning NoRegister. 395 if (FuncInfo->isEntryFunction()) { 396 return TFI->hasFP(MF) ? FuncInfo->getFrameOffsetReg() : Register(); 397 } 398 return TFI->hasFP(MF) ? FuncInfo->getFrameOffsetReg() 399 : FuncInfo->getStackPtrOffsetReg(); 400 } 401 402 bool SIRegisterInfo::hasBasePointer(const MachineFunction &MF) const { 403 // When we need stack realignment, we can't reference off of the 404 // stack pointer, so we reserve a base pointer. 405 const MachineFrameInfo &MFI = MF.getFrameInfo(); 406 return MFI.getNumFixedObjects() && shouldRealignStack(MF); 407 } 408 409 Register SIRegisterInfo::getBaseRegister() const { return AMDGPU::SGPR34; } 410 411 const uint32_t *SIRegisterInfo::getAllVGPRRegMask() const { 412 return CSR_AMDGPU_AllVGPRs_RegMask; 413 } 414 415 const uint32_t *SIRegisterInfo::getAllAGPRRegMask() const { 416 return CSR_AMDGPU_AllAGPRs_RegMask; 417 } 418 419 const uint32_t *SIRegisterInfo::getAllVectorRegMask() const { 420 return CSR_AMDGPU_AllVectorRegs_RegMask; 421 } 422 423 const uint32_t *SIRegisterInfo::getAllAllocatableSRegMask() const { 424 return CSR_AMDGPU_AllAllocatableSRegs_RegMask; 425 } 426 427 unsigned SIRegisterInfo::getSubRegFromChannel(unsigned Channel, 428 unsigned NumRegs) { 429 assert(NumRegs < SubRegFromChannelTableWidthMap.size()); 430 unsigned NumRegIndex = SubRegFromChannelTableWidthMap[NumRegs]; 431 assert(NumRegIndex && "Not implemented"); 432 assert(Channel < SubRegFromChannelTable[NumRegIndex - 1].size()); 433 return SubRegFromChannelTable[NumRegIndex - 1][Channel]; 434 } 435 436 MCRegister SIRegisterInfo::reservedPrivateSegmentBufferReg( 437 const MachineFunction &MF) const { 438 unsigned BaseIdx = alignDown(ST.getMaxNumSGPRs(MF), 4) - 4; 439 MCRegister BaseReg(AMDGPU::SGPR_32RegClass.getRegister(BaseIdx)); 440 return getMatchingSuperReg(BaseReg, AMDGPU::sub0, &AMDGPU::SGPR_128RegClass); 441 } 442 443 BitVector SIRegisterInfo::getReservedRegs(const MachineFunction &MF) const { 444 BitVector Reserved(getNumRegs()); 445 Reserved.set(AMDGPU::MODE); 446 447 // EXEC_LO and EXEC_HI could be allocated and used as regular register, but 448 // this seems likely to result in bugs, so I'm marking them as reserved. 449 reserveRegisterTuples(Reserved, AMDGPU::EXEC); 450 reserveRegisterTuples(Reserved, AMDGPU::FLAT_SCR); 451 452 // M0 has to be reserved so that llvm accepts it as a live-in into a block. 453 reserveRegisterTuples(Reserved, AMDGPU::M0); 454 455 // Reserve src_vccz, src_execz, src_scc. 456 reserveRegisterTuples(Reserved, AMDGPU::SRC_VCCZ); 457 reserveRegisterTuples(Reserved, AMDGPU::SRC_EXECZ); 458 reserveRegisterTuples(Reserved, AMDGPU::SRC_SCC); 459 460 // Reserve the memory aperture registers. 461 reserveRegisterTuples(Reserved, AMDGPU::SRC_SHARED_BASE); 462 reserveRegisterTuples(Reserved, AMDGPU::SRC_SHARED_LIMIT); 463 reserveRegisterTuples(Reserved, AMDGPU::SRC_PRIVATE_BASE); 464 reserveRegisterTuples(Reserved, AMDGPU::SRC_PRIVATE_LIMIT); 465 466 // Reserve src_pops_exiting_wave_id - support is not implemented in Codegen. 467 reserveRegisterTuples(Reserved, AMDGPU::SRC_POPS_EXITING_WAVE_ID); 468 469 // Reserve xnack_mask registers - support is not implemented in Codegen. 470 reserveRegisterTuples(Reserved, AMDGPU::XNACK_MASK); 471 472 // Reserve lds_direct register - support is not implemented in Codegen. 473 reserveRegisterTuples(Reserved, AMDGPU::LDS_DIRECT); 474 475 // Reserve Trap Handler registers - support is not implemented in Codegen. 476 reserveRegisterTuples(Reserved, AMDGPU::TBA); 477 reserveRegisterTuples(Reserved, AMDGPU::TMA); 478 reserveRegisterTuples(Reserved, AMDGPU::TTMP0_TTMP1); 479 reserveRegisterTuples(Reserved, AMDGPU::TTMP2_TTMP3); 480 reserveRegisterTuples(Reserved, AMDGPU::TTMP4_TTMP5); 481 reserveRegisterTuples(Reserved, AMDGPU::TTMP6_TTMP7); 482 reserveRegisterTuples(Reserved, AMDGPU::TTMP8_TTMP9); 483 reserveRegisterTuples(Reserved, AMDGPU::TTMP10_TTMP11); 484 reserveRegisterTuples(Reserved, AMDGPU::TTMP12_TTMP13); 485 reserveRegisterTuples(Reserved, AMDGPU::TTMP14_TTMP15); 486 487 // Reserve null register - it shall never be allocated 488 reserveRegisterTuples(Reserved, AMDGPU::SGPR_NULL); 489 490 // Disallow vcc_hi allocation in wave32. It may be allocated but most likely 491 // will result in bugs. 492 if (isWave32) { 493 Reserved.set(AMDGPU::VCC); 494 Reserved.set(AMDGPU::VCC_HI); 495 } 496 497 unsigned MaxNumSGPRs = ST.getMaxNumSGPRs(MF); 498 unsigned TotalNumSGPRs = AMDGPU::SGPR_32RegClass.getNumRegs(); 499 for (unsigned i = MaxNumSGPRs; i < TotalNumSGPRs; ++i) { 500 unsigned Reg = AMDGPU::SGPR_32RegClass.getRegister(i); 501 reserveRegisterTuples(Reserved, Reg); 502 } 503 504 unsigned MaxNumVGPRs = ST.getMaxNumVGPRs(MF); 505 // TODO: In an entry function without calls and AGPRs used it is possible 506 // to use the whole register budget for VGPRs. Even more it shall 507 // be possible to estimate maximum AGPR/VGPR pressure and split 508 // register file accordingly. 509 if (ST.hasGFX90AInsts()) 510 MaxNumVGPRs /= 2; 511 unsigned TotalNumVGPRs = AMDGPU::VGPR_32RegClass.getNumRegs(); 512 for (unsigned i = MaxNumVGPRs; i < TotalNumVGPRs; ++i) { 513 unsigned Reg = AMDGPU::VGPR_32RegClass.getRegister(i); 514 reserveRegisterTuples(Reserved, Reg); 515 Reg = AMDGPU::AGPR_32RegClass.getRegister(i); 516 reserveRegisterTuples(Reserved, Reg); 517 } 518 519 for (auto Reg : AMDGPU::SReg_32RegClass) { 520 Reserved.set(getSubReg(Reg, AMDGPU::hi16)); 521 Register Low = getSubReg(Reg, AMDGPU::lo16); 522 // This is to prevent BB vcc liveness errors. 523 if (!AMDGPU::SGPR_LO16RegClass.contains(Low)) 524 Reserved.set(Low); 525 } 526 527 for (auto Reg : AMDGPU::AGPR_32RegClass) { 528 Reserved.set(getSubReg(Reg, AMDGPU::hi16)); 529 } 530 531 // Reserve all the rest AGPRs if there are no instructions to use it. 532 if (!ST.hasMAIInsts()) { 533 for (unsigned i = 0; i < MaxNumVGPRs; ++i) { 534 unsigned Reg = AMDGPU::AGPR_32RegClass.getRegister(i); 535 reserveRegisterTuples(Reserved, Reg); 536 } 537 } 538 539 const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>(); 540 541 Register ScratchRSrcReg = MFI->getScratchRSrcReg(); 542 if (ScratchRSrcReg != AMDGPU::NoRegister) { 543 // Reserve 4 SGPRs for the scratch buffer resource descriptor in case we need 544 // to spill. 545 // TODO: May need to reserve a VGPR if doing LDS spilling. 546 reserveRegisterTuples(Reserved, ScratchRSrcReg); 547 } 548 549 // We have to assume the SP is needed in case there are calls in the function, 550 // which is detected after the function is lowered. If we aren't really going 551 // to need SP, don't bother reserving it. 552 MCRegister StackPtrReg = MFI->getStackPtrOffsetReg(); 553 554 if (StackPtrReg) { 555 reserveRegisterTuples(Reserved, StackPtrReg); 556 assert(!isSubRegister(ScratchRSrcReg, StackPtrReg)); 557 } 558 559 MCRegister FrameReg = MFI->getFrameOffsetReg(); 560 if (FrameReg) { 561 reserveRegisterTuples(Reserved, FrameReg); 562 assert(!isSubRegister(ScratchRSrcReg, FrameReg)); 563 } 564 565 if (hasBasePointer(MF)) { 566 MCRegister BasePtrReg = getBaseRegister(); 567 reserveRegisterTuples(Reserved, BasePtrReg); 568 assert(!isSubRegister(ScratchRSrcReg, BasePtrReg)); 569 } 570 571 for (auto Reg : MFI->WWMReservedRegs) { 572 reserveRegisterTuples(Reserved, Reg.first); 573 } 574 575 // Reserve VGPRs used for SGPR spilling. 576 // Note we treat freezeReservedRegs unusually because we run register 577 // allocation in two phases. It's OK to re-freeze with new registers for the 578 // second run. 579 #if 0 580 for (auto &SpilledFI : MFI->sgpr_spill_vgprs()) { 581 for (auto &SpilledVGPR : SpilledFI.second) 582 reserveRegisterTuples(Reserved, SpilledVGPR.VGPR); 583 } 584 #endif 585 586 // FIXME: Stop using reserved registers for this. 587 for (MCPhysReg Reg : MFI->getAGPRSpillVGPRs()) 588 reserveRegisterTuples(Reserved, Reg); 589 590 for (MCPhysReg Reg : MFI->getVGPRSpillAGPRs()) 591 reserveRegisterTuples(Reserved, Reg); 592 593 for (auto SSpill : MFI->getSGPRSpillVGPRs()) 594 reserveRegisterTuples(Reserved, SSpill.VGPR); 595 596 return Reserved; 597 } 598 599 bool SIRegisterInfo::shouldRealignStack(const MachineFunction &MF) const { 600 const SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>(); 601 // On entry, the base address is 0, so it can't possibly need any more 602 // alignment. 603 604 // FIXME: Should be able to specify the entry frame alignment per calling 605 // convention instead. 606 if (Info->isEntryFunction()) 607 return false; 608 609 return TargetRegisterInfo::shouldRealignStack(MF); 610 } 611 612 bool SIRegisterInfo::requiresRegisterScavenging(const MachineFunction &Fn) const { 613 const SIMachineFunctionInfo *Info = Fn.getInfo<SIMachineFunctionInfo>(); 614 if (Info->isEntryFunction()) { 615 const MachineFrameInfo &MFI = Fn.getFrameInfo(); 616 return MFI.hasStackObjects() || MFI.hasCalls(); 617 } 618 619 // May need scavenger for dealing with callee saved registers. 620 return true; 621 } 622 623 bool SIRegisterInfo::requiresFrameIndexScavenging( 624 const MachineFunction &MF) const { 625 // Do not use frame virtual registers. They used to be used for SGPRs, but 626 // once we reach PrologEpilogInserter, we can no longer spill SGPRs. If the 627 // scavenger fails, we can increment/decrement the necessary SGPRs to avoid a 628 // spill. 629 return false; 630 } 631 632 bool SIRegisterInfo::requiresFrameIndexReplacementScavenging( 633 const MachineFunction &MF) const { 634 const MachineFrameInfo &MFI = MF.getFrameInfo(); 635 return MFI.hasStackObjects(); 636 } 637 638 bool SIRegisterInfo::requiresVirtualBaseRegisters( 639 const MachineFunction &) const { 640 // There are no special dedicated stack or frame pointers. 641 return true; 642 } 643 644 int64_t SIRegisterInfo::getScratchInstrOffset(const MachineInstr *MI) const { 645 assert(SIInstrInfo::isMUBUF(*MI) || SIInstrInfo::isFLATScratch(*MI)); 646 647 int OffIdx = AMDGPU::getNamedOperandIdx(MI->getOpcode(), 648 AMDGPU::OpName::offset); 649 return MI->getOperand(OffIdx).getImm(); 650 } 651 652 int64_t SIRegisterInfo::getFrameIndexInstrOffset(const MachineInstr *MI, 653 int Idx) const { 654 if (!SIInstrInfo::isMUBUF(*MI) && !SIInstrInfo::isFLATScratch(*MI)) 655 return 0; 656 657 assert((Idx == AMDGPU::getNamedOperandIdx(MI->getOpcode(), 658 AMDGPU::OpName::vaddr) || 659 (Idx == AMDGPU::getNamedOperandIdx(MI->getOpcode(), 660 AMDGPU::OpName::saddr))) && 661 "Should never see frame index on non-address operand"); 662 663 return getScratchInstrOffset(MI); 664 } 665 666 bool SIRegisterInfo::needsFrameBaseReg(MachineInstr *MI, int64_t Offset) const { 667 if (!SIInstrInfo::isMUBUF(*MI) && !SIInstrInfo::isFLATScratch(*MI)) 668 return false; 669 670 int64_t FullOffset = Offset + getScratchInstrOffset(MI); 671 672 if (SIInstrInfo::isMUBUF(*MI)) 673 return !SIInstrInfo::isLegalMUBUFImmOffset(FullOffset); 674 675 const SIInstrInfo *TII = ST.getInstrInfo(); 676 return !TII->isLegalFLATOffset(FullOffset, AMDGPUAS::PRIVATE_ADDRESS, 677 SIInstrFlags::FlatScratch); 678 } 679 680 Register SIRegisterInfo::materializeFrameBaseRegister(MachineBasicBlock *MBB, 681 int FrameIdx, 682 int64_t Offset) const { 683 MachineBasicBlock::iterator Ins = MBB->begin(); 684 DebugLoc DL; // Defaults to "unknown" 685 686 if (Ins != MBB->end()) 687 DL = Ins->getDebugLoc(); 688 689 MachineFunction *MF = MBB->getParent(); 690 const SIInstrInfo *TII = ST.getInstrInfo(); 691 MachineRegisterInfo &MRI = MF->getRegInfo(); 692 unsigned MovOpc = ST.enableFlatScratch() ? AMDGPU::S_MOV_B32 693 : AMDGPU::V_MOV_B32_e32; 694 695 Register BaseReg = MRI.createVirtualRegister( 696 ST.enableFlatScratch() ? &AMDGPU::SReg_32_XEXEC_HIRegClass 697 : &AMDGPU::VGPR_32RegClass); 698 699 if (Offset == 0) { 700 BuildMI(*MBB, Ins, DL, TII->get(MovOpc), BaseReg) 701 .addFrameIndex(FrameIdx); 702 return BaseReg; 703 } 704 705 Register OffsetReg = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass); 706 707 Register FIReg = MRI.createVirtualRegister( 708 ST.enableFlatScratch() ? &AMDGPU::SReg_32_XM0RegClass 709 : &AMDGPU::VGPR_32RegClass); 710 711 BuildMI(*MBB, Ins, DL, TII->get(AMDGPU::S_MOV_B32), OffsetReg) 712 .addImm(Offset); 713 BuildMI(*MBB, Ins, DL, TII->get(MovOpc), FIReg) 714 .addFrameIndex(FrameIdx); 715 716 if (ST.enableFlatScratch() ) { 717 BuildMI(*MBB, Ins, DL, TII->get(AMDGPU::S_ADD_I32), BaseReg) 718 .addReg(OffsetReg, RegState::Kill) 719 .addReg(FIReg); 720 return BaseReg; 721 } 722 723 TII->getAddNoCarry(*MBB, Ins, DL, BaseReg) 724 .addReg(OffsetReg, RegState::Kill) 725 .addReg(FIReg) 726 .addImm(0); // clamp bit 727 728 return BaseReg; 729 } 730 731 void SIRegisterInfo::resolveFrameIndex(MachineInstr &MI, Register BaseReg, 732 int64_t Offset) const { 733 const SIInstrInfo *TII = ST.getInstrInfo(); 734 bool IsFlat = TII->isFLATScratch(MI); 735 736 #ifndef NDEBUG 737 // FIXME: Is it possible to be storing a frame index to itself? 738 bool SeenFI = false; 739 for (const MachineOperand &MO: MI.operands()) { 740 if (MO.isFI()) { 741 if (SeenFI) 742 llvm_unreachable("should not see multiple frame indices"); 743 744 SeenFI = true; 745 } 746 } 747 #endif 748 749 MachineOperand *FIOp = 750 TII->getNamedOperand(MI, IsFlat ? AMDGPU::OpName::saddr 751 : AMDGPU::OpName::vaddr); 752 753 MachineOperand *OffsetOp = TII->getNamedOperand(MI, AMDGPU::OpName::offset); 754 int64_t NewOffset = OffsetOp->getImm() + Offset; 755 756 assert(FIOp && FIOp->isFI() && "frame index must be address operand"); 757 assert(TII->isMUBUF(MI) || TII->isFLATScratch(MI)); 758 759 if (IsFlat) { 760 assert(TII->isLegalFLATOffset(NewOffset, AMDGPUAS::PRIVATE_ADDRESS, 761 SIInstrFlags::FlatScratch) && 762 "offset should be legal"); 763 FIOp->ChangeToRegister(BaseReg, false); 764 OffsetOp->setImm(NewOffset); 765 return; 766 } 767 768 #ifndef NDEBUG 769 MachineOperand *SOffset = TII->getNamedOperand(MI, AMDGPU::OpName::soffset); 770 assert(SOffset->isImm() && SOffset->getImm() == 0); 771 #endif 772 773 assert(SIInstrInfo::isLegalMUBUFImmOffset(NewOffset) && 774 "offset should be legal"); 775 776 FIOp->ChangeToRegister(BaseReg, false); 777 OffsetOp->setImm(NewOffset); 778 } 779 780 bool SIRegisterInfo::isFrameOffsetLegal(const MachineInstr *MI, 781 Register BaseReg, 782 int64_t Offset) const { 783 if (!SIInstrInfo::isMUBUF(*MI) && !SIInstrInfo::isFLATScratch(*MI)) 784 return false; 785 786 int64_t NewOffset = Offset + getScratchInstrOffset(MI); 787 788 if (SIInstrInfo::isMUBUF(*MI)) 789 return SIInstrInfo::isLegalMUBUFImmOffset(NewOffset); 790 791 const SIInstrInfo *TII = ST.getInstrInfo(); 792 return TII->isLegalFLATOffset(NewOffset, AMDGPUAS::PRIVATE_ADDRESS, 793 SIInstrFlags::FlatScratch); 794 } 795 796 const TargetRegisterClass *SIRegisterInfo::getPointerRegClass( 797 const MachineFunction &MF, unsigned Kind) const { 798 // This is inaccurate. It depends on the instruction and address space. The 799 // only place where we should hit this is for dealing with frame indexes / 800 // private accesses, so this is correct in that case. 801 return &AMDGPU::VGPR_32RegClass; 802 } 803 804 static unsigned getNumSubRegsForSpillOp(unsigned Op) { 805 806 switch (Op) { 807 case AMDGPU::SI_SPILL_S1024_SAVE: 808 case AMDGPU::SI_SPILL_S1024_RESTORE: 809 case AMDGPU::SI_SPILL_V1024_SAVE: 810 case AMDGPU::SI_SPILL_V1024_RESTORE: 811 case AMDGPU::SI_SPILL_A1024_SAVE: 812 case AMDGPU::SI_SPILL_A1024_RESTORE: 813 return 32; 814 case AMDGPU::SI_SPILL_S512_SAVE: 815 case AMDGPU::SI_SPILL_S512_RESTORE: 816 case AMDGPU::SI_SPILL_V512_SAVE: 817 case AMDGPU::SI_SPILL_V512_RESTORE: 818 case AMDGPU::SI_SPILL_A512_SAVE: 819 case AMDGPU::SI_SPILL_A512_RESTORE: 820 return 16; 821 case AMDGPU::SI_SPILL_S256_SAVE: 822 case AMDGPU::SI_SPILL_S256_RESTORE: 823 case AMDGPU::SI_SPILL_V256_SAVE: 824 case AMDGPU::SI_SPILL_V256_RESTORE: 825 case AMDGPU::SI_SPILL_A256_SAVE: 826 case AMDGPU::SI_SPILL_A256_RESTORE: 827 return 8; 828 case AMDGPU::SI_SPILL_S224_SAVE: 829 case AMDGPU::SI_SPILL_S224_RESTORE: 830 case AMDGPU::SI_SPILL_V224_SAVE: 831 case AMDGPU::SI_SPILL_V224_RESTORE: 832 case AMDGPU::SI_SPILL_A224_SAVE: 833 case AMDGPU::SI_SPILL_A224_RESTORE: 834 return 7; 835 case AMDGPU::SI_SPILL_S192_SAVE: 836 case AMDGPU::SI_SPILL_S192_RESTORE: 837 case AMDGPU::SI_SPILL_V192_SAVE: 838 case AMDGPU::SI_SPILL_V192_RESTORE: 839 case AMDGPU::SI_SPILL_A192_SAVE: 840 case AMDGPU::SI_SPILL_A192_RESTORE: 841 return 6; 842 case AMDGPU::SI_SPILL_S160_SAVE: 843 case AMDGPU::SI_SPILL_S160_RESTORE: 844 case AMDGPU::SI_SPILL_V160_SAVE: 845 case AMDGPU::SI_SPILL_V160_RESTORE: 846 case AMDGPU::SI_SPILL_A160_SAVE: 847 case AMDGPU::SI_SPILL_A160_RESTORE: 848 return 5; 849 case AMDGPU::SI_SPILL_S128_SAVE: 850 case AMDGPU::SI_SPILL_S128_RESTORE: 851 case AMDGPU::SI_SPILL_V128_SAVE: 852 case AMDGPU::SI_SPILL_V128_RESTORE: 853 case AMDGPU::SI_SPILL_A128_SAVE: 854 case AMDGPU::SI_SPILL_A128_RESTORE: 855 return 4; 856 case AMDGPU::SI_SPILL_S96_SAVE: 857 case AMDGPU::SI_SPILL_S96_RESTORE: 858 case AMDGPU::SI_SPILL_V96_SAVE: 859 case AMDGPU::SI_SPILL_V96_RESTORE: 860 case AMDGPU::SI_SPILL_A96_SAVE: 861 case AMDGPU::SI_SPILL_A96_RESTORE: 862 return 3; 863 case AMDGPU::SI_SPILL_S64_SAVE: 864 case AMDGPU::SI_SPILL_S64_RESTORE: 865 case AMDGPU::SI_SPILL_V64_SAVE: 866 case AMDGPU::SI_SPILL_V64_RESTORE: 867 case AMDGPU::SI_SPILL_A64_SAVE: 868 case AMDGPU::SI_SPILL_A64_RESTORE: 869 return 2; 870 case AMDGPU::SI_SPILL_S32_SAVE: 871 case AMDGPU::SI_SPILL_S32_RESTORE: 872 case AMDGPU::SI_SPILL_V32_SAVE: 873 case AMDGPU::SI_SPILL_V32_RESTORE: 874 case AMDGPU::SI_SPILL_A32_SAVE: 875 case AMDGPU::SI_SPILL_A32_RESTORE: 876 return 1; 877 default: llvm_unreachable("Invalid spill opcode"); 878 } 879 } 880 881 static int getOffsetMUBUFStore(unsigned Opc) { 882 switch (Opc) { 883 case AMDGPU::BUFFER_STORE_DWORD_OFFEN: 884 return AMDGPU::BUFFER_STORE_DWORD_OFFSET; 885 case AMDGPU::BUFFER_STORE_BYTE_OFFEN: 886 return AMDGPU::BUFFER_STORE_BYTE_OFFSET; 887 case AMDGPU::BUFFER_STORE_SHORT_OFFEN: 888 return AMDGPU::BUFFER_STORE_SHORT_OFFSET; 889 case AMDGPU::BUFFER_STORE_DWORDX2_OFFEN: 890 return AMDGPU::BUFFER_STORE_DWORDX2_OFFSET; 891 case AMDGPU::BUFFER_STORE_DWORDX4_OFFEN: 892 return AMDGPU::BUFFER_STORE_DWORDX4_OFFSET; 893 case AMDGPU::BUFFER_STORE_SHORT_D16_HI_OFFEN: 894 return AMDGPU::BUFFER_STORE_SHORT_D16_HI_OFFSET; 895 case AMDGPU::BUFFER_STORE_BYTE_D16_HI_OFFEN: 896 return AMDGPU::BUFFER_STORE_BYTE_D16_HI_OFFSET; 897 default: 898 return -1; 899 } 900 } 901 902 static int getOffsetMUBUFLoad(unsigned Opc) { 903 switch (Opc) { 904 case AMDGPU::BUFFER_LOAD_DWORD_OFFEN: 905 return AMDGPU::BUFFER_LOAD_DWORD_OFFSET; 906 case AMDGPU::BUFFER_LOAD_UBYTE_OFFEN: 907 return AMDGPU::BUFFER_LOAD_UBYTE_OFFSET; 908 case AMDGPU::BUFFER_LOAD_SBYTE_OFFEN: 909 return AMDGPU::BUFFER_LOAD_SBYTE_OFFSET; 910 case AMDGPU::BUFFER_LOAD_USHORT_OFFEN: 911 return AMDGPU::BUFFER_LOAD_USHORT_OFFSET; 912 case AMDGPU::BUFFER_LOAD_SSHORT_OFFEN: 913 return AMDGPU::BUFFER_LOAD_SSHORT_OFFSET; 914 case AMDGPU::BUFFER_LOAD_DWORDX2_OFFEN: 915 return AMDGPU::BUFFER_LOAD_DWORDX2_OFFSET; 916 case AMDGPU::BUFFER_LOAD_DWORDX4_OFFEN: 917 return AMDGPU::BUFFER_LOAD_DWORDX4_OFFSET; 918 case AMDGPU::BUFFER_LOAD_UBYTE_D16_OFFEN: 919 return AMDGPU::BUFFER_LOAD_UBYTE_D16_OFFSET; 920 case AMDGPU::BUFFER_LOAD_UBYTE_D16_HI_OFFEN: 921 return AMDGPU::BUFFER_LOAD_UBYTE_D16_HI_OFFSET; 922 case AMDGPU::BUFFER_LOAD_SBYTE_D16_OFFEN: 923 return AMDGPU::BUFFER_LOAD_SBYTE_D16_OFFSET; 924 case AMDGPU::BUFFER_LOAD_SBYTE_D16_HI_OFFEN: 925 return AMDGPU::BUFFER_LOAD_SBYTE_D16_HI_OFFSET; 926 case AMDGPU::BUFFER_LOAD_SHORT_D16_OFFEN: 927 return AMDGPU::BUFFER_LOAD_SHORT_D16_OFFSET; 928 case AMDGPU::BUFFER_LOAD_SHORT_D16_HI_OFFEN: 929 return AMDGPU::BUFFER_LOAD_SHORT_D16_HI_OFFSET; 930 default: 931 return -1; 932 } 933 } 934 935 static MachineInstrBuilder spillVGPRtoAGPR(const GCNSubtarget &ST, 936 MachineBasicBlock &MBB, 937 MachineBasicBlock::iterator MI, 938 int Index, unsigned Lane, 939 unsigned ValueReg, bool IsKill) { 940 MachineFunction *MF = MBB.getParent(); 941 SIMachineFunctionInfo *MFI = MF->getInfo<SIMachineFunctionInfo>(); 942 const SIInstrInfo *TII = ST.getInstrInfo(); 943 944 MCPhysReg Reg = MFI->getVGPRToAGPRSpill(Index, Lane); 945 946 if (Reg == AMDGPU::NoRegister) 947 return MachineInstrBuilder(); 948 949 bool IsStore = MI->mayStore(); 950 MachineRegisterInfo &MRI = MF->getRegInfo(); 951 auto *TRI = static_cast<const SIRegisterInfo*>(MRI.getTargetRegisterInfo()); 952 953 unsigned Dst = IsStore ? Reg : ValueReg; 954 unsigned Src = IsStore ? ValueReg : Reg; 955 unsigned Opc = (IsStore ^ TRI->isVGPR(MRI, Reg)) ? AMDGPU::V_ACCVGPR_WRITE_B32_e64 956 : AMDGPU::V_ACCVGPR_READ_B32_e64; 957 958 auto MIB = BuildMI(MBB, MI, MI->getDebugLoc(), TII->get(Opc), Dst) 959 .addReg(Src, getKillRegState(IsKill)); 960 MIB->setAsmPrinterFlag(MachineInstr::ReloadReuse); 961 return MIB; 962 } 963 964 // This differs from buildSpillLoadStore by only scavenging a VGPR. It does not 965 // need to handle the case where an SGPR may need to be spilled while spilling. 966 static bool buildMUBUFOffsetLoadStore(const GCNSubtarget &ST, 967 MachineFrameInfo &MFI, 968 MachineBasicBlock::iterator MI, 969 int Index, 970 int64_t Offset) { 971 const SIInstrInfo *TII = ST.getInstrInfo(); 972 MachineBasicBlock *MBB = MI->getParent(); 973 const DebugLoc &DL = MI->getDebugLoc(); 974 bool IsStore = MI->mayStore(); 975 976 unsigned Opc = MI->getOpcode(); 977 int LoadStoreOp = IsStore ? 978 getOffsetMUBUFStore(Opc) : getOffsetMUBUFLoad(Opc); 979 if (LoadStoreOp == -1) 980 return false; 981 982 const MachineOperand *Reg = TII->getNamedOperand(*MI, AMDGPU::OpName::vdata); 983 if (spillVGPRtoAGPR(ST, *MBB, MI, Index, 0, Reg->getReg(), false).getInstr()) 984 return true; 985 986 MachineInstrBuilder NewMI = 987 BuildMI(*MBB, MI, DL, TII->get(LoadStoreOp)) 988 .add(*Reg) 989 .add(*TII->getNamedOperand(*MI, AMDGPU::OpName::srsrc)) 990 .add(*TII->getNamedOperand(*MI, AMDGPU::OpName::soffset)) 991 .addImm(Offset) 992 .addImm(0) // cpol 993 .addImm(0) // tfe 994 .addImm(0) // swz 995 .cloneMemRefs(*MI); 996 997 const MachineOperand *VDataIn = TII->getNamedOperand(*MI, 998 AMDGPU::OpName::vdata_in); 999 if (VDataIn) 1000 NewMI.add(*VDataIn); 1001 return true; 1002 } 1003 1004 static unsigned getFlatScratchSpillOpcode(const SIInstrInfo *TII, 1005 unsigned LoadStoreOp, 1006 unsigned EltSize) { 1007 bool IsStore = TII->get(LoadStoreOp).mayStore(); 1008 bool UseST = 1009 AMDGPU::getNamedOperandIdx(LoadStoreOp, AMDGPU::OpName::vaddr) < 0 && 1010 AMDGPU::getNamedOperandIdx(LoadStoreOp, AMDGPU::OpName::saddr) < 0; 1011 1012 switch (EltSize) { 1013 case 4: 1014 LoadStoreOp = IsStore ? AMDGPU::SCRATCH_STORE_DWORD_SADDR 1015 : AMDGPU::SCRATCH_LOAD_DWORD_SADDR; 1016 break; 1017 case 8: 1018 LoadStoreOp = IsStore ? AMDGPU::SCRATCH_STORE_DWORDX2_SADDR 1019 : AMDGPU::SCRATCH_LOAD_DWORDX2_SADDR; 1020 break; 1021 case 12: 1022 LoadStoreOp = IsStore ? AMDGPU::SCRATCH_STORE_DWORDX3_SADDR 1023 : AMDGPU::SCRATCH_LOAD_DWORDX3_SADDR; 1024 break; 1025 case 16: 1026 LoadStoreOp = IsStore ? AMDGPU::SCRATCH_STORE_DWORDX4_SADDR 1027 : AMDGPU::SCRATCH_LOAD_DWORDX4_SADDR; 1028 break; 1029 default: 1030 llvm_unreachable("Unexpected spill load/store size!"); 1031 } 1032 1033 if (UseST) 1034 LoadStoreOp = AMDGPU::getFlatScratchInstSTfromSS(LoadStoreOp); 1035 1036 return LoadStoreOp; 1037 } 1038 1039 void SIRegisterInfo::buildSpillLoadStore( 1040 MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, 1041 unsigned LoadStoreOp, int Index, Register ValueReg, bool IsKill, 1042 MCRegister ScratchOffsetReg, int64_t InstOffset, MachineMemOperand *MMO, 1043 RegScavenger *RS, LivePhysRegs *LiveRegs) const { 1044 assert((!RS || !LiveRegs) && "Only RS or LiveRegs can be set but not both"); 1045 1046 MachineFunction *MF = MBB.getParent(); 1047 const SIInstrInfo *TII = ST.getInstrInfo(); 1048 const MachineFrameInfo &MFI = MF->getFrameInfo(); 1049 const SIMachineFunctionInfo *FuncInfo = MF->getInfo<SIMachineFunctionInfo>(); 1050 1051 const MCInstrDesc *Desc = &TII->get(LoadStoreOp); 1052 const DebugLoc &DL = MI != MBB.end() ? MI->getDebugLoc() : DebugLoc(); 1053 bool IsStore = Desc->mayStore(); 1054 bool IsFlat = TII->isFLATScratch(LoadStoreOp); 1055 1056 bool Scavenged = false; 1057 MCRegister SOffset = ScratchOffsetReg; 1058 1059 const TargetRegisterClass *RC = getRegClassForReg(MF->getRegInfo(), ValueReg); 1060 // On gfx90a+ AGPR is a regular VGPR acceptable for loads and stores. 1061 const bool IsAGPR = !ST.hasGFX90AInsts() && hasAGPRs(RC); 1062 const unsigned RegWidth = AMDGPU::getRegBitWidth(RC->getID()) / 8; 1063 1064 // Always use 4 byte operations for AGPRs because we need to scavenge 1065 // a temporary VGPR. 1066 unsigned EltSize = (IsFlat && !IsAGPR) ? std::min(RegWidth, 16u) : 4u; 1067 unsigned NumSubRegs = RegWidth / EltSize; 1068 unsigned Size = NumSubRegs * EltSize; 1069 unsigned RemSize = RegWidth - Size; 1070 unsigned NumRemSubRegs = RemSize ? 1 : 0; 1071 int64_t Offset = InstOffset + MFI.getObjectOffset(Index); 1072 int64_t MaxOffset = Offset + Size + RemSize - EltSize; 1073 int64_t ScratchOffsetRegDelta = 0; 1074 1075 if (IsFlat && EltSize > 4) { 1076 LoadStoreOp = getFlatScratchSpillOpcode(TII, LoadStoreOp, EltSize); 1077 Desc = &TII->get(LoadStoreOp); 1078 } 1079 1080 Align Alignment = MFI.getObjectAlign(Index); 1081 const MachinePointerInfo &BasePtrInfo = MMO->getPointerInfo(); 1082 1083 assert((IsFlat || ((Offset % EltSize) == 0)) && 1084 "unexpected VGPR spill offset"); 1085 1086 bool IsOffsetLegal = 1087 IsFlat ? TII->isLegalFLATOffset(MaxOffset, AMDGPUAS::PRIVATE_ADDRESS, 1088 SIInstrFlags::FlatScratch) 1089 : SIInstrInfo::isLegalMUBUFImmOffset(MaxOffset); 1090 if (!IsOffsetLegal || (IsFlat && !SOffset && !ST.hasFlatScratchSTMode())) { 1091 SOffset = MCRegister(); 1092 1093 // We currently only support spilling VGPRs to EltSize boundaries, meaning 1094 // we can simplify the adjustment of Offset here to just scale with 1095 // WavefrontSize. 1096 if (!IsFlat) 1097 Offset *= ST.getWavefrontSize(); 1098 1099 // We don't have access to the register scavenger if this function is called 1100 // during PEI::scavengeFrameVirtualRegs() so use LiveRegs in this case. 1101 if (RS) { 1102 SOffset = RS->scavengeRegister(&AMDGPU::SGPR_32RegClass, MI, 0, false); 1103 } else if (LiveRegs) { 1104 for (MCRegister Reg : AMDGPU::SGPR_32RegClass) { 1105 if (LiveRegs->available(MF->getRegInfo(), Reg)) { 1106 SOffset = Reg; 1107 break; 1108 } 1109 } 1110 } 1111 1112 if (!SOffset) { 1113 // There are no free SGPRs, and since we are in the process of spilling 1114 // VGPRs too. Since we need a VGPR in order to spill SGPRs (this is true 1115 // on SI/CI and on VI it is true until we implement spilling using scalar 1116 // stores), we have no way to free up an SGPR. Our solution here is to 1117 // add the offset directly to the ScratchOffset or StackPtrOffset 1118 // register, and then subtract the offset after the spill to return the 1119 // register to it's original value. 1120 if (!ScratchOffsetReg) 1121 ScratchOffsetReg = FuncInfo->getStackPtrOffsetReg(); 1122 SOffset = ScratchOffsetReg; 1123 ScratchOffsetRegDelta = Offset; 1124 } else { 1125 Scavenged = true; 1126 } 1127 1128 if (!SOffset) 1129 report_fatal_error("could not scavenge SGPR to spill in entry function"); 1130 1131 if (ScratchOffsetReg == AMDGPU::NoRegister) { 1132 BuildMI(MBB, MI, DL, TII->get(AMDGPU::S_MOV_B32), SOffset).addImm(Offset); 1133 } else { 1134 BuildMI(MBB, MI, DL, TII->get(AMDGPU::S_ADD_I32), SOffset) 1135 .addReg(ScratchOffsetReg) 1136 .addImm(Offset); 1137 } 1138 1139 Offset = 0; 1140 } 1141 1142 if (IsFlat && SOffset == AMDGPU::NoRegister) { 1143 assert(AMDGPU::getNamedOperandIdx(LoadStoreOp, AMDGPU::OpName::vaddr) < 0 1144 && "Unexpected vaddr for flat scratch with a FI operand"); 1145 1146 assert(ST.hasFlatScratchSTMode()); 1147 LoadStoreOp = AMDGPU::getFlatScratchInstSTfromSS(LoadStoreOp); 1148 Desc = &TII->get(LoadStoreOp); 1149 } 1150 1151 Register TmpReg; 1152 1153 for (unsigned i = 0, e = NumSubRegs + NumRemSubRegs, RegOffset = 0; i != e; 1154 ++i, RegOffset += EltSize) { 1155 if (i == NumSubRegs) { 1156 EltSize = RemSize; 1157 LoadStoreOp = getFlatScratchSpillOpcode(TII, LoadStoreOp, EltSize); 1158 } 1159 Desc = &TII->get(LoadStoreOp); 1160 1161 unsigned NumRegs = EltSize / 4; 1162 Register SubReg = e == 1 1163 ? ValueReg 1164 : Register(getSubReg(ValueReg, 1165 getSubRegFromChannel(RegOffset / 4, NumRegs))); 1166 1167 unsigned SOffsetRegState = 0; 1168 unsigned SrcDstRegState = getDefRegState(!IsStore); 1169 if (i + 1 == e) { 1170 SOffsetRegState |= getKillRegState(Scavenged); 1171 // The last implicit use carries the "Kill" flag. 1172 SrcDstRegState |= getKillRegState(IsKill); 1173 } 1174 1175 // Make sure the whole register is defined if there are undef components by 1176 // adding an implicit def of the super-reg on the first instruction. 1177 bool NeedSuperRegDef = e > 1 && IsStore && i == 0; 1178 bool NeedSuperRegImpOperand = e > 1; 1179 1180 unsigned Lane = RegOffset / 4; 1181 unsigned LaneE = (RegOffset + EltSize) / 4; 1182 for ( ; Lane != LaneE; ++Lane) { 1183 bool IsSubReg = e > 1 || EltSize > 4; 1184 Register Sub = IsSubReg 1185 ? Register(getSubReg(ValueReg, getSubRegFromChannel(Lane))) 1186 : ValueReg; 1187 auto MIB = spillVGPRtoAGPR(ST, MBB, MI, Index, Lane, Sub, IsKill); 1188 if (!MIB.getInstr()) 1189 break; 1190 if (NeedSuperRegDef || (IsSubReg && IsStore && Lane == 0)) { 1191 MIB.addReg(ValueReg, RegState::ImplicitDefine); 1192 NeedSuperRegDef = false; 1193 } 1194 if (IsSubReg || NeedSuperRegImpOperand) { 1195 NeedSuperRegImpOperand = true; 1196 unsigned State = SrcDstRegState; 1197 if (Lane + 1 != LaneE) 1198 State &= ~RegState::Kill; 1199 MIB.addReg(ValueReg, RegState::Implicit | State); 1200 } 1201 } 1202 1203 if (Lane == LaneE) // Fully spilled into AGPRs. 1204 continue; 1205 1206 // Offset in bytes from the beginning of the ValueReg to its portion we 1207 // still need to spill. It may differ from RegOffset if a portion of 1208 // current SubReg has been already spilled into AGPRs by the loop above. 1209 unsigned RemRegOffset = Lane * 4; 1210 unsigned RemEltSize = EltSize - (RemRegOffset - RegOffset); 1211 if (RemEltSize != EltSize) { // Partially spilled to AGPRs 1212 assert(IsFlat && EltSize > 4); 1213 1214 unsigned NumRegs = RemEltSize / 4; 1215 SubReg = Register(getSubReg(ValueReg, 1216 getSubRegFromChannel(RemRegOffset / 4, NumRegs))); 1217 unsigned Opc = getFlatScratchSpillOpcode(TII, LoadStoreOp, RemEltSize); 1218 Desc = &TII->get(Opc); 1219 } 1220 1221 unsigned FinalReg = SubReg; 1222 1223 if (IsAGPR) { 1224 assert(EltSize == 4); 1225 1226 if (!TmpReg) { 1227 assert(RS && "Needs to have RegScavenger to spill an AGPR!"); 1228 // FIXME: change to scavengeRegisterBackwards() 1229 TmpReg = RS->scavengeRegister(&AMDGPU::VGPR_32RegClass, MI, 0); 1230 RS->setRegUsed(TmpReg); 1231 } 1232 if (IsStore) { 1233 auto AccRead = BuildMI(MBB, MI, DL, 1234 TII->get(AMDGPU::V_ACCVGPR_READ_B32_e64), TmpReg) 1235 .addReg(SubReg, getKillRegState(IsKill)); 1236 if (NeedSuperRegDef) 1237 AccRead.addReg(ValueReg, RegState::ImplicitDefine); 1238 AccRead->setAsmPrinterFlag(MachineInstr::ReloadReuse); 1239 } 1240 SubReg = TmpReg; 1241 } 1242 1243 MachinePointerInfo PInfo = BasePtrInfo.getWithOffset(RemRegOffset); 1244 MachineMemOperand *NewMMO = 1245 MF->getMachineMemOperand(PInfo, MMO->getFlags(), RemEltSize, 1246 commonAlignment(Alignment, RemRegOffset)); 1247 1248 auto MIB = 1249 BuildMI(MBB, MI, DL, *Desc) 1250 .addReg(SubReg, getDefRegState(!IsStore) | getKillRegState(IsKill)); 1251 if (!IsFlat) 1252 MIB.addReg(FuncInfo->getScratchRSrcReg()); 1253 1254 if (SOffset == AMDGPU::NoRegister) { 1255 if (!IsFlat) 1256 MIB.addImm(0); 1257 } else { 1258 MIB.addReg(SOffset, SOffsetRegState); 1259 } 1260 MIB.addImm(Offset + RemRegOffset) 1261 .addImm(0); // cpol 1262 if (!IsFlat) 1263 MIB.addImm(0) // tfe 1264 .addImm(0); // swz 1265 MIB.addMemOperand(NewMMO); 1266 1267 if (!IsAGPR && NeedSuperRegDef) 1268 MIB.addReg(ValueReg, RegState::ImplicitDefine); 1269 1270 if (!IsStore && TmpReg != AMDGPU::NoRegister) { 1271 MIB = BuildMI(MBB, MI, DL, TII->get(AMDGPU::V_ACCVGPR_WRITE_B32_e64), 1272 FinalReg) 1273 .addReg(TmpReg, RegState::Kill); 1274 MIB->setAsmPrinterFlag(MachineInstr::ReloadReuse); 1275 } 1276 1277 if (NeedSuperRegImpOperand) 1278 MIB.addReg(ValueReg, RegState::Implicit | SrcDstRegState); 1279 } 1280 1281 if (ScratchOffsetRegDelta != 0) { 1282 // Subtract the offset we added to the ScratchOffset register. 1283 BuildMI(MBB, MI, DL, TII->get(AMDGPU::S_ADD_I32), SOffset) 1284 .addReg(SOffset) 1285 .addImm(-ScratchOffsetRegDelta); 1286 } 1287 } 1288 1289 void SIRegisterInfo::buildVGPRSpillLoadStore(SGPRSpillBuilder &SB, int Index, 1290 int Offset, bool IsLoad, 1291 bool IsKill) const { 1292 // Load/store VGPR 1293 MachineFrameInfo &FrameInfo = SB.MF.getFrameInfo(); 1294 assert(FrameInfo.getStackID(Index) != TargetStackID::SGPRSpill); 1295 1296 Register FrameReg = 1297 FrameInfo.isFixedObjectIndex(Index) && hasBasePointer(SB.MF) 1298 ? getBaseRegister() 1299 : getFrameRegister(SB.MF); 1300 1301 Align Alignment = FrameInfo.getObjectAlign(Index); 1302 MachinePointerInfo PtrInfo = MachinePointerInfo::getFixedStack(SB.MF, Index); 1303 MachineMemOperand *MMO = SB.MF.getMachineMemOperand( 1304 PtrInfo, IsLoad ? MachineMemOperand::MOLoad : MachineMemOperand::MOStore, 1305 SB.EltSize, Alignment); 1306 1307 if (IsLoad) { 1308 unsigned Opc = ST.enableFlatScratch() ? AMDGPU::SCRATCH_LOAD_DWORD_SADDR 1309 : AMDGPU::BUFFER_LOAD_DWORD_OFFSET; 1310 buildSpillLoadStore(SB.MBB, SB.MI, Opc, Index, SB.TmpVGPR, false, FrameReg, 1311 Offset * SB.EltSize, MMO, SB.RS); 1312 } else { 1313 unsigned Opc = ST.enableFlatScratch() ? AMDGPU::SCRATCH_STORE_DWORD_SADDR 1314 : AMDGPU::BUFFER_STORE_DWORD_OFFSET; 1315 buildSpillLoadStore(SB.MBB, SB.MI, Opc, Index, SB.TmpVGPR, IsKill, FrameReg, 1316 Offset * SB.EltSize, MMO, SB.RS); 1317 // This only ever adds one VGPR spill 1318 SB.MFI.addToSpilledVGPRs(1); 1319 } 1320 } 1321 1322 bool SIRegisterInfo::spillSGPR(MachineBasicBlock::iterator MI, 1323 int Index, 1324 RegScavenger *RS, 1325 LiveIntervals *LIS, 1326 bool OnlyToVGPR) const { 1327 SGPRSpillBuilder SB(*this, *ST.getInstrInfo(), isWave32, MI, Index, RS); 1328 1329 ArrayRef<SIMachineFunctionInfo::SpilledReg> VGPRSpills = 1330 SB.MFI.getSGPRToVGPRSpills(Index); 1331 bool SpillToVGPR = !VGPRSpills.empty(); 1332 if (OnlyToVGPR && !SpillToVGPR) 1333 return false; 1334 1335 assert(SpillToVGPR || (SB.SuperReg != SB.MFI.getStackPtrOffsetReg() && 1336 SB.SuperReg != SB.MFI.getFrameOffsetReg())); 1337 1338 if (SpillToVGPR) { 1339 for (unsigned i = 0, e = SB.NumSubRegs; i < e; ++i) { 1340 Register SubReg = 1341 SB.NumSubRegs == 1 1342 ? SB.SuperReg 1343 : Register(getSubReg(SB.SuperReg, SB.SplitParts[i])); 1344 SIMachineFunctionInfo::SpilledReg Spill = VGPRSpills[i]; 1345 1346 bool UseKill = SB.IsKill && i == SB.NumSubRegs - 1; 1347 1348 // Mark the "old value of vgpr" input undef only if this is the first sgpr 1349 // spill to this specific vgpr in the first basic block. 1350 auto MIB = BuildMI(SB.MBB, MI, SB.DL, SB.TII.get(AMDGPU::V_WRITELANE_B32), 1351 Spill.VGPR) 1352 .addReg(SubReg, getKillRegState(UseKill)) 1353 .addImm(Spill.Lane) 1354 .addReg(Spill.VGPR); 1355 if (LIS) { 1356 if (i == 0) 1357 LIS->ReplaceMachineInstrInMaps(*MI, *MIB); 1358 else 1359 LIS->InsertMachineInstrInMaps(*MIB); 1360 } 1361 1362 if (i == 0 && SB.NumSubRegs > 1) { 1363 // We may be spilling a super-register which is only partially defined, 1364 // and need to ensure later spills think the value is defined. 1365 MIB.addReg(SB.SuperReg, RegState::ImplicitDefine); 1366 } 1367 1368 if (SB.NumSubRegs > 1) 1369 MIB.addReg(SB.SuperReg, getKillRegState(UseKill) | RegState::Implicit); 1370 1371 // FIXME: Since this spills to another register instead of an actual 1372 // frame index, we should delete the frame index when all references to 1373 // it are fixed. 1374 } 1375 } else { 1376 SB.prepare(); 1377 1378 // SubReg carries the "Kill" flag when SubReg == SB.SuperReg. 1379 unsigned SubKillState = getKillRegState((SB.NumSubRegs == 1) && SB.IsKill); 1380 1381 // Per VGPR helper data 1382 auto PVD = SB.getPerVGPRData(); 1383 1384 for (unsigned Offset = 0; Offset < PVD.NumVGPRs; ++Offset) { 1385 unsigned TmpVGPRFlags = RegState::Undef; 1386 1387 // Write sub registers into the VGPR 1388 for (unsigned i = Offset * PVD.PerVGPR, 1389 e = std::min((Offset + 1) * PVD.PerVGPR, SB.NumSubRegs); 1390 i < e; ++i) { 1391 Register SubReg = 1392 SB.NumSubRegs == 1 1393 ? SB.SuperReg 1394 : Register(getSubReg(SB.SuperReg, SB.SplitParts[i])); 1395 1396 MachineInstrBuilder WriteLane = 1397 BuildMI(SB.MBB, MI, SB.DL, SB.TII.get(AMDGPU::V_WRITELANE_B32), 1398 SB.TmpVGPR) 1399 .addReg(SubReg, SubKillState) 1400 .addImm(i % PVD.PerVGPR) 1401 .addReg(SB.TmpVGPR, TmpVGPRFlags); 1402 TmpVGPRFlags = 0; 1403 1404 if (LIS) { 1405 if (i == 0) 1406 LIS->ReplaceMachineInstrInMaps(*MI, *WriteLane); 1407 else 1408 LIS->InsertMachineInstrInMaps(*WriteLane); 1409 } 1410 1411 // There could be undef components of a spilled super register. 1412 // TODO: Can we detect this and skip the spill? 1413 if (SB.NumSubRegs > 1) { 1414 // The last implicit use of the SB.SuperReg carries the "Kill" flag. 1415 unsigned SuperKillState = 0; 1416 if (i + 1 == SB.NumSubRegs) 1417 SuperKillState |= getKillRegState(SB.IsKill); 1418 WriteLane.addReg(SB.SuperReg, RegState::Implicit | SuperKillState); 1419 } 1420 } 1421 1422 // Write out VGPR 1423 SB.readWriteTmpVGPR(Offset, /*IsLoad*/ false); 1424 } 1425 1426 SB.restore(); 1427 } 1428 1429 MI->eraseFromParent(); 1430 SB.MFI.addToSpilledSGPRs(SB.NumSubRegs); 1431 1432 if (LIS) 1433 LIS->removeAllRegUnitsForPhysReg(SB.SuperReg); 1434 1435 return true; 1436 } 1437 1438 bool SIRegisterInfo::restoreSGPR(MachineBasicBlock::iterator MI, 1439 int Index, 1440 RegScavenger *RS, 1441 LiveIntervals *LIS, 1442 bool OnlyToVGPR) const { 1443 SGPRSpillBuilder SB(*this, *ST.getInstrInfo(), isWave32, MI, Index, RS); 1444 1445 ArrayRef<SIMachineFunctionInfo::SpilledReg> VGPRSpills = 1446 SB.MFI.getSGPRToVGPRSpills(Index); 1447 bool SpillToVGPR = !VGPRSpills.empty(); 1448 if (OnlyToVGPR && !SpillToVGPR) 1449 return false; 1450 1451 if (SpillToVGPR) { 1452 for (unsigned i = 0, e = SB.NumSubRegs; i < e; ++i) { 1453 Register SubReg = 1454 SB.NumSubRegs == 1 1455 ? SB.SuperReg 1456 : Register(getSubReg(SB.SuperReg, SB.SplitParts[i])); 1457 1458 SIMachineFunctionInfo::SpilledReg Spill = VGPRSpills[i]; 1459 auto MIB = 1460 BuildMI(SB.MBB, MI, SB.DL, SB.TII.get(AMDGPU::V_READLANE_B32), SubReg) 1461 .addReg(Spill.VGPR) 1462 .addImm(Spill.Lane); 1463 if (SB.NumSubRegs > 1 && i == 0) 1464 MIB.addReg(SB.SuperReg, RegState::ImplicitDefine); 1465 if (LIS) { 1466 if (i == e - 1) 1467 LIS->ReplaceMachineInstrInMaps(*MI, *MIB); 1468 else 1469 LIS->InsertMachineInstrInMaps(*MIB); 1470 } 1471 1472 } 1473 } else { 1474 SB.prepare(); 1475 1476 // Per VGPR helper data 1477 auto PVD = SB.getPerVGPRData(); 1478 1479 for (unsigned Offset = 0; Offset < PVD.NumVGPRs; ++Offset) { 1480 // Load in VGPR data 1481 SB.readWriteTmpVGPR(Offset, /*IsLoad*/ true); 1482 1483 // Unpack lanes 1484 for (unsigned i = Offset * PVD.PerVGPR, 1485 e = std::min((Offset + 1) * PVD.PerVGPR, SB.NumSubRegs); 1486 i < e; ++i) { 1487 Register SubReg = 1488 SB.NumSubRegs == 1 1489 ? SB.SuperReg 1490 : Register(getSubReg(SB.SuperReg, SB.SplitParts[i])); 1491 1492 bool LastSubReg = (i + 1 == e); 1493 auto MIB = BuildMI(SB.MBB, MI, SB.DL, 1494 SB.TII.get(AMDGPU::V_READLANE_B32), SubReg) 1495 .addReg(SB.TmpVGPR, getKillRegState(LastSubReg)) 1496 .addImm(i); 1497 if (SB.NumSubRegs > 1 && i == 0) 1498 MIB.addReg(SB.SuperReg, RegState::ImplicitDefine); 1499 if (LIS) { 1500 if (i == e - 1) 1501 LIS->ReplaceMachineInstrInMaps(*MI, *MIB); 1502 else 1503 LIS->InsertMachineInstrInMaps(*MIB); 1504 } 1505 } 1506 } 1507 1508 SB.restore(); 1509 } 1510 1511 MI->eraseFromParent(); 1512 1513 if (LIS) 1514 LIS->removeAllRegUnitsForPhysReg(SB.SuperReg); 1515 1516 return true; 1517 } 1518 1519 /// Special case of eliminateFrameIndex. Returns true if the SGPR was spilled to 1520 /// a VGPR and the stack slot can be safely eliminated when all other users are 1521 /// handled. 1522 bool SIRegisterInfo::eliminateSGPRToVGPRSpillFrameIndex( 1523 MachineBasicBlock::iterator MI, 1524 int FI, 1525 RegScavenger *RS, 1526 LiveIntervals *LIS) const { 1527 switch (MI->getOpcode()) { 1528 case AMDGPU::SI_SPILL_S1024_SAVE: 1529 case AMDGPU::SI_SPILL_S512_SAVE: 1530 case AMDGPU::SI_SPILL_S256_SAVE: 1531 case AMDGPU::SI_SPILL_S224_SAVE: 1532 case AMDGPU::SI_SPILL_S192_SAVE: 1533 case AMDGPU::SI_SPILL_S160_SAVE: 1534 case AMDGPU::SI_SPILL_S128_SAVE: 1535 case AMDGPU::SI_SPILL_S96_SAVE: 1536 case AMDGPU::SI_SPILL_S64_SAVE: 1537 case AMDGPU::SI_SPILL_S32_SAVE: 1538 return spillSGPR(MI, FI, RS, LIS, true); 1539 case AMDGPU::SI_SPILL_S1024_RESTORE: 1540 case AMDGPU::SI_SPILL_S512_RESTORE: 1541 case AMDGPU::SI_SPILL_S256_RESTORE: 1542 case AMDGPU::SI_SPILL_S224_RESTORE: 1543 case AMDGPU::SI_SPILL_S192_RESTORE: 1544 case AMDGPU::SI_SPILL_S160_RESTORE: 1545 case AMDGPU::SI_SPILL_S128_RESTORE: 1546 case AMDGPU::SI_SPILL_S96_RESTORE: 1547 case AMDGPU::SI_SPILL_S64_RESTORE: 1548 case AMDGPU::SI_SPILL_S32_RESTORE: 1549 return restoreSGPR(MI, FI, RS, LIS, true); 1550 default: 1551 llvm_unreachable("not an SGPR spill instruction"); 1552 } 1553 } 1554 1555 void SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI, 1556 int SPAdj, unsigned FIOperandNum, 1557 RegScavenger *RS) const { 1558 MachineFunction *MF = MI->getParent()->getParent(); 1559 MachineBasicBlock *MBB = MI->getParent(); 1560 SIMachineFunctionInfo *MFI = MF->getInfo<SIMachineFunctionInfo>(); 1561 MachineFrameInfo &FrameInfo = MF->getFrameInfo(); 1562 const SIInstrInfo *TII = ST.getInstrInfo(); 1563 DebugLoc DL = MI->getDebugLoc(); 1564 1565 assert(SPAdj == 0 && "unhandled SP adjustment in call sequence?"); 1566 1567 MachineOperand &FIOp = MI->getOperand(FIOperandNum); 1568 int Index = MI->getOperand(FIOperandNum).getIndex(); 1569 1570 Register FrameReg = FrameInfo.isFixedObjectIndex(Index) && hasBasePointer(*MF) 1571 ? getBaseRegister() 1572 : getFrameRegister(*MF); 1573 1574 switch (MI->getOpcode()) { 1575 // SGPR register spill 1576 case AMDGPU::SI_SPILL_S1024_SAVE: 1577 case AMDGPU::SI_SPILL_S512_SAVE: 1578 case AMDGPU::SI_SPILL_S256_SAVE: 1579 case AMDGPU::SI_SPILL_S224_SAVE: 1580 case AMDGPU::SI_SPILL_S192_SAVE: 1581 case AMDGPU::SI_SPILL_S160_SAVE: 1582 case AMDGPU::SI_SPILL_S128_SAVE: 1583 case AMDGPU::SI_SPILL_S96_SAVE: 1584 case AMDGPU::SI_SPILL_S64_SAVE: 1585 case AMDGPU::SI_SPILL_S32_SAVE: { 1586 spillSGPR(MI, Index, RS); 1587 break; 1588 } 1589 1590 // SGPR register restore 1591 case AMDGPU::SI_SPILL_S1024_RESTORE: 1592 case AMDGPU::SI_SPILL_S512_RESTORE: 1593 case AMDGPU::SI_SPILL_S256_RESTORE: 1594 case AMDGPU::SI_SPILL_S224_RESTORE: 1595 case AMDGPU::SI_SPILL_S192_RESTORE: 1596 case AMDGPU::SI_SPILL_S160_RESTORE: 1597 case AMDGPU::SI_SPILL_S128_RESTORE: 1598 case AMDGPU::SI_SPILL_S96_RESTORE: 1599 case AMDGPU::SI_SPILL_S64_RESTORE: 1600 case AMDGPU::SI_SPILL_S32_RESTORE: { 1601 restoreSGPR(MI, Index, RS); 1602 break; 1603 } 1604 1605 // VGPR register spill 1606 case AMDGPU::SI_SPILL_V1024_SAVE: 1607 case AMDGPU::SI_SPILL_V512_SAVE: 1608 case AMDGPU::SI_SPILL_V256_SAVE: 1609 case AMDGPU::SI_SPILL_V224_SAVE: 1610 case AMDGPU::SI_SPILL_V192_SAVE: 1611 case AMDGPU::SI_SPILL_V160_SAVE: 1612 case AMDGPU::SI_SPILL_V128_SAVE: 1613 case AMDGPU::SI_SPILL_V96_SAVE: 1614 case AMDGPU::SI_SPILL_V64_SAVE: 1615 case AMDGPU::SI_SPILL_V32_SAVE: 1616 case AMDGPU::SI_SPILL_A1024_SAVE: 1617 case AMDGPU::SI_SPILL_A512_SAVE: 1618 case AMDGPU::SI_SPILL_A256_SAVE: 1619 case AMDGPU::SI_SPILL_A224_SAVE: 1620 case AMDGPU::SI_SPILL_A192_SAVE: 1621 case AMDGPU::SI_SPILL_A160_SAVE: 1622 case AMDGPU::SI_SPILL_A128_SAVE: 1623 case AMDGPU::SI_SPILL_A96_SAVE: 1624 case AMDGPU::SI_SPILL_A64_SAVE: 1625 case AMDGPU::SI_SPILL_A32_SAVE: { 1626 const MachineOperand *VData = TII->getNamedOperand(*MI, 1627 AMDGPU::OpName::vdata); 1628 assert(TII->getNamedOperand(*MI, AMDGPU::OpName::soffset)->getReg() == 1629 MFI->getStackPtrOffsetReg()); 1630 1631 unsigned Opc = ST.enableFlatScratch() ? AMDGPU::SCRATCH_STORE_DWORD_SADDR 1632 : AMDGPU::BUFFER_STORE_DWORD_OFFSET; 1633 auto *MBB = MI->getParent(); 1634 buildSpillLoadStore( 1635 *MBB, MI, Opc, Index, VData->getReg(), VData->isKill(), FrameReg, 1636 TII->getNamedOperand(*MI, AMDGPU::OpName::offset)->getImm(), 1637 *MI->memoperands_begin(), RS); 1638 MFI->addToSpilledVGPRs(getNumSubRegsForSpillOp(MI->getOpcode())); 1639 MI->eraseFromParent(); 1640 break; 1641 } 1642 case AMDGPU::SI_SPILL_V32_RESTORE: 1643 case AMDGPU::SI_SPILL_V64_RESTORE: 1644 case AMDGPU::SI_SPILL_V96_RESTORE: 1645 case AMDGPU::SI_SPILL_V128_RESTORE: 1646 case AMDGPU::SI_SPILL_V160_RESTORE: 1647 case AMDGPU::SI_SPILL_V192_RESTORE: 1648 case AMDGPU::SI_SPILL_V224_RESTORE: 1649 case AMDGPU::SI_SPILL_V256_RESTORE: 1650 case AMDGPU::SI_SPILL_V512_RESTORE: 1651 case AMDGPU::SI_SPILL_V1024_RESTORE: 1652 case AMDGPU::SI_SPILL_A32_RESTORE: 1653 case AMDGPU::SI_SPILL_A64_RESTORE: 1654 case AMDGPU::SI_SPILL_A96_RESTORE: 1655 case AMDGPU::SI_SPILL_A128_RESTORE: 1656 case AMDGPU::SI_SPILL_A160_RESTORE: 1657 case AMDGPU::SI_SPILL_A192_RESTORE: 1658 case AMDGPU::SI_SPILL_A224_RESTORE: 1659 case AMDGPU::SI_SPILL_A256_RESTORE: 1660 case AMDGPU::SI_SPILL_A512_RESTORE: 1661 case AMDGPU::SI_SPILL_A1024_RESTORE: { 1662 const MachineOperand *VData = TII->getNamedOperand(*MI, 1663 AMDGPU::OpName::vdata); 1664 assert(TII->getNamedOperand(*MI, AMDGPU::OpName::soffset)->getReg() == 1665 MFI->getStackPtrOffsetReg()); 1666 1667 unsigned Opc = ST.enableFlatScratch() ? AMDGPU::SCRATCH_LOAD_DWORD_SADDR 1668 : AMDGPU::BUFFER_LOAD_DWORD_OFFSET; 1669 auto *MBB = MI->getParent(); 1670 buildSpillLoadStore( 1671 *MBB, MI, Opc, Index, VData->getReg(), VData->isKill(), FrameReg, 1672 TII->getNamedOperand(*MI, AMDGPU::OpName::offset)->getImm(), 1673 *MI->memoperands_begin(), RS); 1674 MI->eraseFromParent(); 1675 break; 1676 } 1677 1678 default: { 1679 // Other access to frame index 1680 const DebugLoc &DL = MI->getDebugLoc(); 1681 1682 int64_t Offset = FrameInfo.getObjectOffset(Index); 1683 if (ST.enableFlatScratch()) { 1684 if (TII->isFLATScratch(*MI)) { 1685 assert((int16_t)FIOperandNum == 1686 AMDGPU::getNamedOperandIdx(MI->getOpcode(), 1687 AMDGPU::OpName::saddr)); 1688 1689 // The offset is always swizzled, just replace it 1690 if (FrameReg) 1691 FIOp.ChangeToRegister(FrameReg, false); 1692 1693 if (!Offset) 1694 return; 1695 1696 MachineOperand *OffsetOp = 1697 TII->getNamedOperand(*MI, AMDGPU::OpName::offset); 1698 int64_t NewOffset = Offset + OffsetOp->getImm(); 1699 if (TII->isLegalFLATOffset(NewOffset, AMDGPUAS::PRIVATE_ADDRESS, 1700 SIInstrFlags::FlatScratch)) { 1701 OffsetOp->setImm(NewOffset); 1702 if (FrameReg) 1703 return; 1704 Offset = 0; 1705 } 1706 1707 assert(!TII->getNamedOperand(*MI, AMDGPU::OpName::vaddr) && 1708 "Unexpected vaddr for flat scratch with a FI operand"); 1709 1710 // On GFX10 we have ST mode to use no registers for an address. 1711 // Otherwise we need to materialize 0 into an SGPR. 1712 if (!Offset && ST.hasFlatScratchSTMode()) { 1713 unsigned Opc = MI->getOpcode(); 1714 unsigned NewOpc = AMDGPU::getFlatScratchInstSTfromSS(Opc); 1715 MI->RemoveOperand( 1716 AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::saddr)); 1717 MI->setDesc(TII->get(NewOpc)); 1718 return; 1719 } 1720 } 1721 1722 if (!FrameReg) { 1723 FIOp.ChangeToImmediate(Offset); 1724 if (TII->isImmOperandLegal(*MI, FIOperandNum, FIOp)) 1725 return; 1726 } 1727 1728 // We need to use register here. Check if we can use an SGPR or need 1729 // a VGPR. 1730 FIOp.ChangeToRegister(AMDGPU::M0, false); 1731 bool UseSGPR = TII->isOperandLegal(*MI, FIOperandNum, &FIOp); 1732 1733 if (!Offset && FrameReg && UseSGPR) { 1734 FIOp.setReg(FrameReg); 1735 return; 1736 } 1737 1738 const TargetRegisterClass *RC = UseSGPR ? &AMDGPU::SReg_32_XM0RegClass 1739 : &AMDGPU::VGPR_32RegClass; 1740 1741 Register TmpReg = RS->scavengeRegister(RC, MI, 0, !UseSGPR); 1742 FIOp.setReg(TmpReg); 1743 FIOp.setIsKill(true); 1744 1745 if ((!FrameReg || !Offset) && TmpReg) { 1746 unsigned Opc = UseSGPR ? AMDGPU::S_MOV_B32 : AMDGPU::V_MOV_B32_e32; 1747 auto MIB = BuildMI(*MBB, MI, DL, TII->get(Opc), TmpReg); 1748 if (FrameReg) 1749 MIB.addReg(FrameReg); 1750 else 1751 MIB.addImm(Offset); 1752 1753 return; 1754 } 1755 1756 Register TmpSReg = 1757 UseSGPR ? TmpReg 1758 : RS->scavengeRegister(&AMDGPU::SReg_32_XM0RegClass, MI, 0, 1759 !UseSGPR); 1760 1761 // TODO: for flat scratch another attempt can be made with a VGPR index 1762 // if no SGPRs can be scavenged. 1763 if ((!TmpSReg && !FrameReg) || (!TmpReg && !UseSGPR)) 1764 report_fatal_error("Cannot scavenge register in FI elimination!"); 1765 1766 if (!TmpSReg) { 1767 // Use frame register and restore it after. 1768 TmpSReg = FrameReg; 1769 FIOp.setReg(FrameReg); 1770 FIOp.setIsKill(false); 1771 } 1772 1773 BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_ADD_I32), TmpSReg) 1774 .addReg(FrameReg) 1775 .addImm(Offset); 1776 1777 if (!UseSGPR) 1778 BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_MOV_B32_e32), TmpReg) 1779 .addReg(TmpSReg, RegState::Kill); 1780 1781 if (TmpSReg == FrameReg) { 1782 // Undo frame register modification. 1783 BuildMI(*MBB, std::next(MI), DL, TII->get(AMDGPU::S_ADD_I32), 1784 FrameReg) 1785 .addReg(FrameReg) 1786 .addImm(-Offset); 1787 } 1788 1789 return; 1790 } 1791 1792 bool IsMUBUF = TII->isMUBUF(*MI); 1793 1794 if (!IsMUBUF && !MFI->isEntryFunction()) { 1795 // Convert to a swizzled stack address by scaling by the wave size. 1796 // 1797 // In an entry function/kernel the offset is already swizzled. 1798 1799 bool IsCopy = MI->getOpcode() == AMDGPU::V_MOV_B32_e32; 1800 Register ResultReg = 1801 IsCopy ? MI->getOperand(0).getReg() 1802 : RS->scavengeRegister(&AMDGPU::VGPR_32RegClass, MI, 0); 1803 1804 int64_t Offset = FrameInfo.getObjectOffset(Index); 1805 if (Offset == 0) { 1806 // XXX - This never happens because of emergency scavenging slot at 0? 1807 BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_LSHRREV_B32_e64), ResultReg) 1808 .addImm(ST.getWavefrontSizeLog2()) 1809 .addReg(FrameReg); 1810 } else { 1811 if (auto MIB = TII->getAddNoCarry(*MBB, MI, DL, ResultReg, *RS)) { 1812 // Reuse ResultReg in intermediate step. 1813 Register ScaledReg = ResultReg; 1814 1815 BuildMI(*MBB, *MIB, DL, TII->get(AMDGPU::V_LSHRREV_B32_e64), 1816 ScaledReg) 1817 .addImm(ST.getWavefrontSizeLog2()) 1818 .addReg(FrameReg); 1819 1820 const bool IsVOP2 = MIB->getOpcode() == AMDGPU::V_ADD_U32_e32; 1821 1822 // TODO: Fold if use instruction is another add of a constant. 1823 if (IsVOP2 || AMDGPU::isInlinableLiteral32(Offset, ST.hasInv2PiInlineImm())) { 1824 // FIXME: This can fail 1825 MIB.addImm(Offset); 1826 MIB.addReg(ScaledReg, RegState::Kill); 1827 if (!IsVOP2) 1828 MIB.addImm(0); // clamp bit 1829 } else { 1830 assert(MIB->getOpcode() == AMDGPU::V_ADD_CO_U32_e64 && 1831 "Need to reuse carry out register"); 1832 1833 // Use scavenged unused carry out as offset register. 1834 Register ConstOffsetReg; 1835 if (!isWave32) 1836 ConstOffsetReg = getSubReg(MIB.getReg(1), AMDGPU::sub0); 1837 else 1838 ConstOffsetReg = MIB.getReg(1); 1839 1840 BuildMI(*MBB, *MIB, DL, TII->get(AMDGPU::S_MOV_B32), ConstOffsetReg) 1841 .addImm(Offset); 1842 MIB.addReg(ConstOffsetReg, RegState::Kill); 1843 MIB.addReg(ScaledReg, RegState::Kill); 1844 MIB.addImm(0); // clamp bit 1845 } 1846 } else { 1847 // We have to produce a carry out, and there isn't a free SGPR pair 1848 // for it. We can keep the whole computation on the SALU to avoid 1849 // clobbering an additional register at the cost of an extra mov. 1850 1851 // We may have 1 free scratch SGPR even though a carry out is 1852 // unavailable. Only one additional mov is needed. 1853 Register TmpScaledReg = 1854 RS->scavengeRegister(&AMDGPU::SReg_32_XM0RegClass, MI, 0, false); 1855 Register ScaledReg = TmpScaledReg.isValid() ? TmpScaledReg : FrameReg; 1856 1857 BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_LSHR_B32), ScaledReg) 1858 .addReg(FrameReg) 1859 .addImm(ST.getWavefrontSizeLog2()); 1860 BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_ADD_I32), ScaledReg) 1861 .addReg(ScaledReg, RegState::Kill) 1862 .addImm(Offset); 1863 BuildMI(*MBB, MI, DL, TII->get(AMDGPU::COPY), ResultReg) 1864 .addReg(ScaledReg, RegState::Kill); 1865 1866 // If there were truly no free SGPRs, we need to undo everything. 1867 if (!TmpScaledReg.isValid()) { 1868 BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_ADD_I32), ScaledReg) 1869 .addReg(ScaledReg, RegState::Kill) 1870 .addImm(-Offset); 1871 BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_LSHL_B32), ScaledReg) 1872 .addReg(FrameReg) 1873 .addImm(ST.getWavefrontSizeLog2()); 1874 } 1875 } 1876 } 1877 1878 // Don't introduce an extra copy if we're just materializing in a mov. 1879 if (IsCopy) 1880 MI->eraseFromParent(); 1881 else 1882 FIOp.ChangeToRegister(ResultReg, false, false, true); 1883 return; 1884 } 1885 1886 if (IsMUBUF) { 1887 // Disable offen so we don't need a 0 vgpr base. 1888 assert(static_cast<int>(FIOperandNum) == 1889 AMDGPU::getNamedOperandIdx(MI->getOpcode(), 1890 AMDGPU::OpName::vaddr)); 1891 1892 auto &SOffset = *TII->getNamedOperand(*MI, AMDGPU::OpName::soffset); 1893 assert((SOffset.isImm() && SOffset.getImm() == 0)); 1894 1895 if (FrameReg != AMDGPU::NoRegister) 1896 SOffset.ChangeToRegister(FrameReg, false); 1897 1898 int64_t Offset = FrameInfo.getObjectOffset(Index); 1899 int64_t OldImm 1900 = TII->getNamedOperand(*MI, AMDGPU::OpName::offset)->getImm(); 1901 int64_t NewOffset = OldImm + Offset; 1902 1903 if (SIInstrInfo::isLegalMUBUFImmOffset(NewOffset) && 1904 buildMUBUFOffsetLoadStore(ST, FrameInfo, MI, Index, NewOffset)) { 1905 MI->eraseFromParent(); 1906 return; 1907 } 1908 } 1909 1910 // If the offset is simply too big, don't convert to a scratch wave offset 1911 // relative index. 1912 1913 FIOp.ChangeToImmediate(Offset); 1914 if (!TII->isImmOperandLegal(*MI, FIOperandNum, FIOp)) { 1915 Register TmpReg = RS->scavengeRegister(&AMDGPU::VGPR_32RegClass, MI, 0); 1916 BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_MOV_B32_e32), TmpReg) 1917 .addImm(Offset); 1918 FIOp.ChangeToRegister(TmpReg, false, false, true); 1919 } 1920 } 1921 } 1922 } 1923 1924 StringRef SIRegisterInfo::getRegAsmName(MCRegister Reg) const { 1925 return AMDGPUInstPrinter::getRegisterName(Reg); 1926 } 1927 1928 static const TargetRegisterClass * 1929 getAnyVGPRClassForBitWidth(unsigned BitWidth) { 1930 if (BitWidth <= 64) 1931 return &AMDGPU::VReg_64RegClass; 1932 if (BitWidth <= 96) 1933 return &AMDGPU::VReg_96RegClass; 1934 if (BitWidth <= 128) 1935 return &AMDGPU::VReg_128RegClass; 1936 if (BitWidth <= 160) 1937 return &AMDGPU::VReg_160RegClass; 1938 if (BitWidth <= 192) 1939 return &AMDGPU::VReg_192RegClass; 1940 if (BitWidth <= 224) 1941 return &AMDGPU::VReg_224RegClass; 1942 if (BitWidth <= 256) 1943 return &AMDGPU::VReg_256RegClass; 1944 if (BitWidth <= 512) 1945 return &AMDGPU::VReg_512RegClass; 1946 if (BitWidth <= 1024) 1947 return &AMDGPU::VReg_1024RegClass; 1948 1949 return nullptr; 1950 } 1951 1952 static const TargetRegisterClass * 1953 getAlignedVGPRClassForBitWidth(unsigned BitWidth) { 1954 if (BitWidth <= 64) 1955 return &AMDGPU::VReg_64_Align2RegClass; 1956 if (BitWidth <= 96) 1957 return &AMDGPU::VReg_96_Align2RegClass; 1958 if (BitWidth <= 128) 1959 return &AMDGPU::VReg_128_Align2RegClass; 1960 if (BitWidth <= 160) 1961 return &AMDGPU::VReg_160_Align2RegClass; 1962 if (BitWidth <= 192) 1963 return &AMDGPU::VReg_192_Align2RegClass; 1964 if (BitWidth <= 224) 1965 return &AMDGPU::VReg_224_Align2RegClass; 1966 if (BitWidth <= 256) 1967 return &AMDGPU::VReg_256_Align2RegClass; 1968 if (BitWidth <= 512) 1969 return &AMDGPU::VReg_512_Align2RegClass; 1970 if (BitWidth <= 1024) 1971 return &AMDGPU::VReg_1024_Align2RegClass; 1972 1973 return nullptr; 1974 } 1975 1976 const TargetRegisterClass * 1977 SIRegisterInfo::getVGPRClassForBitWidth(unsigned BitWidth) const { 1978 if (BitWidth == 1) 1979 return &AMDGPU::VReg_1RegClass; 1980 if (BitWidth <= 16) 1981 return &AMDGPU::VGPR_LO16RegClass; 1982 if (BitWidth <= 32) 1983 return &AMDGPU::VGPR_32RegClass; 1984 return ST.needsAlignedVGPRs() ? getAlignedVGPRClassForBitWidth(BitWidth) 1985 : getAnyVGPRClassForBitWidth(BitWidth); 1986 } 1987 1988 static const TargetRegisterClass * 1989 getAnyAGPRClassForBitWidth(unsigned BitWidth) { 1990 if (BitWidth <= 64) 1991 return &AMDGPU::AReg_64RegClass; 1992 if (BitWidth <= 96) 1993 return &AMDGPU::AReg_96RegClass; 1994 if (BitWidth <= 128) 1995 return &AMDGPU::AReg_128RegClass; 1996 if (BitWidth <= 160) 1997 return &AMDGPU::AReg_160RegClass; 1998 if (BitWidth <= 192) 1999 return &AMDGPU::AReg_192RegClass; 2000 if (BitWidth <= 224) 2001 return &AMDGPU::AReg_224RegClass; 2002 if (BitWidth <= 256) 2003 return &AMDGPU::AReg_256RegClass; 2004 if (BitWidth <= 512) 2005 return &AMDGPU::AReg_512RegClass; 2006 if (BitWidth <= 1024) 2007 return &AMDGPU::AReg_1024RegClass; 2008 2009 return nullptr; 2010 } 2011 2012 static const TargetRegisterClass * 2013 getAlignedAGPRClassForBitWidth(unsigned BitWidth) { 2014 if (BitWidth <= 64) 2015 return &AMDGPU::AReg_64_Align2RegClass; 2016 if (BitWidth <= 96) 2017 return &AMDGPU::AReg_96_Align2RegClass; 2018 if (BitWidth <= 128) 2019 return &AMDGPU::AReg_128_Align2RegClass; 2020 if (BitWidth <= 160) 2021 return &AMDGPU::AReg_160_Align2RegClass; 2022 if (BitWidth <= 192) 2023 return &AMDGPU::AReg_192_Align2RegClass; 2024 if (BitWidth <= 224) 2025 return &AMDGPU::AReg_224_Align2RegClass; 2026 if (BitWidth <= 256) 2027 return &AMDGPU::AReg_256_Align2RegClass; 2028 if (BitWidth <= 512) 2029 return &AMDGPU::AReg_512_Align2RegClass; 2030 if (BitWidth <= 1024) 2031 return &AMDGPU::AReg_1024_Align2RegClass; 2032 2033 return nullptr; 2034 } 2035 2036 const TargetRegisterClass * 2037 SIRegisterInfo::getAGPRClassForBitWidth(unsigned BitWidth) const { 2038 if (BitWidth <= 16) 2039 return &AMDGPU::AGPR_LO16RegClass; 2040 if (BitWidth <= 32) 2041 return &AMDGPU::AGPR_32RegClass; 2042 return ST.needsAlignedVGPRs() ? getAlignedAGPRClassForBitWidth(BitWidth) 2043 : getAnyAGPRClassForBitWidth(BitWidth); 2044 } 2045 2046 const TargetRegisterClass * 2047 SIRegisterInfo::getSGPRClassForBitWidth(unsigned BitWidth) { 2048 if (BitWidth <= 16) 2049 return &AMDGPU::SGPR_LO16RegClass; 2050 if (BitWidth <= 32) 2051 return &AMDGPU::SReg_32RegClass; 2052 if (BitWidth <= 64) 2053 return &AMDGPU::SReg_64RegClass; 2054 if (BitWidth <= 96) 2055 return &AMDGPU::SGPR_96RegClass; 2056 if (BitWidth <= 128) 2057 return &AMDGPU::SGPR_128RegClass; 2058 if (BitWidth <= 160) 2059 return &AMDGPU::SGPR_160RegClass; 2060 if (BitWidth <= 192) 2061 return &AMDGPU::SGPR_192RegClass; 2062 if (BitWidth <= 224) 2063 return &AMDGPU::SGPR_224RegClass; 2064 if (BitWidth <= 256) 2065 return &AMDGPU::SGPR_256RegClass; 2066 if (BitWidth <= 512) 2067 return &AMDGPU::SGPR_512RegClass; 2068 if (BitWidth <= 1024) 2069 return &AMDGPU::SGPR_1024RegClass; 2070 2071 return nullptr; 2072 } 2073 2074 // FIXME: This is very slow. It might be worth creating a map from physreg to 2075 // register class. 2076 const TargetRegisterClass * 2077 SIRegisterInfo::getPhysRegClass(MCRegister Reg) const { 2078 static const TargetRegisterClass *const BaseClasses[] = { 2079 &AMDGPU::VGPR_LO16RegClass, 2080 &AMDGPU::VGPR_HI16RegClass, 2081 &AMDGPU::SReg_LO16RegClass, 2082 &AMDGPU::AGPR_LO16RegClass, 2083 &AMDGPU::VGPR_32RegClass, 2084 &AMDGPU::SReg_32RegClass, 2085 &AMDGPU::AGPR_32RegClass, 2086 &AMDGPU::AGPR_32RegClass, 2087 &AMDGPU::VReg_64_Align2RegClass, 2088 &AMDGPU::VReg_64RegClass, 2089 &AMDGPU::SReg_64RegClass, 2090 &AMDGPU::AReg_64_Align2RegClass, 2091 &AMDGPU::AReg_64RegClass, 2092 &AMDGPU::VReg_96_Align2RegClass, 2093 &AMDGPU::VReg_96RegClass, 2094 &AMDGPU::SReg_96RegClass, 2095 &AMDGPU::AReg_96_Align2RegClass, 2096 &AMDGPU::AReg_96RegClass, 2097 &AMDGPU::VReg_128_Align2RegClass, 2098 &AMDGPU::VReg_128RegClass, 2099 &AMDGPU::SReg_128RegClass, 2100 &AMDGPU::AReg_128_Align2RegClass, 2101 &AMDGPU::AReg_128RegClass, 2102 &AMDGPU::VReg_160_Align2RegClass, 2103 &AMDGPU::VReg_160RegClass, 2104 &AMDGPU::SReg_160RegClass, 2105 &AMDGPU::AReg_160_Align2RegClass, 2106 &AMDGPU::AReg_160RegClass, 2107 &AMDGPU::VReg_192_Align2RegClass, 2108 &AMDGPU::VReg_192RegClass, 2109 &AMDGPU::SReg_192RegClass, 2110 &AMDGPU::AReg_192_Align2RegClass, 2111 &AMDGPU::AReg_192RegClass, 2112 &AMDGPU::VReg_224_Align2RegClass, 2113 &AMDGPU::VReg_224RegClass, 2114 &AMDGPU::SReg_224RegClass, 2115 &AMDGPU::AReg_224_Align2RegClass, 2116 &AMDGPU::AReg_224RegClass, 2117 &AMDGPU::VReg_256_Align2RegClass, 2118 &AMDGPU::VReg_256RegClass, 2119 &AMDGPU::SReg_256RegClass, 2120 &AMDGPU::AReg_256_Align2RegClass, 2121 &AMDGPU::AReg_256RegClass, 2122 &AMDGPU::VReg_512_Align2RegClass, 2123 &AMDGPU::VReg_512RegClass, 2124 &AMDGPU::SReg_512RegClass, 2125 &AMDGPU::AReg_512_Align2RegClass, 2126 &AMDGPU::AReg_512RegClass, 2127 &AMDGPU::SReg_1024RegClass, 2128 &AMDGPU::VReg_1024_Align2RegClass, 2129 &AMDGPU::VReg_1024RegClass, 2130 &AMDGPU::AReg_1024_Align2RegClass, 2131 &AMDGPU::AReg_1024RegClass, 2132 &AMDGPU::SCC_CLASSRegClass, 2133 &AMDGPU::Pseudo_SReg_32RegClass, 2134 &AMDGPU::Pseudo_SReg_128RegClass, 2135 }; 2136 2137 for (const TargetRegisterClass *BaseClass : BaseClasses) { 2138 if (BaseClass->contains(Reg)) { 2139 return BaseClass; 2140 } 2141 } 2142 return nullptr; 2143 } 2144 2145 bool SIRegisterInfo::isSGPRReg(const MachineRegisterInfo &MRI, 2146 Register Reg) const { 2147 const TargetRegisterClass *RC; 2148 if (Reg.isVirtual()) 2149 RC = MRI.getRegClass(Reg); 2150 else 2151 RC = getPhysRegClass(Reg); 2152 return isSGPRClass(RC); 2153 } 2154 2155 // TODO: It might be helpful to have some target specific flags in 2156 // TargetRegisterClass to mark which classes are VGPRs to make this trivial. 2157 bool SIRegisterInfo::hasVGPRs(const TargetRegisterClass *RC) const { 2158 unsigned Size = getRegSizeInBits(*RC); 2159 if (Size == 16) { 2160 return getCommonSubClass(&AMDGPU::VGPR_LO16RegClass, RC) != nullptr || 2161 getCommonSubClass(&AMDGPU::VGPR_HI16RegClass, RC) != nullptr; 2162 } 2163 const TargetRegisterClass *VRC = getVGPRClassForBitWidth(Size); 2164 if (!VRC) { 2165 assert(Size < 32 && "Invalid register class size"); 2166 return false; 2167 } 2168 return getCommonSubClass(VRC, RC) != nullptr; 2169 } 2170 2171 bool SIRegisterInfo::hasAGPRs(const TargetRegisterClass *RC) const { 2172 unsigned Size = getRegSizeInBits(*RC); 2173 if (Size < 16) 2174 return false; 2175 const TargetRegisterClass *ARC = getAGPRClassForBitWidth(Size); 2176 if (!ARC) { 2177 assert(getVGPRClassForBitWidth(Size) && "Invalid register class size"); 2178 return false; 2179 } 2180 return getCommonSubClass(ARC, RC) != nullptr; 2181 } 2182 2183 const TargetRegisterClass * 2184 SIRegisterInfo::getEquivalentVGPRClass(const TargetRegisterClass *SRC) const { 2185 unsigned Size = getRegSizeInBits(*SRC); 2186 const TargetRegisterClass *VRC = getVGPRClassForBitWidth(Size); 2187 assert(VRC && "Invalid register class size"); 2188 return VRC; 2189 } 2190 2191 const TargetRegisterClass * 2192 SIRegisterInfo::getEquivalentAGPRClass(const TargetRegisterClass *SRC) const { 2193 unsigned Size = getRegSizeInBits(*SRC); 2194 const TargetRegisterClass *ARC = getAGPRClassForBitWidth(Size); 2195 assert(ARC && "Invalid register class size"); 2196 return ARC; 2197 } 2198 2199 const TargetRegisterClass * 2200 SIRegisterInfo::getEquivalentSGPRClass(const TargetRegisterClass *VRC) const { 2201 unsigned Size = getRegSizeInBits(*VRC); 2202 if (Size == 32) 2203 return &AMDGPU::SGPR_32RegClass; 2204 const TargetRegisterClass *SRC = getSGPRClassForBitWidth(Size); 2205 assert(SRC && "Invalid register class size"); 2206 return SRC; 2207 } 2208 2209 const TargetRegisterClass *SIRegisterInfo::getSubRegClass( 2210 const TargetRegisterClass *RC, unsigned SubIdx) const { 2211 if (SubIdx == AMDGPU::NoSubRegister) 2212 return RC; 2213 2214 // We can assume that each lane corresponds to one 32-bit register. 2215 unsigned Size = getNumChannelsFromSubReg(SubIdx) * 32; 2216 if (isSGPRClass(RC)) { 2217 if (Size == 32) 2218 RC = &AMDGPU::SGPR_32RegClass; 2219 else 2220 RC = getSGPRClassForBitWidth(Size); 2221 } else if (hasAGPRs(RC)) { 2222 RC = getAGPRClassForBitWidth(Size); 2223 } else { 2224 RC = getVGPRClassForBitWidth(Size); 2225 } 2226 assert(RC && "Invalid sub-register class size"); 2227 return RC; 2228 } 2229 2230 const TargetRegisterClass * 2231 SIRegisterInfo::getCompatibleSubRegClass(const TargetRegisterClass *SuperRC, 2232 const TargetRegisterClass *SubRC, 2233 unsigned SubIdx) const { 2234 // Ensure this subregister index is aligned in the super register. 2235 const TargetRegisterClass *MatchRC = 2236 getMatchingSuperRegClass(SuperRC, SubRC, SubIdx); 2237 return MatchRC && MatchRC->hasSubClassEq(SuperRC) ? MatchRC : nullptr; 2238 } 2239 2240 bool SIRegisterInfo::opCanUseInlineConstant(unsigned OpType) const { 2241 if (OpType >= AMDGPU::OPERAND_REG_INLINE_AC_FIRST && 2242 OpType <= AMDGPU::OPERAND_REG_INLINE_AC_LAST) 2243 return !ST.hasMFMAInlineLiteralBug(); 2244 2245 return OpType >= AMDGPU::OPERAND_SRC_FIRST && 2246 OpType <= AMDGPU::OPERAND_SRC_LAST; 2247 } 2248 2249 bool SIRegisterInfo::shouldRewriteCopySrc( 2250 const TargetRegisterClass *DefRC, 2251 unsigned DefSubReg, 2252 const TargetRegisterClass *SrcRC, 2253 unsigned SrcSubReg) const { 2254 // We want to prefer the smallest register class possible, so we don't want to 2255 // stop and rewrite on anything that looks like a subregister 2256 // extract. Operations mostly don't care about the super register class, so we 2257 // only want to stop on the most basic of copies between the same register 2258 // class. 2259 // 2260 // e.g. if we have something like 2261 // %0 = ... 2262 // %1 = ... 2263 // %2 = REG_SEQUENCE %0, sub0, %1, sub1, %2, sub2 2264 // %3 = COPY %2, sub0 2265 // 2266 // We want to look through the COPY to find: 2267 // => %3 = COPY %0 2268 2269 // Plain copy. 2270 return getCommonSubClass(DefRC, SrcRC) != nullptr; 2271 } 2272 2273 bool SIRegisterInfo::opCanUseLiteralConstant(unsigned OpType) const { 2274 // TODO: 64-bit operands have extending behavior from 32-bit literal. 2275 return OpType >= AMDGPU::OPERAND_REG_IMM_FIRST && 2276 OpType <= AMDGPU::OPERAND_REG_IMM_LAST; 2277 } 2278 2279 /// Returns a lowest register that is not used at any point in the function. 2280 /// If all registers are used, then this function will return 2281 /// AMDGPU::NoRegister. If \p ReserveHighestVGPR = true, then return 2282 /// highest unused register. 2283 MCRegister SIRegisterInfo::findUnusedRegister(const MachineRegisterInfo &MRI, 2284 const TargetRegisterClass *RC, 2285 const MachineFunction &MF, 2286 bool ReserveHighestVGPR) const { 2287 if (ReserveHighestVGPR) { 2288 for (MCRegister Reg : reverse(*RC)) 2289 if (MRI.isAllocatable(Reg) && !MRI.isPhysRegUsed(Reg)) 2290 return Reg; 2291 } else { 2292 for (MCRegister Reg : *RC) 2293 if (MRI.isAllocatable(Reg) && !MRI.isPhysRegUsed(Reg)) 2294 return Reg; 2295 } 2296 return MCRegister(); 2297 } 2298 2299 ArrayRef<int16_t> SIRegisterInfo::getRegSplitParts(const TargetRegisterClass *RC, 2300 unsigned EltSize) const { 2301 const unsigned RegBitWidth = AMDGPU::getRegBitWidth(*RC->MC); 2302 assert(RegBitWidth >= 32 && RegBitWidth <= 1024); 2303 2304 const unsigned RegDWORDs = RegBitWidth / 32; 2305 const unsigned EltDWORDs = EltSize / 4; 2306 assert(RegSplitParts.size() + 1 >= EltDWORDs); 2307 2308 const std::vector<int16_t> &Parts = RegSplitParts[EltDWORDs - 1]; 2309 const unsigned NumParts = RegDWORDs / EltDWORDs; 2310 2311 return makeArrayRef(Parts.data(), NumParts); 2312 } 2313 2314 const TargetRegisterClass* 2315 SIRegisterInfo::getRegClassForReg(const MachineRegisterInfo &MRI, 2316 Register Reg) const { 2317 return Reg.isVirtual() ? MRI.getRegClass(Reg) : getPhysRegClass(Reg); 2318 } 2319 2320 bool SIRegisterInfo::isVGPR(const MachineRegisterInfo &MRI, 2321 Register Reg) const { 2322 const TargetRegisterClass *RC = getRegClassForReg(MRI, Reg); 2323 // Registers without classes are unaddressable, SGPR-like registers. 2324 return RC && hasVGPRs(RC); 2325 } 2326 2327 bool SIRegisterInfo::isAGPR(const MachineRegisterInfo &MRI, 2328 Register Reg) const { 2329 const TargetRegisterClass *RC = getRegClassForReg(MRI, Reg); 2330 2331 // Registers without classes are unaddressable, SGPR-like registers. 2332 return RC && hasAGPRs(RC); 2333 } 2334 2335 bool SIRegisterInfo::shouldCoalesce(MachineInstr *MI, 2336 const TargetRegisterClass *SrcRC, 2337 unsigned SubReg, 2338 const TargetRegisterClass *DstRC, 2339 unsigned DstSubReg, 2340 const TargetRegisterClass *NewRC, 2341 LiveIntervals &LIS) const { 2342 unsigned SrcSize = getRegSizeInBits(*SrcRC); 2343 unsigned DstSize = getRegSizeInBits(*DstRC); 2344 unsigned NewSize = getRegSizeInBits(*NewRC); 2345 2346 // Do not increase size of registers beyond dword, we would need to allocate 2347 // adjacent registers and constraint regalloc more than needed. 2348 2349 // Always allow dword coalescing. 2350 if (SrcSize <= 32 || DstSize <= 32) 2351 return true; 2352 2353 return NewSize <= DstSize || NewSize <= SrcSize; 2354 } 2355 2356 unsigned SIRegisterInfo::getRegPressureLimit(const TargetRegisterClass *RC, 2357 MachineFunction &MF) const { 2358 const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>(); 2359 2360 unsigned Occupancy = ST.getOccupancyWithLocalMemSize(MFI->getLDSSize(), 2361 MF.getFunction()); 2362 switch (RC->getID()) { 2363 default: 2364 return AMDGPUGenRegisterInfo::getRegPressureLimit(RC, MF); 2365 case AMDGPU::VGPR_32RegClassID: 2366 case AMDGPU::VGPR_LO16RegClassID: 2367 case AMDGPU::VGPR_HI16RegClassID: 2368 return std::min(ST.getMaxNumVGPRs(Occupancy), ST.getMaxNumVGPRs(MF)); 2369 case AMDGPU::SGPR_32RegClassID: 2370 case AMDGPU::SGPR_LO16RegClassID: 2371 return std::min(ST.getMaxNumSGPRs(Occupancy, true), ST.getMaxNumSGPRs(MF)); 2372 } 2373 } 2374 2375 unsigned SIRegisterInfo::getRegPressureSetLimit(const MachineFunction &MF, 2376 unsigned Idx) const { 2377 if (Idx == AMDGPU::RegisterPressureSets::VGPR_32 || 2378 Idx == AMDGPU::RegisterPressureSets::AGPR_32) 2379 return getRegPressureLimit(&AMDGPU::VGPR_32RegClass, 2380 const_cast<MachineFunction &>(MF)); 2381 2382 if (Idx == AMDGPU::RegisterPressureSets::SReg_32) 2383 return getRegPressureLimit(&AMDGPU::SGPR_32RegClass, 2384 const_cast<MachineFunction &>(MF)); 2385 2386 llvm_unreachable("Unexpected register pressure set!"); 2387 } 2388 2389 const int *SIRegisterInfo::getRegUnitPressureSets(unsigned RegUnit) const { 2390 static const int Empty[] = { -1 }; 2391 2392 if (RegPressureIgnoredUnits[RegUnit]) 2393 return Empty; 2394 2395 return AMDGPUGenRegisterInfo::getRegUnitPressureSets(RegUnit); 2396 } 2397 2398 MCRegister SIRegisterInfo::getReturnAddressReg(const MachineFunction &MF) const { 2399 // Not a callee saved register. 2400 return AMDGPU::SGPR30_SGPR31; 2401 } 2402 2403 const TargetRegisterClass * 2404 SIRegisterInfo::getRegClassForSizeOnBank(unsigned Size, 2405 const RegisterBank &RB, 2406 const MachineRegisterInfo &MRI) const { 2407 switch (RB.getID()) { 2408 case AMDGPU::VGPRRegBankID: 2409 return getVGPRClassForBitWidth(std::max(32u, Size)); 2410 case AMDGPU::VCCRegBankID: 2411 assert(Size == 1); 2412 return isWave32 ? &AMDGPU::SReg_32_XM0_XEXECRegClass 2413 : &AMDGPU::SReg_64_XEXECRegClass; 2414 case AMDGPU::SGPRRegBankID: 2415 return getSGPRClassForBitWidth(std::max(32u, Size)); 2416 case AMDGPU::AGPRRegBankID: 2417 return getAGPRClassForBitWidth(std::max(32u, Size)); 2418 default: 2419 llvm_unreachable("unknown register bank"); 2420 } 2421 } 2422 2423 const TargetRegisterClass * 2424 SIRegisterInfo::getConstrainedRegClassForOperand(const MachineOperand &MO, 2425 const MachineRegisterInfo &MRI) const { 2426 const RegClassOrRegBank &RCOrRB = MRI.getRegClassOrRegBank(MO.getReg()); 2427 if (const RegisterBank *RB = RCOrRB.dyn_cast<const RegisterBank*>()) 2428 return getRegClassForTypeOnBank(MRI.getType(MO.getReg()), *RB, MRI); 2429 2430 const TargetRegisterClass *RC = RCOrRB.get<const TargetRegisterClass*>(); 2431 return getAllocatableClass(RC); 2432 } 2433 2434 MCRegister SIRegisterInfo::getVCC() const { 2435 return isWave32 ? AMDGPU::VCC_LO : AMDGPU::VCC; 2436 } 2437 2438 const TargetRegisterClass *SIRegisterInfo::getVGPR64Class() const { 2439 // VGPR tuples have an alignment requirement on gfx90a variants. 2440 return ST.needsAlignedVGPRs() ? &AMDGPU::VReg_64_Align2RegClass 2441 : &AMDGPU::VReg_64RegClass; 2442 } 2443 2444 const TargetRegisterClass * 2445 SIRegisterInfo::getRegClass(unsigned RCID) const { 2446 switch ((int)RCID) { 2447 case AMDGPU::SReg_1RegClassID: 2448 return getBoolRC(); 2449 case AMDGPU::SReg_1_XEXECRegClassID: 2450 return isWave32 ? &AMDGPU::SReg_32_XM0_XEXECRegClass 2451 : &AMDGPU::SReg_64_XEXECRegClass; 2452 case -1: 2453 return nullptr; 2454 default: 2455 return AMDGPUGenRegisterInfo::getRegClass(RCID); 2456 } 2457 } 2458 2459 // Find reaching register definition 2460 MachineInstr *SIRegisterInfo::findReachingDef(Register Reg, unsigned SubReg, 2461 MachineInstr &Use, 2462 MachineRegisterInfo &MRI, 2463 LiveIntervals *LIS) const { 2464 auto &MDT = LIS->getAnalysis<MachineDominatorTree>(); 2465 SlotIndex UseIdx = LIS->getInstructionIndex(Use); 2466 SlotIndex DefIdx; 2467 2468 if (Reg.isVirtual()) { 2469 if (!LIS->hasInterval(Reg)) 2470 return nullptr; 2471 LiveInterval &LI = LIS->getInterval(Reg); 2472 LaneBitmask SubLanes = SubReg ? getSubRegIndexLaneMask(SubReg) 2473 : MRI.getMaxLaneMaskForVReg(Reg); 2474 VNInfo *V = nullptr; 2475 if (LI.hasSubRanges()) { 2476 for (auto &S : LI.subranges()) { 2477 if ((S.LaneMask & SubLanes) == SubLanes) { 2478 V = S.getVNInfoAt(UseIdx); 2479 break; 2480 } 2481 } 2482 } else { 2483 V = LI.getVNInfoAt(UseIdx); 2484 } 2485 if (!V) 2486 return nullptr; 2487 DefIdx = V->def; 2488 } else { 2489 // Find last def. 2490 for (MCRegUnitIterator Units(Reg.asMCReg(), this); Units.isValid(); 2491 ++Units) { 2492 LiveRange &LR = LIS->getRegUnit(*Units); 2493 if (VNInfo *V = LR.getVNInfoAt(UseIdx)) { 2494 if (!DefIdx.isValid() || 2495 MDT.dominates(LIS->getInstructionFromIndex(DefIdx), 2496 LIS->getInstructionFromIndex(V->def))) 2497 DefIdx = V->def; 2498 } else { 2499 return nullptr; 2500 } 2501 } 2502 } 2503 2504 MachineInstr *Def = LIS->getInstructionFromIndex(DefIdx); 2505 2506 if (!Def || !MDT.dominates(Def, &Use)) 2507 return nullptr; 2508 2509 assert(Def->modifiesRegister(Reg, this)); 2510 2511 return Def; 2512 } 2513 2514 MCPhysReg SIRegisterInfo::get32BitRegister(MCPhysReg Reg) const { 2515 assert(getRegSizeInBits(*getPhysRegClass(Reg)) <= 32); 2516 2517 for (const TargetRegisterClass &RC : { AMDGPU::VGPR_32RegClass, 2518 AMDGPU::SReg_32RegClass, 2519 AMDGPU::AGPR_32RegClass } ) { 2520 if (MCPhysReg Super = getMatchingSuperReg(Reg, AMDGPU::lo16, &RC)) 2521 return Super; 2522 } 2523 if (MCPhysReg Super = getMatchingSuperReg(Reg, AMDGPU::hi16, 2524 &AMDGPU::VGPR_32RegClass)) { 2525 return Super; 2526 } 2527 2528 return AMDGPU::NoRegister; 2529 } 2530 2531 bool SIRegisterInfo::isProperlyAlignedRC(const TargetRegisterClass &RC) const { 2532 if (!ST.needsAlignedVGPRs()) 2533 return true; 2534 2535 if (hasVGPRs(&RC)) 2536 return RC.hasSuperClassEq(getVGPRClassForBitWidth(getRegSizeInBits(RC))); 2537 if (hasAGPRs(&RC)) 2538 return RC.hasSuperClassEq(getAGPRClassForBitWidth(getRegSizeInBits(RC))); 2539 2540 return true; 2541 } 2542 2543 bool SIRegisterInfo::isConstantPhysReg(MCRegister PhysReg) const { 2544 switch (PhysReg) { 2545 case AMDGPU::SGPR_NULL: 2546 case AMDGPU::SRC_SHARED_BASE: 2547 case AMDGPU::SRC_PRIVATE_BASE: 2548 case AMDGPU::SRC_SHARED_LIMIT: 2549 case AMDGPU::SRC_PRIVATE_LIMIT: 2550 return true; 2551 default: 2552 return false; 2553 } 2554 } 2555 2556 ArrayRef<MCPhysReg> 2557 SIRegisterInfo::getAllSGPR128(const MachineFunction &MF) const { 2558 return makeArrayRef(AMDGPU::SGPR_128RegClass.begin(), 2559 ST.getMaxNumSGPRs(MF) / 4); 2560 } 2561 2562 ArrayRef<MCPhysReg> 2563 SIRegisterInfo::getAllSGPR64(const MachineFunction &MF) const { 2564 return makeArrayRef(AMDGPU::SGPR_64RegClass.begin(), 2565 ST.getMaxNumSGPRs(MF) / 2); 2566 } 2567 2568 ArrayRef<MCPhysReg> 2569 SIRegisterInfo::getAllSGPR32(const MachineFunction &MF) const { 2570 return makeArrayRef(AMDGPU::SGPR_32RegClass.begin(), ST.getMaxNumSGPRs(MF)); 2571 } 2572