1 //===-- SIRegisterInfo.cpp - SI Register Information ---------------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 /// \file 11 /// \brief SI implementation of the TargetRegisterInfo class. 12 // 13 //===----------------------------------------------------------------------===// 14 15 16 #include "SIRegisterInfo.h" 17 #include "SIInstrInfo.h" 18 #include "SIMachineFunctionInfo.h" 19 #include "llvm/CodeGen/MachineFrameInfo.h" 20 #include "llvm/CodeGen/MachineInstrBuilder.h" 21 #include "llvm/CodeGen/RegisterScavenging.h" 22 #include "llvm/IR/Function.h" 23 #include "llvm/IR/LLVMContext.h" 24 25 using namespace llvm; 26 27 SIRegisterInfo::SIRegisterInfo(const AMDGPUSubtarget &st) 28 : AMDGPURegisterInfo(st) 29 { } 30 31 BitVector SIRegisterInfo::getReservedRegs(const MachineFunction &MF) const { 32 BitVector Reserved(getNumRegs()); 33 Reserved.set(AMDGPU::EXEC); 34 35 // EXEC_LO and EXEC_HI could be allocated and used as regular register, 36 // but this seems likely to result in bugs, so I'm marking them as reserved. 37 Reserved.set(AMDGPU::EXEC_LO); 38 Reserved.set(AMDGPU::EXEC_HI); 39 40 Reserved.set(AMDGPU::INDIRECT_BASE_ADDR); 41 Reserved.set(AMDGPU::FLAT_SCR); 42 Reserved.set(AMDGPU::FLAT_SCR_LO); 43 Reserved.set(AMDGPU::FLAT_SCR_HI); 44 45 // Reserve some VGPRs to use as temp registers in case we have to spill VGPRs 46 Reserved.set(AMDGPU::VGPR255); 47 Reserved.set(AMDGPU::VGPR254); 48 49 // Tonga and Iceland can only allocate a fixed number of SGPRs due 50 // to a hw bug. 51 if (ST.hasSGPRInitBug()) { 52 unsigned NumSGPRs = AMDGPU::SGPR_32RegClass.getNumRegs(); 53 // Reserve some SGPRs for FLAT_SCRATCH and VCC (4 SGPRs). 54 // Assume XNACK_MASK is unused. 55 unsigned Limit = AMDGPUSubtarget::FIXED_SGPR_COUNT_FOR_INIT_BUG - 4; 56 57 for (unsigned i = Limit; i < NumSGPRs; ++i) { 58 unsigned Reg = AMDGPU::SGPR_32RegClass.getRegister(i); 59 MCRegAliasIterator R = MCRegAliasIterator(Reg, this, true); 60 61 for (; R.isValid(); ++R) 62 Reserved.set(*R); 63 } 64 } 65 66 return Reserved; 67 } 68 69 unsigned SIRegisterInfo::getRegPressureSetLimit(unsigned Idx) const { 70 71 // FIXME: We should adjust the max number of waves based on LDS size. 72 unsigned SGPRLimit = getNumSGPRsAllowed(ST.getGeneration(), 73 ST.getMaxWavesPerCU()); 74 unsigned VGPRLimit = getNumVGPRsAllowed(ST.getMaxWavesPerCU()); 75 76 for (regclass_iterator I = regclass_begin(), E = regclass_end(); 77 I != E; ++I) { 78 79 unsigned NumSubRegs = std::max((int)(*I)->getSize() / 4, 1); 80 unsigned Limit; 81 82 if (isSGPRClass(*I)) { 83 Limit = SGPRLimit / NumSubRegs; 84 } else { 85 Limit = VGPRLimit / NumSubRegs; 86 } 87 88 const int *Sets = getRegClassPressureSets(*I); 89 assert(Sets); 90 for (unsigned i = 0; Sets[i] != -1; ++i) { 91 if (Sets[i] == (int)Idx) 92 return Limit; 93 } 94 } 95 return 256; 96 } 97 98 bool SIRegisterInfo::requiresRegisterScavenging(const MachineFunction &Fn) const { 99 return Fn.getFrameInfo()->hasStackObjects(); 100 } 101 102 static unsigned getNumSubRegsForSpillOp(unsigned Op) { 103 104 switch (Op) { 105 case AMDGPU::SI_SPILL_S512_SAVE: 106 case AMDGPU::SI_SPILL_S512_RESTORE: 107 case AMDGPU::SI_SPILL_V512_SAVE: 108 case AMDGPU::SI_SPILL_V512_RESTORE: 109 return 16; 110 case AMDGPU::SI_SPILL_S256_SAVE: 111 case AMDGPU::SI_SPILL_S256_RESTORE: 112 case AMDGPU::SI_SPILL_V256_SAVE: 113 case AMDGPU::SI_SPILL_V256_RESTORE: 114 return 8; 115 case AMDGPU::SI_SPILL_S128_SAVE: 116 case AMDGPU::SI_SPILL_S128_RESTORE: 117 case AMDGPU::SI_SPILL_V128_SAVE: 118 case AMDGPU::SI_SPILL_V128_RESTORE: 119 return 4; 120 case AMDGPU::SI_SPILL_V96_SAVE: 121 case AMDGPU::SI_SPILL_V96_RESTORE: 122 return 3; 123 case AMDGPU::SI_SPILL_S64_SAVE: 124 case AMDGPU::SI_SPILL_S64_RESTORE: 125 case AMDGPU::SI_SPILL_V64_SAVE: 126 case AMDGPU::SI_SPILL_V64_RESTORE: 127 return 2; 128 case AMDGPU::SI_SPILL_S32_SAVE: 129 case AMDGPU::SI_SPILL_S32_RESTORE: 130 case AMDGPU::SI_SPILL_V32_SAVE: 131 case AMDGPU::SI_SPILL_V32_RESTORE: 132 return 1; 133 default: llvm_unreachable("Invalid spill opcode"); 134 } 135 } 136 137 void SIRegisterInfo::buildScratchLoadStore(MachineBasicBlock::iterator MI, 138 unsigned LoadStoreOp, 139 unsigned Value, 140 unsigned ScratchRsrcReg, 141 unsigned ScratchOffset, 142 int64_t Offset, 143 RegScavenger *RS) const { 144 145 const SIInstrInfo *TII = static_cast<const SIInstrInfo*>(ST.getInstrInfo()); 146 MachineBasicBlock *MBB = MI->getParent(); 147 const MachineFunction *MF = MI->getParent()->getParent(); 148 LLVMContext &Ctx = MF->getFunction()->getContext(); 149 DebugLoc DL = MI->getDebugLoc(); 150 bool IsLoad = TII->get(LoadStoreOp).mayLoad(); 151 152 bool RanOutOfSGPRs = false; 153 unsigned SOffset = ScratchOffset; 154 155 unsigned NumSubRegs = getNumSubRegsForSpillOp(MI->getOpcode()); 156 unsigned Size = NumSubRegs * 4; 157 158 if (!isUInt<12>(Offset + Size)) { 159 SOffset = RS->scavengeRegister(&AMDGPU::SGPR_32RegClass, MI, 0); 160 if (SOffset == AMDGPU::NoRegister) { 161 RanOutOfSGPRs = true; 162 SOffset = AMDGPU::SGPR0; 163 } 164 BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_ADD_U32), SOffset) 165 .addReg(ScratchOffset) 166 .addImm(Offset); 167 Offset = 0; 168 } 169 170 if (RanOutOfSGPRs) 171 Ctx.emitError("Ran out of SGPRs for spilling VGPRS"); 172 173 for (unsigned i = 0, e = NumSubRegs; i != e; ++i, Offset += 4) { 174 unsigned SubReg = NumSubRegs > 1 ? 175 getPhysRegSubReg(Value, &AMDGPU::VGPR_32RegClass, i) : 176 Value; 177 bool IsKill = (i == e - 1); 178 179 BuildMI(*MBB, MI, DL, TII->get(LoadStoreOp)) 180 .addReg(SubReg, getDefRegState(IsLoad)) 181 .addReg(ScratchRsrcReg, getKillRegState(IsKill)) 182 .addImm(Offset) 183 .addReg(SOffset) 184 .addImm(0) // glc 185 .addImm(0) // slc 186 .addImm(0) // tfe 187 .addReg(Value, RegState::Implicit | getDefRegState(IsLoad)); 188 } 189 } 190 191 void SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI, 192 int SPAdj, unsigned FIOperandNum, 193 RegScavenger *RS) const { 194 MachineFunction *MF = MI->getParent()->getParent(); 195 MachineBasicBlock *MBB = MI->getParent(); 196 SIMachineFunctionInfo *MFI = MF->getInfo<SIMachineFunctionInfo>(); 197 MachineFrameInfo *FrameInfo = MF->getFrameInfo(); 198 const SIInstrInfo *TII = static_cast<const SIInstrInfo*>(ST.getInstrInfo()); 199 DebugLoc DL = MI->getDebugLoc(); 200 201 MachineOperand &FIOp = MI->getOperand(FIOperandNum); 202 int Index = MI->getOperand(FIOperandNum).getIndex(); 203 204 switch (MI->getOpcode()) { 205 // SGPR register spill 206 case AMDGPU::SI_SPILL_S512_SAVE: 207 case AMDGPU::SI_SPILL_S256_SAVE: 208 case AMDGPU::SI_SPILL_S128_SAVE: 209 case AMDGPU::SI_SPILL_S64_SAVE: 210 case AMDGPU::SI_SPILL_S32_SAVE: { 211 unsigned NumSubRegs = getNumSubRegsForSpillOp(MI->getOpcode()); 212 213 for (unsigned i = 0, e = NumSubRegs; i < e; ++i) { 214 unsigned SubReg = getPhysRegSubReg(MI->getOperand(0).getReg(), 215 &AMDGPU::SGPR_32RegClass, i); 216 struct SIMachineFunctionInfo::SpilledReg Spill = 217 MFI->getSpilledReg(MF, Index, i); 218 219 if (Spill.VGPR == AMDGPU::NoRegister) { 220 LLVMContext &Ctx = MF->getFunction()->getContext(); 221 Ctx.emitError("Ran out of VGPRs for spilling SGPR"); 222 } 223 224 BuildMI(*MBB, MI, DL, 225 TII->getMCOpcodeFromPseudo(AMDGPU::V_WRITELANE_B32), 226 Spill.VGPR) 227 .addReg(SubReg) 228 .addImm(Spill.Lane); 229 230 } 231 MI->eraseFromParent(); 232 break; 233 } 234 235 // SGPR register restore 236 case AMDGPU::SI_SPILL_S512_RESTORE: 237 case AMDGPU::SI_SPILL_S256_RESTORE: 238 case AMDGPU::SI_SPILL_S128_RESTORE: 239 case AMDGPU::SI_SPILL_S64_RESTORE: 240 case AMDGPU::SI_SPILL_S32_RESTORE: { 241 unsigned NumSubRegs = getNumSubRegsForSpillOp(MI->getOpcode()); 242 243 for (unsigned i = 0, e = NumSubRegs; i < e; ++i) { 244 unsigned SubReg = getPhysRegSubReg(MI->getOperand(0).getReg(), 245 &AMDGPU::SGPR_32RegClass, i); 246 bool isM0 = SubReg == AMDGPU::M0; 247 struct SIMachineFunctionInfo::SpilledReg Spill = 248 MFI->getSpilledReg(MF, Index, i); 249 250 if (Spill.VGPR == AMDGPU::NoRegister) { 251 LLVMContext &Ctx = MF->getFunction()->getContext(); 252 Ctx.emitError("Ran out of VGPRs for spilling SGPR"); 253 } 254 255 if (isM0) 256 SubReg = RS->scavengeRegister(&AMDGPU::SGPR_32RegClass, MI, 0); 257 258 BuildMI(*MBB, MI, DL, 259 TII->getMCOpcodeFromPseudo(AMDGPU::V_READLANE_B32), 260 SubReg) 261 .addReg(Spill.VGPR) 262 .addImm(Spill.Lane) 263 .addReg(MI->getOperand(0).getReg(), RegState::ImplicitDefine); 264 if (isM0) { 265 BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_MOV_B32), AMDGPU::M0) 266 .addReg(SubReg); 267 } 268 } 269 270 // TODO: only do this when it is needed 271 switch (ST.getGeneration()) { 272 case AMDGPUSubtarget::SOUTHERN_ISLANDS: 273 // "VALU writes SGPR" -> "SMRD reads that SGPR" needs "S_NOP 3" on SI 274 TII->insertNOPs(MI, 3); 275 break; 276 case AMDGPUSubtarget::SEA_ISLANDS: 277 break; 278 default: // VOLCANIC_ISLANDS and later 279 // "VALU writes SGPR -> VMEM reads that SGPR" needs "S_NOP 4" on VI 280 // and later. This also applies to VALUs which write VCC, but we're 281 // unlikely to see VMEM use VCC. 282 TII->insertNOPs(MI, 4); 283 } 284 285 MI->eraseFromParent(); 286 break; 287 } 288 289 // VGPR register spill 290 case AMDGPU::SI_SPILL_V512_SAVE: 291 case AMDGPU::SI_SPILL_V256_SAVE: 292 case AMDGPU::SI_SPILL_V128_SAVE: 293 case AMDGPU::SI_SPILL_V96_SAVE: 294 case AMDGPU::SI_SPILL_V64_SAVE: 295 case AMDGPU::SI_SPILL_V32_SAVE: 296 buildScratchLoadStore(MI, AMDGPU::BUFFER_STORE_DWORD_OFFSET, 297 TII->getNamedOperand(*MI, AMDGPU::OpName::src)->getReg(), 298 TII->getNamedOperand(*MI, AMDGPU::OpName::scratch_rsrc)->getReg(), 299 TII->getNamedOperand(*MI, AMDGPU::OpName::scratch_offset)->getReg(), 300 FrameInfo->getObjectOffset(Index), RS); 301 MI->eraseFromParent(); 302 break; 303 case AMDGPU::SI_SPILL_V32_RESTORE: 304 case AMDGPU::SI_SPILL_V64_RESTORE: 305 case AMDGPU::SI_SPILL_V96_RESTORE: 306 case AMDGPU::SI_SPILL_V128_RESTORE: 307 case AMDGPU::SI_SPILL_V256_RESTORE: 308 case AMDGPU::SI_SPILL_V512_RESTORE: { 309 buildScratchLoadStore(MI, AMDGPU::BUFFER_LOAD_DWORD_OFFSET, 310 TII->getNamedOperand(*MI, AMDGPU::OpName::dst)->getReg(), 311 TII->getNamedOperand(*MI, AMDGPU::OpName::scratch_rsrc)->getReg(), 312 TII->getNamedOperand(*MI, AMDGPU::OpName::scratch_offset)->getReg(), 313 FrameInfo->getObjectOffset(Index), RS); 314 MI->eraseFromParent(); 315 break; 316 } 317 318 default: { 319 int64_t Offset = FrameInfo->getObjectOffset(Index); 320 FIOp.ChangeToImmediate(Offset); 321 if (!TII->isImmOperandLegal(MI, FIOperandNum, FIOp)) { 322 unsigned TmpReg = RS->scavengeRegister(&AMDGPU::VGPR_32RegClass, MI, SPAdj); 323 BuildMI(*MBB, MI, MI->getDebugLoc(), 324 TII->get(AMDGPU::V_MOV_B32_e32), TmpReg) 325 .addImm(Offset); 326 FIOp.ChangeToRegister(TmpReg, false, false, true); 327 } 328 } 329 } 330 } 331 332 const TargetRegisterClass * SIRegisterInfo::getCFGStructurizerRegClass( 333 MVT VT) const { 334 switch(VT.SimpleTy) { 335 default: 336 case MVT::i32: return &AMDGPU::VGPR_32RegClass; 337 } 338 } 339 340 unsigned SIRegisterInfo::getHWRegIndex(unsigned Reg) const { 341 return getEncodingValue(Reg) & 0xff; 342 } 343 344 const TargetRegisterClass *SIRegisterInfo::getPhysRegClass(unsigned Reg) const { 345 assert(!TargetRegisterInfo::isVirtualRegister(Reg)); 346 347 static const TargetRegisterClass *BaseClasses[] = { 348 &AMDGPU::VGPR_32RegClass, 349 &AMDGPU::SReg_32RegClass, 350 &AMDGPU::VReg_64RegClass, 351 &AMDGPU::SReg_64RegClass, 352 &AMDGPU::VReg_96RegClass, 353 &AMDGPU::VReg_128RegClass, 354 &AMDGPU::SReg_128RegClass, 355 &AMDGPU::VReg_256RegClass, 356 &AMDGPU::SReg_256RegClass, 357 &AMDGPU::VReg_512RegClass 358 }; 359 360 for (const TargetRegisterClass *BaseClass : BaseClasses) { 361 if (BaseClass->contains(Reg)) { 362 return BaseClass; 363 } 364 } 365 return nullptr; 366 } 367 368 bool SIRegisterInfo::hasVGPRs(const TargetRegisterClass *RC) const { 369 return getCommonSubClass(&AMDGPU::VGPR_32RegClass, RC) || 370 getCommonSubClass(&AMDGPU::VReg_64RegClass, RC) || 371 getCommonSubClass(&AMDGPU::VReg_96RegClass, RC) || 372 getCommonSubClass(&AMDGPU::VReg_128RegClass, RC) || 373 getCommonSubClass(&AMDGPU::VReg_256RegClass, RC) || 374 getCommonSubClass(&AMDGPU::VReg_512RegClass, RC); 375 } 376 377 const TargetRegisterClass *SIRegisterInfo::getEquivalentVGPRClass( 378 const TargetRegisterClass *SRC) const { 379 if (hasVGPRs(SRC)) { 380 return SRC; 381 } else if (SRC == &AMDGPU::SCCRegRegClass) { 382 return &AMDGPU::VCCRegRegClass; 383 } else if (getCommonSubClass(SRC, &AMDGPU::SGPR_32RegClass)) { 384 return &AMDGPU::VGPR_32RegClass; 385 } else if (getCommonSubClass(SRC, &AMDGPU::SGPR_64RegClass)) { 386 return &AMDGPU::VReg_64RegClass; 387 } else if (getCommonSubClass(SRC, &AMDGPU::SReg_128RegClass)) { 388 return &AMDGPU::VReg_128RegClass; 389 } else if (getCommonSubClass(SRC, &AMDGPU::SReg_256RegClass)) { 390 return &AMDGPU::VReg_256RegClass; 391 } else if (getCommonSubClass(SRC, &AMDGPU::SReg_512RegClass)) { 392 return &AMDGPU::VReg_512RegClass; 393 } 394 return nullptr; 395 } 396 397 const TargetRegisterClass *SIRegisterInfo::getSubRegClass( 398 const TargetRegisterClass *RC, unsigned SubIdx) const { 399 if (SubIdx == AMDGPU::NoSubRegister) 400 return RC; 401 402 // If this register has a sub-register, we can safely assume it is a 32-bit 403 // register, because all of SI's sub-registers are 32-bit. 404 if (isSGPRClass(RC)) { 405 return &AMDGPU::SGPR_32RegClass; 406 } else { 407 return &AMDGPU::VGPR_32RegClass; 408 } 409 } 410 411 unsigned SIRegisterInfo::getPhysRegSubReg(unsigned Reg, 412 const TargetRegisterClass *SubRC, 413 unsigned Channel) const { 414 415 switch (Reg) { 416 case AMDGPU::VCC: 417 switch(Channel) { 418 case 0: return AMDGPU::VCC_LO; 419 case 1: return AMDGPU::VCC_HI; 420 default: llvm_unreachable("Invalid SubIdx for VCC"); 421 } 422 423 case AMDGPU::FLAT_SCR: 424 switch (Channel) { 425 case 0: 426 return AMDGPU::FLAT_SCR_LO; 427 case 1: 428 return AMDGPU::FLAT_SCR_HI; 429 default: 430 llvm_unreachable("Invalid SubIdx for FLAT_SCR"); 431 } 432 break; 433 434 case AMDGPU::EXEC: 435 switch (Channel) { 436 case 0: 437 return AMDGPU::EXEC_LO; 438 case 1: 439 return AMDGPU::EXEC_HI; 440 default: 441 llvm_unreachable("Invalid SubIdx for EXEC"); 442 } 443 break; 444 } 445 446 const TargetRegisterClass *RC = getPhysRegClass(Reg); 447 // 32-bit registers don't have sub-registers, so we can just return the 448 // Reg. We need to have this check here, because the calculation below 449 // using getHWRegIndex() will fail with special 32-bit registers like 450 // VCC_LO, VCC_HI, EXEC_LO, EXEC_HI and M0. 451 if (RC->getSize() == 4) { 452 assert(Channel == 0); 453 return Reg; 454 } 455 456 unsigned Index = getHWRegIndex(Reg); 457 return SubRC->getRegister(Index + Channel); 458 } 459 460 bool SIRegisterInfo::opCanUseLiteralConstant(unsigned OpType) const { 461 return OpType == AMDGPU::OPERAND_REG_IMM32; 462 } 463 464 bool SIRegisterInfo::opCanUseInlineConstant(unsigned OpType) const { 465 if (opCanUseLiteralConstant(OpType)) 466 return true; 467 468 return OpType == AMDGPU::OPERAND_REG_INLINE_C; 469 } 470 471 unsigned SIRegisterInfo::getPreloadedValue(const MachineFunction &MF, 472 enum PreloadedValue Value) const { 473 474 const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>(); 475 switch (Value) { 476 case SIRegisterInfo::TGID_X: 477 return AMDGPU::SReg_32RegClass.getRegister(MFI->NumUserSGPRs + 0); 478 case SIRegisterInfo::TGID_Y: 479 return AMDGPU::SReg_32RegClass.getRegister(MFI->NumUserSGPRs + 1); 480 case SIRegisterInfo::TGID_Z: 481 return AMDGPU::SReg_32RegClass.getRegister(MFI->NumUserSGPRs + 2); 482 case SIRegisterInfo::SCRATCH_WAVE_OFFSET: 483 if (MFI->getShaderType() != ShaderType::COMPUTE) 484 return MFI->ScratchOffsetReg; 485 return AMDGPU::SReg_32RegClass.getRegister(MFI->NumUserSGPRs + 4); 486 case SIRegisterInfo::SCRATCH_PTR: 487 return AMDGPU::SGPR2_SGPR3; 488 case SIRegisterInfo::INPUT_PTR: 489 return AMDGPU::SGPR0_SGPR1; 490 case SIRegisterInfo::TIDIG_X: 491 return AMDGPU::VGPR0; 492 case SIRegisterInfo::TIDIG_Y: 493 return AMDGPU::VGPR1; 494 case SIRegisterInfo::TIDIG_Z: 495 return AMDGPU::VGPR2; 496 } 497 llvm_unreachable("unexpected preloaded value type"); 498 } 499 500 /// \brief Returns a register that is not used at any point in the function. 501 /// If all registers are used, then this function will return 502 // AMDGPU::NoRegister. 503 unsigned SIRegisterInfo::findUnusedRegister(const MachineRegisterInfo &MRI, 504 const TargetRegisterClass *RC) const { 505 506 for (TargetRegisterClass::iterator I = RC->begin(), E = RC->end(); 507 I != E; ++I) { 508 if (!MRI.isPhysRegUsed(*I)) 509 return *I; 510 } 511 return AMDGPU::NoRegister; 512 } 513 514 unsigned SIRegisterInfo::getNumVGPRsAllowed(unsigned WaveCount) const { 515 switch(WaveCount) { 516 case 10: return 24; 517 case 9: return 28; 518 case 8: return 32; 519 case 7: return 36; 520 case 6: return 40; 521 case 5: return 48; 522 case 4: return 64; 523 case 3: return 84; 524 case 2: return 128; 525 default: return 256; 526 } 527 } 528 529 unsigned SIRegisterInfo::getNumSGPRsAllowed(AMDGPUSubtarget::Generation gen, 530 unsigned WaveCount) const { 531 if (gen >= AMDGPUSubtarget::VOLCANIC_ISLANDS) { 532 switch (WaveCount) { 533 case 10: return 80; 534 case 9: return 80; 535 case 8: return 96; 536 default: return 102; 537 } 538 } else { 539 switch(WaveCount) { 540 case 10: return 48; 541 case 9: return 56; 542 case 8: return 64; 543 case 7: return 72; 544 case 6: return 80; 545 case 5: return 96; 546 default: return 103; 547 } 548 } 549 } 550