1 //===-- SIRegisterInfo.cpp - SI Register Information ---------------------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 /// \file
11 /// \brief SI implementation of the TargetRegisterInfo class.
12 //
13 //===----------------------------------------------------------------------===//
14
15
16 #include "SIRegisterInfo.h"
17 #include "SIInstrInfo.h"
18 #include "SIMachineFunctionInfo.h"
19 #include "llvm/CodeGen/MachineFrameInfo.h"
20 #include "llvm/CodeGen/MachineInstrBuilder.h"
21 #include "llvm/CodeGen/RegisterScavenging.h"
22 #include "llvm/IR/Function.h"
23 #include "llvm/IR/LLVMContext.h"
24
25 using namespace llvm;
26
SIRegisterInfo(const AMDGPUSubtarget & st)27 SIRegisterInfo::SIRegisterInfo(const AMDGPUSubtarget &st)
28 : AMDGPURegisterInfo(st)
29 { }
30
getReservedRegs(const MachineFunction & MF) const31 BitVector SIRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
32 BitVector Reserved(getNumRegs());
33 Reserved.set(AMDGPU::EXEC);
34
35 // EXEC_LO and EXEC_HI could be allocated and used as regular register,
36 // but this seems likely to result in bugs, so I'm marking them as reserved.
37 Reserved.set(AMDGPU::EXEC_LO);
38 Reserved.set(AMDGPU::EXEC_HI);
39
40 Reserved.set(AMDGPU::INDIRECT_BASE_ADDR);
41 Reserved.set(AMDGPU::FLAT_SCR);
42 Reserved.set(AMDGPU::FLAT_SCR_LO);
43 Reserved.set(AMDGPU::FLAT_SCR_HI);
44
45 // Reserve some VGPRs to use as temp registers in case we have to spill VGPRs
46 Reserved.set(AMDGPU::VGPR255);
47 Reserved.set(AMDGPU::VGPR254);
48
49 // Tonga and Iceland can only allocate a fixed number of SGPRs due
50 // to a hw bug.
51 if (ST.hasSGPRInitBug()) {
52 unsigned NumSGPRs = AMDGPU::SGPR_32RegClass.getNumRegs();
53 // Reserve some SGPRs for FLAT_SCRATCH and VCC (4 SGPRs).
54 // Assume XNACK_MASK is unused.
55 unsigned Limit = AMDGPUSubtarget::FIXED_SGPR_COUNT_FOR_INIT_BUG - 4;
56
57 for (unsigned i = Limit; i < NumSGPRs; ++i) {
58 unsigned Reg = AMDGPU::SGPR_32RegClass.getRegister(i);
59 MCRegAliasIterator R = MCRegAliasIterator(Reg, this, true);
60
61 for (; R.isValid(); ++R)
62 Reserved.set(*R);
63 }
64 }
65
66 return Reserved;
67 }
68
getRegPressureSetLimit(unsigned Idx) const69 unsigned SIRegisterInfo::getRegPressureSetLimit(unsigned Idx) const {
70
71 // FIXME: We should adjust the max number of waves based on LDS size.
72 unsigned SGPRLimit = getNumSGPRsAllowed(ST.getGeneration(),
73 ST.getMaxWavesPerCU());
74 unsigned VGPRLimit = getNumVGPRsAllowed(ST.getMaxWavesPerCU());
75
76 for (regclass_iterator I = regclass_begin(), E = regclass_end();
77 I != E; ++I) {
78
79 unsigned NumSubRegs = std::max((int)(*I)->getSize() / 4, 1);
80 unsigned Limit;
81
82 if (isSGPRClass(*I)) {
83 Limit = SGPRLimit / NumSubRegs;
84 } else {
85 Limit = VGPRLimit / NumSubRegs;
86 }
87
88 const int *Sets = getRegClassPressureSets(*I);
89 assert(Sets);
90 for (unsigned i = 0; Sets[i] != -1; ++i) {
91 if (Sets[i] == (int)Idx)
92 return Limit;
93 }
94 }
95 return 256;
96 }
97
requiresRegisterScavenging(const MachineFunction & Fn) const98 bool SIRegisterInfo::requiresRegisterScavenging(const MachineFunction &Fn) const {
99 return Fn.getFrameInfo()->hasStackObjects();
100 }
101
getNumSubRegsForSpillOp(unsigned Op)102 static unsigned getNumSubRegsForSpillOp(unsigned Op) {
103
104 switch (Op) {
105 case AMDGPU::SI_SPILL_S512_SAVE:
106 case AMDGPU::SI_SPILL_S512_RESTORE:
107 case AMDGPU::SI_SPILL_V512_SAVE:
108 case AMDGPU::SI_SPILL_V512_RESTORE:
109 return 16;
110 case AMDGPU::SI_SPILL_S256_SAVE:
111 case AMDGPU::SI_SPILL_S256_RESTORE:
112 case AMDGPU::SI_SPILL_V256_SAVE:
113 case AMDGPU::SI_SPILL_V256_RESTORE:
114 return 8;
115 case AMDGPU::SI_SPILL_S128_SAVE:
116 case AMDGPU::SI_SPILL_S128_RESTORE:
117 case AMDGPU::SI_SPILL_V128_SAVE:
118 case AMDGPU::SI_SPILL_V128_RESTORE:
119 return 4;
120 case AMDGPU::SI_SPILL_V96_SAVE:
121 case AMDGPU::SI_SPILL_V96_RESTORE:
122 return 3;
123 case AMDGPU::SI_SPILL_S64_SAVE:
124 case AMDGPU::SI_SPILL_S64_RESTORE:
125 case AMDGPU::SI_SPILL_V64_SAVE:
126 case AMDGPU::SI_SPILL_V64_RESTORE:
127 return 2;
128 case AMDGPU::SI_SPILL_S32_SAVE:
129 case AMDGPU::SI_SPILL_S32_RESTORE:
130 case AMDGPU::SI_SPILL_V32_SAVE:
131 case AMDGPU::SI_SPILL_V32_RESTORE:
132 return 1;
133 default: llvm_unreachable("Invalid spill opcode");
134 }
135 }
136
buildScratchLoadStore(MachineBasicBlock::iterator MI,unsigned LoadStoreOp,unsigned Value,unsigned ScratchRsrcReg,unsigned ScratchOffset,int64_t Offset,RegScavenger * RS) const137 void SIRegisterInfo::buildScratchLoadStore(MachineBasicBlock::iterator MI,
138 unsigned LoadStoreOp,
139 unsigned Value,
140 unsigned ScratchRsrcReg,
141 unsigned ScratchOffset,
142 int64_t Offset,
143 RegScavenger *RS) const {
144
145 const SIInstrInfo *TII = static_cast<const SIInstrInfo*>(ST.getInstrInfo());
146 MachineBasicBlock *MBB = MI->getParent();
147 const MachineFunction *MF = MI->getParent()->getParent();
148 LLVMContext &Ctx = MF->getFunction()->getContext();
149 DebugLoc DL = MI->getDebugLoc();
150 bool IsLoad = TII->get(LoadStoreOp).mayLoad();
151
152 bool RanOutOfSGPRs = false;
153 unsigned SOffset = ScratchOffset;
154
155 unsigned NumSubRegs = getNumSubRegsForSpillOp(MI->getOpcode());
156 unsigned Size = NumSubRegs * 4;
157
158 if (!isUInt<12>(Offset + Size)) {
159 SOffset = RS->scavengeRegister(&AMDGPU::SGPR_32RegClass, MI, 0);
160 if (SOffset == AMDGPU::NoRegister) {
161 RanOutOfSGPRs = true;
162 SOffset = AMDGPU::SGPR0;
163 }
164 BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_ADD_U32), SOffset)
165 .addReg(ScratchOffset)
166 .addImm(Offset);
167 Offset = 0;
168 }
169
170 if (RanOutOfSGPRs)
171 Ctx.emitError("Ran out of SGPRs for spilling VGPRS");
172
173 for (unsigned i = 0, e = NumSubRegs; i != e; ++i, Offset += 4) {
174 unsigned SubReg = NumSubRegs > 1 ?
175 getPhysRegSubReg(Value, &AMDGPU::VGPR_32RegClass, i) :
176 Value;
177 bool IsKill = (i == e - 1);
178
179 BuildMI(*MBB, MI, DL, TII->get(LoadStoreOp))
180 .addReg(SubReg, getDefRegState(IsLoad))
181 .addReg(ScratchRsrcReg, getKillRegState(IsKill))
182 .addImm(Offset)
183 .addReg(SOffset)
184 .addImm(0) // glc
185 .addImm(0) // slc
186 .addImm(0) // tfe
187 .addReg(Value, RegState::Implicit | getDefRegState(IsLoad));
188 }
189 }
190
eliminateFrameIndex(MachineBasicBlock::iterator MI,int SPAdj,unsigned FIOperandNum,RegScavenger * RS) const191 void SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI,
192 int SPAdj, unsigned FIOperandNum,
193 RegScavenger *RS) const {
194 MachineFunction *MF = MI->getParent()->getParent();
195 MachineBasicBlock *MBB = MI->getParent();
196 SIMachineFunctionInfo *MFI = MF->getInfo<SIMachineFunctionInfo>();
197 MachineFrameInfo *FrameInfo = MF->getFrameInfo();
198 const SIInstrInfo *TII = static_cast<const SIInstrInfo*>(ST.getInstrInfo());
199 DebugLoc DL = MI->getDebugLoc();
200
201 MachineOperand &FIOp = MI->getOperand(FIOperandNum);
202 int Index = MI->getOperand(FIOperandNum).getIndex();
203
204 switch (MI->getOpcode()) {
205 // SGPR register spill
206 case AMDGPU::SI_SPILL_S512_SAVE:
207 case AMDGPU::SI_SPILL_S256_SAVE:
208 case AMDGPU::SI_SPILL_S128_SAVE:
209 case AMDGPU::SI_SPILL_S64_SAVE:
210 case AMDGPU::SI_SPILL_S32_SAVE: {
211 unsigned NumSubRegs = getNumSubRegsForSpillOp(MI->getOpcode());
212
213 for (unsigned i = 0, e = NumSubRegs; i < e; ++i) {
214 unsigned SubReg = getPhysRegSubReg(MI->getOperand(0).getReg(),
215 &AMDGPU::SGPR_32RegClass, i);
216 struct SIMachineFunctionInfo::SpilledReg Spill =
217 MFI->getSpilledReg(MF, Index, i);
218
219 if (Spill.VGPR == AMDGPU::NoRegister) {
220 LLVMContext &Ctx = MF->getFunction()->getContext();
221 Ctx.emitError("Ran out of VGPRs for spilling SGPR");
222 }
223
224 BuildMI(*MBB, MI, DL,
225 TII->getMCOpcodeFromPseudo(AMDGPU::V_WRITELANE_B32),
226 Spill.VGPR)
227 .addReg(SubReg)
228 .addImm(Spill.Lane);
229
230 }
231 MI->eraseFromParent();
232 break;
233 }
234
235 // SGPR register restore
236 case AMDGPU::SI_SPILL_S512_RESTORE:
237 case AMDGPU::SI_SPILL_S256_RESTORE:
238 case AMDGPU::SI_SPILL_S128_RESTORE:
239 case AMDGPU::SI_SPILL_S64_RESTORE:
240 case AMDGPU::SI_SPILL_S32_RESTORE: {
241 unsigned NumSubRegs = getNumSubRegsForSpillOp(MI->getOpcode());
242
243 for (unsigned i = 0, e = NumSubRegs; i < e; ++i) {
244 unsigned SubReg = getPhysRegSubReg(MI->getOperand(0).getReg(),
245 &AMDGPU::SGPR_32RegClass, i);
246 bool isM0 = SubReg == AMDGPU::M0;
247 struct SIMachineFunctionInfo::SpilledReg Spill =
248 MFI->getSpilledReg(MF, Index, i);
249
250 if (Spill.VGPR == AMDGPU::NoRegister) {
251 LLVMContext &Ctx = MF->getFunction()->getContext();
252 Ctx.emitError("Ran out of VGPRs for spilling SGPR");
253 }
254
255 if (isM0)
256 SubReg = RS->scavengeRegister(&AMDGPU::SGPR_32RegClass, MI, 0);
257
258 BuildMI(*MBB, MI, DL,
259 TII->getMCOpcodeFromPseudo(AMDGPU::V_READLANE_B32),
260 SubReg)
261 .addReg(Spill.VGPR)
262 .addImm(Spill.Lane)
263 .addReg(MI->getOperand(0).getReg(), RegState::ImplicitDefine);
264 if (isM0) {
265 BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_MOV_B32), AMDGPU::M0)
266 .addReg(SubReg);
267 }
268 }
269
270 // TODO: only do this when it is needed
271 switch (ST.getGeneration()) {
272 case AMDGPUSubtarget::SOUTHERN_ISLANDS:
273 // "VALU writes SGPR" -> "SMRD reads that SGPR" needs "S_NOP 3" on SI
274 TII->insertNOPs(MI, 3);
275 break;
276 case AMDGPUSubtarget::SEA_ISLANDS:
277 break;
278 default: // VOLCANIC_ISLANDS and later
279 // "VALU writes SGPR -> VMEM reads that SGPR" needs "S_NOP 4" on VI
280 // and later. This also applies to VALUs which write VCC, but we're
281 // unlikely to see VMEM use VCC.
282 TII->insertNOPs(MI, 4);
283 }
284
285 MI->eraseFromParent();
286 break;
287 }
288
289 // VGPR register spill
290 case AMDGPU::SI_SPILL_V512_SAVE:
291 case AMDGPU::SI_SPILL_V256_SAVE:
292 case AMDGPU::SI_SPILL_V128_SAVE:
293 case AMDGPU::SI_SPILL_V96_SAVE:
294 case AMDGPU::SI_SPILL_V64_SAVE:
295 case AMDGPU::SI_SPILL_V32_SAVE:
296 buildScratchLoadStore(MI, AMDGPU::BUFFER_STORE_DWORD_OFFSET,
297 TII->getNamedOperand(*MI, AMDGPU::OpName::src)->getReg(),
298 TII->getNamedOperand(*MI, AMDGPU::OpName::scratch_rsrc)->getReg(),
299 TII->getNamedOperand(*MI, AMDGPU::OpName::scratch_offset)->getReg(),
300 FrameInfo->getObjectOffset(Index), RS);
301 MI->eraseFromParent();
302 break;
303 case AMDGPU::SI_SPILL_V32_RESTORE:
304 case AMDGPU::SI_SPILL_V64_RESTORE:
305 case AMDGPU::SI_SPILL_V96_RESTORE:
306 case AMDGPU::SI_SPILL_V128_RESTORE:
307 case AMDGPU::SI_SPILL_V256_RESTORE:
308 case AMDGPU::SI_SPILL_V512_RESTORE: {
309 buildScratchLoadStore(MI, AMDGPU::BUFFER_LOAD_DWORD_OFFSET,
310 TII->getNamedOperand(*MI, AMDGPU::OpName::dst)->getReg(),
311 TII->getNamedOperand(*MI, AMDGPU::OpName::scratch_rsrc)->getReg(),
312 TII->getNamedOperand(*MI, AMDGPU::OpName::scratch_offset)->getReg(),
313 FrameInfo->getObjectOffset(Index), RS);
314 MI->eraseFromParent();
315 break;
316 }
317
318 default: {
319 int64_t Offset = FrameInfo->getObjectOffset(Index);
320 FIOp.ChangeToImmediate(Offset);
321 if (!TII->isImmOperandLegal(MI, FIOperandNum, FIOp)) {
322 unsigned TmpReg = RS->scavengeRegister(&AMDGPU::VGPR_32RegClass, MI, SPAdj);
323 BuildMI(*MBB, MI, MI->getDebugLoc(),
324 TII->get(AMDGPU::V_MOV_B32_e32), TmpReg)
325 .addImm(Offset);
326 FIOp.ChangeToRegister(TmpReg, false, false, true);
327 }
328 }
329 }
330 }
331
getCFGStructurizerRegClass(MVT VT) const332 const TargetRegisterClass * SIRegisterInfo::getCFGStructurizerRegClass(
333 MVT VT) const {
334 switch(VT.SimpleTy) {
335 default:
336 case MVT::i32: return &AMDGPU::VGPR_32RegClass;
337 }
338 }
339
getHWRegIndex(unsigned Reg) const340 unsigned SIRegisterInfo::getHWRegIndex(unsigned Reg) const {
341 return getEncodingValue(Reg) & 0xff;
342 }
343
getPhysRegClass(unsigned Reg) const344 const TargetRegisterClass *SIRegisterInfo::getPhysRegClass(unsigned Reg) const {
345 assert(!TargetRegisterInfo::isVirtualRegister(Reg));
346
347 static const TargetRegisterClass *BaseClasses[] = {
348 &AMDGPU::VGPR_32RegClass,
349 &AMDGPU::SReg_32RegClass,
350 &AMDGPU::VReg_64RegClass,
351 &AMDGPU::SReg_64RegClass,
352 &AMDGPU::VReg_96RegClass,
353 &AMDGPU::VReg_128RegClass,
354 &AMDGPU::SReg_128RegClass,
355 &AMDGPU::VReg_256RegClass,
356 &AMDGPU::SReg_256RegClass,
357 &AMDGPU::VReg_512RegClass
358 };
359
360 for (const TargetRegisterClass *BaseClass : BaseClasses) {
361 if (BaseClass->contains(Reg)) {
362 return BaseClass;
363 }
364 }
365 return nullptr;
366 }
367
hasVGPRs(const TargetRegisterClass * RC) const368 bool SIRegisterInfo::hasVGPRs(const TargetRegisterClass *RC) const {
369 return getCommonSubClass(&AMDGPU::VGPR_32RegClass, RC) ||
370 getCommonSubClass(&AMDGPU::VReg_64RegClass, RC) ||
371 getCommonSubClass(&AMDGPU::VReg_96RegClass, RC) ||
372 getCommonSubClass(&AMDGPU::VReg_128RegClass, RC) ||
373 getCommonSubClass(&AMDGPU::VReg_256RegClass, RC) ||
374 getCommonSubClass(&AMDGPU::VReg_512RegClass, RC);
375 }
376
getEquivalentVGPRClass(const TargetRegisterClass * SRC) const377 const TargetRegisterClass *SIRegisterInfo::getEquivalentVGPRClass(
378 const TargetRegisterClass *SRC) const {
379 if (hasVGPRs(SRC)) {
380 return SRC;
381 } else if (SRC == &AMDGPU::SCCRegRegClass) {
382 return &AMDGPU::VCCRegRegClass;
383 } else if (getCommonSubClass(SRC, &AMDGPU::SGPR_32RegClass)) {
384 return &AMDGPU::VGPR_32RegClass;
385 } else if (getCommonSubClass(SRC, &AMDGPU::SGPR_64RegClass)) {
386 return &AMDGPU::VReg_64RegClass;
387 } else if (getCommonSubClass(SRC, &AMDGPU::SReg_128RegClass)) {
388 return &AMDGPU::VReg_128RegClass;
389 } else if (getCommonSubClass(SRC, &AMDGPU::SReg_256RegClass)) {
390 return &AMDGPU::VReg_256RegClass;
391 } else if (getCommonSubClass(SRC, &AMDGPU::SReg_512RegClass)) {
392 return &AMDGPU::VReg_512RegClass;
393 }
394 return nullptr;
395 }
396
getSubRegClass(const TargetRegisterClass * RC,unsigned SubIdx) const397 const TargetRegisterClass *SIRegisterInfo::getSubRegClass(
398 const TargetRegisterClass *RC, unsigned SubIdx) const {
399 if (SubIdx == AMDGPU::NoSubRegister)
400 return RC;
401
402 // If this register has a sub-register, we can safely assume it is a 32-bit
403 // register, because all of SI's sub-registers are 32-bit.
404 if (isSGPRClass(RC)) {
405 return &AMDGPU::SGPR_32RegClass;
406 } else {
407 return &AMDGPU::VGPR_32RegClass;
408 }
409 }
410
getPhysRegSubReg(unsigned Reg,const TargetRegisterClass * SubRC,unsigned Channel) const411 unsigned SIRegisterInfo::getPhysRegSubReg(unsigned Reg,
412 const TargetRegisterClass *SubRC,
413 unsigned Channel) const {
414
415 switch (Reg) {
416 case AMDGPU::VCC:
417 switch(Channel) {
418 case 0: return AMDGPU::VCC_LO;
419 case 1: return AMDGPU::VCC_HI;
420 default: llvm_unreachable("Invalid SubIdx for VCC");
421 }
422
423 case AMDGPU::FLAT_SCR:
424 switch (Channel) {
425 case 0:
426 return AMDGPU::FLAT_SCR_LO;
427 case 1:
428 return AMDGPU::FLAT_SCR_HI;
429 default:
430 llvm_unreachable("Invalid SubIdx for FLAT_SCR");
431 }
432 break;
433
434 case AMDGPU::EXEC:
435 switch (Channel) {
436 case 0:
437 return AMDGPU::EXEC_LO;
438 case 1:
439 return AMDGPU::EXEC_HI;
440 default:
441 llvm_unreachable("Invalid SubIdx for EXEC");
442 }
443 break;
444 }
445
446 const TargetRegisterClass *RC = getPhysRegClass(Reg);
447 // 32-bit registers don't have sub-registers, so we can just return the
448 // Reg. We need to have this check here, because the calculation below
449 // using getHWRegIndex() will fail with special 32-bit registers like
450 // VCC_LO, VCC_HI, EXEC_LO, EXEC_HI and M0.
451 if (RC->getSize() == 4) {
452 assert(Channel == 0);
453 return Reg;
454 }
455
456 unsigned Index = getHWRegIndex(Reg);
457 return SubRC->getRegister(Index + Channel);
458 }
459
opCanUseLiteralConstant(unsigned OpType) const460 bool SIRegisterInfo::opCanUseLiteralConstant(unsigned OpType) const {
461 return OpType == AMDGPU::OPERAND_REG_IMM32;
462 }
463
opCanUseInlineConstant(unsigned OpType) const464 bool SIRegisterInfo::opCanUseInlineConstant(unsigned OpType) const {
465 if (opCanUseLiteralConstant(OpType))
466 return true;
467
468 return OpType == AMDGPU::OPERAND_REG_INLINE_C;
469 }
470
getPreloadedValue(const MachineFunction & MF,enum PreloadedValue Value) const471 unsigned SIRegisterInfo::getPreloadedValue(const MachineFunction &MF,
472 enum PreloadedValue Value) const {
473
474 const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
475 switch (Value) {
476 case SIRegisterInfo::TGID_X:
477 return AMDGPU::SReg_32RegClass.getRegister(MFI->NumUserSGPRs + 0);
478 case SIRegisterInfo::TGID_Y:
479 return AMDGPU::SReg_32RegClass.getRegister(MFI->NumUserSGPRs + 1);
480 case SIRegisterInfo::TGID_Z:
481 return AMDGPU::SReg_32RegClass.getRegister(MFI->NumUserSGPRs + 2);
482 case SIRegisterInfo::SCRATCH_WAVE_OFFSET:
483 if (MFI->getShaderType() != ShaderType::COMPUTE)
484 return MFI->ScratchOffsetReg;
485 return AMDGPU::SReg_32RegClass.getRegister(MFI->NumUserSGPRs + 4);
486 case SIRegisterInfo::SCRATCH_PTR:
487 return AMDGPU::SGPR2_SGPR3;
488 case SIRegisterInfo::INPUT_PTR:
489 return AMDGPU::SGPR0_SGPR1;
490 case SIRegisterInfo::TIDIG_X:
491 return AMDGPU::VGPR0;
492 case SIRegisterInfo::TIDIG_Y:
493 return AMDGPU::VGPR1;
494 case SIRegisterInfo::TIDIG_Z:
495 return AMDGPU::VGPR2;
496 }
497 llvm_unreachable("unexpected preloaded value type");
498 }
499
500 /// \brief Returns a register that is not used at any point in the function.
501 /// If all registers are used, then this function will return
502 // AMDGPU::NoRegister.
findUnusedRegister(const MachineRegisterInfo & MRI,const TargetRegisterClass * RC) const503 unsigned SIRegisterInfo::findUnusedRegister(const MachineRegisterInfo &MRI,
504 const TargetRegisterClass *RC) const {
505
506 for (TargetRegisterClass::iterator I = RC->begin(), E = RC->end();
507 I != E; ++I) {
508 if (!MRI.isPhysRegUsed(*I))
509 return *I;
510 }
511 return AMDGPU::NoRegister;
512 }
513
getNumVGPRsAllowed(unsigned WaveCount) const514 unsigned SIRegisterInfo::getNumVGPRsAllowed(unsigned WaveCount) const {
515 switch(WaveCount) {
516 case 10: return 24;
517 case 9: return 28;
518 case 8: return 32;
519 case 7: return 36;
520 case 6: return 40;
521 case 5: return 48;
522 case 4: return 64;
523 case 3: return 84;
524 case 2: return 128;
525 default: return 256;
526 }
527 }
528
getNumSGPRsAllowed(AMDGPUSubtarget::Generation gen,unsigned WaveCount) const529 unsigned SIRegisterInfo::getNumSGPRsAllowed(AMDGPUSubtarget::Generation gen,
530 unsigned WaveCount) const {
531 if (gen >= AMDGPUSubtarget::VOLCANIC_ISLANDS) {
532 switch (WaveCount) {
533 case 10: return 80;
534 case 9: return 80;
535 case 8: return 96;
536 default: return 102;
537 }
538 } else {
539 switch(WaveCount) {
540 case 10: return 48;
541 case 9: return 56;
542 case 8: return 64;
543 case 7: return 72;
544 case 6: return 80;
545 case 5: return 96;
546 default: return 103;
547 }
548 }
549 }
550