1 //===--------------------- SIOptimizeVGPRLiveRange.cpp -------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 /// \file
10 /// This pass tries to remove unnecessary VGPR live ranges in divergent if-else
11 /// structures and waterfall loops.
12 ///
13 /// When we do structurization, we usually transform an if-else into two
14 /// sucessive if-then (with a flow block to do predicate inversion). Consider a
15 /// simple case after structurization: A divergent value %a was defined before
16 /// if-else and used in both THEN (use in THEN is optional) and ELSE part:
17 /// bb.if:
18 /// %a = ...
19 /// ...
20 /// bb.then:
21 /// ... = op %a
22 /// ... // %a can be dead here
23 /// bb.flow:
24 /// ...
25 /// bb.else:
26 /// ... = %a
27 /// ...
28 /// bb.endif
29 ///
30 /// As register allocator has no idea of the thread-control-flow, it will just
31 /// assume %a would be alive in the whole range of bb.then because of a later
32 /// use in bb.else. On AMDGPU architecture, the VGPR is accessed with respect
33 /// to exec mask. For this if-else case, the lanes active in bb.then will be
34 /// inactive in bb.else, and vice-versa. So we are safe to say that %a was dead
35 /// after the last use in bb.then until the end of the block. The reason is
36 /// the instructions in bb.then will only overwrite lanes that will never be
37 /// accessed in bb.else.
38 ///
39 /// This pass aims to to tell register allocator that %a is in-fact dead,
40 /// through inserting a phi-node in bb.flow saying that %a is undef when coming
41 /// from bb.then, and then replace the uses in the bb.else with the result of
42 /// newly inserted phi.
43 ///
44 /// Two key conditions must be met to ensure correctness:
45 /// 1.) The def-point should be in the same loop-level as if-else-endif to make
46 /// sure the second loop iteration still get correct data.
47 /// 2.) There should be no further uses after the IF-ELSE region.
48 ///
49 ///
50 /// Waterfall loops get inserted around instructions that use divergent values
51 /// but can only be executed with a uniform value. For example an indirect call
52 /// to a divergent address:
53 /// bb.start:
54 /// %a = ...
55 /// %fun = ...
56 /// ...
57 /// bb.loop:
58 /// call %fun (%a)
59 /// ... // %a can be dead here
60 /// loop %bb.loop
61 ///
62 /// The loop block is executed multiple times, but it is run exactly once for
63 /// each active lane. Similar to the if-else case, the register allocator
64 /// assumes that %a is live throughout the loop as it is used again in the next
65 /// iteration. If %a is a VGPR that is unused after the loop, it does not need
66 /// to be live after its last use in the loop block. By inserting a phi-node at
67 /// the start of bb.loop that is undef when coming from bb.loop, the register
68 /// allocation knows that the value of %a does not need to be preserved through
69 /// iterations of the loop.
70 ///
71 //
72 //===----------------------------------------------------------------------===//
73
74 #include "AMDGPU.h"
75 #include "GCNSubtarget.h"
76 #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
77 #include "SIMachineFunctionInfo.h"
78 #include "llvm/CodeGen/LiveVariables.h"
79 #include "llvm/CodeGen/MachineDominators.h"
80 #include "llvm/CodeGen/MachineLoopInfo.h"
81 #include "llvm/CodeGen/TargetRegisterInfo.h"
82 #include "llvm/InitializePasses.h"
83
84 using namespace llvm;
85
86 #define DEBUG_TYPE "si-opt-vgpr-liverange"
87
88 namespace {
89
90 class SIOptimizeVGPRLiveRange : public MachineFunctionPass {
91 private:
92 const SIRegisterInfo *TRI = nullptr;
93 const SIInstrInfo *TII = nullptr;
94 LiveVariables *LV = nullptr;
95 MachineDominatorTree *MDT = nullptr;
96 const MachineLoopInfo *Loops = nullptr;
97 MachineRegisterInfo *MRI = nullptr;
98
99 public:
100 static char ID;
101
102 MachineBasicBlock *getElseTarget(MachineBasicBlock *MBB) const;
103
104 void collectElseRegionBlocks(MachineBasicBlock *Flow,
105 MachineBasicBlock *Endif,
106 SmallSetVector<MachineBasicBlock *, 16> &) const;
107
108 void
109 collectCandidateRegisters(MachineBasicBlock *If, MachineBasicBlock *Flow,
110 MachineBasicBlock *Endif,
111 SmallSetVector<MachineBasicBlock *, 16> &ElseBlocks,
112 SmallVectorImpl<Register> &CandidateRegs) const;
113
114 void collectWaterfallCandidateRegisters(
115 MachineBasicBlock *Loop,
116 SmallSetVector<Register, 16> &CandidateRegs) const;
117
118 void findNonPHIUsesInBlock(Register Reg, MachineBasicBlock *MBB,
119 SmallVectorImpl<MachineInstr *> &Uses) const;
120
121 void updateLiveRangeInThenRegion(Register Reg, MachineBasicBlock *If,
122 MachineBasicBlock *Flow) const;
123
124 void updateLiveRangeInElseRegion(
125 Register Reg, Register NewReg, MachineBasicBlock *Flow,
126 MachineBasicBlock *Endif,
127 SmallSetVector<MachineBasicBlock *, 16> &ElseBlocks) const;
128
129 void
130 optimizeLiveRange(Register Reg, MachineBasicBlock *If,
131 MachineBasicBlock *Flow, MachineBasicBlock *Endif,
132 SmallSetVector<MachineBasicBlock *, 16> &ElseBlocks) const;
133
134 void optimizeWaterfallLiveRange(Register Reg, MachineBasicBlock *If) const;
135
SIOptimizeVGPRLiveRange()136 SIOptimizeVGPRLiveRange() : MachineFunctionPass(ID) {}
137
138 bool runOnMachineFunction(MachineFunction &MF) override;
139
getPassName() const140 StringRef getPassName() const override {
141 return "SI Optimize VGPR LiveRange";
142 }
143
getAnalysisUsage(AnalysisUsage & AU) const144 void getAnalysisUsage(AnalysisUsage &AU) const override {
145 AU.addRequired<LiveVariables>();
146 AU.addRequired<MachineDominatorTree>();
147 AU.addRequired<MachineLoopInfo>();
148 AU.addPreserved<LiveVariables>();
149 AU.addPreserved<MachineDominatorTree>();
150 AU.addPreserved<MachineLoopInfo>();
151 MachineFunctionPass::getAnalysisUsage(AU);
152 }
153
getRequiredProperties() const154 MachineFunctionProperties getRequiredProperties() const override {
155 return MachineFunctionProperties().set(
156 MachineFunctionProperties::Property::IsSSA);
157 }
158 };
159
160 } // end anonymous namespace
161
162 // Check whether the MBB is a else flow block and get the branching target which
163 // is the Endif block
164 MachineBasicBlock *
getElseTarget(MachineBasicBlock * MBB) const165 SIOptimizeVGPRLiveRange::getElseTarget(MachineBasicBlock *MBB) const {
166 for (auto &BR : MBB->terminators()) {
167 if (BR.getOpcode() == AMDGPU::SI_ELSE)
168 return BR.getOperand(2).getMBB();
169 }
170 return nullptr;
171 }
172
collectElseRegionBlocks(MachineBasicBlock * Flow,MachineBasicBlock * Endif,SmallSetVector<MachineBasicBlock *,16> & Blocks) const173 void SIOptimizeVGPRLiveRange::collectElseRegionBlocks(
174 MachineBasicBlock *Flow, MachineBasicBlock *Endif,
175 SmallSetVector<MachineBasicBlock *, 16> &Blocks) const {
176 assert(Flow != Endif);
177
178 MachineBasicBlock *MBB = Endif;
179 unsigned Cur = 0;
180 while (MBB) {
181 for (auto *Pred : MBB->predecessors()) {
182 if (Pred != Flow && !Blocks.contains(Pred))
183 Blocks.insert(Pred);
184 }
185
186 if (Cur < Blocks.size())
187 MBB = Blocks[Cur++];
188 else
189 MBB = nullptr;
190 }
191
192 LLVM_DEBUG({
193 dbgs() << "Found Else blocks: ";
194 for (auto *MBB : Blocks)
195 dbgs() << printMBBReference(*MBB) << ' ';
196 dbgs() << '\n';
197 });
198 }
199
200 /// Find the instructions(excluding phi) in \p MBB that uses the \p Reg.
findNonPHIUsesInBlock(Register Reg,MachineBasicBlock * MBB,SmallVectorImpl<MachineInstr * > & Uses) const201 void SIOptimizeVGPRLiveRange::findNonPHIUsesInBlock(
202 Register Reg, MachineBasicBlock *MBB,
203 SmallVectorImpl<MachineInstr *> &Uses) const {
204 for (auto &UseMI : MRI->use_nodbg_instructions(Reg)) {
205 if (UseMI.getParent() == MBB && !UseMI.isPHI())
206 Uses.push_back(&UseMI);
207 }
208 }
209
210 /// Collect the killed registers in the ELSE region which are not alive through
211 /// the whole THEN region.
collectCandidateRegisters(MachineBasicBlock * If,MachineBasicBlock * Flow,MachineBasicBlock * Endif,SmallSetVector<MachineBasicBlock *,16> & ElseBlocks,SmallVectorImpl<Register> & CandidateRegs) const212 void SIOptimizeVGPRLiveRange::collectCandidateRegisters(
213 MachineBasicBlock *If, MachineBasicBlock *Flow, MachineBasicBlock *Endif,
214 SmallSetVector<MachineBasicBlock *, 16> &ElseBlocks,
215 SmallVectorImpl<Register> &CandidateRegs) const {
216
217 SmallSet<Register, 8> KillsInElse;
218
219 for (auto *Else : ElseBlocks) {
220 for (auto &MI : Else->instrs()) {
221 if (MI.isDebugInstr())
222 continue;
223
224 for (auto &MO : MI.operands()) {
225 if (!MO.isReg() || !MO.getReg() || MO.isDef())
226 continue;
227
228 Register MOReg = MO.getReg();
229 // We can only optimize AGPR/VGPR virtual register
230 if (MOReg.isPhysical() || !TRI->isVectorRegister(*MRI, MOReg))
231 continue;
232
233 if (MO.readsReg()) {
234 LiveVariables::VarInfo &VI = LV->getVarInfo(MOReg);
235 const MachineBasicBlock *DefMBB = MRI->getVRegDef(MOReg)->getParent();
236 // Make sure two conditions are met:
237 // a.) the value is defined before/in the IF block
238 // b.) should be defined in the same loop-level.
239 if ((VI.AliveBlocks.test(If->getNumber()) || DefMBB == If) &&
240 Loops->getLoopFor(DefMBB) == Loops->getLoopFor(If)) {
241 // Check if the register is live into the endif block. If not,
242 // consider it killed in the else region.
243 LiveVariables::VarInfo &VI = LV->getVarInfo(MOReg);
244 if (!VI.isLiveIn(*Endif, MOReg, *MRI)) {
245 KillsInElse.insert(MOReg);
246 } else {
247 LLVM_DEBUG(dbgs() << "Excluding " << printReg(MOReg, TRI)
248 << " as Live in Endif\n");
249 }
250 }
251 }
252 }
253 }
254 }
255
256 // Check the phis in the Endif, looking for value coming from the ELSE
257 // region. Make sure the phi-use is the last use.
258 for (auto &MI : Endif->phis()) {
259 for (unsigned Idx = 1; Idx < MI.getNumOperands(); Idx += 2) {
260 auto &MO = MI.getOperand(Idx);
261 auto *Pred = MI.getOperand(Idx + 1).getMBB();
262 if (Pred == Flow)
263 continue;
264 assert(ElseBlocks.contains(Pred) && "Should be from Else region\n");
265
266 if (!MO.isReg() || !MO.getReg() || MO.isUndef())
267 continue;
268
269 Register Reg = MO.getReg();
270 if (Reg.isPhysical() || !TRI->isVectorRegister(*MRI, Reg))
271 continue;
272
273 LiveVariables::VarInfo &VI = LV->getVarInfo(Reg);
274
275 if (VI.isLiveIn(*Endif, Reg, *MRI)) {
276 LLVM_DEBUG(dbgs() << "Excluding " << printReg(Reg, TRI)
277 << " as Live in Endif\n");
278 continue;
279 }
280 // Make sure two conditions are met:
281 // a.) the value is defined before/in the IF block
282 // b.) should be defined in the same loop-level.
283 const MachineBasicBlock *DefMBB = MRI->getVRegDef(Reg)->getParent();
284 if ((VI.AliveBlocks.test(If->getNumber()) || DefMBB == If) &&
285 Loops->getLoopFor(DefMBB) == Loops->getLoopFor(If))
286 KillsInElse.insert(Reg);
287 }
288 }
289
290 auto IsLiveThroughThen = [&](Register Reg) {
291 for (auto I = MRI->use_nodbg_begin(Reg), E = MRI->use_nodbg_end(); I != E;
292 ++I) {
293 if (!I->readsReg())
294 continue;
295 auto *UseMI = I->getParent();
296 auto *UseMBB = UseMI->getParent();
297 if (UseMBB == Flow || UseMBB == Endif) {
298 if (!UseMI->isPHI())
299 return true;
300
301 auto *IncomingMBB = UseMI->getOperand(I.getOperandNo() + 1).getMBB();
302 // The register is live through the path If->Flow or Flow->Endif.
303 // we should not optimize for such cases.
304 if ((UseMBB == Flow && IncomingMBB != If) ||
305 (UseMBB == Endif && IncomingMBB == Flow))
306 return true;
307 }
308 }
309 return false;
310 };
311
312 for (auto Reg : KillsInElse) {
313 if (!IsLiveThroughThen(Reg))
314 CandidateRegs.push_back(Reg);
315 }
316 }
317
318 /// Collect the registers used in the waterfall loop block that are defined
319 /// before.
collectWaterfallCandidateRegisters(MachineBasicBlock * Loop,SmallSetVector<Register,16> & CandidateRegs) const320 void SIOptimizeVGPRLiveRange::collectWaterfallCandidateRegisters(
321 MachineBasicBlock *Loop,
322 SmallSetVector<Register, 16> &CandidateRegs) const {
323
324 for (auto &MI : Loop->instrs()) {
325 if (MI.isDebugInstr())
326 continue;
327
328 for (auto &MO : MI.operands()) {
329 if (!MO.isReg() || !MO.getReg() || MO.isDef())
330 continue;
331
332 Register MOReg = MO.getReg();
333 // We can only optimize AGPR/VGPR virtual register
334 if (MOReg.isPhysical() || !TRI->isVectorRegister(*MRI, MOReg))
335 continue;
336
337 if (MO.readsReg()) {
338 const MachineBasicBlock *DefMBB = MRI->getVRegDef(MOReg)->getParent();
339 // Make sure the value is defined before the LOOP block
340 if (DefMBB != Loop && !CandidateRegs.contains(MOReg)) {
341 // If the variable is used after the loop, the register coalescer will
342 // merge the newly created register and remove the phi node again.
343 // Just do nothing in that case.
344 LiveVariables::VarInfo &OldVarInfo = LV->getVarInfo(MOReg);
345 bool IsUsed = false;
346 for (auto *Succ : Loop->successors()) {
347 if (Succ != Loop && OldVarInfo.isLiveIn(*Succ, MOReg, *MRI)) {
348 IsUsed = true;
349 break;
350 }
351 }
352 if (!IsUsed) {
353 LLVM_DEBUG(dbgs() << "Found candidate reg: "
354 << printReg(MOReg, TRI, 0, MRI) << '\n');
355 CandidateRegs.insert(MOReg);
356 } else {
357 LLVM_DEBUG(dbgs() << "Reg is used after loop, ignoring: "
358 << printReg(MOReg, TRI, 0, MRI) << '\n');
359 }
360 }
361 }
362 }
363 }
364 }
365
366 // Re-calculate the liveness of \p Reg in the THEN-region
updateLiveRangeInThenRegion(Register Reg,MachineBasicBlock * If,MachineBasicBlock * Flow) const367 void SIOptimizeVGPRLiveRange::updateLiveRangeInThenRegion(
368 Register Reg, MachineBasicBlock *If, MachineBasicBlock *Flow) const {
369
370 SmallPtrSet<MachineBasicBlock *, 16> PHIIncoming;
371
372 MachineBasicBlock *ThenEntry = nullptr;
373 for (auto *Succ : If->successors()) {
374 if (Succ != Flow) {
375 ThenEntry = Succ;
376 break;
377 }
378 }
379 assert(ThenEntry && "No successor in Then region?");
380
381 LiveVariables::VarInfo &OldVarInfo = LV->getVarInfo(Reg);
382 df_iterator_default_set<MachineBasicBlock *, 16> Visited;
383
384 for (MachineBasicBlock *MBB : depth_first_ext(ThenEntry, Visited)) {
385 if (MBB == Flow)
386 break;
387
388 // Clear Live bit, as we will recalculate afterwards
389 LLVM_DEBUG(dbgs() << "Clear AliveBlock " << printMBBReference(*MBB)
390 << '\n');
391 OldVarInfo.AliveBlocks.reset(MBB->getNumber());
392 }
393
394 // Get the blocks the Reg should be alive through
395 for (auto I = MRI->use_nodbg_begin(Reg), E = MRI->use_nodbg_end(); I != E;
396 ++I) {
397 auto *UseMI = I->getParent();
398 if (UseMI->isPHI() && I->readsReg()) {
399 if (Visited.contains(UseMI->getParent()))
400 PHIIncoming.insert(UseMI->getOperand(I.getOperandNo() + 1).getMBB());
401 }
402 }
403
404 Visited.clear();
405
406 for (MachineBasicBlock *MBB : depth_first_ext(ThenEntry, Visited)) {
407 if (MBB == Flow)
408 break;
409
410 SmallVector<MachineInstr *> Uses;
411 // PHI instructions has been processed before.
412 findNonPHIUsesInBlock(Reg, MBB, Uses);
413
414 if (Uses.size() == 1) {
415 LLVM_DEBUG(dbgs() << "Found one Non-PHI use in "
416 << printMBBReference(*MBB) << '\n');
417 LV->HandleVirtRegUse(Reg, MBB, *(*Uses.begin()));
418 } else if (Uses.size() > 1) {
419 // Process the instructions in-order
420 LLVM_DEBUG(dbgs() << "Found " << Uses.size() << " Non-PHI uses in "
421 << printMBBReference(*MBB) << '\n');
422 for (MachineInstr &MI : *MBB) {
423 if (llvm::is_contained(Uses, &MI))
424 LV->HandleVirtRegUse(Reg, MBB, MI);
425 }
426 }
427
428 // Mark Reg alive through the block if this is a PHI incoming block
429 if (PHIIncoming.contains(MBB))
430 LV->MarkVirtRegAliveInBlock(OldVarInfo, MRI->getVRegDef(Reg)->getParent(),
431 MBB);
432 }
433
434 // Set the isKilled flag if we get new Kills in the THEN region.
435 for (auto *MI : OldVarInfo.Kills) {
436 if (Visited.contains(MI->getParent()))
437 MI->addRegisterKilled(Reg, TRI);
438 }
439 }
440
updateLiveRangeInElseRegion(Register Reg,Register NewReg,MachineBasicBlock * Flow,MachineBasicBlock * Endif,SmallSetVector<MachineBasicBlock *,16> & ElseBlocks) const441 void SIOptimizeVGPRLiveRange::updateLiveRangeInElseRegion(
442 Register Reg, Register NewReg, MachineBasicBlock *Flow,
443 MachineBasicBlock *Endif,
444 SmallSetVector<MachineBasicBlock *, 16> &ElseBlocks) const {
445 LiveVariables::VarInfo &NewVarInfo = LV->getVarInfo(NewReg);
446 LiveVariables::VarInfo &OldVarInfo = LV->getVarInfo(Reg);
447
448 // Transfer aliveBlocks from Reg to NewReg
449 for (auto *MBB : ElseBlocks) {
450 unsigned BBNum = MBB->getNumber();
451 if (OldVarInfo.AliveBlocks.test(BBNum)) {
452 NewVarInfo.AliveBlocks.set(BBNum);
453 LLVM_DEBUG(dbgs() << "Removing AliveBlock " << printMBBReference(*MBB)
454 << '\n');
455 OldVarInfo.AliveBlocks.reset(BBNum);
456 }
457 }
458
459 // Transfer the possible Kills in ElseBlocks from Reg to NewReg
460 auto I = OldVarInfo.Kills.begin();
461 while (I != OldVarInfo.Kills.end()) {
462 if (ElseBlocks.contains((*I)->getParent())) {
463 NewVarInfo.Kills.push_back(*I);
464 I = OldVarInfo.Kills.erase(I);
465 } else {
466 ++I;
467 }
468 }
469 }
470
optimizeLiveRange(Register Reg,MachineBasicBlock * If,MachineBasicBlock * Flow,MachineBasicBlock * Endif,SmallSetVector<MachineBasicBlock *,16> & ElseBlocks) const471 void SIOptimizeVGPRLiveRange::optimizeLiveRange(
472 Register Reg, MachineBasicBlock *If, MachineBasicBlock *Flow,
473 MachineBasicBlock *Endif,
474 SmallSetVector<MachineBasicBlock *, 16> &ElseBlocks) const {
475 // Insert a new PHI, marking the value from the THEN region being
476 // undef.
477 LLVM_DEBUG(dbgs() << "Optimizing " << printReg(Reg, TRI) << '\n');
478 const auto *RC = MRI->getRegClass(Reg);
479 Register NewReg = MRI->createVirtualRegister(RC);
480 Register UndefReg = MRI->createVirtualRegister(RC);
481 MachineInstrBuilder PHI = BuildMI(*Flow, Flow->getFirstNonPHI(), DebugLoc(),
482 TII->get(TargetOpcode::PHI), NewReg);
483 for (auto *Pred : Flow->predecessors()) {
484 if (Pred == If)
485 PHI.addReg(Reg).addMBB(Pred);
486 else
487 PHI.addReg(UndefReg, RegState::Undef).addMBB(Pred);
488 }
489
490 // Replace all uses in the ELSE region or the PHIs in ENDIF block
491 // Use early increment range because setReg() will update the linked list.
492 for (auto &O : make_early_inc_range(MRI->use_operands(Reg))) {
493 auto *UseMI = O.getParent();
494 auto *UseBlock = UseMI->getParent();
495 // Replace uses in Endif block
496 if (UseBlock == Endif) {
497 assert(UseMI->isPHI() && "Uses should be PHI in Endif block");
498 O.setReg(NewReg);
499 continue;
500 }
501
502 // Replace uses in Else region
503 if (ElseBlocks.contains(UseBlock))
504 O.setReg(NewReg);
505 }
506
507 // The optimized Reg is not alive through Flow blocks anymore.
508 LiveVariables::VarInfo &OldVarInfo = LV->getVarInfo(Reg);
509 OldVarInfo.AliveBlocks.reset(Flow->getNumber());
510
511 updateLiveRangeInElseRegion(Reg, NewReg, Flow, Endif, ElseBlocks);
512 updateLiveRangeInThenRegion(Reg, If, Flow);
513 }
514
optimizeWaterfallLiveRange(Register Reg,MachineBasicBlock * Loop) const515 void SIOptimizeVGPRLiveRange::optimizeWaterfallLiveRange(
516 Register Reg, MachineBasicBlock *Loop) const {
517 // Insert a new PHI, marking the value from the last loop iteration undef.
518 LLVM_DEBUG(dbgs() << "Optimizing " << printReg(Reg, TRI) << '\n');
519 const auto *RC = MRI->getRegClass(Reg);
520 Register NewReg = MRI->createVirtualRegister(RC);
521 Register UndefReg = MRI->createVirtualRegister(RC);
522
523 // Replace all uses in the LOOP region
524 // Use early increment range because setReg() will update the linked list.
525 for (auto &O : make_early_inc_range(MRI->use_operands(Reg))) {
526 auto *UseMI = O.getParent();
527 auto *UseBlock = UseMI->getParent();
528 // Replace uses in Loop block
529 if (UseBlock == Loop)
530 O.setReg(NewReg);
531 }
532
533 MachineInstrBuilder PHI = BuildMI(*Loop, Loop->getFirstNonPHI(), DebugLoc(),
534 TII->get(TargetOpcode::PHI), NewReg);
535 for (auto *Pred : Loop->predecessors()) {
536 if (Pred == Loop)
537 PHI.addReg(UndefReg, RegState::Undef).addMBB(Pred);
538 else
539 PHI.addReg(Reg).addMBB(Pred);
540 }
541
542 LiveVariables::VarInfo &NewVarInfo = LV->getVarInfo(NewReg);
543 LiveVariables::VarInfo &OldVarInfo = LV->getVarInfo(Reg);
544
545 // collectWaterfallCandidateRegisters only collects registers that are dead
546 // after the loop. So we know that the old reg is not live throughout the
547 // whole block anymore.
548 OldVarInfo.AliveBlocks.reset(Loop->getNumber());
549
550 // Mark the last use as kill
551 for (auto &MI : reverse(Loop->instrs())) {
552 if (MI.readsRegister(NewReg, TRI)) {
553 MI.addRegisterKilled(NewReg, TRI);
554 NewVarInfo.Kills.push_back(&MI);
555 break;
556 }
557 }
558 assert(!NewVarInfo.Kills.empty() &&
559 "Failed to find last usage of register in loop");
560 }
561
562 char SIOptimizeVGPRLiveRange::ID = 0;
563
564 INITIALIZE_PASS_BEGIN(SIOptimizeVGPRLiveRange, DEBUG_TYPE,
565 "SI Optimize VGPR LiveRange", false, false)
566 INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
567 INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
568 INITIALIZE_PASS_DEPENDENCY(LiveVariables)
569 INITIALIZE_PASS_END(SIOptimizeVGPRLiveRange, DEBUG_TYPE,
570 "SI Optimize VGPR LiveRange", false, false)
571
572 char &llvm::SIOptimizeVGPRLiveRangeID = SIOptimizeVGPRLiveRange::ID;
573
createSIOptimizeVGPRLiveRangePass()574 FunctionPass *llvm::createSIOptimizeVGPRLiveRangePass() {
575 return new SIOptimizeVGPRLiveRange();
576 }
577
runOnMachineFunction(MachineFunction & MF)578 bool SIOptimizeVGPRLiveRange::runOnMachineFunction(MachineFunction &MF) {
579
580 const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
581 TII = ST.getInstrInfo();
582 TRI = &TII->getRegisterInfo();
583 MDT = &getAnalysis<MachineDominatorTree>();
584 Loops = &getAnalysis<MachineLoopInfo>();
585 LV = &getAnalysis<LiveVariables>();
586 MRI = &MF.getRegInfo();
587
588 if (skipFunction(MF.getFunction()))
589 return false;
590
591 bool MadeChange = false;
592
593 // TODO: we need to think about the order of visiting the blocks to get
594 // optimal result for nesting if-else cases.
595 for (MachineBasicBlock &MBB : MF) {
596 for (auto &MI : MBB.terminators()) {
597 // Detect the if-else blocks
598 if (MI.getOpcode() == AMDGPU::SI_IF) {
599 MachineBasicBlock *IfTarget = MI.getOperand(2).getMBB();
600 auto *Endif = getElseTarget(IfTarget);
601 if (!Endif)
602 continue;
603
604 SmallSetVector<MachineBasicBlock *, 16> ElseBlocks;
605 SmallVector<Register> CandidateRegs;
606
607 LLVM_DEBUG(dbgs() << "Checking IF-ELSE-ENDIF: "
608 << printMBBReference(MBB) << ' '
609 << printMBBReference(*IfTarget) << ' '
610 << printMBBReference(*Endif) << '\n');
611
612 // Collect all the blocks in the ELSE region
613 collectElseRegionBlocks(IfTarget, Endif, ElseBlocks);
614
615 // Collect the registers can be optimized
616 collectCandidateRegisters(&MBB, IfTarget, Endif, ElseBlocks,
617 CandidateRegs);
618 MadeChange |= !CandidateRegs.empty();
619 // Now we are safe to optimize.
620 for (auto Reg : CandidateRegs)
621 optimizeLiveRange(Reg, &MBB, IfTarget, Endif, ElseBlocks);
622 } else if (MI.getOpcode() == AMDGPU::SI_WATERFALL_LOOP) {
623 LLVM_DEBUG(dbgs() << "Checking Waterfall loop: "
624 << printMBBReference(MBB) << '\n');
625
626 SmallSetVector<Register, 16> CandidateRegs;
627 collectWaterfallCandidateRegisters(&MBB, CandidateRegs);
628 MadeChange |= !CandidateRegs.empty();
629 // Now we are safe to optimize.
630 for (auto Reg : CandidateRegs)
631 optimizeWaterfallLiveRange(Reg, &MBB);
632 }
633 }
634 }
635
636 return MadeChange;
637 }
638