1 //===--------------------- SIOptimizeVGPRLiveRange.cpp  -------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 /// \file
10 /// This pass tries to remove unnecessary VGPR live ranges in divergent if-else
11 /// structures and waterfall loops.
12 ///
13 /// When we do structurization, we usually transform an if-else into two
14 /// successive if-then (with a flow block to do predicate inversion). Consider a
15 /// simple case after structurization: A divergent value %a was defined before
16 /// if-else and used in both THEN (use in THEN is optional) and ELSE part:
17 ///    bb.if:
18 ///      %a = ...
19 ///      ...
20 ///    bb.then:
21 ///      ... = op %a
22 ///      ... // %a can be dead here
23 ///    bb.flow:
24 ///      ...
25 ///    bb.else:
26 ///      ... = %a
27 ///      ...
28 ///    bb.endif
29 ///
30 ///  As register allocator has no idea of the thread-control-flow, it will just
31 ///  assume %a would be alive in the whole range of bb.then because of a later
32 ///  use in bb.else. On AMDGPU architecture, the VGPR is accessed with respect
33 ///  to exec mask. For this if-else case, the lanes active in bb.then will be
34 ///  inactive in bb.else, and vice-versa. So we are safe to say that %a was dead
35 ///  after the last use in bb.then until the end of the block. The reason is
36 ///  the instructions in bb.then will only overwrite lanes that will never be
37 ///  accessed in bb.else.
38 ///
39 ///  This pass aims to tell register allocator that %a is in-fact dead,
40 ///  through inserting a phi-node in bb.flow saying that %a is undef when coming
41 ///  from bb.then, and then replace the uses in the bb.else with the result of
42 ///  newly inserted phi.
43 ///
44 ///  Two key conditions must be met to ensure correctness:
45 ///  1.) The def-point should be in the same loop-level as if-else-endif to make
46 ///      sure the second loop iteration still get correct data.
47 ///  2.) There should be no further uses after the IF-ELSE region.
48 ///
49 ///
50 /// Waterfall loops get inserted around instructions that use divergent values
51 /// but can only be executed with a uniform value. For example an indirect call
52 /// to a divergent address:
53 ///    bb.start:
54 ///      %a = ...
55 ///      %fun = ...
56 ///      ...
57 ///    bb.loop:
58 ///      call %fun (%a)
59 ///      ... // %a can be dead here
60 ///      loop %bb.loop
61 ///
62 ///  The loop block is executed multiple times, but it is run exactly once for
63 ///  each active lane. Similar to the if-else case, the register allocator
64 ///  assumes that %a is live throughout the loop as it is used again in the next
65 ///  iteration. If %a is a VGPR that is unused after the loop, it does not need
66 ///  to be live after its last use in the loop block. By inserting a phi-node at
67 ///  the start of bb.loop that is undef when coming from bb.loop, the register
68 ///  allocation knows that the value of %a does not need to be preserved through
69 ///  iterations of the loop.
70 ///
71 //
72 //===----------------------------------------------------------------------===//
73 
74 #include "AMDGPU.h"
75 #include "GCNSubtarget.h"
76 #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
77 #include "SIMachineFunctionInfo.h"
78 #include "llvm/CodeGen/LiveVariables.h"
79 #include "llvm/CodeGen/MachineDominators.h"
80 #include "llvm/CodeGen/MachineLoopInfo.h"
81 #include "llvm/CodeGen/TargetRegisterInfo.h"
82 #include "llvm/InitializePasses.h"
83 
84 using namespace llvm;
85 
86 #define DEBUG_TYPE "si-opt-vgpr-liverange"
87 
88 namespace {
89 
90 class SIOptimizeVGPRLiveRange : public MachineFunctionPass {
91 private:
92   const SIRegisterInfo *TRI = nullptr;
93   const SIInstrInfo *TII = nullptr;
94   LiveVariables *LV = nullptr;
95   MachineDominatorTree *MDT = nullptr;
96   const MachineLoopInfo *Loops = nullptr;
97   MachineRegisterInfo *MRI = nullptr;
98 
99 public:
100   static char ID;
101 
102   MachineBasicBlock *getElseTarget(MachineBasicBlock *MBB) const;
103 
104   void collectElseRegionBlocks(MachineBasicBlock *Flow,
105                                MachineBasicBlock *Endif,
106                                SmallSetVector<MachineBasicBlock *, 16> &) const;
107 
108   void
109   collectCandidateRegisters(MachineBasicBlock *If, MachineBasicBlock *Flow,
110                             MachineBasicBlock *Endif,
111                             SmallSetVector<MachineBasicBlock *, 16> &ElseBlocks,
112                             SmallVectorImpl<Register> &CandidateRegs) const;
113 
114   void collectWaterfallCandidateRegisters(
115       MachineBasicBlock *LoopHeader, MachineBasicBlock *LoopEnd,
116       SmallSetVector<Register, 16> &CandidateRegs,
117       SmallSetVector<MachineBasicBlock *, 2> &Blocks,
118       SmallVectorImpl<MachineInstr *> &Instructions) const;
119 
120   void findNonPHIUsesInBlock(Register Reg, MachineBasicBlock *MBB,
121                              SmallVectorImpl<MachineInstr *> &Uses) const;
122 
123   void updateLiveRangeInThenRegion(Register Reg, MachineBasicBlock *If,
124                                    MachineBasicBlock *Flow) const;
125 
126   void updateLiveRangeInElseRegion(
127       Register Reg, Register NewReg, MachineBasicBlock *Flow,
128       MachineBasicBlock *Endif,
129       SmallSetVector<MachineBasicBlock *, 16> &ElseBlocks) const;
130 
131   void
132   optimizeLiveRange(Register Reg, MachineBasicBlock *If,
133                     MachineBasicBlock *Flow, MachineBasicBlock *Endif,
134                     SmallSetVector<MachineBasicBlock *, 16> &ElseBlocks) const;
135 
136   void optimizeWaterfallLiveRange(
137       Register Reg, MachineBasicBlock *LoopHeader,
138       SmallSetVector<MachineBasicBlock *, 2> &LoopBlocks,
139       SmallVectorImpl<MachineInstr *> &Instructions) const;
140 
SIOptimizeVGPRLiveRange()141   SIOptimizeVGPRLiveRange() : MachineFunctionPass(ID) {}
142 
143   bool runOnMachineFunction(MachineFunction &MF) override;
144 
getPassName() const145   StringRef getPassName() const override {
146     return "SI Optimize VGPR LiveRange";
147   }
148 
getAnalysisUsage(AnalysisUsage & AU) const149   void getAnalysisUsage(AnalysisUsage &AU) const override {
150     AU.addRequired<LiveVariables>();
151     AU.addRequired<MachineDominatorTree>();
152     AU.addRequired<MachineLoopInfo>();
153     AU.addPreserved<LiveVariables>();
154     AU.addPreserved<MachineDominatorTree>();
155     AU.addPreserved<MachineLoopInfo>();
156     MachineFunctionPass::getAnalysisUsage(AU);
157   }
158 
getRequiredProperties() const159   MachineFunctionProperties getRequiredProperties() const override {
160     return MachineFunctionProperties().set(
161         MachineFunctionProperties::Property::IsSSA);
162   }
163 
getClearedProperties() const164   MachineFunctionProperties getClearedProperties() const override {
165     return MachineFunctionProperties().set(
166         MachineFunctionProperties::Property::NoPHIs);
167   }
168 };
169 
170 } // end anonymous namespace
171 
172 // Check whether the MBB is a else flow block and get the branching target which
173 // is the Endif block
174 MachineBasicBlock *
getElseTarget(MachineBasicBlock * MBB) const175 SIOptimizeVGPRLiveRange::getElseTarget(MachineBasicBlock *MBB) const {
176   for (auto &BR : MBB->terminators()) {
177     if (BR.getOpcode() == AMDGPU::SI_ELSE)
178       return BR.getOperand(2).getMBB();
179   }
180   return nullptr;
181 }
182 
collectElseRegionBlocks(MachineBasicBlock * Flow,MachineBasicBlock * Endif,SmallSetVector<MachineBasicBlock *,16> & Blocks) const183 void SIOptimizeVGPRLiveRange::collectElseRegionBlocks(
184     MachineBasicBlock *Flow, MachineBasicBlock *Endif,
185     SmallSetVector<MachineBasicBlock *, 16> &Blocks) const {
186   assert(Flow != Endif);
187 
188   MachineBasicBlock *MBB = Endif;
189   unsigned Cur = 0;
190   while (MBB) {
191     for (auto *Pred : MBB->predecessors()) {
192       if (Pred != Flow && !Blocks.contains(Pred))
193         Blocks.insert(Pred);
194     }
195 
196     if (Cur < Blocks.size())
197       MBB = Blocks[Cur++];
198     else
199       MBB = nullptr;
200   }
201 
202   LLVM_DEBUG({
203     dbgs() << "Found Else blocks: ";
204     for (auto *MBB : Blocks)
205       dbgs() << printMBBReference(*MBB) << ' ';
206     dbgs() << '\n';
207   });
208 }
209 
210 /// Find the instructions(excluding phi) in \p MBB that uses the \p Reg.
findNonPHIUsesInBlock(Register Reg,MachineBasicBlock * MBB,SmallVectorImpl<MachineInstr * > & Uses) const211 void SIOptimizeVGPRLiveRange::findNonPHIUsesInBlock(
212     Register Reg, MachineBasicBlock *MBB,
213     SmallVectorImpl<MachineInstr *> &Uses) const {
214   for (auto &UseMI : MRI->use_nodbg_instructions(Reg)) {
215     if (UseMI.getParent() == MBB && !UseMI.isPHI())
216       Uses.push_back(&UseMI);
217   }
218 }
219 
220 /// Collect the killed registers in the ELSE region which are not alive through
221 /// the whole THEN region.
collectCandidateRegisters(MachineBasicBlock * If,MachineBasicBlock * Flow,MachineBasicBlock * Endif,SmallSetVector<MachineBasicBlock *,16> & ElseBlocks,SmallVectorImpl<Register> & CandidateRegs) const222 void SIOptimizeVGPRLiveRange::collectCandidateRegisters(
223     MachineBasicBlock *If, MachineBasicBlock *Flow, MachineBasicBlock *Endif,
224     SmallSetVector<MachineBasicBlock *, 16> &ElseBlocks,
225     SmallVectorImpl<Register> &CandidateRegs) const {
226 
227   SmallSet<Register, 8> KillsInElse;
228 
229   for (auto *Else : ElseBlocks) {
230     for (auto &MI : Else->instrs()) {
231       if (MI.isDebugInstr())
232         continue;
233 
234       for (auto &MO : MI.operands()) {
235         if (!MO.isReg() || !MO.getReg() || MO.isDef())
236           continue;
237 
238         Register MOReg = MO.getReg();
239         // We can only optimize AGPR/VGPR virtual register
240         if (MOReg.isPhysical() || !TRI->isVectorRegister(*MRI, MOReg))
241           continue;
242 
243         if (MO.readsReg()) {
244           LiveVariables::VarInfo &VI = LV->getVarInfo(MOReg);
245           const MachineBasicBlock *DefMBB = MRI->getVRegDef(MOReg)->getParent();
246           // Make sure two conditions are met:
247           // a.) the value is defined before/in the IF block
248           // b.) should be defined in the same loop-level.
249           if ((VI.AliveBlocks.test(If->getNumber()) || DefMBB == If) &&
250               Loops->getLoopFor(DefMBB) == Loops->getLoopFor(If)) {
251             // Check if the register is live into the endif block. If not,
252             // consider it killed in the else region.
253             LiveVariables::VarInfo &VI = LV->getVarInfo(MOReg);
254             if (!VI.isLiveIn(*Endif, MOReg, *MRI)) {
255               KillsInElse.insert(MOReg);
256             } else {
257               LLVM_DEBUG(dbgs() << "Excluding " << printReg(MOReg, TRI)
258                                 << " as Live in Endif\n");
259             }
260           }
261         }
262       }
263     }
264   }
265 
266   // Check the phis in the Endif, looking for value coming from the ELSE
267   // region. Make sure the phi-use is the last use.
268   for (auto &MI : Endif->phis()) {
269     for (unsigned Idx = 1; Idx < MI.getNumOperands(); Idx += 2) {
270       auto &MO = MI.getOperand(Idx);
271       auto *Pred = MI.getOperand(Idx + 1).getMBB();
272       if (Pred == Flow)
273         continue;
274       assert(ElseBlocks.contains(Pred) && "Should be from Else region\n");
275 
276       if (!MO.isReg() || !MO.getReg() || MO.isUndef())
277         continue;
278 
279       Register Reg = MO.getReg();
280       if (Reg.isPhysical() || !TRI->isVectorRegister(*MRI, Reg))
281         continue;
282 
283       LiveVariables::VarInfo &VI = LV->getVarInfo(Reg);
284 
285       if (VI.isLiveIn(*Endif, Reg, *MRI)) {
286         LLVM_DEBUG(dbgs() << "Excluding " << printReg(Reg, TRI)
287                           << " as Live in Endif\n");
288         continue;
289       }
290       // Make sure two conditions are met:
291       // a.) the value is defined before/in the IF block
292       // b.) should be defined in the same loop-level.
293       const MachineBasicBlock *DefMBB = MRI->getVRegDef(Reg)->getParent();
294       if ((VI.AliveBlocks.test(If->getNumber()) || DefMBB == If) &&
295           Loops->getLoopFor(DefMBB) == Loops->getLoopFor(If))
296         KillsInElse.insert(Reg);
297     }
298   }
299 
300   auto IsLiveThroughThen = [&](Register Reg) {
301     for (auto I = MRI->use_nodbg_begin(Reg), E = MRI->use_nodbg_end(); I != E;
302          ++I) {
303       if (!I->readsReg())
304         continue;
305       auto *UseMI = I->getParent();
306       auto *UseMBB = UseMI->getParent();
307       if (UseMBB == Flow || UseMBB == Endif) {
308         if (!UseMI->isPHI())
309           return true;
310 
311         auto *IncomingMBB = UseMI->getOperand(I.getOperandNo() + 1).getMBB();
312         // The register is live through the path If->Flow or Flow->Endif.
313         // we should not optimize for such cases.
314         if ((UseMBB == Flow && IncomingMBB != If) ||
315             (UseMBB == Endif && IncomingMBB == Flow))
316           return true;
317       }
318     }
319     return false;
320   };
321 
322   for (auto Reg : KillsInElse) {
323     if (!IsLiveThroughThen(Reg))
324       CandidateRegs.push_back(Reg);
325   }
326 }
327 
328 /// Collect the registers used in the waterfall loop block that are defined
329 /// before.
collectWaterfallCandidateRegisters(MachineBasicBlock * LoopHeader,MachineBasicBlock * LoopEnd,SmallSetVector<Register,16> & CandidateRegs,SmallSetVector<MachineBasicBlock *,2> & Blocks,SmallVectorImpl<MachineInstr * > & Instructions) const330 void SIOptimizeVGPRLiveRange::collectWaterfallCandidateRegisters(
331     MachineBasicBlock *LoopHeader, MachineBasicBlock *LoopEnd,
332     SmallSetVector<Register, 16> &CandidateRegs,
333     SmallSetVector<MachineBasicBlock *, 2> &Blocks,
334     SmallVectorImpl<MachineInstr *> &Instructions) const {
335 
336   // Collect loop instructions, potentially spanning multiple blocks
337   auto *MBB = LoopHeader;
338   for (;;) {
339     Blocks.insert(MBB);
340     for (auto &MI : *MBB) {
341       if (MI.isDebugInstr())
342         continue;
343       Instructions.push_back(&MI);
344     }
345     if (MBB == LoopEnd)
346       break;
347 
348     if ((MBB != LoopHeader && MBB->pred_size() != 1) ||
349         (MBB == LoopHeader && MBB->pred_size() != 2) || MBB->succ_size() != 1) {
350       LLVM_DEBUG(dbgs() << "Unexpected edges in CFG, ignoring loop\n");
351       return;
352     }
353 
354     MBB = *MBB->succ_begin();
355   }
356 
357   for (auto *I : Instructions) {
358     auto &MI = *I;
359 
360     for (auto &MO : MI.all_uses()) {
361       if (!MO.getReg())
362         continue;
363 
364       Register MOReg = MO.getReg();
365       // We can only optimize AGPR/VGPR virtual register
366       if (MOReg.isPhysical() || !TRI->isVectorRegister(*MRI, MOReg))
367         continue;
368 
369       if (MO.readsReg()) {
370         MachineBasicBlock *DefMBB = MRI->getVRegDef(MOReg)->getParent();
371         // Make sure the value is defined before the LOOP block
372         if (!Blocks.contains(DefMBB) && !CandidateRegs.contains(MOReg)) {
373           // If the variable is used after the loop, the register coalescer will
374           // merge the newly created register and remove the phi node again.
375           // Just do nothing in that case.
376           LiveVariables::VarInfo &OldVarInfo = LV->getVarInfo(MOReg);
377           bool IsUsed = false;
378           for (auto *Succ : LoopEnd->successors()) {
379             if (!Blocks.contains(Succ) &&
380                 OldVarInfo.isLiveIn(*Succ, MOReg, *MRI)) {
381               IsUsed = true;
382               break;
383             }
384           }
385           if (!IsUsed) {
386             LLVM_DEBUG(dbgs() << "Found candidate reg: "
387                               << printReg(MOReg, TRI, 0, MRI) << '\n');
388             CandidateRegs.insert(MOReg);
389           } else {
390             LLVM_DEBUG(dbgs() << "Reg is used after loop, ignoring: "
391                               << printReg(MOReg, TRI, 0, MRI) << '\n');
392           }
393         }
394       }
395     }
396   }
397 }
398 
399 // Re-calculate the liveness of \p Reg in the THEN-region
updateLiveRangeInThenRegion(Register Reg,MachineBasicBlock * If,MachineBasicBlock * Flow) const400 void SIOptimizeVGPRLiveRange::updateLiveRangeInThenRegion(
401     Register Reg, MachineBasicBlock *If, MachineBasicBlock *Flow) const {
402   SetVector<MachineBasicBlock *> Blocks;
403   SmallVector<MachineBasicBlock *> WorkList({If});
404 
405   // Collect all successors until we see the flow block, where we should
406   // reconverge.
407   while (!WorkList.empty()) {
408     auto *MBB = WorkList.pop_back_val();
409     for (auto *Succ : MBB->successors()) {
410       if (Succ != Flow && !Blocks.contains(Succ)) {
411         WorkList.push_back(Succ);
412         Blocks.insert(Succ);
413       }
414     }
415   }
416 
417   LiveVariables::VarInfo &OldVarInfo = LV->getVarInfo(Reg);
418   for (MachineBasicBlock *MBB : Blocks) {
419     // Clear Live bit, as we will recalculate afterwards
420     LLVM_DEBUG(dbgs() << "Clear AliveBlock " << printMBBReference(*MBB)
421                       << '\n');
422     OldVarInfo.AliveBlocks.reset(MBB->getNumber());
423   }
424 
425   SmallPtrSet<MachineBasicBlock *, 4> PHIIncoming;
426 
427   // Get the blocks the Reg should be alive through
428   for (auto I = MRI->use_nodbg_begin(Reg), E = MRI->use_nodbg_end(); I != E;
429        ++I) {
430     auto *UseMI = I->getParent();
431     if (UseMI->isPHI() && I->readsReg()) {
432       if (Blocks.contains(UseMI->getParent()))
433         PHIIncoming.insert(UseMI->getOperand(I.getOperandNo() + 1).getMBB());
434     }
435   }
436 
437   for (MachineBasicBlock *MBB : Blocks) {
438     SmallVector<MachineInstr *> Uses;
439     // PHI instructions has been processed before.
440     findNonPHIUsesInBlock(Reg, MBB, Uses);
441 
442     if (Uses.size() == 1) {
443       LLVM_DEBUG(dbgs() << "Found one Non-PHI use in "
444                         << printMBBReference(*MBB) << '\n');
445       LV->HandleVirtRegUse(Reg, MBB, *(*Uses.begin()));
446     } else if (Uses.size() > 1) {
447       // Process the instructions in-order
448       LLVM_DEBUG(dbgs() << "Found " << Uses.size() << " Non-PHI uses in "
449                         << printMBBReference(*MBB) << '\n');
450       for (MachineInstr &MI : *MBB) {
451         if (llvm::is_contained(Uses, &MI))
452           LV->HandleVirtRegUse(Reg, MBB, MI);
453       }
454     }
455 
456     // Mark Reg alive through the block if this is a PHI incoming block
457     if (PHIIncoming.contains(MBB))
458       LV->MarkVirtRegAliveInBlock(OldVarInfo, MRI->getVRegDef(Reg)->getParent(),
459                                   MBB);
460   }
461 
462   // Set the isKilled flag if we get new Kills in the THEN region.
463   for (auto *MI : OldVarInfo.Kills) {
464     if (Blocks.contains(MI->getParent()))
465       MI->addRegisterKilled(Reg, TRI);
466   }
467 }
468 
updateLiveRangeInElseRegion(Register Reg,Register NewReg,MachineBasicBlock * Flow,MachineBasicBlock * Endif,SmallSetVector<MachineBasicBlock *,16> & ElseBlocks) const469 void SIOptimizeVGPRLiveRange::updateLiveRangeInElseRegion(
470     Register Reg, Register NewReg, MachineBasicBlock *Flow,
471     MachineBasicBlock *Endif,
472     SmallSetVector<MachineBasicBlock *, 16> &ElseBlocks) const {
473   LiveVariables::VarInfo &NewVarInfo = LV->getVarInfo(NewReg);
474   LiveVariables::VarInfo &OldVarInfo = LV->getVarInfo(Reg);
475 
476   // Transfer aliveBlocks from Reg to NewReg
477   for (auto *MBB : ElseBlocks) {
478     unsigned BBNum = MBB->getNumber();
479     if (OldVarInfo.AliveBlocks.test(BBNum)) {
480       NewVarInfo.AliveBlocks.set(BBNum);
481       LLVM_DEBUG(dbgs() << "Removing AliveBlock " << printMBBReference(*MBB)
482                         << '\n');
483       OldVarInfo.AliveBlocks.reset(BBNum);
484     }
485   }
486 
487   // Transfer the possible Kills in ElseBlocks from Reg to NewReg
488   auto I = OldVarInfo.Kills.begin();
489   while (I != OldVarInfo.Kills.end()) {
490     if (ElseBlocks.contains((*I)->getParent())) {
491       NewVarInfo.Kills.push_back(*I);
492       I = OldVarInfo.Kills.erase(I);
493     } else {
494       ++I;
495     }
496   }
497 }
498 
optimizeLiveRange(Register Reg,MachineBasicBlock * If,MachineBasicBlock * Flow,MachineBasicBlock * Endif,SmallSetVector<MachineBasicBlock *,16> & ElseBlocks) const499 void SIOptimizeVGPRLiveRange::optimizeLiveRange(
500     Register Reg, MachineBasicBlock *If, MachineBasicBlock *Flow,
501     MachineBasicBlock *Endif,
502     SmallSetVector<MachineBasicBlock *, 16> &ElseBlocks) const {
503   // Insert a new PHI, marking the value from the THEN region being
504   // undef.
505   LLVM_DEBUG(dbgs() << "Optimizing " << printReg(Reg, TRI) << '\n');
506   const auto *RC = MRI->getRegClass(Reg);
507   Register NewReg = MRI->createVirtualRegister(RC);
508   Register UndefReg = MRI->createVirtualRegister(RC);
509   MachineInstrBuilder PHI = BuildMI(*Flow, Flow->getFirstNonPHI(), DebugLoc(),
510                                     TII->get(TargetOpcode::PHI), NewReg);
511   for (auto *Pred : Flow->predecessors()) {
512     if (Pred == If)
513       PHI.addReg(Reg).addMBB(Pred);
514     else
515       PHI.addReg(UndefReg, RegState::Undef).addMBB(Pred);
516   }
517 
518   // Replace all uses in the ELSE region or the PHIs in ENDIF block
519   // Use early increment range because setReg() will update the linked list.
520   for (auto &O : make_early_inc_range(MRI->use_operands(Reg))) {
521     auto *UseMI = O.getParent();
522     auto *UseBlock = UseMI->getParent();
523     // Replace uses in Endif block
524     if (UseBlock == Endif) {
525       if (UseMI->isPHI())
526         O.setReg(NewReg);
527       else if (UseMI->isDebugInstr())
528         continue;
529       else {
530         // DetectDeadLanes may mark register uses as undef without removing
531         // them, in which case a non-phi instruction using the original register
532         // may exist in the Endif block even though the register is not live
533         // into it.
534         assert(!O.readsReg());
535       }
536       continue;
537     }
538 
539     // Replace uses in Else region
540     if (ElseBlocks.contains(UseBlock))
541       O.setReg(NewReg);
542   }
543 
544   // The optimized Reg is not alive through Flow blocks anymore.
545   LiveVariables::VarInfo &OldVarInfo = LV->getVarInfo(Reg);
546   OldVarInfo.AliveBlocks.reset(Flow->getNumber());
547 
548   updateLiveRangeInElseRegion(Reg, NewReg, Flow, Endif, ElseBlocks);
549   updateLiveRangeInThenRegion(Reg, If, Flow);
550 }
551 
optimizeWaterfallLiveRange(Register Reg,MachineBasicBlock * LoopHeader,SmallSetVector<MachineBasicBlock *,2> & Blocks,SmallVectorImpl<MachineInstr * > & Instructions) const552 void SIOptimizeVGPRLiveRange::optimizeWaterfallLiveRange(
553     Register Reg, MachineBasicBlock *LoopHeader,
554     SmallSetVector<MachineBasicBlock *, 2> &Blocks,
555     SmallVectorImpl<MachineInstr *> &Instructions) const {
556   // Insert a new PHI, marking the value from the last loop iteration undef.
557   LLVM_DEBUG(dbgs() << "Optimizing " << printReg(Reg, TRI) << '\n');
558   const auto *RC = MRI->getRegClass(Reg);
559   Register NewReg = MRI->createVirtualRegister(RC);
560   Register UndefReg = MRI->createVirtualRegister(RC);
561 
562   // Replace all uses in the LOOP region
563   // Use early increment range because setReg() will update the linked list.
564   for (auto &O : make_early_inc_range(MRI->use_operands(Reg))) {
565     auto *UseMI = O.getParent();
566     auto *UseBlock = UseMI->getParent();
567     // Replace uses in Loop blocks
568     if (Blocks.contains(UseBlock))
569       O.setReg(NewReg);
570   }
571 
572   MachineInstrBuilder PHI =
573       BuildMI(*LoopHeader, LoopHeader->getFirstNonPHI(), DebugLoc(),
574               TII->get(TargetOpcode::PHI), NewReg);
575   for (auto *Pred : LoopHeader->predecessors()) {
576     if (Blocks.contains(Pred))
577       PHI.addReg(UndefReg, RegState::Undef).addMBB(Pred);
578     else
579       PHI.addReg(Reg).addMBB(Pred);
580   }
581 
582   LiveVariables::VarInfo &NewVarInfo = LV->getVarInfo(NewReg);
583   LiveVariables::VarInfo &OldVarInfo = LV->getVarInfo(Reg);
584 
585   // Find last use and mark as kill
586   MachineInstr *Kill = nullptr;
587   for (auto *MI : reverse(Instructions)) {
588     if (MI->readsRegister(NewReg, TRI)) {
589       MI->addRegisterKilled(NewReg, TRI);
590       NewVarInfo.Kills.push_back(MI);
591       Kill = MI;
592       break;
593     }
594   }
595   assert(Kill && "Failed to find last usage of register in loop");
596 
597   MachineBasicBlock *KillBlock = Kill->getParent();
598   bool PostKillBlock = false;
599   for (auto *Block : Blocks) {
600     auto BBNum = Block->getNumber();
601 
602     // collectWaterfallCandidateRegisters only collects registers that are dead
603     // after the loop. So we know that the old reg is no longer live throughout
604     // the waterfall loop.
605     OldVarInfo.AliveBlocks.reset(BBNum);
606 
607     // The new register is live up to (and including) the block that kills it.
608     PostKillBlock |= (Block == KillBlock);
609     if (PostKillBlock) {
610       NewVarInfo.AliveBlocks.reset(BBNum);
611     } else if (Block != LoopHeader) {
612       NewVarInfo.AliveBlocks.set(BBNum);
613     }
614   }
615 }
616 
617 char SIOptimizeVGPRLiveRange::ID = 0;
618 
619 INITIALIZE_PASS_BEGIN(SIOptimizeVGPRLiveRange, DEBUG_TYPE,
620                       "SI Optimize VGPR LiveRange", false, false)
621 INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
622 INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
623 INITIALIZE_PASS_DEPENDENCY(LiveVariables)
624 INITIALIZE_PASS_END(SIOptimizeVGPRLiveRange, DEBUG_TYPE,
625                     "SI Optimize VGPR LiveRange", false, false)
626 
627 char &llvm::SIOptimizeVGPRLiveRangeID = SIOptimizeVGPRLiveRange::ID;
628 
createSIOptimizeVGPRLiveRangePass()629 FunctionPass *llvm::createSIOptimizeVGPRLiveRangePass() {
630   return new SIOptimizeVGPRLiveRange();
631 }
632 
runOnMachineFunction(MachineFunction & MF)633 bool SIOptimizeVGPRLiveRange::runOnMachineFunction(MachineFunction &MF) {
634 
635   const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
636   TII = ST.getInstrInfo();
637   TRI = &TII->getRegisterInfo();
638   MDT = &getAnalysis<MachineDominatorTree>();
639   Loops = &getAnalysis<MachineLoopInfo>();
640   LV = &getAnalysis<LiveVariables>();
641   MRI = &MF.getRegInfo();
642 
643   if (skipFunction(MF.getFunction()))
644     return false;
645 
646   bool MadeChange = false;
647 
648   // TODO: we need to think about the order of visiting the blocks to get
649   // optimal result for nesting if-else cases.
650   for (MachineBasicBlock &MBB : MF) {
651     for (auto &MI : MBB.terminators()) {
652       // Detect the if-else blocks
653       if (MI.getOpcode() == AMDGPU::SI_IF) {
654         MachineBasicBlock *IfTarget = MI.getOperand(2).getMBB();
655         auto *Endif = getElseTarget(IfTarget);
656         if (!Endif)
657           continue;
658 
659         // Skip unexpected control flow.
660         if (!MDT->dominates(&MBB, IfTarget) || !MDT->dominates(IfTarget, Endif))
661           continue;
662 
663         SmallSetVector<MachineBasicBlock *, 16> ElseBlocks;
664         SmallVector<Register> CandidateRegs;
665 
666         LLVM_DEBUG(dbgs() << "Checking IF-ELSE-ENDIF: "
667                           << printMBBReference(MBB) << ' '
668                           << printMBBReference(*IfTarget) << ' '
669                           << printMBBReference(*Endif) << '\n');
670 
671         // Collect all the blocks in the ELSE region
672         collectElseRegionBlocks(IfTarget, Endif, ElseBlocks);
673 
674         // Collect the registers can be optimized
675         collectCandidateRegisters(&MBB, IfTarget, Endif, ElseBlocks,
676                                   CandidateRegs);
677         MadeChange |= !CandidateRegs.empty();
678         // Now we are safe to optimize.
679         for (auto Reg : CandidateRegs)
680           optimizeLiveRange(Reg, &MBB, IfTarget, Endif, ElseBlocks);
681       } else if (MI.getOpcode() == AMDGPU::SI_WATERFALL_LOOP) {
682         auto *LoopHeader = MI.getOperand(0).getMBB();
683         auto *LoopEnd = &MBB;
684 
685         LLVM_DEBUG(dbgs() << "Checking Waterfall loop: "
686                           << printMBBReference(*LoopHeader) << '\n');
687 
688         SmallSetVector<Register, 16> CandidateRegs;
689         SmallVector<MachineInstr *, 16> Instructions;
690         SmallSetVector<MachineBasicBlock *, 2> Blocks;
691 
692         collectWaterfallCandidateRegisters(LoopHeader, LoopEnd, CandidateRegs,
693                                            Blocks, Instructions);
694         MadeChange |= !CandidateRegs.empty();
695         // Now we are safe to optimize.
696         for (auto Reg : CandidateRegs)
697           optimizeWaterfallLiveRange(Reg, LoopHeader, Blocks, Instructions);
698       }
699     }
700   }
701 
702   return MadeChange;
703 }
704