1 //===----- R600Packetizer.cpp - VLIW packetizer ---------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 /// \file
10 /// This pass implements instructions packetization for R600. It unsets isLast
11 /// bit of instructions inside a bundle and substitutes src register with
12 /// PreviousVector when applicable.
13 //
14 //===----------------------------------------------------------------------===//
15 
16 #include "AMDGPU.h"
17 #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
18 #include "R600Subtarget.h"
19 #include "llvm/CodeGen/DFAPacketizer.h"
20 #include "llvm/CodeGen/MachineDominators.h"
21 #include "llvm/CodeGen/MachineLoopInfo.h"
22 #include "llvm/CodeGen/ScheduleDAG.h"
23 
24 using namespace llvm;
25 
26 #define DEBUG_TYPE "packets"
27 
28 namespace {
29 
30 class R600Packetizer : public MachineFunctionPass {
31 
32 public:
33   static char ID;
R600Packetizer()34   R600Packetizer() : MachineFunctionPass(ID) {}
35 
getAnalysisUsage(AnalysisUsage & AU) const36   void getAnalysisUsage(AnalysisUsage &AU) const override {
37     AU.setPreservesCFG();
38     AU.addRequired<MachineDominatorTree>();
39     AU.addPreserved<MachineDominatorTree>();
40     AU.addRequired<MachineLoopInfo>();
41     AU.addPreserved<MachineLoopInfo>();
42     MachineFunctionPass::getAnalysisUsage(AU);
43   }
44 
getPassName() const45   StringRef getPassName() const override { return "R600 Packetizer"; }
46 
47   bool runOnMachineFunction(MachineFunction &Fn) override;
48 };
49 
50 class R600PacketizerList : public VLIWPacketizerList {
51 private:
52   const R600InstrInfo *TII;
53   const R600RegisterInfo &TRI;
54   bool VLIW5;
55   bool ConsideredInstUsesAlreadyWrittenVectorElement;
56 
getSlot(const MachineInstr & MI) const57   unsigned getSlot(const MachineInstr &MI) const {
58     return TRI.getHWRegChan(MI.getOperand(0).getReg());
59   }
60 
61   /// \returns register to PV chan mapping for bundle/single instructions that
62   /// immediately precedes I.
getPreviousVector(MachineBasicBlock::iterator I) const63   DenseMap<unsigned, unsigned> getPreviousVector(MachineBasicBlock::iterator I)
64       const {
65     DenseMap<unsigned, unsigned> Result;
66     I--;
67     if (!TII->isALUInstr(I->getOpcode()) && !I->isBundle())
68       return Result;
69     MachineBasicBlock::instr_iterator BI = I.getInstrIterator();
70     if (I->isBundle())
71       BI++;
72     int LastDstChan = -1;
73     do {
74       bool isTrans = false;
75       int BISlot = getSlot(*BI);
76       if (LastDstChan >= BISlot)
77         isTrans = true;
78       LastDstChan = BISlot;
79       if (TII->isPredicated(*BI))
80         continue;
81       int OperandIdx = TII->getOperandIdx(BI->getOpcode(), R600::OpName::write);
82       if (OperandIdx > -1 && BI->getOperand(OperandIdx).getImm() == 0)
83         continue;
84       int DstIdx = TII->getOperandIdx(BI->getOpcode(), R600::OpName::dst);
85       if (DstIdx == -1) {
86         continue;
87       }
88       Register Dst = BI->getOperand(DstIdx).getReg();
89       if (isTrans || TII->isTransOnly(*BI)) {
90         Result[Dst] = R600::PS;
91         continue;
92       }
93       if (BI->getOpcode() == R600::DOT4_r600 ||
94           BI->getOpcode() == R600::DOT4_eg) {
95         Result[Dst] = R600::PV_X;
96         continue;
97       }
98       if (Dst == R600::OQAP) {
99         continue;
100       }
101       unsigned PVReg = 0;
102       switch (TRI.getHWRegChan(Dst)) {
103       case 0:
104         PVReg = R600::PV_X;
105         break;
106       case 1:
107         PVReg = R600::PV_Y;
108         break;
109       case 2:
110         PVReg = R600::PV_Z;
111         break;
112       case 3:
113         PVReg = R600::PV_W;
114         break;
115       default:
116         llvm_unreachable("Invalid Chan");
117       }
118       Result[Dst] = PVReg;
119     } while ((++BI)->isBundledWithPred());
120     return Result;
121   }
122 
substitutePV(MachineInstr & MI,const DenseMap<unsigned,unsigned> & PVs) const123   void substitutePV(MachineInstr &MI, const DenseMap<unsigned, unsigned> &PVs)
124       const {
125     unsigned Ops[] = {
126       R600::OpName::src0,
127       R600::OpName::src1,
128       R600::OpName::src2
129     };
130     for (unsigned i = 0; i < 3; i++) {
131       int OperandIdx = TII->getOperandIdx(MI.getOpcode(), Ops[i]);
132       if (OperandIdx < 0)
133         continue;
134       Register Src = MI.getOperand(OperandIdx).getReg();
135       const DenseMap<unsigned, unsigned>::const_iterator It = PVs.find(Src);
136       if (It != PVs.end())
137         MI.getOperand(OperandIdx).setReg(It->second);
138     }
139   }
140 public:
141   // Ctor.
R600PacketizerList(MachineFunction & MF,const R600Subtarget & ST,MachineLoopInfo & MLI)142   R600PacketizerList(MachineFunction &MF, const R600Subtarget &ST,
143                      MachineLoopInfo &MLI)
144       : VLIWPacketizerList(MF, MLI, nullptr),
145         TII(ST.getInstrInfo()),
146         TRI(TII->getRegisterInfo()) {
147     VLIW5 = !ST.hasCaymanISA();
148   }
149 
150   // initPacketizerState - initialize some internal flags.
initPacketizerState()151   void initPacketizerState() override {
152     ConsideredInstUsesAlreadyWrittenVectorElement = false;
153   }
154 
155   // ignorePseudoInstruction - Ignore bundling of pseudo instructions.
ignorePseudoInstruction(const MachineInstr & MI,const MachineBasicBlock * MBB)156   bool ignorePseudoInstruction(const MachineInstr &MI,
157                                const MachineBasicBlock *MBB) override {
158     return false;
159   }
160 
161   // isSoloInstruction - return true if instruction MI can not be packetized
162   // with any other instruction, which means that MI itself is a packet.
isSoloInstruction(const MachineInstr & MI)163   bool isSoloInstruction(const MachineInstr &MI) override {
164     if (TII->isVector(MI))
165       return true;
166     if (!TII->isALUInstr(MI.getOpcode()))
167       return true;
168     if (MI.getOpcode() == R600::GROUP_BARRIER)
169       return true;
170     // XXX: This can be removed once the packetizer properly handles all the
171     // LDS instruction group restrictions.
172     return TII->isLDSInstr(MI.getOpcode());
173   }
174 
175   // isLegalToPacketizeTogether - Is it legal to packetize SUI and SUJ
176   // together.
isLegalToPacketizeTogether(SUnit * SUI,SUnit * SUJ)177   bool isLegalToPacketizeTogether(SUnit *SUI, SUnit *SUJ) override {
178     MachineInstr *MII = SUI->getInstr(), *MIJ = SUJ->getInstr();
179     if (getSlot(*MII) == getSlot(*MIJ))
180       ConsideredInstUsesAlreadyWrittenVectorElement = true;
181     // Does MII and MIJ share the same pred_sel ?
182     int OpI = TII->getOperandIdx(MII->getOpcode(), R600::OpName::pred_sel),
183         OpJ = TII->getOperandIdx(MIJ->getOpcode(), R600::OpName::pred_sel);
184     Register PredI = (OpI > -1)?MII->getOperand(OpI).getReg() : Register(),
185       PredJ = (OpJ > -1)?MIJ->getOperand(OpJ).getReg() : Register();
186     if (PredI != PredJ)
187       return false;
188     if (SUJ->isSucc(SUI)) {
189       for (unsigned i = 0, e = SUJ->Succs.size(); i < e; ++i) {
190         const SDep &Dep = SUJ->Succs[i];
191         if (Dep.getSUnit() != SUI)
192           continue;
193         if (Dep.getKind() == SDep::Anti)
194           continue;
195         if (Dep.getKind() == SDep::Output)
196           if (MII->getOperand(0).getReg() != MIJ->getOperand(0).getReg())
197             continue;
198         return false;
199       }
200     }
201 
202     bool ARDef =
203         TII->definesAddressRegister(*MII) || TII->definesAddressRegister(*MIJ);
204     bool ARUse =
205         TII->usesAddressRegister(*MII) || TII->usesAddressRegister(*MIJ);
206 
207     return !ARDef || !ARUse;
208   }
209 
210   // isLegalToPruneDependencies - Is it legal to prune dependece between SUI
211   // and SUJ.
isLegalToPruneDependencies(SUnit * SUI,SUnit * SUJ)212   bool isLegalToPruneDependencies(SUnit *SUI, SUnit *SUJ) override {
213     return false;
214   }
215 
setIsLastBit(MachineInstr * MI,unsigned Bit) const216   void setIsLastBit(MachineInstr *MI, unsigned Bit) const {
217     unsigned LastOp = TII->getOperandIdx(MI->getOpcode(), R600::OpName::last);
218     MI->getOperand(LastOp).setImm(Bit);
219   }
220 
isBundlableWithCurrentPMI(MachineInstr & MI,const DenseMap<unsigned,unsigned> & PV,std::vector<R600InstrInfo::BankSwizzle> & BS,bool & isTransSlot)221   bool isBundlableWithCurrentPMI(MachineInstr &MI,
222                                  const DenseMap<unsigned, unsigned> &PV,
223                                  std::vector<R600InstrInfo::BankSwizzle> &BS,
224                                  bool &isTransSlot) {
225     isTransSlot = TII->isTransOnly(MI);
226     assert (!isTransSlot || VLIW5);
227 
228     // Is the dst reg sequence legal ?
229     if (!isTransSlot && !CurrentPacketMIs.empty()) {
230       if (getSlot(MI) <= getSlot(*CurrentPacketMIs.back())) {
231         if (ConsideredInstUsesAlreadyWrittenVectorElement &&
232             !TII->isVectorOnly(MI) && VLIW5) {
233           isTransSlot = true;
234           LLVM_DEBUG({
235             dbgs() << "Considering as Trans Inst :";
236             MI.dump();
237           });
238         }
239         else
240           return false;
241       }
242     }
243 
244     // Are the Constants limitations met ?
245     CurrentPacketMIs.push_back(&MI);
246     if (!TII->fitsConstReadLimitations(CurrentPacketMIs)) {
247       LLVM_DEBUG({
248         dbgs() << "Couldn't pack :\n";
249         MI.dump();
250         dbgs() << "with the following packets :\n";
251         for (unsigned i = 0, e = CurrentPacketMIs.size() - 1; i < e; i++) {
252           CurrentPacketMIs[i]->dump();
253           dbgs() << "\n";
254         }
255         dbgs() << "because of Consts read limitations\n";
256       });
257       CurrentPacketMIs.pop_back();
258       return false;
259     }
260 
261     // Is there a BankSwizzle set that meet Read Port limitations ?
262     if (!TII->fitsReadPortLimitations(CurrentPacketMIs,
263             PV, BS, isTransSlot)) {
264       LLVM_DEBUG({
265         dbgs() << "Couldn't pack :\n";
266         MI.dump();
267         dbgs() << "with the following packets :\n";
268         for (unsigned i = 0, e = CurrentPacketMIs.size() - 1; i < e; i++) {
269           CurrentPacketMIs[i]->dump();
270           dbgs() << "\n";
271         }
272         dbgs() << "because of Read port limitations\n";
273       });
274       CurrentPacketMIs.pop_back();
275       return false;
276     }
277 
278     // We cannot read LDS source registers from the Trans slot.
279     if (isTransSlot && TII->readsLDSSrcReg(MI))
280       return false;
281 
282     CurrentPacketMIs.pop_back();
283     return true;
284   }
285 
addToPacket(MachineInstr & MI)286   MachineBasicBlock::iterator addToPacket(MachineInstr &MI) override {
287     MachineBasicBlock::iterator FirstInBundle =
288         CurrentPacketMIs.empty() ? &MI : CurrentPacketMIs.front();
289     const DenseMap<unsigned, unsigned> &PV =
290         getPreviousVector(FirstInBundle);
291     std::vector<R600InstrInfo::BankSwizzle> BS;
292     bool isTransSlot;
293 
294     if (isBundlableWithCurrentPMI(MI, PV, BS, isTransSlot)) {
295       for (unsigned i = 0, e = CurrentPacketMIs.size(); i < e; i++) {
296         MachineInstr *MI = CurrentPacketMIs[i];
297         unsigned Op = TII->getOperandIdx(MI->getOpcode(),
298             R600::OpName::bank_swizzle);
299         MI->getOperand(Op).setImm(BS[i]);
300       }
301       unsigned Op =
302           TII->getOperandIdx(MI.getOpcode(), R600::OpName::bank_swizzle);
303       MI.getOperand(Op).setImm(BS.back());
304       if (!CurrentPacketMIs.empty())
305         setIsLastBit(CurrentPacketMIs.back(), 0);
306       substitutePV(MI, PV);
307       MachineBasicBlock::iterator It = VLIWPacketizerList::addToPacket(MI);
308       if (isTransSlot) {
309         endPacket(std::next(It)->getParent(), std::next(It));
310       }
311       return It;
312     }
313     endPacket(MI.getParent(), MI);
314     if (TII->isTransOnly(MI))
315       return MI;
316     return VLIWPacketizerList::addToPacket(MI);
317   }
318 };
319 
runOnMachineFunction(MachineFunction & Fn)320 bool R600Packetizer::runOnMachineFunction(MachineFunction &Fn) {
321   const R600Subtarget &ST = Fn.getSubtarget<R600Subtarget>();
322   const R600InstrInfo *TII = ST.getInstrInfo();
323 
324   MachineLoopInfo &MLI = getAnalysis<MachineLoopInfo>();
325 
326   // Instantiate the packetizer.
327   R600PacketizerList Packetizer(Fn, ST, MLI);
328 
329   // DFA state table should not be empty.
330   assert(Packetizer.getResourceTracker() && "Empty DFA table!");
331   assert(Packetizer.getResourceTracker()->getInstrItins());
332 
333   if (Packetizer.getResourceTracker()->getInstrItins()->isEmpty())
334     return false;
335 
336   //
337   // Loop over all basic blocks and remove KILL pseudo-instructions
338   // These instructions confuse the dependence analysis. Consider:
339   // D0 = ...   (Insn 0)
340   // R0 = KILL R0, D0 (Insn 1)
341   // R0 = ... (Insn 2)
342   // Here, Insn 1 will result in the dependence graph not emitting an output
343   // dependence between Insn 0 and Insn 2. This can lead to incorrect
344   // packetization
345   //
346   for (MachineFunction::iterator MBB = Fn.begin(), MBBe = Fn.end();
347        MBB != MBBe; ++MBB) {
348     MachineBasicBlock::iterator End = MBB->end();
349     MachineBasicBlock::iterator MI = MBB->begin();
350     while (MI != End) {
351       if (MI->isKill() || MI->getOpcode() == R600::IMPLICIT_DEF ||
352           (MI->getOpcode() == R600::CF_ALU && !MI->getOperand(8).getImm())) {
353         MachineBasicBlock::iterator DeleteMI = MI;
354         ++MI;
355         MBB->erase(DeleteMI);
356         End = MBB->end();
357         continue;
358       }
359       ++MI;
360     }
361   }
362 
363   // Loop over all of the basic blocks.
364   for (MachineFunction::iterator MBB = Fn.begin(), MBBe = Fn.end();
365        MBB != MBBe; ++MBB) {
366     // Find scheduling regions and schedule / packetize each region.
367     unsigned RemainingCount = MBB->size();
368     for(MachineBasicBlock::iterator RegionEnd = MBB->end();
369         RegionEnd != MBB->begin();) {
370       // The next region starts above the previous region. Look backward in the
371       // instruction stream until we find the nearest boundary.
372       MachineBasicBlock::iterator I = RegionEnd;
373       for(;I != MBB->begin(); --I, --RemainingCount) {
374         if (TII->isSchedulingBoundary(*std::prev(I), &*MBB, Fn))
375           break;
376       }
377       I = MBB->begin();
378 
379       // Skip empty scheduling regions.
380       if (I == RegionEnd) {
381         RegionEnd = std::prev(RegionEnd);
382         --RemainingCount;
383         continue;
384       }
385       // Skip regions with one instruction.
386       if (I == std::prev(RegionEnd)) {
387         RegionEnd = std::prev(RegionEnd);
388         continue;
389       }
390 
391       Packetizer.PacketizeMIs(&*MBB, &*I, RegionEnd);
392       RegionEnd = I;
393     }
394   }
395 
396   return true;
397 
398 }
399 
400 } // end anonymous namespace
401 
402 INITIALIZE_PASS_BEGIN(R600Packetizer, DEBUG_TYPE,
403                      "R600 Packetizer", false, false)
404 INITIALIZE_PASS_END(R600Packetizer, DEBUG_TYPE,
405                     "R600 Packetizer", false, false)
406 
407 char R600Packetizer::ID = 0;
408 
409 char &llvm::R600PacketizerID = R600Packetizer::ID;
410 
createR600Packetizer()411 llvm::FunctionPass *llvm::createR600Packetizer() {
412   return new R600Packetizer();
413 }
414