1 //=======- GCNDPPCombine.cpp - optimization for DPP instructions ---==========//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 // The pass combines V_MOV_B32_dpp instruction with its VALU uses as a DPP src0
9 // operand. If any of the use instruction cannot be combined with the mov the
10 // whole sequence is reverted.
11 //
12 // $old = ...
13 // $dpp_value = V_MOV_B32_dpp $old, $vgpr_to_be_read_from_other_lane,
14 //                            dpp_controls..., $row_mask, $bank_mask, $bound_ctrl
15 // $res = VALU $dpp_value [, src1]
16 //
17 // to
18 //
19 // $res = VALU_DPP $combined_old, $vgpr_to_be_read_from_other_lane, [src1,]
20 //                 dpp_controls..., $row_mask, $bank_mask, $combined_bound_ctrl
21 //
22 // Combining rules :
23 //
24 // if $row_mask and $bank_mask are fully enabled (0xF) and
25 //    $bound_ctrl==DPP_BOUND_ZERO or $old==0
26 // -> $combined_old = undef,
27 //    $combined_bound_ctrl = DPP_BOUND_ZERO
28 //
29 // if the VALU op is binary and
30 //    $bound_ctrl==DPP_BOUND_OFF and
31 //    $old==identity value (immediate) for the VALU op
32 // -> $combined_old = src1,
33 //    $combined_bound_ctrl = DPP_BOUND_OFF
34 //
35 // Otherwise cancel.
36 //
37 // The mov_dpp instruction should reside in the same BB as all its uses
38 //===----------------------------------------------------------------------===//
39 
40 #include "AMDGPU.h"
41 #include "AMDGPUSubtarget.h"
42 #include "SIInstrInfo.h"
43 #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
44 #include "llvm/ADT/DenseMap.h"
45 #include "llvm/ADT/SmallVector.h"
46 #include "llvm/ADT/Statistic.h"
47 #include "llvm/CodeGen/MachineBasicBlock.h"
48 #include "llvm/CodeGen/MachineFunction.h"
49 #include "llvm/CodeGen/MachineFunctionPass.h"
50 #include "llvm/CodeGen/MachineInstr.h"
51 #include "llvm/CodeGen/MachineInstrBuilder.h"
52 #include "llvm/CodeGen/MachineOperand.h"
53 #include "llvm/CodeGen/MachineRegisterInfo.h"
54 #include "llvm/CodeGen/TargetRegisterInfo.h"
55 #include "llvm/Pass.h"
56 #include <cassert>
57 
58 using namespace llvm;
59 
60 #define DEBUG_TYPE "gcn-dpp-combine"
61 
62 STATISTIC(NumDPPMovsCombined, "Number of DPP moves combined.");
63 
64 namespace {
65 
66 class GCNDPPCombine : public MachineFunctionPass {
67   MachineRegisterInfo *MRI;
68   const SIInstrInfo *TII;
69 
70   using RegSubRegPair = TargetInstrInfo::RegSubRegPair;
71 
72   MachineOperand *getOldOpndValue(MachineOperand &OldOpnd) const;
73 
74   MachineInstr *createDPPInst(MachineInstr &OrigMI,
75                               MachineInstr &MovMI,
76                               RegSubRegPair CombOldVGPR,
77                               MachineOperand *OldOpnd,
78                               bool CombBCZ) const;
79 
80   MachineInstr *createDPPInst(MachineInstr &OrigMI,
81                               MachineInstr &MovMI,
82                               RegSubRegPair CombOldVGPR,
83                               bool CombBCZ) const;
84 
85   bool hasNoImmOrEqual(MachineInstr &MI,
86                        unsigned OpndName,
87                        int64_t Value,
88                        int64_t Mask = -1) const;
89 
90   bool combineDPPMov(MachineInstr &MI) const;
91 
92 public:
93   static char ID;
94 
GCNDPPCombine()95   GCNDPPCombine() : MachineFunctionPass(ID) {
96     initializeGCNDPPCombinePass(*PassRegistry::getPassRegistry());
97   }
98 
99   bool runOnMachineFunction(MachineFunction &MF) override;
100 
getPassName() const101   StringRef getPassName() const override { return "GCN DPP Combine"; }
102 
getAnalysisUsage(AnalysisUsage & AU) const103   void getAnalysisUsage(AnalysisUsage &AU) const override {
104     AU.setPreservesCFG();
105     MachineFunctionPass::getAnalysisUsage(AU);
106   }
107 
108 private:
109   int getDPPOp(unsigned Op) const;
110 };
111 
112 } // end anonymous namespace
113 
114 INITIALIZE_PASS(GCNDPPCombine, DEBUG_TYPE, "GCN DPP Combine", false, false)
115 
116 char GCNDPPCombine::ID = 0;
117 
118 char &llvm::GCNDPPCombineID = GCNDPPCombine::ID;
119 
createGCNDPPCombinePass()120 FunctionPass *llvm::createGCNDPPCombinePass() {
121   return new GCNDPPCombine();
122 }
123 
getDPPOp(unsigned Op) const124 int GCNDPPCombine::getDPPOp(unsigned Op) const {
125   auto DPP32 = AMDGPU::getDPPOp32(Op);
126   if (DPP32 == -1) {
127     auto E32 = AMDGPU::getVOPe32(Op);
128     DPP32 = (E32 == -1)? -1 : AMDGPU::getDPPOp32(E32);
129   }
130   return (DPP32 == -1 || TII->pseudoToMCOpcode(DPP32) == -1) ? -1 : DPP32;
131 }
132 
133 // tracks the register operand definition and returns:
134 //   1. immediate operand used to initialize the register if found
135 //   2. nullptr if the register operand is undef
136 //   3. the operand itself otherwise
getOldOpndValue(MachineOperand & OldOpnd) const137 MachineOperand *GCNDPPCombine::getOldOpndValue(MachineOperand &OldOpnd) const {
138   auto *Def = getVRegSubRegDef(getRegSubRegPair(OldOpnd), *MRI);
139   if (!Def)
140     return nullptr;
141 
142   switch(Def->getOpcode()) {
143   default: break;
144   case AMDGPU::IMPLICIT_DEF:
145     return nullptr;
146   case AMDGPU::COPY:
147   case AMDGPU::V_MOV_B32_e32: {
148     auto &Op1 = Def->getOperand(1);
149     if (Op1.isImm())
150       return &Op1;
151     break;
152   }
153   }
154   return &OldOpnd;
155 }
156 
createDPPInst(MachineInstr & OrigMI,MachineInstr & MovMI,RegSubRegPair CombOldVGPR,bool CombBCZ) const157 MachineInstr *GCNDPPCombine::createDPPInst(MachineInstr &OrigMI,
158                                            MachineInstr &MovMI,
159                                            RegSubRegPair CombOldVGPR,
160                                            bool CombBCZ) const {
161   assert(MovMI.getOpcode() == AMDGPU::V_MOV_B32_dpp);
162 
163   auto OrigOp = OrigMI.getOpcode();
164   auto DPPOp = getDPPOp(OrigOp);
165   if (DPPOp == -1) {
166     LLVM_DEBUG(dbgs() << "  failed: no DPP opcode\n");
167     return nullptr;
168   }
169 
170   auto DPPInst = BuildMI(*OrigMI.getParent(), OrigMI,
171                          OrigMI.getDebugLoc(), TII->get(DPPOp));
172   bool Fail = false;
173   do {
174     auto *Dst = TII->getNamedOperand(OrigMI, AMDGPU::OpName::vdst);
175     assert(Dst);
176     DPPInst.add(*Dst);
177     int NumOperands = 1;
178 
179     const int OldIdx = AMDGPU::getNamedOperandIdx(DPPOp, AMDGPU::OpName::old);
180     if (OldIdx != -1) {
181       assert(OldIdx == NumOperands);
182       assert(isOfRegClass(CombOldVGPR, AMDGPU::VGPR_32RegClass, *MRI));
183       auto *Def = getVRegSubRegDef(CombOldVGPR, *MRI);
184       DPPInst.addReg(CombOldVGPR.Reg, Def ? 0 : RegState::Undef,
185                      CombOldVGPR.SubReg);
186       ++NumOperands;
187     } else {
188       // TODO: this discards MAC/FMA instructions for now, let's add it later
189       LLVM_DEBUG(dbgs() << "  failed: no old operand in DPP instruction,"
190                            " TBD\n");
191       Fail = true;
192       break;
193     }
194 
195     if (auto *Mod0 = TII->getNamedOperand(OrigMI,
196                                           AMDGPU::OpName::src0_modifiers)) {
197       assert(NumOperands == AMDGPU::getNamedOperandIdx(DPPOp,
198                                           AMDGPU::OpName::src0_modifiers));
199       assert(0LL == (Mod0->getImm() & ~(SISrcMods::ABS | SISrcMods::NEG)));
200       DPPInst.addImm(Mod0->getImm());
201       ++NumOperands;
202     } else if (AMDGPU::getNamedOperandIdx(DPPOp,
203                    AMDGPU::OpName::src0_modifiers) != -1) {
204       DPPInst.addImm(0);
205       ++NumOperands;
206     }
207     auto *Src0 = TII->getNamedOperand(MovMI, AMDGPU::OpName::src0);
208     assert(Src0);
209     if (!TII->isOperandLegal(*DPPInst.getInstr(), NumOperands, Src0)) {
210       LLVM_DEBUG(dbgs() << "  failed: src0 is illegal\n");
211       Fail = true;
212       break;
213     }
214     DPPInst.add(*Src0);
215     DPPInst->getOperand(NumOperands).setIsKill(false);
216     ++NumOperands;
217 
218     if (auto *Mod1 = TII->getNamedOperand(OrigMI,
219                                           AMDGPU::OpName::src1_modifiers)) {
220       assert(NumOperands == AMDGPU::getNamedOperandIdx(DPPOp,
221                                           AMDGPU::OpName::src1_modifiers));
222       assert(0LL == (Mod1->getImm() & ~(SISrcMods::ABS | SISrcMods::NEG)));
223       DPPInst.addImm(Mod1->getImm());
224       ++NumOperands;
225     } else if (AMDGPU::getNamedOperandIdx(DPPOp,
226                    AMDGPU::OpName::src1_modifiers) != -1) {
227       DPPInst.addImm(0);
228       ++NumOperands;
229     }
230     if (auto *Src1 = TII->getNamedOperand(OrigMI, AMDGPU::OpName::src1)) {
231       if (!TII->isOperandLegal(*DPPInst.getInstr(), NumOperands, Src1)) {
232         LLVM_DEBUG(dbgs() << "  failed: src1 is illegal\n");
233         Fail = true;
234         break;
235       }
236       DPPInst.add(*Src1);
237       ++NumOperands;
238     }
239 
240     if (auto *Src2 = TII->getNamedOperand(OrigMI, AMDGPU::OpName::src2)) {
241       if (!TII->getNamedOperand(*DPPInst.getInstr(), AMDGPU::OpName::src2) ||
242           !TII->isOperandLegal(*DPPInst.getInstr(), NumOperands, Src2)) {
243         LLVM_DEBUG(dbgs() << "  failed: src2 is illegal\n");
244         Fail = true;
245         break;
246       }
247       DPPInst.add(*Src2);
248     }
249 
250     DPPInst.add(*TII->getNamedOperand(MovMI, AMDGPU::OpName::dpp_ctrl));
251     DPPInst.add(*TII->getNamedOperand(MovMI, AMDGPU::OpName::row_mask));
252     DPPInst.add(*TII->getNamedOperand(MovMI, AMDGPU::OpName::bank_mask));
253     DPPInst.addImm(CombBCZ ? 1 : 0);
254   } while (false);
255 
256   if (Fail) {
257     DPPInst.getInstr()->eraseFromParent();
258     return nullptr;
259   }
260   LLVM_DEBUG(dbgs() << "  combined:  " << *DPPInst.getInstr());
261   return DPPInst.getInstr();
262 }
263 
isIdentityValue(unsigned OrigMIOp,MachineOperand * OldOpnd)264 static bool isIdentityValue(unsigned OrigMIOp, MachineOperand *OldOpnd) {
265   assert(OldOpnd->isImm());
266   switch (OrigMIOp) {
267   default: break;
268   case AMDGPU::V_ADD_U32_e32:
269   case AMDGPU::V_ADD_U32_e64:
270   case AMDGPU::V_ADD_I32_e32:
271   case AMDGPU::V_ADD_I32_e64:
272   case AMDGPU::V_OR_B32_e32:
273   case AMDGPU::V_OR_B32_e64:
274   case AMDGPU::V_SUBREV_U32_e32:
275   case AMDGPU::V_SUBREV_U32_e64:
276   case AMDGPU::V_SUBREV_I32_e32:
277   case AMDGPU::V_SUBREV_I32_e64:
278   case AMDGPU::V_MAX_U32_e32:
279   case AMDGPU::V_MAX_U32_e64:
280   case AMDGPU::V_XOR_B32_e32:
281   case AMDGPU::V_XOR_B32_e64:
282     if (OldOpnd->getImm() == 0)
283       return true;
284     break;
285   case AMDGPU::V_AND_B32_e32:
286   case AMDGPU::V_AND_B32_e64:
287   case AMDGPU::V_MIN_U32_e32:
288   case AMDGPU::V_MIN_U32_e64:
289     if (static_cast<uint32_t>(OldOpnd->getImm()) ==
290         std::numeric_limits<uint32_t>::max())
291       return true;
292     break;
293   case AMDGPU::V_MIN_I32_e32:
294   case AMDGPU::V_MIN_I32_e64:
295     if (static_cast<int32_t>(OldOpnd->getImm()) ==
296         std::numeric_limits<int32_t>::max())
297       return true;
298     break;
299   case AMDGPU::V_MAX_I32_e32:
300   case AMDGPU::V_MAX_I32_e64:
301     if (static_cast<int32_t>(OldOpnd->getImm()) ==
302         std::numeric_limits<int32_t>::min())
303       return true;
304     break;
305   case AMDGPU::V_MUL_I32_I24_e32:
306   case AMDGPU::V_MUL_I32_I24_e64:
307   case AMDGPU::V_MUL_U32_U24_e32:
308   case AMDGPU::V_MUL_U32_U24_e64:
309     if (OldOpnd->getImm() == 1)
310       return true;
311     break;
312   }
313   return false;
314 }
315 
createDPPInst(MachineInstr & OrigMI,MachineInstr & MovMI,RegSubRegPair CombOldVGPR,MachineOperand * OldOpndValue,bool CombBCZ) const316 MachineInstr *GCNDPPCombine::createDPPInst(MachineInstr &OrigMI,
317                                            MachineInstr &MovMI,
318                                            RegSubRegPair CombOldVGPR,
319                                            MachineOperand *OldOpndValue,
320                                            bool CombBCZ) const {
321   assert(CombOldVGPR.Reg);
322   if (!CombBCZ && OldOpndValue && OldOpndValue->isImm()) {
323     auto *Src1 = TII->getNamedOperand(OrigMI, AMDGPU::OpName::src1);
324     if (!Src1 || !Src1->isReg()) {
325       LLVM_DEBUG(dbgs() << "  failed: no src1 or it isn't a register\n");
326       return nullptr;
327     }
328     if (!isIdentityValue(OrigMI.getOpcode(), OldOpndValue)) {
329       LLVM_DEBUG(dbgs() << "  failed: old immediate isn't an identity\n");
330       return nullptr;
331     }
332     CombOldVGPR = getRegSubRegPair(*Src1);
333     if (!isOfRegClass(CombOldVGPR, AMDGPU::VGPR_32RegClass, *MRI)) {
334       LLVM_DEBUG(dbgs() << "  failed: src1 isn't a VGPR32 register\n");
335       return nullptr;
336     }
337   }
338   return createDPPInst(OrigMI, MovMI, CombOldVGPR, CombBCZ);
339 }
340 
341 // returns true if MI doesn't have OpndName immediate operand or the
342 // operand has Value
hasNoImmOrEqual(MachineInstr & MI,unsigned OpndName,int64_t Value,int64_t Mask) const343 bool GCNDPPCombine::hasNoImmOrEqual(MachineInstr &MI, unsigned OpndName,
344                                     int64_t Value, int64_t Mask) const {
345   auto *Imm = TII->getNamedOperand(MI, OpndName);
346   if (!Imm)
347     return true;
348 
349   assert(Imm->isImm());
350   return (Imm->getImm() & Mask) == Value;
351 }
352 
combineDPPMov(MachineInstr & MovMI) const353 bool GCNDPPCombine::combineDPPMov(MachineInstr &MovMI) const {
354   assert(MovMI.getOpcode() == AMDGPU::V_MOV_B32_dpp);
355   LLVM_DEBUG(dbgs() << "\nDPP combine: " << MovMI);
356 
357   auto *DstOpnd = TII->getNamedOperand(MovMI, AMDGPU::OpName::vdst);
358   assert(DstOpnd && DstOpnd->isReg());
359   auto DPPMovReg = DstOpnd->getReg();
360   if (DPPMovReg.isPhysical()) {
361     LLVM_DEBUG(dbgs() << "  failed: dpp move writes physreg\n");
362     return false;
363   }
364   if (execMayBeModifiedBeforeAnyUse(*MRI, DPPMovReg, MovMI)) {
365     LLVM_DEBUG(dbgs() << "  failed: EXEC mask should remain the same"
366                          " for all uses\n");
367     return false;
368   }
369 
370   auto *RowMaskOpnd = TII->getNamedOperand(MovMI, AMDGPU::OpName::row_mask);
371   assert(RowMaskOpnd && RowMaskOpnd->isImm());
372   auto *BankMaskOpnd = TII->getNamedOperand(MovMI, AMDGPU::OpName::bank_mask);
373   assert(BankMaskOpnd && BankMaskOpnd->isImm());
374   const bool MaskAllLanes = RowMaskOpnd->getImm() == 0xF &&
375                             BankMaskOpnd->getImm() == 0xF;
376 
377   auto *BCZOpnd = TII->getNamedOperand(MovMI, AMDGPU::OpName::bound_ctrl);
378   assert(BCZOpnd && BCZOpnd->isImm());
379   bool BoundCtrlZero = BCZOpnd->getImm();
380 
381   auto *OldOpnd = TII->getNamedOperand(MovMI, AMDGPU::OpName::old);
382   auto *SrcOpnd = TII->getNamedOperand(MovMI, AMDGPU::OpName::src0);
383   assert(OldOpnd && OldOpnd->isReg());
384   assert(SrcOpnd && SrcOpnd->isReg());
385   if (OldOpnd->getReg().isPhysical() || SrcOpnd->getReg().isPhysical()) {
386     LLVM_DEBUG(dbgs() << "  failed: dpp move reads physreg\n");
387     return false;
388   }
389 
390   auto * const OldOpndValue = getOldOpndValue(*OldOpnd);
391   // OldOpndValue is either undef (IMPLICIT_DEF) or immediate or something else
392   // We could use: assert(!OldOpndValue || OldOpndValue->isImm())
393   // but the third option is used to distinguish undef from non-immediate
394   // to reuse IMPLICIT_DEF instruction later
395   assert(!OldOpndValue || OldOpndValue->isImm() || OldOpndValue == OldOpnd);
396 
397   bool CombBCZ = false;
398 
399   if (MaskAllLanes && BoundCtrlZero) { // [1]
400     CombBCZ = true;
401   } else {
402     if (!OldOpndValue || !OldOpndValue->isImm()) {
403       LLVM_DEBUG(dbgs() << "  failed: the DPP mov isn't combinable\n");
404       return false;
405     }
406 
407     if (OldOpndValue->getParent()->getParent() != MovMI.getParent()) {
408       LLVM_DEBUG(dbgs() <<
409         "  failed: old reg def and mov should be in the same BB\n");
410       return false;
411     }
412 
413     if (OldOpndValue->getImm() == 0) {
414       if (MaskAllLanes) {
415         assert(!BoundCtrlZero); // by check [1]
416         CombBCZ = true;
417       }
418     } else if (BoundCtrlZero) {
419       assert(!MaskAllLanes); // by check [1]
420       LLVM_DEBUG(dbgs() <<
421         "  failed: old!=0 and bctrl:0 and not all lanes isn't combinable\n");
422       return false;
423     }
424   }
425 
426   LLVM_DEBUG(dbgs() << "  old=";
427     if (!OldOpndValue)
428       dbgs() << "undef";
429     else
430       dbgs() << *OldOpndValue;
431     dbgs() << ", bound_ctrl=" << CombBCZ << '\n');
432 
433   SmallVector<MachineInstr*, 4> OrigMIs, DPPMIs;
434   DenseMap<MachineInstr*, SmallVector<unsigned, 4>> RegSeqWithOpNos;
435   auto CombOldVGPR = getRegSubRegPair(*OldOpnd);
436   // try to reuse previous old reg if its undefined (IMPLICIT_DEF)
437   if (CombBCZ && OldOpndValue) { // CombOldVGPR should be undef
438     CombOldVGPR = RegSubRegPair(
439       MRI->createVirtualRegister(&AMDGPU::VGPR_32RegClass));
440     auto UndefInst = BuildMI(*MovMI.getParent(), MovMI, MovMI.getDebugLoc(),
441                              TII->get(AMDGPU::IMPLICIT_DEF), CombOldVGPR.Reg);
442     DPPMIs.push_back(UndefInst.getInstr());
443   }
444 
445   OrigMIs.push_back(&MovMI);
446   bool Rollback = true;
447   SmallVector<MachineOperand*, 16> Uses;
448 
449   for (auto &Use : MRI->use_nodbg_operands(DPPMovReg)) {
450     Uses.push_back(&Use);
451   }
452 
453   while (!Uses.empty()) {
454     MachineOperand *Use = Uses.pop_back_val();
455     Rollback = true;
456 
457     auto &OrigMI = *Use->getParent();
458     LLVM_DEBUG(dbgs() << "  try: " << OrigMI);
459 
460     auto OrigOp = OrigMI.getOpcode();
461     if (OrigOp == AMDGPU::REG_SEQUENCE) {
462       Register FwdReg = OrigMI.getOperand(0).getReg();
463       unsigned FwdSubReg = 0;
464 
465       if (execMayBeModifiedBeforeAnyUse(*MRI, FwdReg, OrigMI)) {
466         LLVM_DEBUG(dbgs() << "  failed: EXEC mask should remain the same"
467                              " for all uses\n");
468         break;
469       }
470 
471       unsigned OpNo, E = OrigMI.getNumOperands();
472       for (OpNo = 1; OpNo < E; OpNo += 2) {
473         if (OrigMI.getOperand(OpNo).getReg() == DPPMovReg) {
474           FwdSubReg = OrigMI.getOperand(OpNo + 1).getImm();
475           break;
476         }
477       }
478 
479       if (!FwdSubReg)
480         break;
481 
482       for (auto &Op : MRI->use_nodbg_operands(FwdReg)) {
483         if (Op.getSubReg() == FwdSubReg)
484           Uses.push_back(&Op);
485       }
486       RegSeqWithOpNos[&OrigMI].push_back(OpNo);
487       continue;
488     }
489 
490     if (TII->isVOP3(OrigOp)) {
491       if (!TII->hasVALU32BitEncoding(OrigOp)) {
492         LLVM_DEBUG(dbgs() << "  failed: VOP3 hasn't e32 equivalent\n");
493         break;
494       }
495       // check if other than abs|neg modifiers are set (opsel for example)
496       const int64_t Mask = ~(SISrcMods::ABS | SISrcMods::NEG);
497       if (!hasNoImmOrEqual(OrigMI, AMDGPU::OpName::src0_modifiers, 0, Mask) ||
498           !hasNoImmOrEqual(OrigMI, AMDGPU::OpName::src1_modifiers, 0, Mask) ||
499           !hasNoImmOrEqual(OrigMI, AMDGPU::OpName::clamp, 0) ||
500           !hasNoImmOrEqual(OrigMI, AMDGPU::OpName::omod, 0)) {
501         LLVM_DEBUG(dbgs() << "  failed: VOP3 has non-default modifiers\n");
502         break;
503       }
504     } else if (!TII->isVOP1(OrigOp) && !TII->isVOP2(OrigOp)) {
505       LLVM_DEBUG(dbgs() << "  failed: not VOP1/2/3\n");
506       break;
507     }
508 
509     LLVM_DEBUG(dbgs() << "  combining: " << OrigMI);
510     if (Use == TII->getNamedOperand(OrigMI, AMDGPU::OpName::src0)) {
511       if (auto *DPPInst = createDPPInst(OrigMI, MovMI, CombOldVGPR,
512                                         OldOpndValue, CombBCZ)) {
513         DPPMIs.push_back(DPPInst);
514         Rollback = false;
515       }
516     } else if (OrigMI.isCommutable() &&
517                Use == TII->getNamedOperand(OrigMI, AMDGPU::OpName::src1)) {
518       auto *BB = OrigMI.getParent();
519       auto *NewMI = BB->getParent()->CloneMachineInstr(&OrigMI);
520       BB->insert(OrigMI, NewMI);
521       if (TII->commuteInstruction(*NewMI)) {
522         LLVM_DEBUG(dbgs() << "  commuted:  " << *NewMI);
523         if (auto *DPPInst = createDPPInst(*NewMI, MovMI, CombOldVGPR,
524                                           OldOpndValue, CombBCZ)) {
525           DPPMIs.push_back(DPPInst);
526           Rollback = false;
527         }
528       } else
529         LLVM_DEBUG(dbgs() << "  failed: cannot be commuted\n");
530       NewMI->eraseFromParent();
531     } else
532       LLVM_DEBUG(dbgs() << "  failed: no suitable operands\n");
533     if (Rollback)
534       break;
535     OrigMIs.push_back(&OrigMI);
536   }
537 
538   Rollback |= !Uses.empty();
539 
540   for (auto *MI : *(Rollback? &DPPMIs : &OrigMIs))
541     MI->eraseFromParent();
542 
543   if (!Rollback) {
544     for (auto &S : RegSeqWithOpNos) {
545       if (MRI->use_nodbg_empty(S.first->getOperand(0).getReg())) {
546         S.first->eraseFromParent();
547         continue;
548       }
549       while (!S.second.empty())
550         S.first->getOperand(S.second.pop_back_val()).setIsUndef(true);
551     }
552   }
553 
554   return !Rollback;
555 }
556 
runOnMachineFunction(MachineFunction & MF)557 bool GCNDPPCombine::runOnMachineFunction(MachineFunction &MF) {
558   auto &ST = MF.getSubtarget<GCNSubtarget>();
559   if (!ST.hasDPP() || skipFunction(MF.getFunction()))
560     return false;
561 
562   MRI = &MF.getRegInfo();
563   TII = ST.getInstrInfo();
564 
565   assert(MRI->isSSA() && "Must be run on SSA");
566 
567   bool Changed = false;
568   for (auto &MBB : MF) {
569     for (auto I = MBB.rbegin(), E = MBB.rend(); I != E;) {
570       auto &MI = *I++;
571       if (MI.getOpcode() == AMDGPU::V_MOV_B32_dpp && combineDPPMov(MI)) {
572         Changed = true;
573         ++NumDPPMovsCombined;
574       } else if (MI.getOpcode() == AMDGPU::V_MOV_B64_DPP_PSEUDO) {
575         auto Split = TII->expandMovDPP64(MI);
576         for (auto M : { Split.first, Split.second }) {
577           if (combineDPPMov(*M))
578             ++NumDPPMovsCombined;
579         }
580         Changed = true;
581       }
582     }
583   }
584   return Changed;
585 }
586