1 //=======- GCNDPPCombine.cpp - optimization for DPP instructions ---==========//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 // The pass combines V_MOV_B32_dpp instruction with its VALU uses as a DPP src0
9 // operand. If any of the use instruction cannot be combined with the mov the
10 // whole sequence is reverted.
11 //
12 // $old = ...
13 // $dpp_value = V_MOV_B32_dpp $old, $vgpr_to_be_read_from_other_lane,
14 // dpp_controls..., $row_mask, $bank_mask, $bound_ctrl
15 // $res = VALU $dpp_value [, src1]
16 //
17 // to
18 //
19 // $res = VALU_DPP $combined_old, $vgpr_to_be_read_from_other_lane, [src1,]
20 // dpp_controls..., $row_mask, $bank_mask, $combined_bound_ctrl
21 //
22 // Combining rules :
23 //
24 // if $row_mask and $bank_mask are fully enabled (0xF) and
25 // $bound_ctrl==DPP_BOUND_ZERO or $old==0
26 // -> $combined_old = undef,
27 // $combined_bound_ctrl = DPP_BOUND_ZERO
28 //
29 // if the VALU op is binary and
30 // $bound_ctrl==DPP_BOUND_OFF and
31 // $old==identity value (immediate) for the VALU op
32 // -> $combined_old = src1,
33 // $combined_bound_ctrl = DPP_BOUND_OFF
34 //
35 // Otherwise cancel.
36 //
37 // The mov_dpp instruction should reside in the same BB as all its uses
38 //===----------------------------------------------------------------------===//
39
40 #include "AMDGPU.h"
41 #include "AMDGPUSubtarget.h"
42 #include "SIInstrInfo.h"
43 #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
44 #include "llvm/ADT/DenseMap.h"
45 #include "llvm/ADT/SmallVector.h"
46 #include "llvm/ADT/Statistic.h"
47 #include "llvm/CodeGen/MachineBasicBlock.h"
48 #include "llvm/CodeGen/MachineFunction.h"
49 #include "llvm/CodeGen/MachineFunctionPass.h"
50 #include "llvm/CodeGen/MachineInstr.h"
51 #include "llvm/CodeGen/MachineInstrBuilder.h"
52 #include "llvm/CodeGen/MachineOperand.h"
53 #include "llvm/CodeGen/MachineRegisterInfo.h"
54 #include "llvm/CodeGen/TargetRegisterInfo.h"
55 #include "llvm/Pass.h"
56 #include <cassert>
57
58 using namespace llvm;
59
60 #define DEBUG_TYPE "gcn-dpp-combine"
61
62 STATISTIC(NumDPPMovsCombined, "Number of DPP moves combined.");
63
64 namespace {
65
66 class GCNDPPCombine : public MachineFunctionPass {
67 MachineRegisterInfo *MRI;
68 const SIInstrInfo *TII;
69
70 using RegSubRegPair = TargetInstrInfo::RegSubRegPair;
71
72 MachineOperand *getOldOpndValue(MachineOperand &OldOpnd) const;
73
74 MachineInstr *createDPPInst(MachineInstr &OrigMI,
75 MachineInstr &MovMI,
76 RegSubRegPair CombOldVGPR,
77 MachineOperand *OldOpnd,
78 bool CombBCZ) const;
79
80 MachineInstr *createDPPInst(MachineInstr &OrigMI,
81 MachineInstr &MovMI,
82 RegSubRegPair CombOldVGPR,
83 bool CombBCZ) const;
84
85 bool hasNoImmOrEqual(MachineInstr &MI,
86 unsigned OpndName,
87 int64_t Value,
88 int64_t Mask = -1) const;
89
90 bool combineDPPMov(MachineInstr &MI) const;
91
92 public:
93 static char ID;
94
GCNDPPCombine()95 GCNDPPCombine() : MachineFunctionPass(ID) {
96 initializeGCNDPPCombinePass(*PassRegistry::getPassRegistry());
97 }
98
99 bool runOnMachineFunction(MachineFunction &MF) override;
100
getPassName() const101 StringRef getPassName() const override { return "GCN DPP Combine"; }
102
getAnalysisUsage(AnalysisUsage & AU) const103 void getAnalysisUsage(AnalysisUsage &AU) const override {
104 AU.setPreservesCFG();
105 MachineFunctionPass::getAnalysisUsage(AU);
106 }
107
108 private:
109 int getDPPOp(unsigned Op) const;
110 };
111
112 } // end anonymous namespace
113
114 INITIALIZE_PASS(GCNDPPCombine, DEBUG_TYPE, "GCN DPP Combine", false, false)
115
116 char GCNDPPCombine::ID = 0;
117
118 char &llvm::GCNDPPCombineID = GCNDPPCombine::ID;
119
createGCNDPPCombinePass()120 FunctionPass *llvm::createGCNDPPCombinePass() {
121 return new GCNDPPCombine();
122 }
123
getDPPOp(unsigned Op) const124 int GCNDPPCombine::getDPPOp(unsigned Op) const {
125 auto DPP32 = AMDGPU::getDPPOp32(Op);
126 if (DPP32 == -1) {
127 auto E32 = AMDGPU::getVOPe32(Op);
128 DPP32 = (E32 == -1)? -1 : AMDGPU::getDPPOp32(E32);
129 }
130 return (DPP32 == -1 || TII->pseudoToMCOpcode(DPP32) == -1) ? -1 : DPP32;
131 }
132
133 // tracks the register operand definition and returns:
134 // 1. immediate operand used to initialize the register if found
135 // 2. nullptr if the register operand is undef
136 // 3. the operand itself otherwise
getOldOpndValue(MachineOperand & OldOpnd) const137 MachineOperand *GCNDPPCombine::getOldOpndValue(MachineOperand &OldOpnd) const {
138 auto *Def = getVRegSubRegDef(getRegSubRegPair(OldOpnd), *MRI);
139 if (!Def)
140 return nullptr;
141
142 switch(Def->getOpcode()) {
143 default: break;
144 case AMDGPU::IMPLICIT_DEF:
145 return nullptr;
146 case AMDGPU::COPY:
147 case AMDGPU::V_MOV_B32_e32: {
148 auto &Op1 = Def->getOperand(1);
149 if (Op1.isImm())
150 return &Op1;
151 break;
152 }
153 }
154 return &OldOpnd;
155 }
156
createDPPInst(MachineInstr & OrigMI,MachineInstr & MovMI,RegSubRegPair CombOldVGPR,bool CombBCZ) const157 MachineInstr *GCNDPPCombine::createDPPInst(MachineInstr &OrigMI,
158 MachineInstr &MovMI,
159 RegSubRegPair CombOldVGPR,
160 bool CombBCZ) const {
161 assert(MovMI.getOpcode() == AMDGPU::V_MOV_B32_dpp);
162
163 auto OrigOp = OrigMI.getOpcode();
164 auto DPPOp = getDPPOp(OrigOp);
165 if (DPPOp == -1) {
166 LLVM_DEBUG(dbgs() << " failed: no DPP opcode\n");
167 return nullptr;
168 }
169
170 auto DPPInst = BuildMI(*OrigMI.getParent(), OrigMI,
171 OrigMI.getDebugLoc(), TII->get(DPPOp));
172 bool Fail = false;
173 do {
174 auto *Dst = TII->getNamedOperand(OrigMI, AMDGPU::OpName::vdst);
175 assert(Dst);
176 DPPInst.add(*Dst);
177 int NumOperands = 1;
178
179 const int OldIdx = AMDGPU::getNamedOperandIdx(DPPOp, AMDGPU::OpName::old);
180 if (OldIdx != -1) {
181 assert(OldIdx == NumOperands);
182 assert(isOfRegClass(CombOldVGPR, AMDGPU::VGPR_32RegClass, *MRI));
183 auto *Def = getVRegSubRegDef(CombOldVGPR, *MRI);
184 DPPInst.addReg(CombOldVGPR.Reg, Def ? 0 : RegState::Undef,
185 CombOldVGPR.SubReg);
186 ++NumOperands;
187 } else {
188 // TODO: this discards MAC/FMA instructions for now, let's add it later
189 LLVM_DEBUG(dbgs() << " failed: no old operand in DPP instruction,"
190 " TBD\n");
191 Fail = true;
192 break;
193 }
194
195 if (auto *Mod0 = TII->getNamedOperand(OrigMI,
196 AMDGPU::OpName::src0_modifiers)) {
197 assert(NumOperands == AMDGPU::getNamedOperandIdx(DPPOp,
198 AMDGPU::OpName::src0_modifiers));
199 assert(0LL == (Mod0->getImm() & ~(SISrcMods::ABS | SISrcMods::NEG)));
200 DPPInst.addImm(Mod0->getImm());
201 ++NumOperands;
202 } else if (AMDGPU::getNamedOperandIdx(DPPOp,
203 AMDGPU::OpName::src0_modifiers) != -1) {
204 DPPInst.addImm(0);
205 ++NumOperands;
206 }
207 auto *Src0 = TII->getNamedOperand(MovMI, AMDGPU::OpName::src0);
208 assert(Src0);
209 if (!TII->isOperandLegal(*DPPInst.getInstr(), NumOperands, Src0)) {
210 LLVM_DEBUG(dbgs() << " failed: src0 is illegal\n");
211 Fail = true;
212 break;
213 }
214 DPPInst.add(*Src0);
215 DPPInst->getOperand(NumOperands).setIsKill(false);
216 ++NumOperands;
217
218 if (auto *Mod1 = TII->getNamedOperand(OrigMI,
219 AMDGPU::OpName::src1_modifiers)) {
220 assert(NumOperands == AMDGPU::getNamedOperandIdx(DPPOp,
221 AMDGPU::OpName::src1_modifiers));
222 assert(0LL == (Mod1->getImm() & ~(SISrcMods::ABS | SISrcMods::NEG)));
223 DPPInst.addImm(Mod1->getImm());
224 ++NumOperands;
225 } else if (AMDGPU::getNamedOperandIdx(DPPOp,
226 AMDGPU::OpName::src1_modifiers) != -1) {
227 DPPInst.addImm(0);
228 ++NumOperands;
229 }
230 if (auto *Src1 = TII->getNamedOperand(OrigMI, AMDGPU::OpName::src1)) {
231 if (!TII->isOperandLegal(*DPPInst.getInstr(), NumOperands, Src1)) {
232 LLVM_DEBUG(dbgs() << " failed: src1 is illegal\n");
233 Fail = true;
234 break;
235 }
236 DPPInst.add(*Src1);
237 ++NumOperands;
238 }
239
240 if (auto *Src2 = TII->getNamedOperand(OrigMI, AMDGPU::OpName::src2)) {
241 if (!TII->getNamedOperand(*DPPInst.getInstr(), AMDGPU::OpName::src2) ||
242 !TII->isOperandLegal(*DPPInst.getInstr(), NumOperands, Src2)) {
243 LLVM_DEBUG(dbgs() << " failed: src2 is illegal\n");
244 Fail = true;
245 break;
246 }
247 DPPInst.add(*Src2);
248 }
249
250 DPPInst.add(*TII->getNamedOperand(MovMI, AMDGPU::OpName::dpp_ctrl));
251 DPPInst.add(*TII->getNamedOperand(MovMI, AMDGPU::OpName::row_mask));
252 DPPInst.add(*TII->getNamedOperand(MovMI, AMDGPU::OpName::bank_mask));
253 DPPInst.addImm(CombBCZ ? 1 : 0);
254 } while (false);
255
256 if (Fail) {
257 DPPInst.getInstr()->eraseFromParent();
258 return nullptr;
259 }
260 LLVM_DEBUG(dbgs() << " combined: " << *DPPInst.getInstr());
261 return DPPInst.getInstr();
262 }
263
isIdentityValue(unsigned OrigMIOp,MachineOperand * OldOpnd)264 static bool isIdentityValue(unsigned OrigMIOp, MachineOperand *OldOpnd) {
265 assert(OldOpnd->isImm());
266 switch (OrigMIOp) {
267 default: break;
268 case AMDGPU::V_ADD_U32_e32:
269 case AMDGPU::V_ADD_U32_e64:
270 case AMDGPU::V_ADD_I32_e32:
271 case AMDGPU::V_ADD_I32_e64:
272 case AMDGPU::V_OR_B32_e32:
273 case AMDGPU::V_OR_B32_e64:
274 case AMDGPU::V_SUBREV_U32_e32:
275 case AMDGPU::V_SUBREV_U32_e64:
276 case AMDGPU::V_SUBREV_I32_e32:
277 case AMDGPU::V_SUBREV_I32_e64:
278 case AMDGPU::V_MAX_U32_e32:
279 case AMDGPU::V_MAX_U32_e64:
280 case AMDGPU::V_XOR_B32_e32:
281 case AMDGPU::V_XOR_B32_e64:
282 if (OldOpnd->getImm() == 0)
283 return true;
284 break;
285 case AMDGPU::V_AND_B32_e32:
286 case AMDGPU::V_AND_B32_e64:
287 case AMDGPU::V_MIN_U32_e32:
288 case AMDGPU::V_MIN_U32_e64:
289 if (static_cast<uint32_t>(OldOpnd->getImm()) ==
290 std::numeric_limits<uint32_t>::max())
291 return true;
292 break;
293 case AMDGPU::V_MIN_I32_e32:
294 case AMDGPU::V_MIN_I32_e64:
295 if (static_cast<int32_t>(OldOpnd->getImm()) ==
296 std::numeric_limits<int32_t>::max())
297 return true;
298 break;
299 case AMDGPU::V_MAX_I32_e32:
300 case AMDGPU::V_MAX_I32_e64:
301 if (static_cast<int32_t>(OldOpnd->getImm()) ==
302 std::numeric_limits<int32_t>::min())
303 return true;
304 break;
305 case AMDGPU::V_MUL_I32_I24_e32:
306 case AMDGPU::V_MUL_I32_I24_e64:
307 case AMDGPU::V_MUL_U32_U24_e32:
308 case AMDGPU::V_MUL_U32_U24_e64:
309 if (OldOpnd->getImm() == 1)
310 return true;
311 break;
312 }
313 return false;
314 }
315
createDPPInst(MachineInstr & OrigMI,MachineInstr & MovMI,RegSubRegPair CombOldVGPR,MachineOperand * OldOpndValue,bool CombBCZ) const316 MachineInstr *GCNDPPCombine::createDPPInst(MachineInstr &OrigMI,
317 MachineInstr &MovMI,
318 RegSubRegPair CombOldVGPR,
319 MachineOperand *OldOpndValue,
320 bool CombBCZ) const {
321 assert(CombOldVGPR.Reg);
322 if (!CombBCZ && OldOpndValue && OldOpndValue->isImm()) {
323 auto *Src1 = TII->getNamedOperand(OrigMI, AMDGPU::OpName::src1);
324 if (!Src1 || !Src1->isReg()) {
325 LLVM_DEBUG(dbgs() << " failed: no src1 or it isn't a register\n");
326 return nullptr;
327 }
328 if (!isIdentityValue(OrigMI.getOpcode(), OldOpndValue)) {
329 LLVM_DEBUG(dbgs() << " failed: old immediate isn't an identity\n");
330 return nullptr;
331 }
332 CombOldVGPR = getRegSubRegPair(*Src1);
333 if (!isOfRegClass(CombOldVGPR, AMDGPU::VGPR_32RegClass, *MRI)) {
334 LLVM_DEBUG(dbgs() << " failed: src1 isn't a VGPR32 register\n");
335 return nullptr;
336 }
337 }
338 return createDPPInst(OrigMI, MovMI, CombOldVGPR, CombBCZ);
339 }
340
341 // returns true if MI doesn't have OpndName immediate operand or the
342 // operand has Value
hasNoImmOrEqual(MachineInstr & MI,unsigned OpndName,int64_t Value,int64_t Mask) const343 bool GCNDPPCombine::hasNoImmOrEqual(MachineInstr &MI, unsigned OpndName,
344 int64_t Value, int64_t Mask) const {
345 auto *Imm = TII->getNamedOperand(MI, OpndName);
346 if (!Imm)
347 return true;
348
349 assert(Imm->isImm());
350 return (Imm->getImm() & Mask) == Value;
351 }
352
combineDPPMov(MachineInstr & MovMI) const353 bool GCNDPPCombine::combineDPPMov(MachineInstr &MovMI) const {
354 assert(MovMI.getOpcode() == AMDGPU::V_MOV_B32_dpp);
355 LLVM_DEBUG(dbgs() << "\nDPP combine: " << MovMI);
356
357 auto *DstOpnd = TII->getNamedOperand(MovMI, AMDGPU::OpName::vdst);
358 assert(DstOpnd && DstOpnd->isReg());
359 auto DPPMovReg = DstOpnd->getReg();
360 if (DPPMovReg.isPhysical()) {
361 LLVM_DEBUG(dbgs() << " failed: dpp move writes physreg\n");
362 return false;
363 }
364 if (execMayBeModifiedBeforeAnyUse(*MRI, DPPMovReg, MovMI)) {
365 LLVM_DEBUG(dbgs() << " failed: EXEC mask should remain the same"
366 " for all uses\n");
367 return false;
368 }
369
370 auto *RowMaskOpnd = TII->getNamedOperand(MovMI, AMDGPU::OpName::row_mask);
371 assert(RowMaskOpnd && RowMaskOpnd->isImm());
372 auto *BankMaskOpnd = TII->getNamedOperand(MovMI, AMDGPU::OpName::bank_mask);
373 assert(BankMaskOpnd && BankMaskOpnd->isImm());
374 const bool MaskAllLanes = RowMaskOpnd->getImm() == 0xF &&
375 BankMaskOpnd->getImm() == 0xF;
376
377 auto *BCZOpnd = TII->getNamedOperand(MovMI, AMDGPU::OpName::bound_ctrl);
378 assert(BCZOpnd && BCZOpnd->isImm());
379 bool BoundCtrlZero = BCZOpnd->getImm();
380
381 auto *OldOpnd = TII->getNamedOperand(MovMI, AMDGPU::OpName::old);
382 auto *SrcOpnd = TII->getNamedOperand(MovMI, AMDGPU::OpName::src0);
383 assert(OldOpnd && OldOpnd->isReg());
384 assert(SrcOpnd && SrcOpnd->isReg());
385 if (OldOpnd->getReg().isPhysical() || SrcOpnd->getReg().isPhysical()) {
386 LLVM_DEBUG(dbgs() << " failed: dpp move reads physreg\n");
387 return false;
388 }
389
390 auto * const OldOpndValue = getOldOpndValue(*OldOpnd);
391 // OldOpndValue is either undef (IMPLICIT_DEF) or immediate or something else
392 // We could use: assert(!OldOpndValue || OldOpndValue->isImm())
393 // but the third option is used to distinguish undef from non-immediate
394 // to reuse IMPLICIT_DEF instruction later
395 assert(!OldOpndValue || OldOpndValue->isImm() || OldOpndValue == OldOpnd);
396
397 bool CombBCZ = false;
398
399 if (MaskAllLanes && BoundCtrlZero) { // [1]
400 CombBCZ = true;
401 } else {
402 if (!OldOpndValue || !OldOpndValue->isImm()) {
403 LLVM_DEBUG(dbgs() << " failed: the DPP mov isn't combinable\n");
404 return false;
405 }
406
407 if (OldOpndValue->getParent()->getParent() != MovMI.getParent()) {
408 LLVM_DEBUG(dbgs() <<
409 " failed: old reg def and mov should be in the same BB\n");
410 return false;
411 }
412
413 if (OldOpndValue->getImm() == 0) {
414 if (MaskAllLanes) {
415 assert(!BoundCtrlZero); // by check [1]
416 CombBCZ = true;
417 }
418 } else if (BoundCtrlZero) {
419 assert(!MaskAllLanes); // by check [1]
420 LLVM_DEBUG(dbgs() <<
421 " failed: old!=0 and bctrl:0 and not all lanes isn't combinable\n");
422 return false;
423 }
424 }
425
426 LLVM_DEBUG(dbgs() << " old=";
427 if (!OldOpndValue)
428 dbgs() << "undef";
429 else
430 dbgs() << *OldOpndValue;
431 dbgs() << ", bound_ctrl=" << CombBCZ << '\n');
432
433 SmallVector<MachineInstr*, 4> OrigMIs, DPPMIs;
434 DenseMap<MachineInstr*, SmallVector<unsigned, 4>> RegSeqWithOpNos;
435 auto CombOldVGPR = getRegSubRegPair(*OldOpnd);
436 // try to reuse previous old reg if its undefined (IMPLICIT_DEF)
437 if (CombBCZ && OldOpndValue) { // CombOldVGPR should be undef
438 CombOldVGPR = RegSubRegPair(
439 MRI->createVirtualRegister(&AMDGPU::VGPR_32RegClass));
440 auto UndefInst = BuildMI(*MovMI.getParent(), MovMI, MovMI.getDebugLoc(),
441 TII->get(AMDGPU::IMPLICIT_DEF), CombOldVGPR.Reg);
442 DPPMIs.push_back(UndefInst.getInstr());
443 }
444
445 OrigMIs.push_back(&MovMI);
446 bool Rollback = true;
447 SmallVector<MachineOperand*, 16> Uses;
448
449 for (auto &Use : MRI->use_nodbg_operands(DPPMovReg)) {
450 Uses.push_back(&Use);
451 }
452
453 while (!Uses.empty()) {
454 MachineOperand *Use = Uses.pop_back_val();
455 Rollback = true;
456
457 auto &OrigMI = *Use->getParent();
458 LLVM_DEBUG(dbgs() << " try: " << OrigMI);
459
460 auto OrigOp = OrigMI.getOpcode();
461 if (OrigOp == AMDGPU::REG_SEQUENCE) {
462 Register FwdReg = OrigMI.getOperand(0).getReg();
463 unsigned FwdSubReg = 0;
464
465 if (execMayBeModifiedBeforeAnyUse(*MRI, FwdReg, OrigMI)) {
466 LLVM_DEBUG(dbgs() << " failed: EXEC mask should remain the same"
467 " for all uses\n");
468 break;
469 }
470
471 unsigned OpNo, E = OrigMI.getNumOperands();
472 for (OpNo = 1; OpNo < E; OpNo += 2) {
473 if (OrigMI.getOperand(OpNo).getReg() == DPPMovReg) {
474 FwdSubReg = OrigMI.getOperand(OpNo + 1).getImm();
475 break;
476 }
477 }
478
479 if (!FwdSubReg)
480 break;
481
482 for (auto &Op : MRI->use_nodbg_operands(FwdReg)) {
483 if (Op.getSubReg() == FwdSubReg)
484 Uses.push_back(&Op);
485 }
486 RegSeqWithOpNos[&OrigMI].push_back(OpNo);
487 continue;
488 }
489
490 if (TII->isVOP3(OrigOp)) {
491 if (!TII->hasVALU32BitEncoding(OrigOp)) {
492 LLVM_DEBUG(dbgs() << " failed: VOP3 hasn't e32 equivalent\n");
493 break;
494 }
495 // check if other than abs|neg modifiers are set (opsel for example)
496 const int64_t Mask = ~(SISrcMods::ABS | SISrcMods::NEG);
497 if (!hasNoImmOrEqual(OrigMI, AMDGPU::OpName::src0_modifiers, 0, Mask) ||
498 !hasNoImmOrEqual(OrigMI, AMDGPU::OpName::src1_modifiers, 0, Mask) ||
499 !hasNoImmOrEqual(OrigMI, AMDGPU::OpName::clamp, 0) ||
500 !hasNoImmOrEqual(OrigMI, AMDGPU::OpName::omod, 0)) {
501 LLVM_DEBUG(dbgs() << " failed: VOP3 has non-default modifiers\n");
502 break;
503 }
504 } else if (!TII->isVOP1(OrigOp) && !TII->isVOP2(OrigOp)) {
505 LLVM_DEBUG(dbgs() << " failed: not VOP1/2/3\n");
506 break;
507 }
508
509 LLVM_DEBUG(dbgs() << " combining: " << OrigMI);
510 if (Use == TII->getNamedOperand(OrigMI, AMDGPU::OpName::src0)) {
511 if (auto *DPPInst = createDPPInst(OrigMI, MovMI, CombOldVGPR,
512 OldOpndValue, CombBCZ)) {
513 DPPMIs.push_back(DPPInst);
514 Rollback = false;
515 }
516 } else if (OrigMI.isCommutable() &&
517 Use == TII->getNamedOperand(OrigMI, AMDGPU::OpName::src1)) {
518 auto *BB = OrigMI.getParent();
519 auto *NewMI = BB->getParent()->CloneMachineInstr(&OrigMI);
520 BB->insert(OrigMI, NewMI);
521 if (TII->commuteInstruction(*NewMI)) {
522 LLVM_DEBUG(dbgs() << " commuted: " << *NewMI);
523 if (auto *DPPInst = createDPPInst(*NewMI, MovMI, CombOldVGPR,
524 OldOpndValue, CombBCZ)) {
525 DPPMIs.push_back(DPPInst);
526 Rollback = false;
527 }
528 } else
529 LLVM_DEBUG(dbgs() << " failed: cannot be commuted\n");
530 NewMI->eraseFromParent();
531 } else
532 LLVM_DEBUG(dbgs() << " failed: no suitable operands\n");
533 if (Rollback)
534 break;
535 OrigMIs.push_back(&OrigMI);
536 }
537
538 Rollback |= !Uses.empty();
539
540 for (auto *MI : *(Rollback? &DPPMIs : &OrigMIs))
541 MI->eraseFromParent();
542
543 if (!Rollback) {
544 for (auto &S : RegSeqWithOpNos) {
545 if (MRI->use_nodbg_empty(S.first->getOperand(0).getReg())) {
546 S.first->eraseFromParent();
547 continue;
548 }
549 while (!S.second.empty())
550 S.first->getOperand(S.second.pop_back_val()).setIsUndef(true);
551 }
552 }
553
554 return !Rollback;
555 }
556
runOnMachineFunction(MachineFunction & MF)557 bool GCNDPPCombine::runOnMachineFunction(MachineFunction &MF) {
558 auto &ST = MF.getSubtarget<GCNSubtarget>();
559 if (!ST.hasDPP() || skipFunction(MF.getFunction()))
560 return false;
561
562 MRI = &MF.getRegInfo();
563 TII = ST.getInstrInfo();
564
565 assert(MRI->isSSA() && "Must be run on SSA");
566
567 bool Changed = false;
568 for (auto &MBB : MF) {
569 for (auto I = MBB.rbegin(), E = MBB.rend(); I != E;) {
570 auto &MI = *I++;
571 if (MI.getOpcode() == AMDGPU::V_MOV_B32_dpp && combineDPPMov(MI)) {
572 Changed = true;
573 ++NumDPPMovsCombined;
574 } else if (MI.getOpcode() == AMDGPU::V_MOV_B64_DPP_PSEUDO) {
575 auto Split = TII->expandMovDPP64(MI);
576 for (auto M : { Split.first, Split.second }) {
577 if (combineDPPMov(*M))
578 ++NumDPPMovsCombined;
579 }
580 Changed = true;
581 }
582 }
583 }
584 return Changed;
585 }
586