1 //===-- NVPTXPeephole.cpp - NVPTX Peephole Optimiztions -------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // In NVPTX, NVPTXFrameLowering will emit following instruction at the beginning
10 // of a MachineFunction.
11 //
12 //   mov %SPL, %depot
13 //   cvta.local %SP, %SPL
14 //
15 // Because Frame Index is a generic address and alloca can only return generic
16 // pointer, without this pass the instructions producing alloca'ed address will
17 // be based on %SP. NVPTXLowerAlloca tends to help replace store and load on
18 // this address with their .local versions, but this may introduce a lot of
19 // cvta.to.local instructions. Performance can be improved if we avoid casting
20 // address back and forth and directly calculate local address based on %SPL.
21 // This peephole pass optimizes these cases, for example
22 //
23 // It will transform the following pattern
24 //    %0 = LEA_ADDRi64 %VRFrame, 4
25 //    %1 = cvta_to_local_yes_64 %0
26 //
27 // into
28 //    %1 = LEA_ADDRi64 %VRFrameLocal, 4
29 //
30 // %VRFrameLocal is the virtual register name of %SPL
31 //
32 //===----------------------------------------------------------------------===//
33 
34 #include "NVPTX.h"
35 #include "llvm/CodeGen/MachineFunctionPass.h"
36 #include "llvm/CodeGen/MachineInstrBuilder.h"
37 #include "llvm/CodeGen/MachineRegisterInfo.h"
38 #include "llvm/CodeGen/TargetInstrInfo.h"
39 #include "llvm/CodeGen/TargetRegisterInfo.h"
40 
41 using namespace llvm;
42 
43 #define DEBUG_TYPE "nvptx-peephole"
44 
45 namespace llvm {
46 void initializeNVPTXPeepholePass(PassRegistry &);
47 }
48 
49 namespace {
50 struct NVPTXPeephole : public MachineFunctionPass {
51  public:
52   static char ID;
NVPTXPeephole__anonc83d20600111::NVPTXPeephole53   NVPTXPeephole() : MachineFunctionPass(ID) {
54     initializeNVPTXPeepholePass(*PassRegistry::getPassRegistry());
55   }
56 
57   bool runOnMachineFunction(MachineFunction &MF) override;
58 
getPassName__anonc83d20600111::NVPTXPeephole59   StringRef getPassName() const override {
60     return "NVPTX optimize redundant cvta.to.local instruction";
61   }
62 
getAnalysisUsage__anonc83d20600111::NVPTXPeephole63   void getAnalysisUsage(AnalysisUsage &AU) const override {
64     MachineFunctionPass::getAnalysisUsage(AU);
65   }
66 };
67 }
68 
69 char NVPTXPeephole::ID = 0;
70 
71 INITIALIZE_PASS(NVPTXPeephole, "nvptx-peephole", "NVPTX Peephole", false, false)
72 
isCVTAToLocalCombinationCandidate(MachineInstr & Root)73 static bool isCVTAToLocalCombinationCandidate(MachineInstr &Root) {
74   auto &MBB = *Root.getParent();
75   auto &MF = *MBB.getParent();
76   // Check current instruction is cvta.to.local
77   if (Root.getOpcode() != NVPTX::cvta_to_local_yes_64 &&
78       Root.getOpcode() != NVPTX::cvta_to_local_yes)
79     return false;
80 
81   auto &Op = Root.getOperand(1);
82   const auto &MRI = MF.getRegInfo();
83   MachineInstr *GenericAddrDef = nullptr;
84   if (Op.isReg() && Register::isVirtualRegister(Op.getReg())) {
85     GenericAddrDef = MRI.getUniqueVRegDef(Op.getReg());
86   }
87 
88   // Check the register operand is uniquely defined by LEA_ADDRi instruction
89   if (!GenericAddrDef || GenericAddrDef->getParent() != &MBB ||
90       (GenericAddrDef->getOpcode() != NVPTX::LEA_ADDRi64 &&
91        GenericAddrDef->getOpcode() != NVPTX::LEA_ADDRi)) {
92     return false;
93   }
94 
95   // Check the LEA_ADDRi operand is Frame index
96   auto &BaseAddrOp = GenericAddrDef->getOperand(1);
97   if (BaseAddrOp.isReg() && BaseAddrOp.getReg() == NVPTX::VRFrame) {
98     return true;
99   }
100 
101   return false;
102 }
103 
CombineCVTAToLocal(MachineInstr & Root)104 static void CombineCVTAToLocal(MachineInstr &Root) {
105   auto &MBB = *Root.getParent();
106   auto &MF = *MBB.getParent();
107   const auto &MRI = MF.getRegInfo();
108   const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo();
109   auto &Prev = *MRI.getUniqueVRegDef(Root.getOperand(1).getReg());
110 
111   MachineInstrBuilder MIB =
112       BuildMI(MF, Root.getDebugLoc(), TII->get(Prev.getOpcode()),
113               Root.getOperand(0).getReg())
114           .addReg(NVPTX::VRFrameLocal)
115           .add(Prev.getOperand(2));
116 
117   MBB.insert((MachineBasicBlock::iterator)&Root, MIB);
118 
119   // Check if MRI has only one non dbg use, which is Root
120   if (MRI.hasOneNonDBGUse(Prev.getOperand(0).getReg())) {
121     Prev.eraseFromParentAndMarkDBGValuesForRemoval();
122   }
123   Root.eraseFromParentAndMarkDBGValuesForRemoval();
124 }
125 
runOnMachineFunction(MachineFunction & MF)126 bool NVPTXPeephole::runOnMachineFunction(MachineFunction &MF) {
127   if (skipFunction(MF.getFunction()))
128     return false;
129 
130   bool Changed = false;
131   // Loop over all of the basic blocks.
132   for (auto &MBB : MF) {
133     // Traverse the basic block.
134     auto BlockIter = MBB.begin();
135 
136     while (BlockIter != MBB.end()) {
137       auto &MI = *BlockIter++;
138       if (isCVTAToLocalCombinationCandidate(MI)) {
139         CombineCVTAToLocal(MI);
140         Changed = true;
141       }
142     }  // Instruction
143   }    // Basic Block
144 
145   // Remove unnecessary %VRFrame = cvta.local %VRFrameLocal
146   const auto &MRI = MF.getRegInfo();
147   if (MRI.use_empty(NVPTX::VRFrame)) {
148     if (auto MI = MRI.getUniqueVRegDef(NVPTX::VRFrame)) {
149       MI->eraseFromParentAndMarkDBGValuesForRemoval();
150     }
151   }
152 
153   return Changed;
154 }
155 
createNVPTXPeephole()156 MachineFunctionPass *llvm::createNVPTXPeephole() { return new NVPTXPeephole(); }
157