1 //===- HexagonVExtract.cpp ------------------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 // This pass will replace multiple occurrences of V6_extractw from the same
9 // vector register with a combination of a vector store and scalar loads.
10 //===----------------------------------------------------------------------===//
11 
12 #include "Hexagon.h"
13 #include "HexagonInstrInfo.h"
14 #include "HexagonMachineFunctionInfo.h"
15 #include "HexagonRegisterInfo.h"
16 #include "HexagonSubtarget.h"
17 #include "llvm/ADT/SmallVector.h"
18 #include "llvm/Pass.h"
19 #include "llvm/CodeGen/MachineBasicBlock.h"
20 #include "llvm/CodeGen/MachineFunction.h"
21 #include "llvm/CodeGen/MachineFunctionPass.h"
22 #include "llvm/CodeGen/MachineInstrBuilder.h"
23 #include "llvm/CodeGen/MachineRegisterInfo.h"
24 #include "llvm/Support/CommandLine.h"
25 
26 #include <map>
27 
28 using namespace llvm;
29 
30 static cl::opt<unsigned> VExtractThreshold(
31     "hexagon-vextract-threshold", cl::Hidden, cl::init(1),
32     cl::desc("Threshold for triggering vextract replacement"));
33 
34 namespace llvm {
35   void initializeHexagonVExtractPass(PassRegistry& Registry);
36   FunctionPass *createHexagonVExtract();
37 }
38 
39 namespace {
40   class HexagonVExtract : public MachineFunctionPass {
41   public:
42     static char ID;
43     HexagonVExtract() : MachineFunctionPass(ID) {}
44 
45     StringRef getPassName() const override {
46       return "Hexagon optimize vextract";
47     }
48     void getAnalysisUsage(AnalysisUsage &AU) const override {
49       MachineFunctionPass::getAnalysisUsage(AU);
50     }
51     bool runOnMachineFunction(MachineFunction &MF) override;
52 
53   private:
54     const HexagonSubtarget *HST = nullptr;
55     const HexagonInstrInfo *HII = nullptr;
56 
57     unsigned genElemLoad(MachineInstr *ExtI, unsigned BaseR,
58                          MachineRegisterInfo &MRI);
59   };
60 
61   char HexagonVExtract::ID = 0;
62 }
63 
64 INITIALIZE_PASS(HexagonVExtract, "hexagon-vextract",
65   "Hexagon optimize vextract", false, false)
66 
67 unsigned HexagonVExtract::genElemLoad(MachineInstr *ExtI, unsigned BaseR,
68                                       MachineRegisterInfo &MRI) {
69   MachineBasicBlock &ExtB = *ExtI->getParent();
70   DebugLoc DL = ExtI->getDebugLoc();
71   Register ElemR = MRI.createVirtualRegister(&Hexagon::IntRegsRegClass);
72 
73   Register ExtIdxR = ExtI->getOperand(2).getReg();
74   unsigned ExtIdxS = ExtI->getOperand(2).getSubReg();
75 
76   // Simplified check for a compile-time constant value of ExtIdxR.
77   if (ExtIdxS == 0) {
78     MachineInstr *DI = MRI.getVRegDef(ExtIdxR);
79     if (DI->getOpcode() == Hexagon::A2_tfrsi) {
80       unsigned V = DI->getOperand(1).getImm();
81       V &= (HST->getVectorLength()-1) & -4u;
82 
83       BuildMI(ExtB, ExtI, DL, HII->get(Hexagon::L2_loadri_io), ElemR)
84         .addReg(BaseR)
85         .addImm(V);
86       return ElemR;
87     }
88   }
89 
90   Register IdxR = MRI.createVirtualRegister(&Hexagon::IntRegsRegClass);
91   BuildMI(ExtB, ExtI, DL, HII->get(Hexagon::A2_andir), IdxR)
92     .add(ExtI->getOperand(2))
93     .addImm(-4);
94   BuildMI(ExtB, ExtI, DL, HII->get(Hexagon::L4_loadri_rr), ElemR)
95     .addReg(BaseR)
96     .addReg(IdxR)
97     .addImm(0);
98   return ElemR;
99 }
100 
101 bool HexagonVExtract::runOnMachineFunction(MachineFunction &MF) {
102   HST = &MF.getSubtarget<HexagonSubtarget>();
103   HII = HST->getInstrInfo();
104   const auto &HRI = *HST->getRegisterInfo();
105   MachineRegisterInfo &MRI = MF.getRegInfo();
106   MachineFrameInfo &MFI = MF.getFrameInfo();
107   Register AR =
108       MF.getInfo<HexagonMachineFunctionInfo>()->getStackAlignBaseVReg();
109   std::map<unsigned, SmallVector<MachineInstr *, 4>> VExtractMap;
110   bool Changed = false;
111 
112   for (MachineBasicBlock &MBB : MF) {
113     for (MachineInstr &MI : MBB) {
114       unsigned Opc = MI.getOpcode();
115       if (Opc != Hexagon::V6_extractw)
116         continue;
117       Register VecR = MI.getOperand(1).getReg();
118       VExtractMap[VecR].push_back(&MI);
119     }
120   }
121 
122   auto EmitAddr = [&] (MachineBasicBlock &BB, MachineBasicBlock::iterator At,
123                        DebugLoc dl, int FI, unsigned Offset) {
124     Register AddrR = MRI.createVirtualRegister(&Hexagon::IntRegsRegClass);
125     unsigned FiOpc = AR != 0 ? Hexagon::PS_fia : Hexagon::PS_fi;
126     auto MIB = BuildMI(BB, At, dl, HII->get(FiOpc), AddrR);
127     if (AR)
128       MIB.addReg(AR);
129     MIB.addFrameIndex(FI).addImm(Offset);
130     return AddrR;
131   };
132 
133   MaybeAlign MaxAlign;
134   for (auto &P : VExtractMap) {
135     unsigned VecR = P.first;
136     if (P.second.size() <= VExtractThreshold)
137       continue;
138 
139     const auto &VecRC = *MRI.getRegClass(VecR);
140     Align Alignment = HRI.getSpillAlign(VecRC);
141     MaxAlign = std::max(MaxAlign.valueOrOne(), Alignment);
142     // Make sure this is not a spill slot: spill slots cannot be aligned
143     // if there are variable-sized objects on the stack. They must be
144     // accessible via FP (which is not aligned), because SP is unknown,
145     // and AP may not be available at the location of the load/store.
146     int FI = MFI.CreateStackObject(HRI.getSpillSize(VecRC), Alignment,
147                                    /*isSpillSlot*/ false);
148 
149     MachineInstr *DefI = MRI.getVRegDef(VecR);
150     MachineBasicBlock::iterator At = std::next(DefI->getIterator());
151     MachineBasicBlock &DefB = *DefI->getParent();
152     unsigned StoreOpc = VecRC.getID() == Hexagon::HvxVRRegClassID
153                           ? Hexagon::V6_vS32b_ai
154                           : Hexagon::PS_vstorerw_ai;
155     Register AddrR = EmitAddr(DefB, At, DefI->getDebugLoc(), FI, 0);
156     BuildMI(DefB, At, DefI->getDebugLoc(), HII->get(StoreOpc))
157       .addReg(AddrR)
158       .addImm(0)
159       .addReg(VecR);
160 
161     unsigned VecSize = HRI.getRegSizeInBits(VecRC) / 8;
162 
163     for (MachineInstr *ExtI : P.second) {
164       assert(ExtI->getOpcode() == Hexagon::V6_extractw);
165       unsigned SR = ExtI->getOperand(1).getSubReg();
166       assert(ExtI->getOperand(1).getReg() == VecR);
167 
168       MachineBasicBlock &ExtB = *ExtI->getParent();
169       DebugLoc DL = ExtI->getDebugLoc();
170       Register BaseR = EmitAddr(ExtB, ExtI, ExtI->getDebugLoc(), FI,
171                                 SR == 0 ? 0 : VecSize/2);
172 
173       unsigned ElemR = genElemLoad(ExtI, BaseR, MRI);
174       Register ExtR = ExtI->getOperand(0).getReg();
175       MRI.replaceRegWith(ExtR, ElemR);
176       ExtB.erase(ExtI);
177       Changed = true;
178     }
179   }
180 
181   if (AR && MaxAlign) {
182     // Update the required stack alignment.
183     MachineInstr *AlignaI = MRI.getVRegDef(AR);
184     assert(AlignaI->getOpcode() == Hexagon::PS_aligna);
185     MachineOperand &Op = AlignaI->getOperand(1);
186     if (*MaxAlign > Op.getImm())
187       Op.setImm(MaxAlign->value());
188   }
189 
190   return Changed;
191 }
192 
193 FunctionPass *llvm::createHexagonVExtract() {
194   return new HexagonVExtract();
195 }
196