1 //===-- LanaiMemAluCombiner.cpp - Pass to combine memory & ALU operations -===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 // Simple pass to combine memory and ALU operations
9 //
10 // The Lanai ISA supports instructions where a load/store modifies the base
11 // register used in the load/store operation. This pass finds suitable
12 // load/store and ALU instructions and combines them into one instruction.
13 //
14 // For example,
15 //   ld [ %r6 -- ], %r12
16 // is a supported instruction that is not currently generated by the instruction
17 // selection pass of this backend. This pass generates these instructions by
18 // merging
19 //   add %r6, -4, %r6
20 // followed by
21 //   ld [ %r6 ], %r12
22 // in the same machine basic block into one machine instruction.
23 //===----------------------------------------------------------------------===//
24 
25 #include "LanaiAluCode.h"
26 #include "LanaiTargetMachine.h"
27 #include "llvm/ADT/Statistic.h"
28 #include "llvm/CodeGen/MachineFunctionPass.h"
29 #include "llvm/CodeGen/MachineInstrBuilder.h"
30 #include "llvm/CodeGen/RegisterScavenging.h"
31 #include "llvm/CodeGen/TargetInstrInfo.h"
32 #include "llvm/Support/CommandLine.h"
33 using namespace llvm;
34 
35 #define GET_INSTRMAP_INFO
36 #include "LanaiGenInstrInfo.inc"
37 
38 #define DEBUG_TYPE "lanai-mem-alu-combiner"
39 
40 STATISTIC(NumLdStAluCombined, "Number of memory and ALU instructions combined");
41 
42 static llvm::cl::opt<bool> DisableMemAluCombiner(
43     "disable-lanai-mem-alu-combiner", llvm::cl::init(false),
44     llvm::cl::desc("Do not combine ALU and memory operators"),
45     llvm::cl::Hidden);
46 
47 namespace llvm {
48 void initializeLanaiMemAluCombinerPass(PassRegistry &);
49 } // namespace llvm
50 
51 namespace {
52 typedef MachineBasicBlock::iterator MbbIterator;
53 typedef MachineFunction::iterator MfIterator;
54 
55 class LanaiMemAluCombiner : public MachineFunctionPass {
56 public:
57   static char ID;
LanaiMemAluCombiner()58   explicit LanaiMemAluCombiner() : MachineFunctionPass(ID) {
59     initializeLanaiMemAluCombinerPass(*PassRegistry::getPassRegistry());
60   }
61 
getPassName() const62   StringRef getPassName() const override {
63     return "Lanai load / store optimization pass";
64   }
65 
66   bool runOnMachineFunction(MachineFunction &F) override;
67 
getRequiredProperties() const68   MachineFunctionProperties getRequiredProperties() const override {
69     return MachineFunctionProperties().set(
70         MachineFunctionProperties::Property::NoVRegs);
71   }
72 
73 private:
74   MbbIterator findClosestSuitableAluInstr(MachineBasicBlock *BB,
75                                           const MbbIterator &MemInstr,
76                                           bool Decrement);
77   void insertMergedInstruction(MachineBasicBlock *BB,
78                                const MbbIterator &MemInstr,
79                                const MbbIterator &AluInstr, bool Before);
80   bool combineMemAluInBasicBlock(MachineBasicBlock *BB);
81 
82   // Target machine description which we query for register names, data
83   // layout, etc.
84   const TargetInstrInfo *TII;
85 };
86 } // namespace
87 
88 char LanaiMemAluCombiner::ID = 0;
89 
90 INITIALIZE_PASS(LanaiMemAluCombiner, DEBUG_TYPE,
91                 "Lanai memory ALU combiner pass", false, false)
92 
93 namespace {
isSpls(uint16_t Opcode)94 bool isSpls(uint16_t Opcode) { return Lanai::splsIdempotent(Opcode) == Opcode; }
95 
96 // Determine the opcode for the merged instruction created by considering the
97 // old memory operation's opcode and whether the merged opcode will have an
98 // immediate offset.
mergedOpcode(unsigned OldOpcode,bool ImmediateOffset)99 unsigned mergedOpcode(unsigned OldOpcode, bool ImmediateOffset) {
100   switch (OldOpcode) {
101   case Lanai::LDW_RI:
102   case Lanai::LDW_RR:
103     if (ImmediateOffset)
104       return Lanai::LDW_RI;
105     return Lanai::LDW_RR;
106   case Lanai::LDHs_RI:
107   case Lanai::LDHs_RR:
108     if (ImmediateOffset)
109       return Lanai::LDHs_RI;
110     return Lanai::LDHs_RR;
111   case Lanai::LDHz_RI:
112   case Lanai::LDHz_RR:
113     if (ImmediateOffset)
114       return Lanai::LDHz_RI;
115     return Lanai::LDHz_RR;
116   case Lanai::LDBs_RI:
117   case Lanai::LDBs_RR:
118     if (ImmediateOffset)
119       return Lanai::LDBs_RI;
120     return Lanai::LDBs_RR;
121   case Lanai::LDBz_RI:
122   case Lanai::LDBz_RR:
123     if (ImmediateOffset)
124       return Lanai::LDBz_RI;
125     return Lanai::LDBz_RR;
126   case Lanai::SW_RI:
127   case Lanai::SW_RR:
128     if (ImmediateOffset)
129       return Lanai::SW_RI;
130     return Lanai::SW_RR;
131   case Lanai::STB_RI:
132   case Lanai::STB_RR:
133     if (ImmediateOffset)
134       return Lanai::STB_RI;
135     return Lanai::STB_RR;
136   case Lanai::STH_RI:
137   case Lanai::STH_RR:
138     if (ImmediateOffset)
139       return Lanai::STH_RI;
140     return Lanai::STH_RR;
141   default:
142     return 0;
143   }
144 }
145 
146 // Check if the machine instruction has non-volatile memory operands of the type
147 // supported for combining with ALU instructions.
isNonVolatileMemoryOp(const MachineInstr & MI)148 bool isNonVolatileMemoryOp(const MachineInstr &MI) {
149   if (!MI.hasOneMemOperand())
150     return false;
151 
152   // Determine if the machine instruction is a supported memory operation by
153   // testing if the computed merge opcode is a valid memory operation opcode.
154   if (mergedOpcode(MI.getOpcode(), false) == 0)
155     return false;
156 
157   const MachineMemOperand *MemOperand = *MI.memoperands_begin();
158 
159   // Don't move volatile memory accesses
160   // TODO: unclear if we need to be as conservative about atomics
161   if (MemOperand->isVolatile() || MemOperand->isAtomic())
162     return false;
163 
164   return true;
165 }
166 
167 // Test to see if two machine operands are of the same type. This test is less
168 // strict than the MachineOperand::isIdenticalTo function.
isSameOperand(const MachineOperand & Op1,const MachineOperand & Op2)169 bool isSameOperand(const MachineOperand &Op1, const MachineOperand &Op2) {
170   if (Op1.getType() != Op2.getType())
171     return false;
172 
173   switch (Op1.getType()) {
174   case MachineOperand::MO_Register:
175     return Op1.getReg() == Op2.getReg();
176   case MachineOperand::MO_Immediate:
177     return Op1.getImm() == Op2.getImm();
178   default:
179     return false;
180   }
181 }
182 
isZeroOperand(const MachineOperand & Op)183 bool isZeroOperand(const MachineOperand &Op) {
184   return ((Op.isReg() && Op.getReg() == Lanai::R0) ||
185           (Op.isImm() && Op.getImm() == 0));
186 }
187 
188 // Determines whether a register is used by an instruction.
InstrUsesReg(const MbbIterator & Instr,const MachineOperand * Reg)189 bool InstrUsesReg(const MbbIterator &Instr, const MachineOperand *Reg) {
190   for (MachineInstr::const_mop_iterator Mop = Instr->operands_begin();
191        Mop != Instr->operands_end(); ++Mop) {
192     if (isSameOperand(*Mop, *Reg))
193       return true;
194   }
195   return false;
196 }
197 
198 // Converts between machine opcode and AluCode.
199 // Flag using/modifying ALU operations should not be considered for merging and
200 // are omitted from this list.
mergedAluCode(unsigned AluOpcode)201 LPAC::AluCode mergedAluCode(unsigned AluOpcode) {
202   switch (AluOpcode) {
203   case Lanai::ADD_I_LO:
204   case Lanai::ADD_R:
205     return LPAC::ADD;
206   case Lanai::SUB_I_LO:
207   case Lanai::SUB_R:
208     return LPAC::SUB;
209   case Lanai::AND_I_LO:
210   case Lanai::AND_R:
211     return LPAC::AND;
212   case Lanai::OR_I_LO:
213   case Lanai::OR_R:
214     return LPAC::OR;
215   case Lanai::XOR_I_LO:
216   case Lanai::XOR_R:
217     return LPAC::XOR;
218   case Lanai::SHL_R:
219     return LPAC::SHL;
220   case Lanai::SRL_R:
221     return LPAC::SRL;
222   case Lanai::SRA_R:
223     return LPAC::SRA;
224   case Lanai::SA_I:
225   case Lanai::SL_I:
226   default:
227     return LPAC::UNKNOWN;
228   }
229 }
230 
231 // Insert a new combined memory and ALU operation instruction.
232 //
233 // This function builds a new machine instruction using the MachineInstrBuilder
234 // class and inserts it before the memory instruction.
insertMergedInstruction(MachineBasicBlock * BB,const MbbIterator & MemInstr,const MbbIterator & AluInstr,bool Before)235 void LanaiMemAluCombiner::insertMergedInstruction(MachineBasicBlock *BB,
236                                                   const MbbIterator &MemInstr,
237                                                   const MbbIterator &AluInstr,
238                                                   bool Before) {
239   // Insert new combined load/store + alu operation
240   MachineOperand Dest = MemInstr->getOperand(0);
241   MachineOperand Base = MemInstr->getOperand(1);
242   MachineOperand MemOffset = MemInstr->getOperand(2);
243   MachineOperand AluOffset = AluInstr->getOperand(2);
244 
245   // Abort if ALU offset is not a register or immediate
246   assert((AluOffset.isReg() || AluOffset.isImm()) &&
247          "Unsupported operand type in merge");
248 
249   // Determined merged instructions opcode and ALU code
250   LPAC::AluCode AluOpcode = mergedAluCode(AluInstr->getOpcode());
251   unsigned NewOpc = mergedOpcode(MemInstr->getOpcode(), AluOffset.isImm());
252 
253   assert(AluOpcode != LPAC::UNKNOWN && "Unknown ALU code in merging");
254   assert(NewOpc != 0 && "Unknown merged node opcode");
255 
256   // Build and insert new machine instruction
257   MachineInstrBuilder InstrBuilder =
258       BuildMI(*BB, MemInstr, MemInstr->getDebugLoc(), TII->get(NewOpc));
259   InstrBuilder.addReg(Dest.getReg(), getDefRegState(true));
260   InstrBuilder.addReg(Base.getReg(), getKillRegState(true));
261 
262   // Add offset to machine instruction
263   if (AluOffset.isReg())
264     InstrBuilder.addReg(AluOffset.getReg());
265   else if (AluOffset.isImm())
266     InstrBuilder.addImm(AluOffset.getImm());
267   else
268     llvm_unreachable("Unsupported ld/st ALU merge.");
269 
270   // Create a pre-op if the ALU operation preceded the memory operation or the
271   // MemOffset is non-zero (i.e. the memory value should be adjusted before
272   // accessing it), else create a post-op.
273   if (Before || !isZeroOperand(MemOffset))
274     InstrBuilder.addImm(LPAC::makePreOp(AluOpcode));
275   else
276     InstrBuilder.addImm(LPAC::makePostOp(AluOpcode));
277 
278   // Transfer memory operands.
279   InstrBuilder.setMemRefs(MemInstr->memoperands());
280 }
281 
282 // Function determines if ALU operation (in alu_iter) can be combined with
283 // a load/store with base and offset.
isSuitableAluInstr(bool IsSpls,const MbbIterator & AluIter,const MachineOperand & Base,const MachineOperand & Offset)284 bool isSuitableAluInstr(bool IsSpls, const MbbIterator &AluIter,
285                         const MachineOperand &Base,
286                         const MachineOperand &Offset) {
287   // ALU operations have 3 operands
288   if (AluIter->getNumOperands() != 3)
289     return false;
290 
291   MachineOperand &Dest = AluIter->getOperand(0);
292   MachineOperand &Op1 = AluIter->getOperand(1);
293   MachineOperand &Op2 = AluIter->getOperand(2);
294 
295   // Only match instructions using the base register as destination and with the
296   // base and first operand equal
297   if (!isSameOperand(Dest, Base) || !isSameOperand(Dest, Op1))
298     return false;
299 
300   if (Op2.isImm()) {
301     // It is not a match if the 2nd operand in the ALU operation is an
302     // immediate but the ALU operation is not an addition.
303     if (AluIter->getOpcode() != Lanai::ADD_I_LO)
304       return false;
305 
306     if (Offset.isReg() && Offset.getReg() == Lanai::R0)
307       return true;
308 
309     if (Offset.isImm() &&
310         ((Offset.getImm() == 0 &&
311           // Check that the Op2 would fit in the immediate field of the
312           // memory operation.
313           ((IsSpls && isInt<10>(Op2.getImm())) ||
314            (!IsSpls && isInt<16>(Op2.getImm())))) ||
315          Offset.getImm() == Op2.getImm()))
316       return true;
317   } else if (Op2.isReg()) {
318     // The Offset and 2nd operand are both registers and equal
319     if (Offset.isReg() && Op2.getReg() == Offset.getReg())
320       return true;
321   } else
322     // Only consider operations with register or immediate values
323     return false;
324 
325   return false;
326 }
327 
findClosestSuitableAluInstr(MachineBasicBlock * BB,const MbbIterator & MemInstr,const bool Decrement)328 MbbIterator LanaiMemAluCombiner::findClosestSuitableAluInstr(
329     MachineBasicBlock *BB, const MbbIterator &MemInstr, const bool Decrement) {
330   MachineOperand *Base = &MemInstr->getOperand(1);
331   MachineOperand *Offset = &MemInstr->getOperand(2);
332   bool IsSpls = isSpls(MemInstr->getOpcode());
333 
334   MbbIterator First = MemInstr;
335   MbbIterator Last = Decrement ? BB->begin() : BB->end();
336 
337   while (First != Last) {
338     Decrement ? --First : ++First;
339 
340     if (First == Last)
341       break;
342 
343     // Skip over debug instructions
344     if (First->isDebugInstr())
345       continue;
346 
347     if (isSuitableAluInstr(IsSpls, First, *Base, *Offset)) {
348       return First;
349     }
350 
351     // Usage of the base or offset register is not a form suitable for merging.
352     if (First != Last) {
353       if (InstrUsesReg(First, Base))
354         break;
355       if (Offset->isReg() && InstrUsesReg(First, Offset))
356         break;
357     }
358   }
359 
360   return MemInstr;
361 }
362 
combineMemAluInBasicBlock(MachineBasicBlock * BB)363 bool LanaiMemAluCombiner::combineMemAluInBasicBlock(MachineBasicBlock *BB) {
364   bool Modified = false;
365 
366   MbbIterator MBBIter = BB->begin(), End = BB->end();
367   while (MBBIter != End) {
368     bool IsMemOp = isNonVolatileMemoryOp(*MBBIter);
369 
370     if (IsMemOp) {
371       MachineOperand AluOperand = MBBIter->getOperand(3);
372       unsigned int DestReg = MBBIter->getOperand(0).getReg(),
373                    BaseReg = MBBIter->getOperand(1).getReg();
374       assert(AluOperand.isImm() && "Unexpected memory operator type");
375       LPAC::AluCode AluOpcode = static_cast<LPAC::AluCode>(AluOperand.getImm());
376 
377       // Skip memory operations that already modify the base register or if
378       // the destination and base register are the same
379       if (!LPAC::modifiesOp(AluOpcode) && DestReg != BaseReg) {
380         for (int Inc = 0; Inc <= 1; ++Inc) {
381           MbbIterator AluIter =
382               findClosestSuitableAluInstr(BB, MBBIter, Inc == 0);
383           if (AluIter != MBBIter) {
384             insertMergedInstruction(BB, MBBIter, AluIter, Inc == 0);
385 
386             ++NumLdStAluCombined;
387             Modified = true;
388 
389             // Erase the matching ALU instruction
390             BB->erase(AluIter);
391             // Erase old load/store instruction
392             BB->erase(MBBIter++);
393             break;
394           }
395         }
396       }
397     }
398     if (MBBIter == End)
399       break;
400     ++MBBIter;
401   }
402 
403   return Modified;
404 }
405 
406 // Driver function that iterates over the machine basic building blocks of a
407 // machine function
runOnMachineFunction(MachineFunction & MF)408 bool LanaiMemAluCombiner::runOnMachineFunction(MachineFunction &MF) {
409   if (DisableMemAluCombiner)
410     return false;
411 
412   TII = MF.getSubtarget<LanaiSubtarget>().getInstrInfo();
413   bool Modified = false;
414   for (MachineBasicBlock &MBB : MF)
415     Modified |= combineMemAluInBasicBlock(&MBB);
416   return Modified;
417 }
418 } // namespace
419 
createLanaiMemAluCombinerPass()420 FunctionPass *llvm::createLanaiMemAluCombinerPass() {
421   return new LanaiMemAluCombiner();
422 }
423