1f4a2713aSLionel Sambuc //===-- ARMLoadStoreOptimizer.cpp - ARM load / store opt. pass ------------===//
2f4a2713aSLionel Sambuc //
3f4a2713aSLionel Sambuc //                     The LLVM Compiler Infrastructure
4f4a2713aSLionel Sambuc //
5f4a2713aSLionel Sambuc // This file is distributed under the University of Illinois Open Source
6f4a2713aSLionel Sambuc // License. See LICENSE.TXT for details.
7f4a2713aSLionel Sambuc //
8f4a2713aSLionel Sambuc //===----------------------------------------------------------------------===//
9f4a2713aSLionel Sambuc //
10f4a2713aSLionel Sambuc // This file contains a pass that performs load / store related peephole
11f4a2713aSLionel Sambuc // optimizations. This pass should be run after register allocation.
12f4a2713aSLionel Sambuc //
13f4a2713aSLionel Sambuc //===----------------------------------------------------------------------===//
14f4a2713aSLionel Sambuc 
15f4a2713aSLionel Sambuc #include "ARM.h"
16f4a2713aSLionel Sambuc #include "ARMBaseInstrInfo.h"
17f4a2713aSLionel Sambuc #include "ARMBaseRegisterInfo.h"
18*0a6a1f1dSLionel Sambuc #include "ARMISelLowering.h"
19f4a2713aSLionel Sambuc #include "ARMMachineFunctionInfo.h"
20*0a6a1f1dSLionel Sambuc #include "ARMSubtarget.h"
21f4a2713aSLionel Sambuc #include "MCTargetDesc/ARMAddressingModes.h"
22*0a6a1f1dSLionel Sambuc #include "Thumb1RegisterInfo.h"
23f4a2713aSLionel Sambuc #include "llvm/ADT/DenseMap.h"
24f4a2713aSLionel Sambuc #include "llvm/ADT/STLExtras.h"
25f4a2713aSLionel Sambuc #include "llvm/ADT/SmallPtrSet.h"
26f4a2713aSLionel Sambuc #include "llvm/ADT/SmallSet.h"
27f4a2713aSLionel Sambuc #include "llvm/ADT/SmallVector.h"
28f4a2713aSLionel Sambuc #include "llvm/ADT/Statistic.h"
29f4a2713aSLionel Sambuc #include "llvm/CodeGen/MachineBasicBlock.h"
30f4a2713aSLionel Sambuc #include "llvm/CodeGen/MachineFunctionPass.h"
31f4a2713aSLionel Sambuc #include "llvm/CodeGen/MachineInstr.h"
32f4a2713aSLionel Sambuc #include "llvm/CodeGen/MachineInstrBuilder.h"
33f4a2713aSLionel Sambuc #include "llvm/CodeGen/MachineRegisterInfo.h"
34f4a2713aSLionel Sambuc #include "llvm/CodeGen/RegisterScavenging.h"
35f4a2713aSLionel Sambuc #include "llvm/CodeGen/SelectionDAGNodes.h"
36f4a2713aSLionel Sambuc #include "llvm/IR/DataLayout.h"
37f4a2713aSLionel Sambuc #include "llvm/IR/DerivedTypes.h"
38f4a2713aSLionel Sambuc #include "llvm/IR/Function.h"
39f4a2713aSLionel Sambuc #include "llvm/Support/Debug.h"
40f4a2713aSLionel Sambuc #include "llvm/Support/ErrorHandling.h"
41f4a2713aSLionel Sambuc #include "llvm/Target/TargetInstrInfo.h"
42f4a2713aSLionel Sambuc #include "llvm/Target/TargetMachine.h"
43f4a2713aSLionel Sambuc #include "llvm/Target/TargetRegisterInfo.h"
44f4a2713aSLionel Sambuc using namespace llvm;
45f4a2713aSLionel Sambuc 
46*0a6a1f1dSLionel Sambuc #define DEBUG_TYPE "arm-ldst-opt"
47*0a6a1f1dSLionel Sambuc 
48f4a2713aSLionel Sambuc STATISTIC(NumLDMGened , "Number of ldm instructions generated");
49f4a2713aSLionel Sambuc STATISTIC(NumSTMGened , "Number of stm instructions generated");
50f4a2713aSLionel Sambuc STATISTIC(NumVLDMGened, "Number of vldm instructions generated");
51f4a2713aSLionel Sambuc STATISTIC(NumVSTMGened, "Number of vstm instructions generated");
52f4a2713aSLionel Sambuc STATISTIC(NumLdStMoved, "Number of load / store instructions moved");
53f4a2713aSLionel Sambuc STATISTIC(NumLDRDFormed,"Number of ldrd created before allocation");
54f4a2713aSLionel Sambuc STATISTIC(NumSTRDFormed,"Number of strd created before allocation");
55f4a2713aSLionel Sambuc STATISTIC(NumLDRD2LDM,  "Number of ldrd instructions turned back into ldm");
56f4a2713aSLionel Sambuc STATISTIC(NumSTRD2STM,  "Number of strd instructions turned back into stm");
57f4a2713aSLionel Sambuc STATISTIC(NumLDRD2LDR,  "Number of ldrd instructions turned back into ldr's");
58f4a2713aSLionel Sambuc STATISTIC(NumSTRD2STR,  "Number of strd instructions turned back into str's");
59f4a2713aSLionel Sambuc 
60f4a2713aSLionel Sambuc /// ARMAllocLoadStoreOpt - Post- register allocation pass the combine
61f4a2713aSLionel Sambuc /// load / store instructions to form ldm / stm instructions.
62f4a2713aSLionel Sambuc 
63f4a2713aSLionel Sambuc namespace {
64f4a2713aSLionel Sambuc   struct ARMLoadStoreOpt : public MachineFunctionPass {
65f4a2713aSLionel Sambuc     static char ID;
ARMLoadStoreOpt__anon90483a300111::ARMLoadStoreOpt66f4a2713aSLionel Sambuc     ARMLoadStoreOpt() : MachineFunctionPass(ID) {}
67f4a2713aSLionel Sambuc 
68f4a2713aSLionel Sambuc     const TargetInstrInfo *TII;
69f4a2713aSLionel Sambuc     const TargetRegisterInfo *TRI;
70f4a2713aSLionel Sambuc     const ARMSubtarget *STI;
71*0a6a1f1dSLionel Sambuc     const TargetLowering *TL;
72f4a2713aSLionel Sambuc     ARMFunctionInfo *AFI;
73f4a2713aSLionel Sambuc     RegScavenger *RS;
74*0a6a1f1dSLionel Sambuc     bool isThumb1, isThumb2;
75f4a2713aSLionel Sambuc 
76*0a6a1f1dSLionel Sambuc     bool runOnMachineFunction(MachineFunction &Fn) override;
77f4a2713aSLionel Sambuc 
getPassName__anon90483a300111::ARMLoadStoreOpt78*0a6a1f1dSLionel Sambuc     const char *getPassName() const override {
79f4a2713aSLionel Sambuc       return "ARM load / store optimization pass";
80f4a2713aSLionel Sambuc     }
81f4a2713aSLionel Sambuc 
82f4a2713aSLionel Sambuc   private:
83f4a2713aSLionel Sambuc     struct MemOpQueueEntry {
84f4a2713aSLionel Sambuc       int Offset;
85f4a2713aSLionel Sambuc       unsigned Reg;
86f4a2713aSLionel Sambuc       bool isKill;
87f4a2713aSLionel Sambuc       unsigned Position;
88f4a2713aSLionel Sambuc       MachineBasicBlock::iterator MBBI;
89f4a2713aSLionel Sambuc       bool Merged;
MemOpQueueEntry__anon90483a300111::ARMLoadStoreOpt::MemOpQueueEntry90f4a2713aSLionel Sambuc       MemOpQueueEntry(int o, unsigned r, bool k, unsigned p,
91f4a2713aSLionel Sambuc                       MachineBasicBlock::iterator i)
92f4a2713aSLionel Sambuc         : Offset(o), Reg(r), isKill(k), Position(p), MBBI(i), Merged(false) {}
93f4a2713aSLionel Sambuc     };
94f4a2713aSLionel Sambuc     typedef SmallVector<MemOpQueueEntry,8> MemOpQueue;
95f4a2713aSLionel Sambuc     typedef MemOpQueue::iterator MemOpQueueIter;
96f4a2713aSLionel Sambuc 
97f4a2713aSLionel Sambuc     void findUsesOfImpDef(SmallVectorImpl<MachineOperand *> &UsesOfImpDefs,
98f4a2713aSLionel Sambuc                           const MemOpQueue &MemOps, unsigned DefReg,
99f4a2713aSLionel Sambuc                           unsigned RangeBegin, unsigned RangeEnd);
100*0a6a1f1dSLionel Sambuc     void UpdateBaseRegUses(MachineBasicBlock &MBB,
101*0a6a1f1dSLionel Sambuc                            MachineBasicBlock::iterator MBBI,
102*0a6a1f1dSLionel Sambuc                            DebugLoc dl, unsigned Base, unsigned WordOffset,
103*0a6a1f1dSLionel Sambuc                            ARMCC::CondCodes Pred, unsigned PredReg);
104f4a2713aSLionel Sambuc     bool MergeOps(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
105f4a2713aSLionel Sambuc                   int Offset, unsigned Base, bool BaseKill, int Opcode,
106f4a2713aSLionel Sambuc                   ARMCC::CondCodes Pred, unsigned PredReg, unsigned Scratch,
107f4a2713aSLionel Sambuc                   DebugLoc dl,
108f4a2713aSLionel Sambuc                   ArrayRef<std::pair<unsigned, bool> > Regs,
109f4a2713aSLionel Sambuc                   ArrayRef<unsigned> ImpDefs);
110f4a2713aSLionel Sambuc     void MergeOpsUpdate(MachineBasicBlock &MBB,
111f4a2713aSLionel Sambuc                         MemOpQueue &MemOps,
112f4a2713aSLionel Sambuc                         unsigned memOpsBegin,
113f4a2713aSLionel Sambuc                         unsigned memOpsEnd,
114f4a2713aSLionel Sambuc                         unsigned insertAfter,
115f4a2713aSLionel Sambuc                         int Offset,
116f4a2713aSLionel Sambuc                         unsigned Base,
117f4a2713aSLionel Sambuc                         bool BaseKill,
118f4a2713aSLionel Sambuc                         int Opcode,
119f4a2713aSLionel Sambuc                         ARMCC::CondCodes Pred,
120f4a2713aSLionel Sambuc                         unsigned PredReg,
121f4a2713aSLionel Sambuc                         unsigned Scratch,
122f4a2713aSLionel Sambuc                         DebugLoc dl,
123f4a2713aSLionel Sambuc                         SmallVectorImpl<MachineBasicBlock::iterator> &Merges);
124f4a2713aSLionel Sambuc     void MergeLDR_STR(MachineBasicBlock &MBB, unsigned SIndex, unsigned Base,
125f4a2713aSLionel Sambuc                       int Opcode, unsigned Size,
126f4a2713aSLionel Sambuc                       ARMCC::CondCodes Pred, unsigned PredReg,
127f4a2713aSLionel Sambuc                       unsigned Scratch, MemOpQueue &MemOps,
128f4a2713aSLionel Sambuc                       SmallVectorImpl<MachineBasicBlock::iterator> &Merges);
129f4a2713aSLionel Sambuc     void AdvanceRS(MachineBasicBlock &MBB, MemOpQueue &MemOps);
130f4a2713aSLionel Sambuc     bool FixInvalidRegPairOp(MachineBasicBlock &MBB,
131f4a2713aSLionel Sambuc                              MachineBasicBlock::iterator &MBBI);
132f4a2713aSLionel Sambuc     bool MergeBaseUpdateLoadStore(MachineBasicBlock &MBB,
133f4a2713aSLionel Sambuc                                   MachineBasicBlock::iterator MBBI,
134f4a2713aSLionel Sambuc                                   const TargetInstrInfo *TII,
135f4a2713aSLionel Sambuc                                   bool &Advance,
136f4a2713aSLionel Sambuc                                   MachineBasicBlock::iterator &I);
137f4a2713aSLionel Sambuc     bool MergeBaseUpdateLSMultiple(MachineBasicBlock &MBB,
138f4a2713aSLionel Sambuc                                    MachineBasicBlock::iterator MBBI,
139f4a2713aSLionel Sambuc                                    bool &Advance,
140f4a2713aSLionel Sambuc                                    MachineBasicBlock::iterator &I);
141f4a2713aSLionel Sambuc     bool LoadStoreMultipleOpti(MachineBasicBlock &MBB);
142f4a2713aSLionel Sambuc     bool MergeReturnIntoLDM(MachineBasicBlock &MBB);
143f4a2713aSLionel Sambuc   };
144f4a2713aSLionel Sambuc   char ARMLoadStoreOpt::ID = 0;
145f4a2713aSLionel Sambuc }
146f4a2713aSLionel Sambuc 
definesCPSR(const MachineInstr * MI)147*0a6a1f1dSLionel Sambuc static bool definesCPSR(const MachineInstr *MI) {
148*0a6a1f1dSLionel Sambuc   for (const auto &MO : MI->operands()) {
149*0a6a1f1dSLionel Sambuc     if (!MO.isReg())
150*0a6a1f1dSLionel Sambuc       continue;
151*0a6a1f1dSLionel Sambuc     if (MO.isDef() && MO.getReg() == ARM::CPSR && !MO.isDead())
152*0a6a1f1dSLionel Sambuc       // If the instruction has live CPSR def, then it's not safe to fold it
153*0a6a1f1dSLionel Sambuc       // into load / store.
154*0a6a1f1dSLionel Sambuc       return true;
155*0a6a1f1dSLionel Sambuc   }
156*0a6a1f1dSLionel Sambuc 
157*0a6a1f1dSLionel Sambuc   return false;
158*0a6a1f1dSLionel Sambuc }
159*0a6a1f1dSLionel Sambuc 
getMemoryOpOffset(const MachineInstr * MI)160*0a6a1f1dSLionel Sambuc static int getMemoryOpOffset(const MachineInstr *MI) {
161*0a6a1f1dSLionel Sambuc   int Opcode = MI->getOpcode();
162*0a6a1f1dSLionel Sambuc   bool isAM3 = Opcode == ARM::LDRD || Opcode == ARM::STRD;
163*0a6a1f1dSLionel Sambuc   unsigned NumOperands = MI->getDesc().getNumOperands();
164*0a6a1f1dSLionel Sambuc   unsigned OffField = MI->getOperand(NumOperands-3).getImm();
165*0a6a1f1dSLionel Sambuc 
166*0a6a1f1dSLionel Sambuc   if (Opcode == ARM::t2LDRi12 || Opcode == ARM::t2LDRi8 ||
167*0a6a1f1dSLionel Sambuc       Opcode == ARM::t2STRi12 || Opcode == ARM::t2STRi8 ||
168*0a6a1f1dSLionel Sambuc       Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8 ||
169*0a6a1f1dSLionel Sambuc       Opcode == ARM::LDRi12   || Opcode == ARM::STRi12)
170*0a6a1f1dSLionel Sambuc     return OffField;
171*0a6a1f1dSLionel Sambuc 
172*0a6a1f1dSLionel Sambuc   // Thumb1 immediate offsets are scaled by 4
173*0a6a1f1dSLionel Sambuc   if (Opcode == ARM::tLDRi || Opcode == ARM::tSTRi)
174*0a6a1f1dSLionel Sambuc     return OffField * 4;
175*0a6a1f1dSLionel Sambuc 
176*0a6a1f1dSLionel Sambuc   int Offset = isAM3 ? ARM_AM::getAM3Offset(OffField)
177*0a6a1f1dSLionel Sambuc     : ARM_AM::getAM5Offset(OffField) * 4;
178*0a6a1f1dSLionel Sambuc   ARM_AM::AddrOpc Op = isAM3 ? ARM_AM::getAM3Op(OffField)
179*0a6a1f1dSLionel Sambuc     : ARM_AM::getAM5Op(OffField);
180*0a6a1f1dSLionel Sambuc 
181*0a6a1f1dSLionel Sambuc   if (Op == ARM_AM::sub)
182*0a6a1f1dSLionel Sambuc     return -Offset;
183*0a6a1f1dSLionel Sambuc 
184*0a6a1f1dSLionel Sambuc   return Offset;
185*0a6a1f1dSLionel Sambuc }
186*0a6a1f1dSLionel Sambuc 
getLoadStoreMultipleOpcode(int Opcode,ARM_AM::AMSubMode Mode)187f4a2713aSLionel Sambuc static int getLoadStoreMultipleOpcode(int Opcode, ARM_AM::AMSubMode Mode) {
188f4a2713aSLionel Sambuc   switch (Opcode) {
189f4a2713aSLionel Sambuc   default: llvm_unreachable("Unhandled opcode!");
190f4a2713aSLionel Sambuc   case ARM::LDRi12:
191f4a2713aSLionel Sambuc     ++NumLDMGened;
192f4a2713aSLionel Sambuc     switch (Mode) {
193f4a2713aSLionel Sambuc     default: llvm_unreachable("Unhandled submode!");
194f4a2713aSLionel Sambuc     case ARM_AM::ia: return ARM::LDMIA;
195f4a2713aSLionel Sambuc     case ARM_AM::da: return ARM::LDMDA;
196f4a2713aSLionel Sambuc     case ARM_AM::db: return ARM::LDMDB;
197f4a2713aSLionel Sambuc     case ARM_AM::ib: return ARM::LDMIB;
198f4a2713aSLionel Sambuc     }
199f4a2713aSLionel Sambuc   case ARM::STRi12:
200f4a2713aSLionel Sambuc     ++NumSTMGened;
201f4a2713aSLionel Sambuc     switch (Mode) {
202f4a2713aSLionel Sambuc     default: llvm_unreachable("Unhandled submode!");
203f4a2713aSLionel Sambuc     case ARM_AM::ia: return ARM::STMIA;
204f4a2713aSLionel Sambuc     case ARM_AM::da: return ARM::STMDA;
205f4a2713aSLionel Sambuc     case ARM_AM::db: return ARM::STMDB;
206f4a2713aSLionel Sambuc     case ARM_AM::ib: return ARM::STMIB;
207f4a2713aSLionel Sambuc     }
208*0a6a1f1dSLionel Sambuc   case ARM::tLDRi:
209*0a6a1f1dSLionel Sambuc     // tLDMIA is writeback-only - unless the base register is in the input
210*0a6a1f1dSLionel Sambuc     // reglist.
211*0a6a1f1dSLionel Sambuc     ++NumLDMGened;
212*0a6a1f1dSLionel Sambuc     switch (Mode) {
213*0a6a1f1dSLionel Sambuc     default: llvm_unreachable("Unhandled submode!");
214*0a6a1f1dSLionel Sambuc     case ARM_AM::ia: return ARM::tLDMIA;
215*0a6a1f1dSLionel Sambuc     }
216*0a6a1f1dSLionel Sambuc   case ARM::tSTRi:
217*0a6a1f1dSLionel Sambuc     // There is no non-writeback tSTMIA either.
218*0a6a1f1dSLionel Sambuc     ++NumSTMGened;
219*0a6a1f1dSLionel Sambuc     switch (Mode) {
220*0a6a1f1dSLionel Sambuc     default: llvm_unreachable("Unhandled submode!");
221*0a6a1f1dSLionel Sambuc     case ARM_AM::ia: return ARM::tSTMIA_UPD;
222*0a6a1f1dSLionel Sambuc     }
223f4a2713aSLionel Sambuc   case ARM::t2LDRi8:
224f4a2713aSLionel Sambuc   case ARM::t2LDRi12:
225f4a2713aSLionel Sambuc     ++NumLDMGened;
226f4a2713aSLionel Sambuc     switch (Mode) {
227f4a2713aSLionel Sambuc     default: llvm_unreachable("Unhandled submode!");
228f4a2713aSLionel Sambuc     case ARM_AM::ia: return ARM::t2LDMIA;
229f4a2713aSLionel Sambuc     case ARM_AM::db: return ARM::t2LDMDB;
230f4a2713aSLionel Sambuc     }
231f4a2713aSLionel Sambuc   case ARM::t2STRi8:
232f4a2713aSLionel Sambuc   case ARM::t2STRi12:
233f4a2713aSLionel Sambuc     ++NumSTMGened;
234f4a2713aSLionel Sambuc     switch (Mode) {
235f4a2713aSLionel Sambuc     default: llvm_unreachable("Unhandled submode!");
236f4a2713aSLionel Sambuc     case ARM_AM::ia: return ARM::t2STMIA;
237f4a2713aSLionel Sambuc     case ARM_AM::db: return ARM::t2STMDB;
238f4a2713aSLionel Sambuc     }
239f4a2713aSLionel Sambuc   case ARM::VLDRS:
240f4a2713aSLionel Sambuc     ++NumVLDMGened;
241f4a2713aSLionel Sambuc     switch (Mode) {
242f4a2713aSLionel Sambuc     default: llvm_unreachable("Unhandled submode!");
243f4a2713aSLionel Sambuc     case ARM_AM::ia: return ARM::VLDMSIA;
244f4a2713aSLionel Sambuc     case ARM_AM::db: return 0; // Only VLDMSDB_UPD exists.
245f4a2713aSLionel Sambuc     }
246f4a2713aSLionel Sambuc   case ARM::VSTRS:
247f4a2713aSLionel Sambuc     ++NumVSTMGened;
248f4a2713aSLionel Sambuc     switch (Mode) {
249f4a2713aSLionel Sambuc     default: llvm_unreachable("Unhandled submode!");
250f4a2713aSLionel Sambuc     case ARM_AM::ia: return ARM::VSTMSIA;
251f4a2713aSLionel Sambuc     case ARM_AM::db: return 0; // Only VSTMSDB_UPD exists.
252f4a2713aSLionel Sambuc     }
253f4a2713aSLionel Sambuc   case ARM::VLDRD:
254f4a2713aSLionel Sambuc     ++NumVLDMGened;
255f4a2713aSLionel Sambuc     switch (Mode) {
256f4a2713aSLionel Sambuc     default: llvm_unreachable("Unhandled submode!");
257f4a2713aSLionel Sambuc     case ARM_AM::ia: return ARM::VLDMDIA;
258f4a2713aSLionel Sambuc     case ARM_AM::db: return 0; // Only VLDMDDB_UPD exists.
259f4a2713aSLionel Sambuc     }
260f4a2713aSLionel Sambuc   case ARM::VSTRD:
261f4a2713aSLionel Sambuc     ++NumVSTMGened;
262f4a2713aSLionel Sambuc     switch (Mode) {
263f4a2713aSLionel Sambuc     default: llvm_unreachable("Unhandled submode!");
264f4a2713aSLionel Sambuc     case ARM_AM::ia: return ARM::VSTMDIA;
265f4a2713aSLionel Sambuc     case ARM_AM::db: return 0; // Only VSTMDDB_UPD exists.
266f4a2713aSLionel Sambuc     }
267f4a2713aSLionel Sambuc   }
268f4a2713aSLionel Sambuc }
269f4a2713aSLionel Sambuc 
270f4a2713aSLionel Sambuc namespace llvm {
271f4a2713aSLionel Sambuc   namespace ARM_AM {
272f4a2713aSLionel Sambuc 
getLoadStoreMultipleSubMode(int Opcode)273f4a2713aSLionel Sambuc AMSubMode getLoadStoreMultipleSubMode(int Opcode) {
274f4a2713aSLionel Sambuc   switch (Opcode) {
275f4a2713aSLionel Sambuc   default: llvm_unreachable("Unhandled opcode!");
276f4a2713aSLionel Sambuc   case ARM::LDMIA_RET:
277f4a2713aSLionel Sambuc   case ARM::LDMIA:
278f4a2713aSLionel Sambuc   case ARM::LDMIA_UPD:
279f4a2713aSLionel Sambuc   case ARM::STMIA:
280f4a2713aSLionel Sambuc   case ARM::STMIA_UPD:
281*0a6a1f1dSLionel Sambuc   case ARM::tLDMIA:
282*0a6a1f1dSLionel Sambuc   case ARM::tLDMIA_UPD:
283*0a6a1f1dSLionel Sambuc   case ARM::tSTMIA_UPD:
284f4a2713aSLionel Sambuc   case ARM::t2LDMIA_RET:
285f4a2713aSLionel Sambuc   case ARM::t2LDMIA:
286f4a2713aSLionel Sambuc   case ARM::t2LDMIA_UPD:
287f4a2713aSLionel Sambuc   case ARM::t2STMIA:
288f4a2713aSLionel Sambuc   case ARM::t2STMIA_UPD:
289f4a2713aSLionel Sambuc   case ARM::VLDMSIA:
290f4a2713aSLionel Sambuc   case ARM::VLDMSIA_UPD:
291f4a2713aSLionel Sambuc   case ARM::VSTMSIA:
292f4a2713aSLionel Sambuc   case ARM::VSTMSIA_UPD:
293f4a2713aSLionel Sambuc   case ARM::VLDMDIA:
294f4a2713aSLionel Sambuc   case ARM::VLDMDIA_UPD:
295f4a2713aSLionel Sambuc   case ARM::VSTMDIA:
296f4a2713aSLionel Sambuc   case ARM::VSTMDIA_UPD:
297f4a2713aSLionel Sambuc     return ARM_AM::ia;
298f4a2713aSLionel Sambuc 
299f4a2713aSLionel Sambuc   case ARM::LDMDA:
300f4a2713aSLionel Sambuc   case ARM::LDMDA_UPD:
301f4a2713aSLionel Sambuc   case ARM::STMDA:
302f4a2713aSLionel Sambuc   case ARM::STMDA_UPD:
303f4a2713aSLionel Sambuc     return ARM_AM::da;
304f4a2713aSLionel Sambuc 
305f4a2713aSLionel Sambuc   case ARM::LDMDB:
306f4a2713aSLionel Sambuc   case ARM::LDMDB_UPD:
307f4a2713aSLionel Sambuc   case ARM::STMDB:
308f4a2713aSLionel Sambuc   case ARM::STMDB_UPD:
309f4a2713aSLionel Sambuc   case ARM::t2LDMDB:
310f4a2713aSLionel Sambuc   case ARM::t2LDMDB_UPD:
311f4a2713aSLionel Sambuc   case ARM::t2STMDB:
312f4a2713aSLionel Sambuc   case ARM::t2STMDB_UPD:
313f4a2713aSLionel Sambuc   case ARM::VLDMSDB_UPD:
314f4a2713aSLionel Sambuc   case ARM::VSTMSDB_UPD:
315f4a2713aSLionel Sambuc   case ARM::VLDMDDB_UPD:
316f4a2713aSLionel Sambuc   case ARM::VSTMDDB_UPD:
317f4a2713aSLionel Sambuc     return ARM_AM::db;
318f4a2713aSLionel Sambuc 
319f4a2713aSLionel Sambuc   case ARM::LDMIB:
320f4a2713aSLionel Sambuc   case ARM::LDMIB_UPD:
321f4a2713aSLionel Sambuc   case ARM::STMIB:
322f4a2713aSLionel Sambuc   case ARM::STMIB_UPD:
323f4a2713aSLionel Sambuc     return ARM_AM::ib;
324f4a2713aSLionel Sambuc   }
325f4a2713aSLionel Sambuc }
326f4a2713aSLionel Sambuc 
327f4a2713aSLionel Sambuc   } // end namespace ARM_AM
328f4a2713aSLionel Sambuc } // end namespace llvm
329f4a2713aSLionel Sambuc 
isT1i32Load(unsigned Opc)330*0a6a1f1dSLionel Sambuc static bool isT1i32Load(unsigned Opc) {
331*0a6a1f1dSLionel Sambuc   return Opc == ARM::tLDRi;
332*0a6a1f1dSLionel Sambuc }
333*0a6a1f1dSLionel Sambuc 
isT2i32Load(unsigned Opc)334f4a2713aSLionel Sambuc static bool isT2i32Load(unsigned Opc) {
335f4a2713aSLionel Sambuc   return Opc == ARM::t2LDRi12 || Opc == ARM::t2LDRi8;
336f4a2713aSLionel Sambuc }
337f4a2713aSLionel Sambuc 
isi32Load(unsigned Opc)338f4a2713aSLionel Sambuc static bool isi32Load(unsigned Opc) {
339*0a6a1f1dSLionel Sambuc   return Opc == ARM::LDRi12 || isT1i32Load(Opc) || isT2i32Load(Opc) ;
340*0a6a1f1dSLionel Sambuc }
341*0a6a1f1dSLionel Sambuc 
isT1i32Store(unsigned Opc)342*0a6a1f1dSLionel Sambuc static bool isT1i32Store(unsigned Opc) {
343*0a6a1f1dSLionel Sambuc   return Opc == ARM::tSTRi;
344f4a2713aSLionel Sambuc }
345f4a2713aSLionel Sambuc 
isT2i32Store(unsigned Opc)346f4a2713aSLionel Sambuc static bool isT2i32Store(unsigned Opc) {
347f4a2713aSLionel Sambuc   return Opc == ARM::t2STRi12 || Opc == ARM::t2STRi8;
348f4a2713aSLionel Sambuc }
349f4a2713aSLionel Sambuc 
isi32Store(unsigned Opc)350f4a2713aSLionel Sambuc static bool isi32Store(unsigned Opc) {
351*0a6a1f1dSLionel Sambuc   return Opc == ARM::STRi12 || isT1i32Store(Opc) || isT2i32Store(Opc);
352*0a6a1f1dSLionel Sambuc }
353*0a6a1f1dSLionel Sambuc 
getImmScale(unsigned Opc)354*0a6a1f1dSLionel Sambuc static unsigned getImmScale(unsigned Opc) {
355*0a6a1f1dSLionel Sambuc   switch (Opc) {
356*0a6a1f1dSLionel Sambuc   default: llvm_unreachable("Unhandled opcode!");
357*0a6a1f1dSLionel Sambuc   case ARM::tLDRi:
358*0a6a1f1dSLionel Sambuc   case ARM::tSTRi:
359*0a6a1f1dSLionel Sambuc     return 1;
360*0a6a1f1dSLionel Sambuc   case ARM::tLDRHi:
361*0a6a1f1dSLionel Sambuc   case ARM::tSTRHi:
362*0a6a1f1dSLionel Sambuc     return 2;
363*0a6a1f1dSLionel Sambuc   case ARM::tLDRBi:
364*0a6a1f1dSLionel Sambuc   case ARM::tSTRBi:
365*0a6a1f1dSLionel Sambuc     return 4;
366*0a6a1f1dSLionel Sambuc   }
367*0a6a1f1dSLionel Sambuc }
368*0a6a1f1dSLionel Sambuc 
369*0a6a1f1dSLionel Sambuc /// Update future uses of the base register with the offset introduced
370*0a6a1f1dSLionel Sambuc /// due to writeback. This function only works on Thumb1.
371*0a6a1f1dSLionel Sambuc void
UpdateBaseRegUses(MachineBasicBlock & MBB,MachineBasicBlock::iterator MBBI,DebugLoc dl,unsigned Base,unsigned WordOffset,ARMCC::CondCodes Pred,unsigned PredReg)372*0a6a1f1dSLionel Sambuc ARMLoadStoreOpt::UpdateBaseRegUses(MachineBasicBlock &MBB,
373*0a6a1f1dSLionel Sambuc                                    MachineBasicBlock::iterator MBBI,
374*0a6a1f1dSLionel Sambuc                                    DebugLoc dl, unsigned Base,
375*0a6a1f1dSLionel Sambuc                                    unsigned WordOffset,
376*0a6a1f1dSLionel Sambuc                                    ARMCC::CondCodes Pred, unsigned PredReg) {
377*0a6a1f1dSLionel Sambuc   assert(isThumb1 && "Can only update base register uses for Thumb1!");
378*0a6a1f1dSLionel Sambuc   // Start updating any instructions with immediate offsets. Insert a SUB before
379*0a6a1f1dSLionel Sambuc   // the first non-updateable instruction (if any).
380*0a6a1f1dSLionel Sambuc   for (; MBBI != MBB.end(); ++MBBI) {
381*0a6a1f1dSLionel Sambuc     bool InsertSub = false;
382*0a6a1f1dSLionel Sambuc     unsigned Opc = MBBI->getOpcode();
383*0a6a1f1dSLionel Sambuc 
384*0a6a1f1dSLionel Sambuc     if (MBBI->readsRegister(Base)) {
385*0a6a1f1dSLionel Sambuc       int Offset;
386*0a6a1f1dSLionel Sambuc       bool IsLoad =
387*0a6a1f1dSLionel Sambuc         Opc == ARM::tLDRi || Opc == ARM::tLDRHi || Opc == ARM::tLDRBi;
388*0a6a1f1dSLionel Sambuc       bool IsStore =
389*0a6a1f1dSLionel Sambuc         Opc == ARM::tSTRi || Opc == ARM::tSTRHi || Opc == ARM::tSTRBi;
390*0a6a1f1dSLionel Sambuc 
391*0a6a1f1dSLionel Sambuc       if (IsLoad || IsStore) {
392*0a6a1f1dSLionel Sambuc         // Loads and stores with immediate offsets can be updated, but only if
393*0a6a1f1dSLionel Sambuc         // the new offset isn't negative.
394*0a6a1f1dSLionel Sambuc         // The MachineOperand containing the offset immediate is the last one
395*0a6a1f1dSLionel Sambuc         // before predicates.
396*0a6a1f1dSLionel Sambuc         MachineOperand &MO =
397*0a6a1f1dSLionel Sambuc           MBBI->getOperand(MBBI->getDesc().getNumOperands() - 3);
398*0a6a1f1dSLionel Sambuc         // The offsets are scaled by 1, 2 or 4 depending on the Opcode.
399*0a6a1f1dSLionel Sambuc         Offset = MO.getImm() - WordOffset * getImmScale(Opc);
400*0a6a1f1dSLionel Sambuc 
401*0a6a1f1dSLionel Sambuc         // If storing the base register, it needs to be reset first.
402*0a6a1f1dSLionel Sambuc         unsigned InstrSrcReg = MBBI->getOperand(0).getReg();
403*0a6a1f1dSLionel Sambuc 
404*0a6a1f1dSLionel Sambuc         if (Offset >= 0 && !(IsStore && InstrSrcReg == Base))
405*0a6a1f1dSLionel Sambuc           MO.setImm(Offset);
406*0a6a1f1dSLionel Sambuc         else
407*0a6a1f1dSLionel Sambuc           InsertSub = true;
408*0a6a1f1dSLionel Sambuc 
409*0a6a1f1dSLionel Sambuc       } else if ((Opc == ARM::tSUBi8 || Opc == ARM::tADDi8) &&
410*0a6a1f1dSLionel Sambuc                  !definesCPSR(MBBI)) {
411*0a6a1f1dSLionel Sambuc         // SUBS/ADDS using this register, with a dead def of the CPSR.
412*0a6a1f1dSLionel Sambuc         // Merge it with the update; if the merged offset is too large,
413*0a6a1f1dSLionel Sambuc         // insert a new sub instead.
414*0a6a1f1dSLionel Sambuc         MachineOperand &MO =
415*0a6a1f1dSLionel Sambuc           MBBI->getOperand(MBBI->getDesc().getNumOperands() - 3);
416*0a6a1f1dSLionel Sambuc         Offset = (Opc == ARM::tSUBi8) ?
417*0a6a1f1dSLionel Sambuc           MO.getImm() + WordOffset * 4 :
418*0a6a1f1dSLionel Sambuc           MO.getImm() - WordOffset * 4 ;
419*0a6a1f1dSLionel Sambuc         if (Offset >= 0 && TL->isLegalAddImmediate(Offset)) {
420*0a6a1f1dSLionel Sambuc           // FIXME: Swap ADDS<->SUBS if Offset < 0, erase instruction if
421*0a6a1f1dSLionel Sambuc           // Offset == 0.
422*0a6a1f1dSLionel Sambuc           MO.setImm(Offset);
423*0a6a1f1dSLionel Sambuc           // The base register has now been reset, so exit early.
424*0a6a1f1dSLionel Sambuc           return;
425*0a6a1f1dSLionel Sambuc         } else {
426*0a6a1f1dSLionel Sambuc           InsertSub = true;
427*0a6a1f1dSLionel Sambuc         }
428*0a6a1f1dSLionel Sambuc 
429*0a6a1f1dSLionel Sambuc       } else {
430*0a6a1f1dSLionel Sambuc         // Can't update the instruction.
431*0a6a1f1dSLionel Sambuc         InsertSub = true;
432*0a6a1f1dSLionel Sambuc       }
433*0a6a1f1dSLionel Sambuc 
434*0a6a1f1dSLionel Sambuc     } else if (definesCPSR(MBBI) || MBBI->isCall() || MBBI->isBranch()) {
435*0a6a1f1dSLionel Sambuc       // Since SUBS sets the condition flags, we can't place the base reset
436*0a6a1f1dSLionel Sambuc       // after an instruction that has a live CPSR def.
437*0a6a1f1dSLionel Sambuc       // The base register might also contain an argument for a function call.
438*0a6a1f1dSLionel Sambuc       InsertSub = true;
439*0a6a1f1dSLionel Sambuc     }
440*0a6a1f1dSLionel Sambuc 
441*0a6a1f1dSLionel Sambuc     if (InsertSub) {
442*0a6a1f1dSLionel Sambuc       // An instruction above couldn't be updated, so insert a sub.
443*0a6a1f1dSLionel Sambuc       AddDefaultT1CC(BuildMI(MBB, MBBI, dl, TII->get(ARM::tSUBi8), Base), true)
444*0a6a1f1dSLionel Sambuc         .addReg(Base, getKillRegState(false)).addImm(WordOffset * 4)
445*0a6a1f1dSLionel Sambuc         .addImm(Pred).addReg(PredReg);
446*0a6a1f1dSLionel Sambuc       return;
447*0a6a1f1dSLionel Sambuc     }
448*0a6a1f1dSLionel Sambuc 
449*0a6a1f1dSLionel Sambuc     if (MBBI->killsRegister(Base))
450*0a6a1f1dSLionel Sambuc       // Register got killed. Stop updating.
451*0a6a1f1dSLionel Sambuc       return;
452*0a6a1f1dSLionel Sambuc   }
453*0a6a1f1dSLionel Sambuc 
454*0a6a1f1dSLionel Sambuc   // End of block was reached.
455*0a6a1f1dSLionel Sambuc   if (MBB.succ_size() > 0) {
456*0a6a1f1dSLionel Sambuc     // FIXME: Because of a bug, live registers are sometimes missing from
457*0a6a1f1dSLionel Sambuc     // the successor blocks' live-in sets. This means we can't trust that
458*0a6a1f1dSLionel Sambuc     // information and *always* have to reset at the end of a block.
459*0a6a1f1dSLionel Sambuc     // See PR21029.
460*0a6a1f1dSLionel Sambuc     if (MBBI != MBB.end()) --MBBI;
461*0a6a1f1dSLionel Sambuc     AddDefaultT1CC(
462*0a6a1f1dSLionel Sambuc       BuildMI(MBB, MBBI, dl, TII->get(ARM::tSUBi8), Base), true)
463*0a6a1f1dSLionel Sambuc       .addReg(Base, getKillRegState(false)).addImm(WordOffset * 4)
464*0a6a1f1dSLionel Sambuc       .addImm(Pred).addReg(PredReg);
465*0a6a1f1dSLionel Sambuc   }
466f4a2713aSLionel Sambuc }
467f4a2713aSLionel Sambuc 
468f4a2713aSLionel Sambuc /// MergeOps - Create and insert a LDM or STM with Base as base register and
469f4a2713aSLionel Sambuc /// registers in Regs as the register operands that would be loaded / stored.
470f4a2713aSLionel Sambuc /// It returns true if the transformation is done.
471f4a2713aSLionel Sambuc bool
MergeOps(MachineBasicBlock & MBB,MachineBasicBlock::iterator MBBI,int Offset,unsigned Base,bool BaseKill,int Opcode,ARMCC::CondCodes Pred,unsigned PredReg,unsigned Scratch,DebugLoc dl,ArrayRef<std::pair<unsigned,bool>> Regs,ArrayRef<unsigned> ImpDefs)472f4a2713aSLionel Sambuc ARMLoadStoreOpt::MergeOps(MachineBasicBlock &MBB,
473f4a2713aSLionel Sambuc                           MachineBasicBlock::iterator MBBI,
474f4a2713aSLionel Sambuc                           int Offset, unsigned Base, bool BaseKill,
475f4a2713aSLionel Sambuc                           int Opcode, ARMCC::CondCodes Pred,
476f4a2713aSLionel Sambuc                           unsigned PredReg, unsigned Scratch, DebugLoc dl,
477f4a2713aSLionel Sambuc                           ArrayRef<std::pair<unsigned, bool> > Regs,
478f4a2713aSLionel Sambuc                           ArrayRef<unsigned> ImpDefs) {
479f4a2713aSLionel Sambuc   // Only a single register to load / store. Don't bother.
480f4a2713aSLionel Sambuc   unsigned NumRegs = Regs.size();
481f4a2713aSLionel Sambuc   if (NumRegs <= 1)
482f4a2713aSLionel Sambuc     return false;
483f4a2713aSLionel Sambuc 
484*0a6a1f1dSLionel Sambuc   // For Thumb1 targets, it might be necessary to clobber the CPSR to merge.
485*0a6a1f1dSLionel Sambuc   // Compute liveness information for that register to make the decision.
486*0a6a1f1dSLionel Sambuc   bool SafeToClobberCPSR = !isThumb1 ||
487*0a6a1f1dSLionel Sambuc     (MBB.computeRegisterLiveness(TRI, ARM::CPSR, std::prev(MBBI), 15) ==
488*0a6a1f1dSLionel Sambuc      MachineBasicBlock::LQR_Dead);
489*0a6a1f1dSLionel Sambuc 
490*0a6a1f1dSLionel Sambuc   bool Writeback = isThumb1; // Thumb1 LDM/STM have base reg writeback.
491*0a6a1f1dSLionel Sambuc 
492*0a6a1f1dSLionel Sambuc   // Exception: If the base register is in the input reglist, Thumb1 LDM is
493*0a6a1f1dSLionel Sambuc   // non-writeback.
494*0a6a1f1dSLionel Sambuc   // It's also not possible to merge an STR of the base register in Thumb1.
495*0a6a1f1dSLionel Sambuc   if (isThumb1)
496*0a6a1f1dSLionel Sambuc     for (unsigned I = 0; I < NumRegs; ++I)
497*0a6a1f1dSLionel Sambuc       if (Base == Regs[I].first) {
498*0a6a1f1dSLionel Sambuc         if (Opcode == ARM::tLDRi) {
499*0a6a1f1dSLionel Sambuc           Writeback = false;
500*0a6a1f1dSLionel Sambuc           break;
501*0a6a1f1dSLionel Sambuc         } else if (Opcode == ARM::tSTRi) {
502*0a6a1f1dSLionel Sambuc           return false;
503*0a6a1f1dSLionel Sambuc         }
504*0a6a1f1dSLionel Sambuc       }
505*0a6a1f1dSLionel Sambuc 
506f4a2713aSLionel Sambuc   ARM_AM::AMSubMode Mode = ARM_AM::ia;
507*0a6a1f1dSLionel Sambuc   // VFP and Thumb2 do not support IB or DA modes. Thumb1 only supports IA.
508f4a2713aSLionel Sambuc   bool isNotVFP = isi32Load(Opcode) || isi32Store(Opcode);
509*0a6a1f1dSLionel Sambuc   bool haveIBAndDA = isNotVFP && !isThumb2 && !isThumb1;
510*0a6a1f1dSLionel Sambuc 
511*0a6a1f1dSLionel Sambuc   if (Offset == 4 && haveIBAndDA) {
512f4a2713aSLionel Sambuc     Mode = ARM_AM::ib;
513*0a6a1f1dSLionel Sambuc   } else if (Offset == -4 * (int)NumRegs + 4 && haveIBAndDA) {
514f4a2713aSLionel Sambuc     Mode = ARM_AM::da;
515*0a6a1f1dSLionel Sambuc   } else if (Offset == -4 * (int)NumRegs && isNotVFP && !isThumb1) {
516f4a2713aSLionel Sambuc     // VLDM/VSTM do not support DB mode without also updating the base reg.
517f4a2713aSLionel Sambuc     Mode = ARM_AM::db;
518*0a6a1f1dSLionel Sambuc   } else if (Offset != 0) {
519*0a6a1f1dSLionel Sambuc     // Check if this is a supported opcode before inserting instructions to
520f4a2713aSLionel Sambuc     // calculate a new base register.
521f4a2713aSLionel Sambuc     if (!getLoadStoreMultipleOpcode(Opcode, Mode)) return false;
522f4a2713aSLionel Sambuc 
523f4a2713aSLionel Sambuc     // If starting offset isn't zero, insert a MI to materialize a new base.
524f4a2713aSLionel Sambuc     // But only do so if it is cost effective, i.e. merging more than two
525f4a2713aSLionel Sambuc     // loads / stores.
526f4a2713aSLionel Sambuc     if (NumRegs <= 2)
527f4a2713aSLionel Sambuc       return false;
528f4a2713aSLionel Sambuc 
529*0a6a1f1dSLionel Sambuc     // On Thumb1, it's not worth materializing a new base register without
530*0a6a1f1dSLionel Sambuc     // clobbering the CPSR (i.e. not using ADDS/SUBS).
531*0a6a1f1dSLionel Sambuc     if (!SafeToClobberCPSR)
532*0a6a1f1dSLionel Sambuc       return false;
533*0a6a1f1dSLionel Sambuc 
534f4a2713aSLionel Sambuc     unsigned NewBase;
535*0a6a1f1dSLionel Sambuc     if (isi32Load(Opcode)) {
536f4a2713aSLionel Sambuc       // If it is a load, then just use one of the destination register to
537f4a2713aSLionel Sambuc       // use as the new base.
538f4a2713aSLionel Sambuc       NewBase = Regs[NumRegs-1].first;
539*0a6a1f1dSLionel Sambuc     } else {
540f4a2713aSLionel Sambuc       // Use the scratch register to use as a new base.
541f4a2713aSLionel Sambuc       NewBase = Scratch;
542f4a2713aSLionel Sambuc       if (NewBase == 0)
543f4a2713aSLionel Sambuc         return false;
544f4a2713aSLionel Sambuc     }
545*0a6a1f1dSLionel Sambuc 
546*0a6a1f1dSLionel Sambuc     int BaseOpc =
547*0a6a1f1dSLionel Sambuc       isThumb2 ? ARM::t2ADDri :
548*0a6a1f1dSLionel Sambuc       (isThumb1 && Offset < 8) ? ARM::tADDi3 :
549*0a6a1f1dSLionel Sambuc       isThumb1 ? ARM::tADDi8  : ARM::ADDri;
550*0a6a1f1dSLionel Sambuc 
551f4a2713aSLionel Sambuc     if (Offset < 0) {
552f4a2713aSLionel Sambuc       Offset = - Offset;
553*0a6a1f1dSLionel Sambuc       BaseOpc =
554*0a6a1f1dSLionel Sambuc         isThumb2 ? ARM::t2SUBri :
555*0a6a1f1dSLionel Sambuc         (isThumb1 && Offset < 8) ? ARM::tSUBi3 :
556*0a6a1f1dSLionel Sambuc         isThumb1 ? ARM::tSUBi8  : ARM::SUBri;
557f4a2713aSLionel Sambuc     }
558*0a6a1f1dSLionel Sambuc 
559*0a6a1f1dSLionel Sambuc     if (!TL->isLegalAddImmediate(Offset))
560*0a6a1f1dSLionel Sambuc       // FIXME: Try add with register operand?
561f4a2713aSLionel Sambuc       return false; // Probably not worth it then.
562f4a2713aSLionel Sambuc 
563*0a6a1f1dSLionel Sambuc     if (isThumb1) {
564*0a6a1f1dSLionel Sambuc       // Thumb1: depending on immediate size, use either
565*0a6a1f1dSLionel Sambuc       //   ADDS NewBase, Base, #imm3
566*0a6a1f1dSLionel Sambuc       // or
567*0a6a1f1dSLionel Sambuc       //   MOV  NewBase, Base
568*0a6a1f1dSLionel Sambuc       //   ADDS NewBase, #imm8.
569*0a6a1f1dSLionel Sambuc       if (Base != NewBase && Offset >= 8) {
570*0a6a1f1dSLionel Sambuc         const ARMSubtarget &Subtarget = MBB.getParent()->getTarget()
571*0a6a1f1dSLionel Sambuc                        .getSubtarget<ARMSubtarget>();
572*0a6a1f1dSLionel Sambuc         // Need to insert a MOV to the new base first.
573*0a6a1f1dSLionel Sambuc         if (isARMLowRegister(NewBase) && isARMLowRegister(Base) &&
574*0a6a1f1dSLionel Sambuc             !Subtarget.hasV6Ops()) {
575*0a6a1f1dSLionel Sambuc           // thumbv4t doesn't have lo->lo copies, and we can't predicate tMOVSr
576*0a6a1f1dSLionel Sambuc           if (Pred != ARMCC::AL)
577*0a6a1f1dSLionel Sambuc             return false;
578*0a6a1f1dSLionel Sambuc           BuildMI(MBB, MBBI, dl, TII->get(ARM::tMOVSr), NewBase)
579*0a6a1f1dSLionel Sambuc             .addReg(Base, getKillRegState(BaseKill));
580*0a6a1f1dSLionel Sambuc         } else
581*0a6a1f1dSLionel Sambuc           BuildMI(MBB, MBBI, dl, TII->get(ARM::tMOVr), NewBase)
582*0a6a1f1dSLionel Sambuc             .addReg(Base, getKillRegState(BaseKill))
583*0a6a1f1dSLionel Sambuc             .addImm(Pred).addReg(PredReg);
584*0a6a1f1dSLionel Sambuc 
585*0a6a1f1dSLionel Sambuc         // Set up BaseKill and Base correctly to insert the ADDS/SUBS below.
586*0a6a1f1dSLionel Sambuc         Base = NewBase;
587*0a6a1f1dSLionel Sambuc         BaseKill = false;
588*0a6a1f1dSLionel Sambuc       }
589*0a6a1f1dSLionel Sambuc       AddDefaultT1CC(BuildMI(MBB, MBBI, dl, TII->get(BaseOpc), NewBase), true)
590*0a6a1f1dSLionel Sambuc         .addReg(Base, getKillRegState(BaseKill)).addImm(Offset)
591*0a6a1f1dSLionel Sambuc         .addImm(Pred).addReg(PredReg);
592*0a6a1f1dSLionel Sambuc     } else {
593f4a2713aSLionel Sambuc       BuildMI(MBB, MBBI, dl, TII->get(BaseOpc), NewBase)
594f4a2713aSLionel Sambuc         .addReg(Base, getKillRegState(BaseKill)).addImm(Offset)
595f4a2713aSLionel Sambuc         .addImm(Pred).addReg(PredReg).addReg(0);
596*0a6a1f1dSLionel Sambuc     }
597f4a2713aSLionel Sambuc     Base = NewBase;
598*0a6a1f1dSLionel Sambuc     BaseKill = true; // New base is always killed straight away.
599f4a2713aSLionel Sambuc   }
600f4a2713aSLionel Sambuc 
601f4a2713aSLionel Sambuc   bool isDef = (isi32Load(Opcode) || Opcode == ARM::VLDRS ||
602f4a2713aSLionel Sambuc                 Opcode == ARM::VLDRD);
603*0a6a1f1dSLionel Sambuc 
604*0a6a1f1dSLionel Sambuc   // Get LS multiple opcode. Note that for Thumb1 this might be an opcode with
605*0a6a1f1dSLionel Sambuc   // base register writeback.
606f4a2713aSLionel Sambuc   Opcode = getLoadStoreMultipleOpcode(Opcode, Mode);
607f4a2713aSLionel Sambuc   if (!Opcode) return false;
608*0a6a1f1dSLionel Sambuc 
609*0a6a1f1dSLionel Sambuc   // Check if a Thumb1 LDM/STM merge is safe. This is the case if:
610*0a6a1f1dSLionel Sambuc   // - There is no writeback (LDM of base register),
611*0a6a1f1dSLionel Sambuc   // - the base register is killed by the merged instruction,
612*0a6a1f1dSLionel Sambuc   // - or it's safe to overwrite the condition flags, i.e. to insert a SUBS
613*0a6a1f1dSLionel Sambuc   //   to reset the base register.
614*0a6a1f1dSLionel Sambuc   // Otherwise, don't merge.
615*0a6a1f1dSLionel Sambuc   // It's safe to return here since the code to materialize a new base register
616*0a6a1f1dSLionel Sambuc   // above is also conditional on SafeToClobberCPSR.
617*0a6a1f1dSLionel Sambuc   if (isThumb1 && !SafeToClobberCPSR && Writeback && !BaseKill)
618*0a6a1f1dSLionel Sambuc     return false;
619*0a6a1f1dSLionel Sambuc 
620*0a6a1f1dSLionel Sambuc   MachineInstrBuilder MIB;
621*0a6a1f1dSLionel Sambuc 
622*0a6a1f1dSLionel Sambuc   if (Writeback) {
623*0a6a1f1dSLionel Sambuc     if (Opcode == ARM::tLDMIA)
624*0a6a1f1dSLionel Sambuc       // Update tLDMIA with writeback if necessary.
625*0a6a1f1dSLionel Sambuc       Opcode = ARM::tLDMIA_UPD;
626*0a6a1f1dSLionel Sambuc 
627*0a6a1f1dSLionel Sambuc     MIB = BuildMI(MBB, MBBI, dl, TII->get(Opcode));
628*0a6a1f1dSLionel Sambuc 
629*0a6a1f1dSLionel Sambuc     // Thumb1: we might need to set base writeback when building the MI.
630*0a6a1f1dSLionel Sambuc     MIB.addReg(Base, getDefRegState(true))
631*0a6a1f1dSLionel Sambuc        .addReg(Base, getKillRegState(BaseKill));
632*0a6a1f1dSLionel Sambuc 
633*0a6a1f1dSLionel Sambuc     // The base isn't dead after a merged instruction with writeback.
634*0a6a1f1dSLionel Sambuc     // Insert a sub instruction after the newly formed instruction to reset.
635*0a6a1f1dSLionel Sambuc     if (!BaseKill)
636*0a6a1f1dSLionel Sambuc       UpdateBaseRegUses(MBB, MBBI, dl, Base, NumRegs, Pred, PredReg);
637*0a6a1f1dSLionel Sambuc 
638*0a6a1f1dSLionel Sambuc   } else {
639*0a6a1f1dSLionel Sambuc     // No writeback, simply build the MachineInstr.
640*0a6a1f1dSLionel Sambuc     MIB = BuildMI(MBB, MBBI, dl, TII->get(Opcode));
641*0a6a1f1dSLionel Sambuc     MIB.addReg(Base, getKillRegState(BaseKill));
642*0a6a1f1dSLionel Sambuc   }
643*0a6a1f1dSLionel Sambuc 
644*0a6a1f1dSLionel Sambuc   MIB.addImm(Pred).addReg(PredReg);
645*0a6a1f1dSLionel Sambuc 
646f4a2713aSLionel Sambuc   for (unsigned i = 0; i != NumRegs; ++i)
647f4a2713aSLionel Sambuc     MIB = MIB.addReg(Regs[i].first, getDefRegState(isDef)
648f4a2713aSLionel Sambuc                      | getKillRegState(Regs[i].second));
649f4a2713aSLionel Sambuc 
650f4a2713aSLionel Sambuc   // Add implicit defs for super-registers.
651f4a2713aSLionel Sambuc   for (unsigned i = 0, e = ImpDefs.size(); i != e; ++i)
652f4a2713aSLionel Sambuc     MIB.addReg(ImpDefs[i], RegState::ImplicitDefine);
653f4a2713aSLionel Sambuc 
654f4a2713aSLionel Sambuc   return true;
655f4a2713aSLionel Sambuc }
656f4a2713aSLionel Sambuc 
657f4a2713aSLionel Sambuc /// \brief Find all instructions using a given imp-def within a range.
658f4a2713aSLionel Sambuc ///
659f4a2713aSLionel Sambuc /// We are trying to combine a range of instructions, one of which (located at
660f4a2713aSLionel Sambuc /// position RangeBegin) implicitly defines a register. The final LDM/STM will
661f4a2713aSLionel Sambuc /// be placed at RangeEnd, and so any uses of this definition between RangeStart
662f4a2713aSLionel Sambuc /// and RangeEnd must be modified to use an undefined value.
663f4a2713aSLionel Sambuc ///
664f4a2713aSLionel Sambuc /// The live range continues until we find a second definition or one of the
665f4a2713aSLionel Sambuc /// uses we find is a kill. Unfortunately MemOps is not sorted by Position, so
666f4a2713aSLionel Sambuc /// we must consider all uses and decide which are relevant in a second pass.
findUsesOfImpDef(SmallVectorImpl<MachineOperand * > & UsesOfImpDefs,const MemOpQueue & MemOps,unsigned DefReg,unsigned RangeBegin,unsigned RangeEnd)667f4a2713aSLionel Sambuc void ARMLoadStoreOpt::findUsesOfImpDef(
668f4a2713aSLionel Sambuc     SmallVectorImpl<MachineOperand *> &UsesOfImpDefs, const MemOpQueue &MemOps,
669f4a2713aSLionel Sambuc     unsigned DefReg, unsigned RangeBegin, unsigned RangeEnd) {
670f4a2713aSLionel Sambuc   std::map<unsigned, MachineOperand *> Uses;
671f4a2713aSLionel Sambuc   unsigned LastLivePos = RangeEnd;
672f4a2713aSLionel Sambuc 
673f4a2713aSLionel Sambuc   // First we find all uses of this register with Position between RangeBegin
674f4a2713aSLionel Sambuc   // and RangeEnd, any or all of these could be uses of a definition at
675f4a2713aSLionel Sambuc   // RangeBegin. We also record the latest position a definition at RangeBegin
676f4a2713aSLionel Sambuc   // would be considered live.
677f4a2713aSLionel Sambuc   for (unsigned i = 0; i < MemOps.size(); ++i) {
678f4a2713aSLionel Sambuc     MachineInstr &MI = *MemOps[i].MBBI;
679f4a2713aSLionel Sambuc     unsigned MIPosition = MemOps[i].Position;
680f4a2713aSLionel Sambuc     if (MIPosition <= RangeBegin || MIPosition > RangeEnd)
681f4a2713aSLionel Sambuc       continue;
682f4a2713aSLionel Sambuc 
683f4a2713aSLionel Sambuc     // If this instruction defines the register, then any later use will be of
684f4a2713aSLionel Sambuc     // that definition rather than ours.
685f4a2713aSLionel Sambuc     if (MI.definesRegister(DefReg))
686f4a2713aSLionel Sambuc       LastLivePos = std::min(LastLivePos, MIPosition);
687f4a2713aSLionel Sambuc 
688f4a2713aSLionel Sambuc     MachineOperand *UseOp = MI.findRegisterUseOperand(DefReg);
689f4a2713aSLionel Sambuc     if (!UseOp)
690f4a2713aSLionel Sambuc       continue;
691f4a2713aSLionel Sambuc 
692f4a2713aSLionel Sambuc     // If this instruction kills the register then (assuming liveness is
693f4a2713aSLionel Sambuc     // correct when we start) we don't need to think about anything after here.
694f4a2713aSLionel Sambuc     if (UseOp->isKill())
695f4a2713aSLionel Sambuc       LastLivePos = std::min(LastLivePos, MIPosition);
696f4a2713aSLionel Sambuc 
697f4a2713aSLionel Sambuc     Uses[MIPosition] = UseOp;
698f4a2713aSLionel Sambuc   }
699f4a2713aSLionel Sambuc 
700f4a2713aSLionel Sambuc   // Now we traverse the list of all uses, and append the ones that actually use
701f4a2713aSLionel Sambuc   // our definition to the requested list.
702f4a2713aSLionel Sambuc   for (std::map<unsigned, MachineOperand *>::iterator I = Uses.begin(),
703f4a2713aSLionel Sambuc                                                       E = Uses.end();
704f4a2713aSLionel Sambuc        I != E; ++I) {
705f4a2713aSLionel Sambuc     // List is sorted by position so once we've found one out of range there
706f4a2713aSLionel Sambuc     // will be no more to consider.
707f4a2713aSLionel Sambuc     if (I->first > LastLivePos)
708f4a2713aSLionel Sambuc       break;
709f4a2713aSLionel Sambuc     UsesOfImpDefs.push_back(I->second);
710f4a2713aSLionel Sambuc   }
711f4a2713aSLionel Sambuc }
712f4a2713aSLionel Sambuc 
713f4a2713aSLionel Sambuc // MergeOpsUpdate - call MergeOps and update MemOps and merges accordingly on
714f4a2713aSLionel Sambuc // success.
MergeOpsUpdate(MachineBasicBlock & MBB,MemOpQueue & memOps,unsigned memOpsBegin,unsigned memOpsEnd,unsigned insertAfter,int Offset,unsigned Base,bool BaseKill,int Opcode,ARMCC::CondCodes Pred,unsigned PredReg,unsigned Scratch,DebugLoc dl,SmallVectorImpl<MachineBasicBlock::iterator> & Merges)715f4a2713aSLionel Sambuc void ARMLoadStoreOpt::MergeOpsUpdate(MachineBasicBlock &MBB,
716f4a2713aSLionel Sambuc                                      MemOpQueue &memOps,
717f4a2713aSLionel Sambuc                                      unsigned memOpsBegin, unsigned memOpsEnd,
718f4a2713aSLionel Sambuc                                      unsigned insertAfter, int Offset,
719f4a2713aSLionel Sambuc                                      unsigned Base, bool BaseKill,
720f4a2713aSLionel Sambuc                                      int Opcode,
721f4a2713aSLionel Sambuc                                      ARMCC::CondCodes Pred, unsigned PredReg,
722f4a2713aSLionel Sambuc                                      unsigned Scratch,
723f4a2713aSLionel Sambuc                                      DebugLoc dl,
724f4a2713aSLionel Sambuc                          SmallVectorImpl<MachineBasicBlock::iterator> &Merges) {
725f4a2713aSLionel Sambuc   // First calculate which of the registers should be killed by the merged
726f4a2713aSLionel Sambuc   // instruction.
727f4a2713aSLionel Sambuc   const unsigned insertPos = memOps[insertAfter].Position;
728f4a2713aSLionel Sambuc   SmallSet<unsigned, 4> KilledRegs;
729f4a2713aSLionel Sambuc   DenseMap<unsigned, unsigned> Killer;
730f4a2713aSLionel Sambuc   for (unsigned i = 0, e = memOps.size(); i != e; ++i) {
731f4a2713aSLionel Sambuc     if (i == memOpsBegin) {
732f4a2713aSLionel Sambuc       i = memOpsEnd;
733f4a2713aSLionel Sambuc       if (i == e)
734f4a2713aSLionel Sambuc         break;
735f4a2713aSLionel Sambuc     }
736f4a2713aSLionel Sambuc     if (memOps[i].Position < insertPos && memOps[i].isKill) {
737f4a2713aSLionel Sambuc       unsigned Reg = memOps[i].Reg;
738f4a2713aSLionel Sambuc       KilledRegs.insert(Reg);
739f4a2713aSLionel Sambuc       Killer[Reg] = i;
740f4a2713aSLionel Sambuc     }
741f4a2713aSLionel Sambuc   }
742f4a2713aSLionel Sambuc 
743f4a2713aSLionel Sambuc   SmallVector<std::pair<unsigned, bool>, 8> Regs;
744f4a2713aSLionel Sambuc   SmallVector<unsigned, 8> ImpDefs;
745f4a2713aSLionel Sambuc   SmallVector<MachineOperand *, 8> UsesOfImpDefs;
746f4a2713aSLionel Sambuc   for (unsigned i = memOpsBegin; i < memOpsEnd; ++i) {
747f4a2713aSLionel Sambuc     unsigned Reg = memOps[i].Reg;
748f4a2713aSLionel Sambuc     // If we are inserting the merged operation after an operation that
749f4a2713aSLionel Sambuc     // uses the same register, make sure to transfer any kill flag.
750f4a2713aSLionel Sambuc     bool isKill = memOps[i].isKill || KilledRegs.count(Reg);
751f4a2713aSLionel Sambuc     Regs.push_back(std::make_pair(Reg, isKill));
752f4a2713aSLionel Sambuc 
753f4a2713aSLionel Sambuc     // Collect any implicit defs of super-registers. They must be preserved.
754f4a2713aSLionel Sambuc     for (MIOperands MO(memOps[i].MBBI); MO.isValid(); ++MO) {
755f4a2713aSLionel Sambuc       if (!MO->isReg() || !MO->isDef() || !MO->isImplicit() || MO->isDead())
756f4a2713aSLionel Sambuc         continue;
757f4a2713aSLionel Sambuc       unsigned DefReg = MO->getReg();
758f4a2713aSLionel Sambuc       if (std::find(ImpDefs.begin(), ImpDefs.end(), DefReg) == ImpDefs.end())
759f4a2713aSLionel Sambuc         ImpDefs.push_back(DefReg);
760f4a2713aSLionel Sambuc 
761f4a2713aSLionel Sambuc       // There may be other uses of the definition between this instruction and
762f4a2713aSLionel Sambuc       // the eventual LDM/STM position. These should be marked undef if the
763f4a2713aSLionel Sambuc       // merge takes place.
764f4a2713aSLionel Sambuc       findUsesOfImpDef(UsesOfImpDefs, memOps, DefReg, memOps[i].Position,
765f4a2713aSLionel Sambuc                        insertPos);
766f4a2713aSLionel Sambuc     }
767f4a2713aSLionel Sambuc   }
768f4a2713aSLionel Sambuc 
769f4a2713aSLionel Sambuc   // Try to do the merge.
770f4a2713aSLionel Sambuc   MachineBasicBlock::iterator Loc = memOps[insertAfter].MBBI;
771f4a2713aSLionel Sambuc   ++Loc;
772f4a2713aSLionel Sambuc   if (!MergeOps(MBB, Loc, Offset, Base, BaseKill, Opcode,
773f4a2713aSLionel Sambuc                 Pred, PredReg, Scratch, dl, Regs, ImpDefs))
774f4a2713aSLionel Sambuc     return;
775f4a2713aSLionel Sambuc 
776f4a2713aSLionel Sambuc   // Merge succeeded, update records.
777*0a6a1f1dSLionel Sambuc   Merges.push_back(std::prev(Loc));
778f4a2713aSLionel Sambuc 
779f4a2713aSLionel Sambuc   // In gathering loads together, we may have moved the imp-def of a register
780f4a2713aSLionel Sambuc   // past one of its uses. This is OK, since we know better than the rest of
781f4a2713aSLionel Sambuc   // LLVM what's OK with ARM loads and stores; but we still have to adjust the
782f4a2713aSLionel Sambuc   // affected uses.
783f4a2713aSLionel Sambuc   for (SmallVectorImpl<MachineOperand *>::iterator I = UsesOfImpDefs.begin(),
784f4a2713aSLionel Sambuc                                                    E = UsesOfImpDefs.end();
785f4a2713aSLionel Sambuc                                                    I != E; ++I)
786f4a2713aSLionel Sambuc     (*I)->setIsUndef();
787f4a2713aSLionel Sambuc 
788f4a2713aSLionel Sambuc   for (unsigned i = memOpsBegin; i < memOpsEnd; ++i) {
789f4a2713aSLionel Sambuc     // Remove kill flags from any memops that come before insertPos.
790f4a2713aSLionel Sambuc     if (Regs[i-memOpsBegin].second) {
791f4a2713aSLionel Sambuc       unsigned Reg = Regs[i-memOpsBegin].first;
792f4a2713aSLionel Sambuc       if (KilledRegs.count(Reg)) {
793f4a2713aSLionel Sambuc         unsigned j = Killer[Reg];
794f4a2713aSLionel Sambuc         int Idx = memOps[j].MBBI->findRegisterUseOperandIdx(Reg, true);
795f4a2713aSLionel Sambuc         assert(Idx >= 0 && "Cannot find killing operand");
796f4a2713aSLionel Sambuc         memOps[j].MBBI->getOperand(Idx).setIsKill(false);
797f4a2713aSLionel Sambuc         memOps[j].isKill = false;
798f4a2713aSLionel Sambuc       }
799f4a2713aSLionel Sambuc       memOps[i].isKill = true;
800f4a2713aSLionel Sambuc     }
801f4a2713aSLionel Sambuc     MBB.erase(memOps[i].MBBI);
802f4a2713aSLionel Sambuc     // Update this memop to refer to the merged instruction.
803f4a2713aSLionel Sambuc     // We may need to move kill flags again.
804f4a2713aSLionel Sambuc     memOps[i].Merged = true;
805f4a2713aSLionel Sambuc     memOps[i].MBBI = Merges.back();
806f4a2713aSLionel Sambuc     memOps[i].Position = insertPos;
807f4a2713aSLionel Sambuc   }
808*0a6a1f1dSLionel Sambuc 
809*0a6a1f1dSLionel Sambuc   // Update memOps offsets, since they may have been modified by MergeOps.
810*0a6a1f1dSLionel Sambuc   for (auto &MemOp : memOps) {
811*0a6a1f1dSLionel Sambuc     MemOp.Offset = getMemoryOpOffset(MemOp.MBBI);
812*0a6a1f1dSLionel Sambuc   }
813f4a2713aSLionel Sambuc }
814f4a2713aSLionel Sambuc 
815f4a2713aSLionel Sambuc /// MergeLDR_STR - Merge a number of load / store instructions into one or more
816f4a2713aSLionel Sambuc /// load / store multiple instructions.
817f4a2713aSLionel Sambuc void
MergeLDR_STR(MachineBasicBlock & MBB,unsigned SIndex,unsigned Base,int Opcode,unsigned Size,ARMCC::CondCodes Pred,unsigned PredReg,unsigned Scratch,MemOpQueue & MemOps,SmallVectorImpl<MachineBasicBlock::iterator> & Merges)818f4a2713aSLionel Sambuc ARMLoadStoreOpt::MergeLDR_STR(MachineBasicBlock &MBB, unsigned SIndex,
819f4a2713aSLionel Sambuc                          unsigned Base, int Opcode, unsigned Size,
820f4a2713aSLionel Sambuc                          ARMCC::CondCodes Pred, unsigned PredReg,
821f4a2713aSLionel Sambuc                          unsigned Scratch, MemOpQueue &MemOps,
822f4a2713aSLionel Sambuc                          SmallVectorImpl<MachineBasicBlock::iterator> &Merges) {
823f4a2713aSLionel Sambuc   bool isNotVFP = isi32Load(Opcode) || isi32Store(Opcode);
824f4a2713aSLionel Sambuc   int Offset = MemOps[SIndex].Offset;
825f4a2713aSLionel Sambuc   int SOffset = Offset;
826f4a2713aSLionel Sambuc   unsigned insertAfter = SIndex;
827f4a2713aSLionel Sambuc   MachineBasicBlock::iterator Loc = MemOps[SIndex].MBBI;
828f4a2713aSLionel Sambuc   DebugLoc dl = Loc->getDebugLoc();
829f4a2713aSLionel Sambuc   const MachineOperand &PMO = Loc->getOperand(0);
830f4a2713aSLionel Sambuc   unsigned PReg = PMO.getReg();
831f4a2713aSLionel Sambuc   unsigned PRegNum = PMO.isUndef() ? UINT_MAX : TRI->getEncodingValue(PReg);
832f4a2713aSLionel Sambuc   unsigned Count = 1;
833f4a2713aSLionel Sambuc   unsigned Limit = ~0U;
834*0a6a1f1dSLionel Sambuc   bool BaseKill = false;
835f4a2713aSLionel Sambuc   // vldm / vstm limit are 32 for S variants, 16 for D variants.
836f4a2713aSLionel Sambuc 
837f4a2713aSLionel Sambuc   switch (Opcode) {
838f4a2713aSLionel Sambuc   default: break;
839f4a2713aSLionel Sambuc   case ARM::VSTRS:
840f4a2713aSLionel Sambuc     Limit = 32;
841f4a2713aSLionel Sambuc     break;
842f4a2713aSLionel Sambuc   case ARM::VSTRD:
843f4a2713aSLionel Sambuc     Limit = 16;
844f4a2713aSLionel Sambuc     break;
845f4a2713aSLionel Sambuc   case ARM::VLDRD:
846f4a2713aSLionel Sambuc     Limit = 16;
847f4a2713aSLionel Sambuc     break;
848f4a2713aSLionel Sambuc   case ARM::VLDRS:
849f4a2713aSLionel Sambuc     Limit = 32;
850f4a2713aSLionel Sambuc     break;
851f4a2713aSLionel Sambuc   }
852f4a2713aSLionel Sambuc 
853f4a2713aSLionel Sambuc   for (unsigned i = SIndex+1, e = MemOps.size(); i != e; ++i) {
854f4a2713aSLionel Sambuc     int NewOffset = MemOps[i].Offset;
855f4a2713aSLionel Sambuc     const MachineOperand &MO = MemOps[i].MBBI->getOperand(0);
856f4a2713aSLionel Sambuc     unsigned Reg = MO.getReg();
857f4a2713aSLionel Sambuc     unsigned RegNum = MO.isUndef() ? UINT_MAX : TRI->getEncodingValue(Reg);
858f4a2713aSLionel Sambuc     // Register numbers must be in ascending order. For VFP / NEON load and
859f4a2713aSLionel Sambuc     // store multiples, the registers must also be consecutive and within the
860f4a2713aSLionel Sambuc     // limit on the number of registers per instruction.
861f4a2713aSLionel Sambuc     if (Reg != ARM::SP &&
862f4a2713aSLionel Sambuc         NewOffset == Offset + (int)Size &&
863f4a2713aSLionel Sambuc         ((isNotVFP && RegNum > PRegNum) ||
864f4a2713aSLionel Sambuc          ((Count < Limit) && RegNum == PRegNum+1)) &&
865f4a2713aSLionel Sambuc         // On Swift we don't want vldm/vstm to start with a odd register num
866f4a2713aSLionel Sambuc         // because Q register unaligned vldm/vstm need more uops.
867f4a2713aSLionel Sambuc         (!STI->isSwift() || isNotVFP || Count != 1 || !(PRegNum & 0x1))) {
868f4a2713aSLionel Sambuc       Offset += Size;
869f4a2713aSLionel Sambuc       PRegNum = RegNum;
870f4a2713aSLionel Sambuc       ++Count;
871f4a2713aSLionel Sambuc     } else {
872f4a2713aSLionel Sambuc       // Can't merge this in. Try merge the earlier ones first.
873*0a6a1f1dSLionel Sambuc       // We need to compute BaseKill here because the MemOps may have been
874*0a6a1f1dSLionel Sambuc       // reordered.
875*0a6a1f1dSLionel Sambuc       BaseKill = Loc->killsRegister(Base);
876*0a6a1f1dSLionel Sambuc 
877*0a6a1f1dSLionel Sambuc       MergeOpsUpdate(MBB, MemOps, SIndex, i, insertAfter, SOffset, Base,
878*0a6a1f1dSLionel Sambuc                      BaseKill, Opcode, Pred, PredReg, Scratch, dl, Merges);
879f4a2713aSLionel Sambuc       MergeLDR_STR(MBB, i, Base, Opcode, Size, Pred, PredReg, Scratch,
880f4a2713aSLionel Sambuc                    MemOps, Merges);
881f4a2713aSLionel Sambuc       return;
882f4a2713aSLionel Sambuc     }
883f4a2713aSLionel Sambuc 
884*0a6a1f1dSLionel Sambuc     if (MemOps[i].Position > MemOps[insertAfter].Position) {
885f4a2713aSLionel Sambuc       insertAfter = i;
886*0a6a1f1dSLionel Sambuc       Loc = MemOps[i].MBBI;
887*0a6a1f1dSLionel Sambuc     }
888f4a2713aSLionel Sambuc   }
889f4a2713aSLionel Sambuc 
890*0a6a1f1dSLionel Sambuc   BaseKill =  Loc->killsRegister(Base);
891f4a2713aSLionel Sambuc   MergeOpsUpdate(MBB, MemOps, SIndex, MemOps.size(), insertAfter, SOffset,
892f4a2713aSLionel Sambuc                  Base, BaseKill, Opcode, Pred, PredReg, Scratch, dl, Merges);
893f4a2713aSLionel Sambuc }
894f4a2713aSLionel Sambuc 
isMatchingDecrement(MachineInstr * MI,unsigned Base,unsigned Bytes,unsigned Limit,ARMCC::CondCodes Pred,unsigned PredReg)895f4a2713aSLionel Sambuc static bool isMatchingDecrement(MachineInstr *MI, unsigned Base,
896f4a2713aSLionel Sambuc                                 unsigned Bytes, unsigned Limit,
897f4a2713aSLionel Sambuc                                 ARMCC::CondCodes Pred, unsigned PredReg) {
898f4a2713aSLionel Sambuc   unsigned MyPredReg = 0;
899f4a2713aSLionel Sambuc   if (!MI)
900f4a2713aSLionel Sambuc     return false;
901f4a2713aSLionel Sambuc 
902f4a2713aSLionel Sambuc   bool CheckCPSRDef = false;
903f4a2713aSLionel Sambuc   switch (MI->getOpcode()) {
904f4a2713aSLionel Sambuc   default: return false;
905*0a6a1f1dSLionel Sambuc   case ARM::tSUBi8:
906f4a2713aSLionel Sambuc   case ARM::t2SUBri:
907f4a2713aSLionel Sambuc   case ARM::SUBri:
908f4a2713aSLionel Sambuc     CheckCPSRDef = true;
909f4a2713aSLionel Sambuc   // fallthrough
910f4a2713aSLionel Sambuc   case ARM::tSUBspi:
911f4a2713aSLionel Sambuc     break;
912f4a2713aSLionel Sambuc   }
913f4a2713aSLionel Sambuc 
914f4a2713aSLionel Sambuc   // Make sure the offset fits in 8 bits.
915f4a2713aSLionel Sambuc   if (Bytes == 0 || (Limit && Bytes >= Limit))
916f4a2713aSLionel Sambuc     return false;
917f4a2713aSLionel Sambuc 
918*0a6a1f1dSLionel Sambuc   unsigned Scale = (MI->getOpcode() == ARM::tSUBspi ||
919*0a6a1f1dSLionel Sambuc                     MI->getOpcode() == ARM::tSUBi8) ? 4 : 1; // FIXME
920f4a2713aSLionel Sambuc   if (!(MI->getOperand(0).getReg() == Base &&
921f4a2713aSLionel Sambuc         MI->getOperand(1).getReg() == Base &&
922f4a2713aSLionel Sambuc         (MI->getOperand(2).getImm() * Scale) == Bytes &&
923f4a2713aSLionel Sambuc         getInstrPredicate(MI, MyPredReg) == Pred &&
924f4a2713aSLionel Sambuc         MyPredReg == PredReg))
925f4a2713aSLionel Sambuc     return false;
926f4a2713aSLionel Sambuc 
927f4a2713aSLionel Sambuc   return CheckCPSRDef ? !definesCPSR(MI) : true;
928f4a2713aSLionel Sambuc }
929f4a2713aSLionel Sambuc 
isMatchingIncrement(MachineInstr * MI,unsigned Base,unsigned Bytes,unsigned Limit,ARMCC::CondCodes Pred,unsigned PredReg)930f4a2713aSLionel Sambuc static bool isMatchingIncrement(MachineInstr *MI, unsigned Base,
931f4a2713aSLionel Sambuc                                 unsigned Bytes, unsigned Limit,
932f4a2713aSLionel Sambuc                                 ARMCC::CondCodes Pred, unsigned PredReg) {
933f4a2713aSLionel Sambuc   unsigned MyPredReg = 0;
934f4a2713aSLionel Sambuc   if (!MI)
935f4a2713aSLionel Sambuc     return false;
936f4a2713aSLionel Sambuc 
937f4a2713aSLionel Sambuc   bool CheckCPSRDef = false;
938f4a2713aSLionel Sambuc   switch (MI->getOpcode()) {
939f4a2713aSLionel Sambuc   default: return false;
940*0a6a1f1dSLionel Sambuc   case ARM::tADDi8:
941f4a2713aSLionel Sambuc   case ARM::t2ADDri:
942f4a2713aSLionel Sambuc   case ARM::ADDri:
943f4a2713aSLionel Sambuc     CheckCPSRDef = true;
944f4a2713aSLionel Sambuc   // fallthrough
945f4a2713aSLionel Sambuc   case ARM::tADDspi:
946f4a2713aSLionel Sambuc     break;
947f4a2713aSLionel Sambuc   }
948f4a2713aSLionel Sambuc 
949f4a2713aSLionel Sambuc   if (Bytes == 0 || (Limit && Bytes >= Limit))
950f4a2713aSLionel Sambuc     // Make sure the offset fits in 8 bits.
951f4a2713aSLionel Sambuc     return false;
952f4a2713aSLionel Sambuc 
953*0a6a1f1dSLionel Sambuc   unsigned Scale = (MI->getOpcode() == ARM::tADDspi ||
954*0a6a1f1dSLionel Sambuc                     MI->getOpcode() == ARM::tADDi8) ? 4 : 1; // FIXME
955f4a2713aSLionel Sambuc   if (!(MI->getOperand(0).getReg() == Base &&
956f4a2713aSLionel Sambuc         MI->getOperand(1).getReg() == Base &&
957f4a2713aSLionel Sambuc         (MI->getOperand(2).getImm() * Scale) == Bytes &&
958f4a2713aSLionel Sambuc         getInstrPredicate(MI, MyPredReg) == Pred &&
959f4a2713aSLionel Sambuc         MyPredReg == PredReg))
960f4a2713aSLionel Sambuc     return false;
961f4a2713aSLionel Sambuc 
962f4a2713aSLionel Sambuc   return CheckCPSRDef ? !definesCPSR(MI) : true;
963f4a2713aSLionel Sambuc }
964f4a2713aSLionel Sambuc 
getLSMultipleTransferSize(MachineInstr * MI)965f4a2713aSLionel Sambuc static inline unsigned getLSMultipleTransferSize(MachineInstr *MI) {
966f4a2713aSLionel Sambuc   switch (MI->getOpcode()) {
967f4a2713aSLionel Sambuc   default: return 0;
968f4a2713aSLionel Sambuc   case ARM::LDRi12:
969f4a2713aSLionel Sambuc   case ARM::STRi12:
970*0a6a1f1dSLionel Sambuc   case ARM::tLDRi:
971*0a6a1f1dSLionel Sambuc   case ARM::tSTRi:
972f4a2713aSLionel Sambuc   case ARM::t2LDRi8:
973f4a2713aSLionel Sambuc   case ARM::t2LDRi12:
974f4a2713aSLionel Sambuc   case ARM::t2STRi8:
975f4a2713aSLionel Sambuc   case ARM::t2STRi12:
976f4a2713aSLionel Sambuc   case ARM::VLDRS:
977f4a2713aSLionel Sambuc   case ARM::VSTRS:
978f4a2713aSLionel Sambuc     return 4;
979f4a2713aSLionel Sambuc   case ARM::VLDRD:
980f4a2713aSLionel Sambuc   case ARM::VSTRD:
981f4a2713aSLionel Sambuc     return 8;
982f4a2713aSLionel Sambuc   case ARM::LDMIA:
983f4a2713aSLionel Sambuc   case ARM::LDMDA:
984f4a2713aSLionel Sambuc   case ARM::LDMDB:
985f4a2713aSLionel Sambuc   case ARM::LDMIB:
986f4a2713aSLionel Sambuc   case ARM::STMIA:
987f4a2713aSLionel Sambuc   case ARM::STMDA:
988f4a2713aSLionel Sambuc   case ARM::STMDB:
989f4a2713aSLionel Sambuc   case ARM::STMIB:
990*0a6a1f1dSLionel Sambuc   case ARM::tLDMIA:
991*0a6a1f1dSLionel Sambuc   case ARM::tLDMIA_UPD:
992*0a6a1f1dSLionel Sambuc   case ARM::tSTMIA_UPD:
993f4a2713aSLionel Sambuc   case ARM::t2LDMIA:
994f4a2713aSLionel Sambuc   case ARM::t2LDMDB:
995f4a2713aSLionel Sambuc   case ARM::t2STMIA:
996f4a2713aSLionel Sambuc   case ARM::t2STMDB:
997f4a2713aSLionel Sambuc   case ARM::VLDMSIA:
998f4a2713aSLionel Sambuc   case ARM::VSTMSIA:
999f4a2713aSLionel Sambuc     return (MI->getNumOperands() - MI->getDesc().getNumOperands() + 1) * 4;
1000f4a2713aSLionel Sambuc   case ARM::VLDMDIA:
1001f4a2713aSLionel Sambuc   case ARM::VSTMDIA:
1002f4a2713aSLionel Sambuc     return (MI->getNumOperands() - MI->getDesc().getNumOperands() + 1) * 8;
1003f4a2713aSLionel Sambuc   }
1004f4a2713aSLionel Sambuc }
1005f4a2713aSLionel Sambuc 
getUpdatingLSMultipleOpcode(unsigned Opc,ARM_AM::AMSubMode Mode)1006f4a2713aSLionel Sambuc static unsigned getUpdatingLSMultipleOpcode(unsigned Opc,
1007f4a2713aSLionel Sambuc                                             ARM_AM::AMSubMode Mode) {
1008f4a2713aSLionel Sambuc   switch (Opc) {
1009f4a2713aSLionel Sambuc   default: llvm_unreachable("Unhandled opcode!");
1010f4a2713aSLionel Sambuc   case ARM::LDMIA:
1011f4a2713aSLionel Sambuc   case ARM::LDMDA:
1012f4a2713aSLionel Sambuc   case ARM::LDMDB:
1013f4a2713aSLionel Sambuc   case ARM::LDMIB:
1014f4a2713aSLionel Sambuc     switch (Mode) {
1015f4a2713aSLionel Sambuc     default: llvm_unreachable("Unhandled submode!");
1016f4a2713aSLionel Sambuc     case ARM_AM::ia: return ARM::LDMIA_UPD;
1017f4a2713aSLionel Sambuc     case ARM_AM::ib: return ARM::LDMIB_UPD;
1018f4a2713aSLionel Sambuc     case ARM_AM::da: return ARM::LDMDA_UPD;
1019f4a2713aSLionel Sambuc     case ARM_AM::db: return ARM::LDMDB_UPD;
1020f4a2713aSLionel Sambuc     }
1021f4a2713aSLionel Sambuc   case ARM::STMIA:
1022f4a2713aSLionel Sambuc   case ARM::STMDA:
1023f4a2713aSLionel Sambuc   case ARM::STMDB:
1024f4a2713aSLionel Sambuc   case ARM::STMIB:
1025f4a2713aSLionel Sambuc     switch (Mode) {
1026f4a2713aSLionel Sambuc     default: llvm_unreachable("Unhandled submode!");
1027f4a2713aSLionel Sambuc     case ARM_AM::ia: return ARM::STMIA_UPD;
1028f4a2713aSLionel Sambuc     case ARM_AM::ib: return ARM::STMIB_UPD;
1029f4a2713aSLionel Sambuc     case ARM_AM::da: return ARM::STMDA_UPD;
1030f4a2713aSLionel Sambuc     case ARM_AM::db: return ARM::STMDB_UPD;
1031f4a2713aSLionel Sambuc     }
1032f4a2713aSLionel Sambuc   case ARM::t2LDMIA:
1033f4a2713aSLionel Sambuc   case ARM::t2LDMDB:
1034f4a2713aSLionel Sambuc     switch (Mode) {
1035f4a2713aSLionel Sambuc     default: llvm_unreachable("Unhandled submode!");
1036f4a2713aSLionel Sambuc     case ARM_AM::ia: return ARM::t2LDMIA_UPD;
1037f4a2713aSLionel Sambuc     case ARM_AM::db: return ARM::t2LDMDB_UPD;
1038f4a2713aSLionel Sambuc     }
1039f4a2713aSLionel Sambuc   case ARM::t2STMIA:
1040f4a2713aSLionel Sambuc   case ARM::t2STMDB:
1041f4a2713aSLionel Sambuc     switch (Mode) {
1042f4a2713aSLionel Sambuc     default: llvm_unreachable("Unhandled submode!");
1043f4a2713aSLionel Sambuc     case ARM_AM::ia: return ARM::t2STMIA_UPD;
1044f4a2713aSLionel Sambuc     case ARM_AM::db: return ARM::t2STMDB_UPD;
1045f4a2713aSLionel Sambuc     }
1046f4a2713aSLionel Sambuc   case ARM::VLDMSIA:
1047f4a2713aSLionel Sambuc     switch (Mode) {
1048f4a2713aSLionel Sambuc     default: llvm_unreachable("Unhandled submode!");
1049f4a2713aSLionel Sambuc     case ARM_AM::ia: return ARM::VLDMSIA_UPD;
1050f4a2713aSLionel Sambuc     case ARM_AM::db: return ARM::VLDMSDB_UPD;
1051f4a2713aSLionel Sambuc     }
1052f4a2713aSLionel Sambuc   case ARM::VLDMDIA:
1053f4a2713aSLionel Sambuc     switch (Mode) {
1054f4a2713aSLionel Sambuc     default: llvm_unreachable("Unhandled submode!");
1055f4a2713aSLionel Sambuc     case ARM_AM::ia: return ARM::VLDMDIA_UPD;
1056f4a2713aSLionel Sambuc     case ARM_AM::db: return ARM::VLDMDDB_UPD;
1057f4a2713aSLionel Sambuc     }
1058f4a2713aSLionel Sambuc   case ARM::VSTMSIA:
1059f4a2713aSLionel Sambuc     switch (Mode) {
1060f4a2713aSLionel Sambuc     default: llvm_unreachable("Unhandled submode!");
1061f4a2713aSLionel Sambuc     case ARM_AM::ia: return ARM::VSTMSIA_UPD;
1062f4a2713aSLionel Sambuc     case ARM_AM::db: return ARM::VSTMSDB_UPD;
1063f4a2713aSLionel Sambuc     }
1064f4a2713aSLionel Sambuc   case ARM::VSTMDIA:
1065f4a2713aSLionel Sambuc     switch (Mode) {
1066f4a2713aSLionel Sambuc     default: llvm_unreachable("Unhandled submode!");
1067f4a2713aSLionel Sambuc     case ARM_AM::ia: return ARM::VSTMDIA_UPD;
1068f4a2713aSLionel Sambuc     case ARM_AM::db: return ARM::VSTMDDB_UPD;
1069f4a2713aSLionel Sambuc     }
1070f4a2713aSLionel Sambuc   }
1071f4a2713aSLionel Sambuc }
1072f4a2713aSLionel Sambuc 
1073f4a2713aSLionel Sambuc /// MergeBaseUpdateLSMultiple - Fold proceeding/trailing inc/dec of base
1074f4a2713aSLionel Sambuc /// register into the LDM/STM/VLDM{D|S}/VSTM{D|S} op when possible:
1075f4a2713aSLionel Sambuc ///
1076f4a2713aSLionel Sambuc /// stmia rn, <ra, rb, rc>
1077f4a2713aSLionel Sambuc /// rn := rn + 4 * 3;
1078f4a2713aSLionel Sambuc /// =>
1079f4a2713aSLionel Sambuc /// stmia rn!, <ra, rb, rc>
1080f4a2713aSLionel Sambuc ///
1081f4a2713aSLionel Sambuc /// rn := rn - 4 * 3;
1082f4a2713aSLionel Sambuc /// ldmia rn, <ra, rb, rc>
1083f4a2713aSLionel Sambuc /// =>
1084f4a2713aSLionel Sambuc /// ldmdb rn!, <ra, rb, rc>
MergeBaseUpdateLSMultiple(MachineBasicBlock & MBB,MachineBasicBlock::iterator MBBI,bool & Advance,MachineBasicBlock::iterator & I)1085f4a2713aSLionel Sambuc bool ARMLoadStoreOpt::MergeBaseUpdateLSMultiple(MachineBasicBlock &MBB,
1086f4a2713aSLionel Sambuc                                                MachineBasicBlock::iterator MBBI,
1087f4a2713aSLionel Sambuc                                                bool &Advance,
1088f4a2713aSLionel Sambuc                                                MachineBasicBlock::iterator &I) {
1089*0a6a1f1dSLionel Sambuc   // Thumb1 is already using updating loads/stores.
1090*0a6a1f1dSLionel Sambuc   if (isThumb1) return false;
1091*0a6a1f1dSLionel Sambuc 
1092f4a2713aSLionel Sambuc   MachineInstr *MI = MBBI;
1093f4a2713aSLionel Sambuc   unsigned Base = MI->getOperand(0).getReg();
1094f4a2713aSLionel Sambuc   bool BaseKill = MI->getOperand(0).isKill();
1095f4a2713aSLionel Sambuc   unsigned Bytes = getLSMultipleTransferSize(MI);
1096f4a2713aSLionel Sambuc   unsigned PredReg = 0;
1097f4a2713aSLionel Sambuc   ARMCC::CondCodes Pred = getInstrPredicate(MI, PredReg);
1098f4a2713aSLionel Sambuc   int Opcode = MI->getOpcode();
1099f4a2713aSLionel Sambuc   DebugLoc dl = MI->getDebugLoc();
1100f4a2713aSLionel Sambuc 
1101f4a2713aSLionel Sambuc   // Can't use an updating ld/st if the base register is also a dest
1102f4a2713aSLionel Sambuc   // register. e.g. ldmdb r0!, {r0, r1, r2}. The behavior is undefined.
1103f4a2713aSLionel Sambuc   for (unsigned i = 2, e = MI->getNumOperands(); i != e; ++i)
1104f4a2713aSLionel Sambuc     if (MI->getOperand(i).getReg() == Base)
1105f4a2713aSLionel Sambuc       return false;
1106f4a2713aSLionel Sambuc 
1107f4a2713aSLionel Sambuc   bool DoMerge = false;
1108f4a2713aSLionel Sambuc   ARM_AM::AMSubMode Mode = ARM_AM::getLoadStoreMultipleSubMode(Opcode);
1109f4a2713aSLionel Sambuc 
1110f4a2713aSLionel Sambuc   // Try merging with the previous instruction.
1111f4a2713aSLionel Sambuc   MachineBasicBlock::iterator BeginMBBI = MBB.begin();
1112f4a2713aSLionel Sambuc   if (MBBI != BeginMBBI) {
1113*0a6a1f1dSLionel Sambuc     MachineBasicBlock::iterator PrevMBBI = std::prev(MBBI);
1114f4a2713aSLionel Sambuc     while (PrevMBBI != BeginMBBI && PrevMBBI->isDebugValue())
1115f4a2713aSLionel Sambuc       --PrevMBBI;
1116f4a2713aSLionel Sambuc     if (Mode == ARM_AM::ia &&
1117f4a2713aSLionel Sambuc         isMatchingDecrement(PrevMBBI, Base, Bytes, 0, Pred, PredReg)) {
1118f4a2713aSLionel Sambuc       Mode = ARM_AM::db;
1119f4a2713aSLionel Sambuc       DoMerge = true;
1120f4a2713aSLionel Sambuc     } else if (Mode == ARM_AM::ib &&
1121f4a2713aSLionel Sambuc                isMatchingDecrement(PrevMBBI, Base, Bytes, 0, Pred, PredReg)) {
1122f4a2713aSLionel Sambuc       Mode = ARM_AM::da;
1123f4a2713aSLionel Sambuc       DoMerge = true;
1124f4a2713aSLionel Sambuc     }
1125f4a2713aSLionel Sambuc     if (DoMerge)
1126f4a2713aSLionel Sambuc       MBB.erase(PrevMBBI);
1127f4a2713aSLionel Sambuc   }
1128f4a2713aSLionel Sambuc 
1129f4a2713aSLionel Sambuc   // Try merging with the next instruction.
1130f4a2713aSLionel Sambuc   MachineBasicBlock::iterator EndMBBI = MBB.end();
1131f4a2713aSLionel Sambuc   if (!DoMerge && MBBI != EndMBBI) {
1132*0a6a1f1dSLionel Sambuc     MachineBasicBlock::iterator NextMBBI = std::next(MBBI);
1133f4a2713aSLionel Sambuc     while (NextMBBI != EndMBBI && NextMBBI->isDebugValue())
1134f4a2713aSLionel Sambuc       ++NextMBBI;
1135f4a2713aSLionel Sambuc     if ((Mode == ARM_AM::ia || Mode == ARM_AM::ib) &&
1136f4a2713aSLionel Sambuc         isMatchingIncrement(NextMBBI, Base, Bytes, 0, Pred, PredReg)) {
1137f4a2713aSLionel Sambuc       DoMerge = true;
1138f4a2713aSLionel Sambuc     } else if ((Mode == ARM_AM::da || Mode == ARM_AM::db) &&
1139f4a2713aSLionel Sambuc                isMatchingDecrement(NextMBBI, Base, Bytes, 0, Pred, PredReg)) {
1140f4a2713aSLionel Sambuc       DoMerge = true;
1141f4a2713aSLionel Sambuc     }
1142f4a2713aSLionel Sambuc     if (DoMerge) {
1143f4a2713aSLionel Sambuc       if (NextMBBI == I) {
1144f4a2713aSLionel Sambuc         Advance = true;
1145f4a2713aSLionel Sambuc         ++I;
1146f4a2713aSLionel Sambuc       }
1147f4a2713aSLionel Sambuc       MBB.erase(NextMBBI);
1148f4a2713aSLionel Sambuc     }
1149f4a2713aSLionel Sambuc   }
1150f4a2713aSLionel Sambuc 
1151f4a2713aSLionel Sambuc   if (!DoMerge)
1152f4a2713aSLionel Sambuc     return false;
1153f4a2713aSLionel Sambuc 
1154f4a2713aSLionel Sambuc   unsigned NewOpc = getUpdatingLSMultipleOpcode(Opcode, Mode);
1155f4a2713aSLionel Sambuc   MachineInstrBuilder MIB = BuildMI(MBB, MBBI, dl, TII->get(NewOpc))
1156f4a2713aSLionel Sambuc     .addReg(Base, getDefRegState(true)) // WB base register
1157f4a2713aSLionel Sambuc     .addReg(Base, getKillRegState(BaseKill))
1158f4a2713aSLionel Sambuc     .addImm(Pred).addReg(PredReg);
1159f4a2713aSLionel Sambuc 
1160f4a2713aSLionel Sambuc   // Transfer the rest of operands.
1161f4a2713aSLionel Sambuc   for (unsigned OpNum = 3, e = MI->getNumOperands(); OpNum != e; ++OpNum)
1162f4a2713aSLionel Sambuc     MIB.addOperand(MI->getOperand(OpNum));
1163f4a2713aSLionel Sambuc 
1164f4a2713aSLionel Sambuc   // Transfer memoperands.
1165f4a2713aSLionel Sambuc   MIB->setMemRefs(MI->memoperands_begin(), MI->memoperands_end());
1166f4a2713aSLionel Sambuc 
1167f4a2713aSLionel Sambuc   MBB.erase(MBBI);
1168f4a2713aSLionel Sambuc   return true;
1169f4a2713aSLionel Sambuc }
1170f4a2713aSLionel Sambuc 
getPreIndexedLoadStoreOpcode(unsigned Opc,ARM_AM::AddrOpc Mode)1171f4a2713aSLionel Sambuc static unsigned getPreIndexedLoadStoreOpcode(unsigned Opc,
1172f4a2713aSLionel Sambuc                                              ARM_AM::AddrOpc Mode) {
1173f4a2713aSLionel Sambuc   switch (Opc) {
1174f4a2713aSLionel Sambuc   case ARM::LDRi12:
1175f4a2713aSLionel Sambuc     return ARM::LDR_PRE_IMM;
1176f4a2713aSLionel Sambuc   case ARM::STRi12:
1177f4a2713aSLionel Sambuc     return ARM::STR_PRE_IMM;
1178f4a2713aSLionel Sambuc   case ARM::VLDRS:
1179f4a2713aSLionel Sambuc     return Mode == ARM_AM::add ? ARM::VLDMSIA_UPD : ARM::VLDMSDB_UPD;
1180f4a2713aSLionel Sambuc   case ARM::VLDRD:
1181f4a2713aSLionel Sambuc     return Mode == ARM_AM::add ? ARM::VLDMDIA_UPD : ARM::VLDMDDB_UPD;
1182f4a2713aSLionel Sambuc   case ARM::VSTRS:
1183f4a2713aSLionel Sambuc     return Mode == ARM_AM::add ? ARM::VSTMSIA_UPD : ARM::VSTMSDB_UPD;
1184f4a2713aSLionel Sambuc   case ARM::VSTRD:
1185f4a2713aSLionel Sambuc     return Mode == ARM_AM::add ? ARM::VSTMDIA_UPD : ARM::VSTMDDB_UPD;
1186f4a2713aSLionel Sambuc   case ARM::t2LDRi8:
1187f4a2713aSLionel Sambuc   case ARM::t2LDRi12:
1188f4a2713aSLionel Sambuc     return ARM::t2LDR_PRE;
1189f4a2713aSLionel Sambuc   case ARM::t2STRi8:
1190f4a2713aSLionel Sambuc   case ARM::t2STRi12:
1191f4a2713aSLionel Sambuc     return ARM::t2STR_PRE;
1192f4a2713aSLionel Sambuc   default: llvm_unreachable("Unhandled opcode!");
1193f4a2713aSLionel Sambuc   }
1194f4a2713aSLionel Sambuc }
1195f4a2713aSLionel Sambuc 
getPostIndexedLoadStoreOpcode(unsigned Opc,ARM_AM::AddrOpc Mode)1196f4a2713aSLionel Sambuc static unsigned getPostIndexedLoadStoreOpcode(unsigned Opc,
1197f4a2713aSLionel Sambuc                                               ARM_AM::AddrOpc Mode) {
1198f4a2713aSLionel Sambuc   switch (Opc) {
1199f4a2713aSLionel Sambuc   case ARM::LDRi12:
1200f4a2713aSLionel Sambuc     return ARM::LDR_POST_IMM;
1201f4a2713aSLionel Sambuc   case ARM::STRi12:
1202f4a2713aSLionel Sambuc     return ARM::STR_POST_IMM;
1203f4a2713aSLionel Sambuc   case ARM::VLDRS:
1204f4a2713aSLionel Sambuc     return Mode == ARM_AM::add ? ARM::VLDMSIA_UPD : ARM::VLDMSDB_UPD;
1205f4a2713aSLionel Sambuc   case ARM::VLDRD:
1206f4a2713aSLionel Sambuc     return Mode == ARM_AM::add ? ARM::VLDMDIA_UPD : ARM::VLDMDDB_UPD;
1207f4a2713aSLionel Sambuc   case ARM::VSTRS:
1208f4a2713aSLionel Sambuc     return Mode == ARM_AM::add ? ARM::VSTMSIA_UPD : ARM::VSTMSDB_UPD;
1209f4a2713aSLionel Sambuc   case ARM::VSTRD:
1210f4a2713aSLionel Sambuc     return Mode == ARM_AM::add ? ARM::VSTMDIA_UPD : ARM::VSTMDDB_UPD;
1211f4a2713aSLionel Sambuc   case ARM::t2LDRi8:
1212f4a2713aSLionel Sambuc   case ARM::t2LDRi12:
1213f4a2713aSLionel Sambuc     return ARM::t2LDR_POST;
1214f4a2713aSLionel Sambuc   case ARM::t2STRi8:
1215f4a2713aSLionel Sambuc   case ARM::t2STRi12:
1216f4a2713aSLionel Sambuc     return ARM::t2STR_POST;
1217f4a2713aSLionel Sambuc   default: llvm_unreachable("Unhandled opcode!");
1218f4a2713aSLionel Sambuc   }
1219f4a2713aSLionel Sambuc }
1220f4a2713aSLionel Sambuc 
1221f4a2713aSLionel Sambuc /// MergeBaseUpdateLoadStore - Fold proceeding/trailing inc/dec of base
1222f4a2713aSLionel Sambuc /// register into the LDR/STR/FLD{D|S}/FST{D|S} op when possible:
MergeBaseUpdateLoadStore(MachineBasicBlock & MBB,MachineBasicBlock::iterator MBBI,const TargetInstrInfo * TII,bool & Advance,MachineBasicBlock::iterator & I)1223f4a2713aSLionel Sambuc bool ARMLoadStoreOpt::MergeBaseUpdateLoadStore(MachineBasicBlock &MBB,
1224f4a2713aSLionel Sambuc                                                MachineBasicBlock::iterator MBBI,
1225f4a2713aSLionel Sambuc                                                const TargetInstrInfo *TII,
1226f4a2713aSLionel Sambuc                                                bool &Advance,
1227f4a2713aSLionel Sambuc                                                MachineBasicBlock::iterator &I) {
1228*0a6a1f1dSLionel Sambuc   // Thumb1 doesn't have updating LDR/STR.
1229*0a6a1f1dSLionel Sambuc   // FIXME: Use LDM/STM with single register instead.
1230*0a6a1f1dSLionel Sambuc   if (isThumb1) return false;
1231*0a6a1f1dSLionel Sambuc 
1232f4a2713aSLionel Sambuc   MachineInstr *MI = MBBI;
1233f4a2713aSLionel Sambuc   unsigned Base = MI->getOperand(1).getReg();
1234f4a2713aSLionel Sambuc   bool BaseKill = MI->getOperand(1).isKill();
1235f4a2713aSLionel Sambuc   unsigned Bytes = getLSMultipleTransferSize(MI);
1236f4a2713aSLionel Sambuc   int Opcode = MI->getOpcode();
1237f4a2713aSLionel Sambuc   DebugLoc dl = MI->getDebugLoc();
1238f4a2713aSLionel Sambuc   bool isAM5 = (Opcode == ARM::VLDRD || Opcode == ARM::VLDRS ||
1239f4a2713aSLionel Sambuc                 Opcode == ARM::VSTRD || Opcode == ARM::VSTRS);
1240f4a2713aSLionel Sambuc   bool isAM2 = (Opcode == ARM::LDRi12 || Opcode == ARM::STRi12);
1241f4a2713aSLionel Sambuc   if (isi32Load(Opcode) || isi32Store(Opcode))
1242f4a2713aSLionel Sambuc     if (MI->getOperand(2).getImm() != 0)
1243f4a2713aSLionel Sambuc       return false;
1244f4a2713aSLionel Sambuc   if (isAM5 && ARM_AM::getAM5Offset(MI->getOperand(2).getImm()) != 0)
1245f4a2713aSLionel Sambuc     return false;
1246f4a2713aSLionel Sambuc 
1247f4a2713aSLionel Sambuc   bool isLd = isi32Load(Opcode) || Opcode == ARM::VLDRS || Opcode == ARM::VLDRD;
1248f4a2713aSLionel Sambuc   // Can't do the merge if the destination register is the same as the would-be
1249f4a2713aSLionel Sambuc   // writeback register.
1250f4a2713aSLionel Sambuc   if (MI->getOperand(0).getReg() == Base)
1251f4a2713aSLionel Sambuc     return false;
1252f4a2713aSLionel Sambuc 
1253f4a2713aSLionel Sambuc   unsigned PredReg = 0;
1254f4a2713aSLionel Sambuc   ARMCC::CondCodes Pred = getInstrPredicate(MI, PredReg);
1255f4a2713aSLionel Sambuc   bool DoMerge = false;
1256f4a2713aSLionel Sambuc   ARM_AM::AddrOpc AddSub = ARM_AM::add;
1257f4a2713aSLionel Sambuc   unsigned NewOpc = 0;
1258f4a2713aSLionel Sambuc   // AM2 - 12 bits, thumb2 - 8 bits.
1259f4a2713aSLionel Sambuc   unsigned Limit = isAM5 ? 0 : (isAM2 ? 0x1000 : 0x100);
1260f4a2713aSLionel Sambuc 
1261f4a2713aSLionel Sambuc   // Try merging with the previous instruction.
1262f4a2713aSLionel Sambuc   MachineBasicBlock::iterator BeginMBBI = MBB.begin();
1263f4a2713aSLionel Sambuc   if (MBBI != BeginMBBI) {
1264*0a6a1f1dSLionel Sambuc     MachineBasicBlock::iterator PrevMBBI = std::prev(MBBI);
1265f4a2713aSLionel Sambuc     while (PrevMBBI != BeginMBBI && PrevMBBI->isDebugValue())
1266f4a2713aSLionel Sambuc       --PrevMBBI;
1267f4a2713aSLionel Sambuc     if (isMatchingDecrement(PrevMBBI, Base, Bytes, Limit, Pred, PredReg)) {
1268f4a2713aSLionel Sambuc       DoMerge = true;
1269f4a2713aSLionel Sambuc       AddSub = ARM_AM::sub;
1270f4a2713aSLionel Sambuc     } else if (!isAM5 &&
1271f4a2713aSLionel Sambuc                isMatchingIncrement(PrevMBBI, Base, Bytes, Limit,Pred,PredReg)) {
1272f4a2713aSLionel Sambuc       DoMerge = true;
1273f4a2713aSLionel Sambuc     }
1274f4a2713aSLionel Sambuc     if (DoMerge) {
1275f4a2713aSLionel Sambuc       NewOpc = getPreIndexedLoadStoreOpcode(Opcode, AddSub);
1276f4a2713aSLionel Sambuc       MBB.erase(PrevMBBI);
1277f4a2713aSLionel Sambuc     }
1278f4a2713aSLionel Sambuc   }
1279f4a2713aSLionel Sambuc 
1280f4a2713aSLionel Sambuc   // Try merging with the next instruction.
1281f4a2713aSLionel Sambuc   MachineBasicBlock::iterator EndMBBI = MBB.end();
1282f4a2713aSLionel Sambuc   if (!DoMerge && MBBI != EndMBBI) {
1283*0a6a1f1dSLionel Sambuc     MachineBasicBlock::iterator NextMBBI = std::next(MBBI);
1284f4a2713aSLionel Sambuc     while (NextMBBI != EndMBBI && NextMBBI->isDebugValue())
1285f4a2713aSLionel Sambuc       ++NextMBBI;
1286f4a2713aSLionel Sambuc     if (!isAM5 &&
1287f4a2713aSLionel Sambuc         isMatchingDecrement(NextMBBI, Base, Bytes, Limit, Pred, PredReg)) {
1288f4a2713aSLionel Sambuc       DoMerge = true;
1289f4a2713aSLionel Sambuc       AddSub = ARM_AM::sub;
1290f4a2713aSLionel Sambuc     } else if (isMatchingIncrement(NextMBBI, Base, Bytes, Limit,Pred,PredReg)) {
1291f4a2713aSLionel Sambuc       DoMerge = true;
1292f4a2713aSLionel Sambuc     }
1293f4a2713aSLionel Sambuc     if (DoMerge) {
1294f4a2713aSLionel Sambuc       NewOpc = getPostIndexedLoadStoreOpcode(Opcode, AddSub);
1295f4a2713aSLionel Sambuc       if (NextMBBI == I) {
1296f4a2713aSLionel Sambuc         Advance = true;
1297f4a2713aSLionel Sambuc         ++I;
1298f4a2713aSLionel Sambuc       }
1299f4a2713aSLionel Sambuc       MBB.erase(NextMBBI);
1300f4a2713aSLionel Sambuc     }
1301f4a2713aSLionel Sambuc   }
1302f4a2713aSLionel Sambuc 
1303f4a2713aSLionel Sambuc   if (!DoMerge)
1304f4a2713aSLionel Sambuc     return false;
1305f4a2713aSLionel Sambuc 
1306f4a2713aSLionel Sambuc   if (isAM5) {
1307*0a6a1f1dSLionel Sambuc     // VLDM[SD]_UPD, VSTM[SD]_UPD
1308f4a2713aSLionel Sambuc     // (There are no base-updating versions of VLDR/VSTR instructions, but the
1309f4a2713aSLionel Sambuc     // updating load/store-multiple instructions can be used with only one
1310f4a2713aSLionel Sambuc     // register.)
1311f4a2713aSLionel Sambuc     MachineOperand &MO = MI->getOperand(0);
1312f4a2713aSLionel Sambuc     BuildMI(MBB, MBBI, dl, TII->get(NewOpc))
1313f4a2713aSLionel Sambuc       .addReg(Base, getDefRegState(true)) // WB base register
1314f4a2713aSLionel Sambuc       .addReg(Base, getKillRegState(isLd ? BaseKill : false))
1315f4a2713aSLionel Sambuc       .addImm(Pred).addReg(PredReg)
1316f4a2713aSLionel Sambuc       .addReg(MO.getReg(), (isLd ? getDefRegState(true) :
1317f4a2713aSLionel Sambuc                             getKillRegState(MO.isKill())));
1318f4a2713aSLionel Sambuc   } else if (isLd) {
1319f4a2713aSLionel Sambuc     if (isAM2) {
1320f4a2713aSLionel Sambuc       // LDR_PRE, LDR_POST
1321f4a2713aSLionel Sambuc       if (NewOpc == ARM::LDR_PRE_IMM || NewOpc == ARM::LDRB_PRE_IMM) {
1322f4a2713aSLionel Sambuc         int Offset = AddSub == ARM_AM::sub ? -Bytes : Bytes;
1323f4a2713aSLionel Sambuc         BuildMI(MBB, MBBI, dl, TII->get(NewOpc), MI->getOperand(0).getReg())
1324f4a2713aSLionel Sambuc           .addReg(Base, RegState::Define)
1325f4a2713aSLionel Sambuc           .addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg);
1326f4a2713aSLionel Sambuc       } else {
1327f4a2713aSLionel Sambuc         int Offset = ARM_AM::getAM2Opc(AddSub, Bytes, ARM_AM::no_shift);
1328f4a2713aSLionel Sambuc         BuildMI(MBB, MBBI, dl, TII->get(NewOpc), MI->getOperand(0).getReg())
1329f4a2713aSLionel Sambuc           .addReg(Base, RegState::Define)
1330f4a2713aSLionel Sambuc           .addReg(Base).addReg(0).addImm(Offset).addImm(Pred).addReg(PredReg);
1331f4a2713aSLionel Sambuc       }
1332f4a2713aSLionel Sambuc     } else {
1333f4a2713aSLionel Sambuc       int Offset = AddSub == ARM_AM::sub ? -Bytes : Bytes;
1334f4a2713aSLionel Sambuc       // t2LDR_PRE, t2LDR_POST
1335f4a2713aSLionel Sambuc       BuildMI(MBB, MBBI, dl, TII->get(NewOpc), MI->getOperand(0).getReg())
1336f4a2713aSLionel Sambuc         .addReg(Base, RegState::Define)
1337f4a2713aSLionel Sambuc         .addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg);
1338f4a2713aSLionel Sambuc     }
1339f4a2713aSLionel Sambuc   } else {
1340f4a2713aSLionel Sambuc     MachineOperand &MO = MI->getOperand(0);
1341f4a2713aSLionel Sambuc     // FIXME: post-indexed stores use am2offset_imm, which still encodes
1342f4a2713aSLionel Sambuc     // the vestigal zero-reg offset register. When that's fixed, this clause
1343f4a2713aSLionel Sambuc     // can be removed entirely.
1344f4a2713aSLionel Sambuc     if (isAM2 && NewOpc == ARM::STR_POST_IMM) {
1345f4a2713aSLionel Sambuc       int Offset = ARM_AM::getAM2Opc(AddSub, Bytes, ARM_AM::no_shift);
1346f4a2713aSLionel Sambuc       // STR_PRE, STR_POST
1347f4a2713aSLionel Sambuc       BuildMI(MBB, MBBI, dl, TII->get(NewOpc), Base)
1348f4a2713aSLionel Sambuc         .addReg(MO.getReg(), getKillRegState(MO.isKill()))
1349f4a2713aSLionel Sambuc         .addReg(Base).addReg(0).addImm(Offset).addImm(Pred).addReg(PredReg);
1350f4a2713aSLionel Sambuc     } else {
1351f4a2713aSLionel Sambuc       int Offset = AddSub == ARM_AM::sub ? -Bytes : Bytes;
1352f4a2713aSLionel Sambuc       // t2STR_PRE, t2STR_POST
1353f4a2713aSLionel Sambuc       BuildMI(MBB, MBBI, dl, TII->get(NewOpc), Base)
1354f4a2713aSLionel Sambuc         .addReg(MO.getReg(), getKillRegState(MO.isKill()))
1355f4a2713aSLionel Sambuc         .addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg);
1356f4a2713aSLionel Sambuc     }
1357f4a2713aSLionel Sambuc   }
1358f4a2713aSLionel Sambuc   MBB.erase(MBBI);
1359f4a2713aSLionel Sambuc 
1360f4a2713aSLionel Sambuc   return true;
1361f4a2713aSLionel Sambuc }
1362f4a2713aSLionel Sambuc 
1363f4a2713aSLionel Sambuc /// isMemoryOp - Returns true if instruction is a memory operation that this
1364f4a2713aSLionel Sambuc /// pass is capable of operating on.
isMemoryOp(const MachineInstr * MI)1365f4a2713aSLionel Sambuc static bool isMemoryOp(const MachineInstr *MI) {
1366f4a2713aSLionel Sambuc   // When no memory operands are present, conservatively assume unaligned,
1367f4a2713aSLionel Sambuc   // volatile, unfoldable.
1368f4a2713aSLionel Sambuc   if (!MI->hasOneMemOperand())
1369f4a2713aSLionel Sambuc     return false;
1370f4a2713aSLionel Sambuc 
1371f4a2713aSLionel Sambuc   const MachineMemOperand *MMO = *MI->memoperands_begin();
1372f4a2713aSLionel Sambuc 
1373f4a2713aSLionel Sambuc   // Don't touch volatile memory accesses - we may be changing their order.
1374f4a2713aSLionel Sambuc   if (MMO->isVolatile())
1375f4a2713aSLionel Sambuc     return false;
1376f4a2713aSLionel Sambuc 
1377f4a2713aSLionel Sambuc   // Unaligned ldr/str is emulated by some kernels, but unaligned ldm/stm is
1378f4a2713aSLionel Sambuc   // not.
1379f4a2713aSLionel Sambuc   if (MMO->getAlignment() < 4)
1380f4a2713aSLionel Sambuc     return false;
1381f4a2713aSLionel Sambuc 
1382f4a2713aSLionel Sambuc   // str <undef> could probably be eliminated entirely, but for now we just want
1383f4a2713aSLionel Sambuc   // to avoid making a mess of it.
1384f4a2713aSLionel Sambuc   // FIXME: Use str <undef> as a wildcard to enable better stm folding.
1385f4a2713aSLionel Sambuc   if (MI->getNumOperands() > 0 && MI->getOperand(0).isReg() &&
1386f4a2713aSLionel Sambuc       MI->getOperand(0).isUndef())
1387f4a2713aSLionel Sambuc     return false;
1388f4a2713aSLionel Sambuc 
1389f4a2713aSLionel Sambuc   // Likewise don't mess with references to undefined addresses.
1390f4a2713aSLionel Sambuc   if (MI->getNumOperands() > 1 && MI->getOperand(1).isReg() &&
1391f4a2713aSLionel Sambuc       MI->getOperand(1).isUndef())
1392f4a2713aSLionel Sambuc     return false;
1393f4a2713aSLionel Sambuc 
1394f4a2713aSLionel Sambuc   int Opcode = MI->getOpcode();
1395f4a2713aSLionel Sambuc   switch (Opcode) {
1396f4a2713aSLionel Sambuc   default: break;
1397f4a2713aSLionel Sambuc   case ARM::VLDRS:
1398f4a2713aSLionel Sambuc   case ARM::VSTRS:
1399f4a2713aSLionel Sambuc     return MI->getOperand(1).isReg();
1400f4a2713aSLionel Sambuc   case ARM::VLDRD:
1401f4a2713aSLionel Sambuc   case ARM::VSTRD:
1402f4a2713aSLionel Sambuc     return MI->getOperand(1).isReg();
1403f4a2713aSLionel Sambuc   case ARM::LDRi12:
1404f4a2713aSLionel Sambuc   case ARM::STRi12:
1405*0a6a1f1dSLionel Sambuc   case ARM::tLDRi:
1406*0a6a1f1dSLionel Sambuc   case ARM::tSTRi:
1407f4a2713aSLionel Sambuc   case ARM::t2LDRi8:
1408f4a2713aSLionel Sambuc   case ARM::t2LDRi12:
1409f4a2713aSLionel Sambuc   case ARM::t2STRi8:
1410f4a2713aSLionel Sambuc   case ARM::t2STRi12:
1411f4a2713aSLionel Sambuc     return MI->getOperand(1).isReg();
1412f4a2713aSLionel Sambuc   }
1413f4a2713aSLionel Sambuc   return false;
1414f4a2713aSLionel Sambuc }
1415f4a2713aSLionel Sambuc 
1416f4a2713aSLionel Sambuc /// AdvanceRS - Advance register scavenger to just before the earliest memory
1417f4a2713aSLionel Sambuc /// op that is being merged.
AdvanceRS(MachineBasicBlock & MBB,MemOpQueue & MemOps)1418f4a2713aSLionel Sambuc void ARMLoadStoreOpt::AdvanceRS(MachineBasicBlock &MBB, MemOpQueue &MemOps) {
1419f4a2713aSLionel Sambuc   MachineBasicBlock::iterator Loc = MemOps[0].MBBI;
1420f4a2713aSLionel Sambuc   unsigned Position = MemOps[0].Position;
1421f4a2713aSLionel Sambuc   for (unsigned i = 1, e = MemOps.size(); i != e; ++i) {
1422f4a2713aSLionel Sambuc     if (MemOps[i].Position < Position) {
1423f4a2713aSLionel Sambuc       Position = MemOps[i].Position;
1424f4a2713aSLionel Sambuc       Loc = MemOps[i].MBBI;
1425f4a2713aSLionel Sambuc     }
1426f4a2713aSLionel Sambuc   }
1427f4a2713aSLionel Sambuc 
1428f4a2713aSLionel Sambuc   if (Loc != MBB.begin())
1429*0a6a1f1dSLionel Sambuc     RS->forward(std::prev(Loc));
1430f4a2713aSLionel Sambuc }
1431f4a2713aSLionel Sambuc 
InsertLDR_STR(MachineBasicBlock & MBB,MachineBasicBlock::iterator & MBBI,int Offset,bool isDef,DebugLoc dl,unsigned NewOpc,unsigned Reg,bool RegDeadKill,bool RegUndef,unsigned BaseReg,bool BaseKill,bool BaseUndef,bool OffKill,bool OffUndef,ARMCC::CondCodes Pred,unsigned PredReg,const TargetInstrInfo * TII,bool isT2)1432f4a2713aSLionel Sambuc static void InsertLDR_STR(MachineBasicBlock &MBB,
1433f4a2713aSLionel Sambuc                           MachineBasicBlock::iterator &MBBI,
1434f4a2713aSLionel Sambuc                           int Offset, bool isDef,
1435f4a2713aSLionel Sambuc                           DebugLoc dl, unsigned NewOpc,
1436f4a2713aSLionel Sambuc                           unsigned Reg, bool RegDeadKill, bool RegUndef,
1437f4a2713aSLionel Sambuc                           unsigned BaseReg, bool BaseKill, bool BaseUndef,
1438f4a2713aSLionel Sambuc                           bool OffKill, bool OffUndef,
1439f4a2713aSLionel Sambuc                           ARMCC::CondCodes Pred, unsigned PredReg,
1440f4a2713aSLionel Sambuc                           const TargetInstrInfo *TII, bool isT2) {
1441f4a2713aSLionel Sambuc   if (isDef) {
1442f4a2713aSLionel Sambuc     MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MBBI->getDebugLoc(),
1443f4a2713aSLionel Sambuc                                       TII->get(NewOpc))
1444f4a2713aSLionel Sambuc       .addReg(Reg, getDefRegState(true) | getDeadRegState(RegDeadKill))
1445f4a2713aSLionel Sambuc       .addReg(BaseReg, getKillRegState(BaseKill)|getUndefRegState(BaseUndef));
1446f4a2713aSLionel Sambuc     MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
1447f4a2713aSLionel Sambuc   } else {
1448f4a2713aSLionel Sambuc     MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MBBI->getDebugLoc(),
1449f4a2713aSLionel Sambuc                                       TII->get(NewOpc))
1450f4a2713aSLionel Sambuc       .addReg(Reg, getKillRegState(RegDeadKill) | getUndefRegState(RegUndef))
1451f4a2713aSLionel Sambuc       .addReg(BaseReg, getKillRegState(BaseKill)|getUndefRegState(BaseUndef));
1452f4a2713aSLionel Sambuc     MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
1453f4a2713aSLionel Sambuc   }
1454f4a2713aSLionel Sambuc }
1455f4a2713aSLionel Sambuc 
FixInvalidRegPairOp(MachineBasicBlock & MBB,MachineBasicBlock::iterator & MBBI)1456f4a2713aSLionel Sambuc bool ARMLoadStoreOpt::FixInvalidRegPairOp(MachineBasicBlock &MBB,
1457f4a2713aSLionel Sambuc                                           MachineBasicBlock::iterator &MBBI) {
1458f4a2713aSLionel Sambuc   MachineInstr *MI = &*MBBI;
1459f4a2713aSLionel Sambuc   unsigned Opcode = MI->getOpcode();
1460f4a2713aSLionel Sambuc   if (Opcode == ARM::LDRD || Opcode == ARM::STRD ||
1461f4a2713aSLionel Sambuc       Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8) {
1462f4a2713aSLionel Sambuc     const MachineOperand &BaseOp = MI->getOperand(2);
1463f4a2713aSLionel Sambuc     unsigned BaseReg = BaseOp.getReg();
1464f4a2713aSLionel Sambuc     unsigned EvenReg = MI->getOperand(0).getReg();
1465f4a2713aSLionel Sambuc     unsigned OddReg  = MI->getOperand(1).getReg();
1466f4a2713aSLionel Sambuc     unsigned EvenRegNum = TRI->getDwarfRegNum(EvenReg, false);
1467f4a2713aSLionel Sambuc     unsigned OddRegNum  = TRI->getDwarfRegNum(OddReg, false);
1468f4a2713aSLionel Sambuc     // ARM errata 602117: LDRD with base in list may result in incorrect base
1469f4a2713aSLionel Sambuc     // register when interrupted or faulted.
1470f4a2713aSLionel Sambuc     bool Errata602117 = EvenReg == BaseReg && STI->isCortexM3();
1471f4a2713aSLionel Sambuc     if (!Errata602117 &&
1472f4a2713aSLionel Sambuc         ((EvenRegNum & 1) == 0 && (EvenRegNum + 1) == OddRegNum))
1473f4a2713aSLionel Sambuc       return false;
1474f4a2713aSLionel Sambuc 
1475f4a2713aSLionel Sambuc     MachineBasicBlock::iterator NewBBI = MBBI;
1476f4a2713aSLionel Sambuc     bool isT2 = Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8;
1477f4a2713aSLionel Sambuc     bool isLd = Opcode == ARM::LDRD || Opcode == ARM::t2LDRDi8;
1478f4a2713aSLionel Sambuc     bool EvenDeadKill = isLd ?
1479f4a2713aSLionel Sambuc       MI->getOperand(0).isDead() : MI->getOperand(0).isKill();
1480f4a2713aSLionel Sambuc     bool EvenUndef = MI->getOperand(0).isUndef();
1481f4a2713aSLionel Sambuc     bool OddDeadKill  = isLd ?
1482f4a2713aSLionel Sambuc       MI->getOperand(1).isDead() : MI->getOperand(1).isKill();
1483f4a2713aSLionel Sambuc     bool OddUndef = MI->getOperand(1).isUndef();
1484f4a2713aSLionel Sambuc     bool BaseKill = BaseOp.isKill();
1485f4a2713aSLionel Sambuc     bool BaseUndef = BaseOp.isUndef();
1486f4a2713aSLionel Sambuc     bool OffKill = isT2 ? false : MI->getOperand(3).isKill();
1487f4a2713aSLionel Sambuc     bool OffUndef = isT2 ? false : MI->getOperand(3).isUndef();
1488f4a2713aSLionel Sambuc     int OffImm = getMemoryOpOffset(MI);
1489f4a2713aSLionel Sambuc     unsigned PredReg = 0;
1490f4a2713aSLionel Sambuc     ARMCC::CondCodes Pred = getInstrPredicate(MI, PredReg);
1491f4a2713aSLionel Sambuc 
1492f4a2713aSLionel Sambuc     if (OddRegNum > EvenRegNum && OffImm == 0) {
1493f4a2713aSLionel Sambuc       // Ascending register numbers and no offset. It's safe to change it to a
1494f4a2713aSLionel Sambuc       // ldm or stm.
1495f4a2713aSLionel Sambuc       unsigned NewOpc = (isLd)
1496f4a2713aSLionel Sambuc         ? (isT2 ? ARM::t2LDMIA : ARM::LDMIA)
1497f4a2713aSLionel Sambuc         : (isT2 ? ARM::t2STMIA : ARM::STMIA);
1498f4a2713aSLionel Sambuc       if (isLd) {
1499f4a2713aSLionel Sambuc         BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(NewOpc))
1500f4a2713aSLionel Sambuc           .addReg(BaseReg, getKillRegState(BaseKill))
1501f4a2713aSLionel Sambuc           .addImm(Pred).addReg(PredReg)
1502f4a2713aSLionel Sambuc           .addReg(EvenReg, getDefRegState(isLd) | getDeadRegState(EvenDeadKill))
1503f4a2713aSLionel Sambuc           .addReg(OddReg,  getDefRegState(isLd) | getDeadRegState(OddDeadKill));
1504f4a2713aSLionel Sambuc         ++NumLDRD2LDM;
1505f4a2713aSLionel Sambuc       } else {
1506f4a2713aSLionel Sambuc         BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(NewOpc))
1507f4a2713aSLionel Sambuc           .addReg(BaseReg, getKillRegState(BaseKill))
1508f4a2713aSLionel Sambuc           .addImm(Pred).addReg(PredReg)
1509f4a2713aSLionel Sambuc           .addReg(EvenReg,
1510f4a2713aSLionel Sambuc                   getKillRegState(EvenDeadKill) | getUndefRegState(EvenUndef))
1511f4a2713aSLionel Sambuc           .addReg(OddReg,
1512f4a2713aSLionel Sambuc                   getKillRegState(OddDeadKill)  | getUndefRegState(OddUndef));
1513f4a2713aSLionel Sambuc         ++NumSTRD2STM;
1514f4a2713aSLionel Sambuc       }
1515*0a6a1f1dSLionel Sambuc       NewBBI = std::prev(MBBI);
1516f4a2713aSLionel Sambuc     } else {
1517f4a2713aSLionel Sambuc       // Split into two instructions.
1518f4a2713aSLionel Sambuc       unsigned NewOpc = (isLd)
1519f4a2713aSLionel Sambuc         ? (isT2 ? (OffImm < 0 ? ARM::t2LDRi8 : ARM::t2LDRi12) : ARM::LDRi12)
1520f4a2713aSLionel Sambuc         : (isT2 ? (OffImm < 0 ? ARM::t2STRi8 : ARM::t2STRi12) : ARM::STRi12);
1521f4a2713aSLionel Sambuc       // Be extra careful for thumb2. t2LDRi8 can't reference a zero offset,
1522f4a2713aSLionel Sambuc       // so adjust and use t2LDRi12 here for that.
1523f4a2713aSLionel Sambuc       unsigned NewOpc2 = (isLd)
1524f4a2713aSLionel Sambuc         ? (isT2 ? (OffImm+4 < 0 ? ARM::t2LDRi8 : ARM::t2LDRi12) : ARM::LDRi12)
1525f4a2713aSLionel Sambuc         : (isT2 ? (OffImm+4 < 0 ? ARM::t2STRi8 : ARM::t2STRi12) : ARM::STRi12);
1526f4a2713aSLionel Sambuc       DebugLoc dl = MBBI->getDebugLoc();
1527f4a2713aSLionel Sambuc       // If this is a load and base register is killed, it may have been
1528f4a2713aSLionel Sambuc       // re-defed by the load, make sure the first load does not clobber it.
1529f4a2713aSLionel Sambuc       if (isLd &&
1530f4a2713aSLionel Sambuc           (BaseKill || OffKill) &&
1531f4a2713aSLionel Sambuc           (TRI->regsOverlap(EvenReg, BaseReg))) {
1532f4a2713aSLionel Sambuc         assert(!TRI->regsOverlap(OddReg, BaseReg));
1533f4a2713aSLionel Sambuc         InsertLDR_STR(MBB, MBBI, OffImm+4, isLd, dl, NewOpc2,
1534f4a2713aSLionel Sambuc                       OddReg, OddDeadKill, false,
1535f4a2713aSLionel Sambuc                       BaseReg, false, BaseUndef, false, OffUndef,
1536f4a2713aSLionel Sambuc                       Pred, PredReg, TII, isT2);
1537*0a6a1f1dSLionel Sambuc         NewBBI = std::prev(MBBI);
1538f4a2713aSLionel Sambuc         InsertLDR_STR(MBB, MBBI, OffImm, isLd, dl, NewOpc,
1539f4a2713aSLionel Sambuc                       EvenReg, EvenDeadKill, false,
1540f4a2713aSLionel Sambuc                       BaseReg, BaseKill, BaseUndef, OffKill, OffUndef,
1541f4a2713aSLionel Sambuc                       Pred, PredReg, TII, isT2);
1542f4a2713aSLionel Sambuc       } else {
1543f4a2713aSLionel Sambuc         if (OddReg == EvenReg && EvenDeadKill) {
1544f4a2713aSLionel Sambuc           // If the two source operands are the same, the kill marker is
1545f4a2713aSLionel Sambuc           // probably on the first one. e.g.
1546f4a2713aSLionel Sambuc           // t2STRDi8 %R5<kill>, %R5, %R9<kill>, 0, 14, %reg0
1547f4a2713aSLionel Sambuc           EvenDeadKill = false;
1548f4a2713aSLionel Sambuc           OddDeadKill = true;
1549f4a2713aSLionel Sambuc         }
1550f4a2713aSLionel Sambuc         // Never kill the base register in the first instruction.
1551f4a2713aSLionel Sambuc         if (EvenReg == BaseReg)
1552f4a2713aSLionel Sambuc           EvenDeadKill = false;
1553f4a2713aSLionel Sambuc         InsertLDR_STR(MBB, MBBI, OffImm, isLd, dl, NewOpc,
1554f4a2713aSLionel Sambuc                       EvenReg, EvenDeadKill, EvenUndef,
1555f4a2713aSLionel Sambuc                       BaseReg, false, BaseUndef, false, OffUndef,
1556f4a2713aSLionel Sambuc                       Pred, PredReg, TII, isT2);
1557*0a6a1f1dSLionel Sambuc         NewBBI = std::prev(MBBI);
1558f4a2713aSLionel Sambuc         InsertLDR_STR(MBB, MBBI, OffImm+4, isLd, dl, NewOpc2,
1559f4a2713aSLionel Sambuc                       OddReg, OddDeadKill, OddUndef,
1560f4a2713aSLionel Sambuc                       BaseReg, BaseKill, BaseUndef, OffKill, OffUndef,
1561f4a2713aSLionel Sambuc                       Pred, PredReg, TII, isT2);
1562f4a2713aSLionel Sambuc       }
1563f4a2713aSLionel Sambuc       if (isLd)
1564f4a2713aSLionel Sambuc         ++NumLDRD2LDR;
1565f4a2713aSLionel Sambuc       else
1566f4a2713aSLionel Sambuc         ++NumSTRD2STR;
1567f4a2713aSLionel Sambuc     }
1568f4a2713aSLionel Sambuc 
1569f4a2713aSLionel Sambuc     MBB.erase(MI);
1570f4a2713aSLionel Sambuc     MBBI = NewBBI;
1571f4a2713aSLionel Sambuc     return true;
1572f4a2713aSLionel Sambuc   }
1573f4a2713aSLionel Sambuc   return false;
1574f4a2713aSLionel Sambuc }
1575f4a2713aSLionel Sambuc 
1576f4a2713aSLionel Sambuc /// LoadStoreMultipleOpti - An optimization pass to turn multiple LDR / STR
1577f4a2713aSLionel Sambuc /// ops of the same base and incrementing offset into LDM / STM ops.
LoadStoreMultipleOpti(MachineBasicBlock & MBB)1578f4a2713aSLionel Sambuc bool ARMLoadStoreOpt::LoadStoreMultipleOpti(MachineBasicBlock &MBB) {
1579f4a2713aSLionel Sambuc   unsigned NumMerges = 0;
1580f4a2713aSLionel Sambuc   unsigned NumMemOps = 0;
1581f4a2713aSLionel Sambuc   MemOpQueue MemOps;
1582f4a2713aSLionel Sambuc   unsigned CurrBase = 0;
1583f4a2713aSLionel Sambuc   int CurrOpc = -1;
1584f4a2713aSLionel Sambuc   unsigned CurrSize = 0;
1585f4a2713aSLionel Sambuc   ARMCC::CondCodes CurrPred = ARMCC::AL;
1586f4a2713aSLionel Sambuc   unsigned CurrPredReg = 0;
1587f4a2713aSLionel Sambuc   unsigned Position = 0;
1588f4a2713aSLionel Sambuc   SmallVector<MachineBasicBlock::iterator,4> Merges;
1589f4a2713aSLionel Sambuc 
1590f4a2713aSLionel Sambuc   RS->enterBasicBlock(&MBB);
1591f4a2713aSLionel Sambuc   MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end();
1592f4a2713aSLionel Sambuc   while (MBBI != E) {
1593f4a2713aSLionel Sambuc     if (FixInvalidRegPairOp(MBB, MBBI))
1594f4a2713aSLionel Sambuc       continue;
1595f4a2713aSLionel Sambuc 
1596f4a2713aSLionel Sambuc     bool Advance  = false;
1597f4a2713aSLionel Sambuc     bool TryMerge = false;
1598f4a2713aSLionel Sambuc     bool Clobber  = false;
1599f4a2713aSLionel Sambuc 
1600f4a2713aSLionel Sambuc     bool isMemOp = isMemoryOp(MBBI);
1601f4a2713aSLionel Sambuc     if (isMemOp) {
1602f4a2713aSLionel Sambuc       int Opcode = MBBI->getOpcode();
1603f4a2713aSLionel Sambuc       unsigned Size = getLSMultipleTransferSize(MBBI);
1604f4a2713aSLionel Sambuc       const MachineOperand &MO = MBBI->getOperand(0);
1605f4a2713aSLionel Sambuc       unsigned Reg = MO.getReg();
1606f4a2713aSLionel Sambuc       bool isKill = MO.isDef() ? false : MO.isKill();
1607f4a2713aSLionel Sambuc       unsigned Base = MBBI->getOperand(1).getReg();
1608f4a2713aSLionel Sambuc       unsigned PredReg = 0;
1609f4a2713aSLionel Sambuc       ARMCC::CondCodes Pred = getInstrPredicate(MBBI, PredReg);
1610f4a2713aSLionel Sambuc       int Offset = getMemoryOpOffset(MBBI);
1611f4a2713aSLionel Sambuc       // Watch out for:
1612f4a2713aSLionel Sambuc       // r4 := ldr [r5]
1613f4a2713aSLionel Sambuc       // r5 := ldr [r5, #4]
1614f4a2713aSLionel Sambuc       // r6 := ldr [r5, #8]
1615f4a2713aSLionel Sambuc       //
1616f4a2713aSLionel Sambuc       // The second ldr has effectively broken the chain even though it
1617f4a2713aSLionel Sambuc       // looks like the later ldr(s) use the same base register. Try to
1618f4a2713aSLionel Sambuc       // merge the ldr's so far, including this one. But don't try to
1619f4a2713aSLionel Sambuc       // combine the following ldr(s).
1620f4a2713aSLionel Sambuc       Clobber = (isi32Load(Opcode) && Base == MBBI->getOperand(0).getReg());
1621f4a2713aSLionel Sambuc 
1622f4a2713aSLionel Sambuc       // Watch out for:
1623f4a2713aSLionel Sambuc       // r4 := ldr [r0, #8]
1624f4a2713aSLionel Sambuc       // r4 := ldr [r0, #4]
1625f4a2713aSLionel Sambuc       //
1626f4a2713aSLionel Sambuc       // The optimization may reorder the second ldr in front of the first
1627f4a2713aSLionel Sambuc       // ldr, which violates write after write(WAW) dependence. The same as
1628f4a2713aSLionel Sambuc       // str. Try to merge inst(s) already in MemOps.
1629f4a2713aSLionel Sambuc       bool Overlap = false;
1630f4a2713aSLionel Sambuc       for (MemOpQueueIter I = MemOps.begin(), E = MemOps.end(); I != E; ++I) {
1631f4a2713aSLionel Sambuc         if (TRI->regsOverlap(Reg, I->MBBI->getOperand(0).getReg())) {
1632f4a2713aSLionel Sambuc           Overlap = true;
1633f4a2713aSLionel Sambuc           break;
1634f4a2713aSLionel Sambuc         }
1635f4a2713aSLionel Sambuc       }
1636f4a2713aSLionel Sambuc 
1637f4a2713aSLionel Sambuc       if (CurrBase == 0 && !Clobber) {
1638f4a2713aSLionel Sambuc         // Start of a new chain.
1639f4a2713aSLionel Sambuc         CurrBase = Base;
1640f4a2713aSLionel Sambuc         CurrOpc  = Opcode;
1641f4a2713aSLionel Sambuc         CurrSize = Size;
1642f4a2713aSLionel Sambuc         CurrPred = Pred;
1643f4a2713aSLionel Sambuc         CurrPredReg = PredReg;
1644f4a2713aSLionel Sambuc         MemOps.push_back(MemOpQueueEntry(Offset, Reg, isKill, Position, MBBI));
1645f4a2713aSLionel Sambuc         ++NumMemOps;
1646f4a2713aSLionel Sambuc         Advance = true;
1647f4a2713aSLionel Sambuc       } else if (!Overlap) {
1648f4a2713aSLionel Sambuc         if (Clobber) {
1649f4a2713aSLionel Sambuc           TryMerge = true;
1650f4a2713aSLionel Sambuc           Advance = true;
1651f4a2713aSLionel Sambuc         }
1652f4a2713aSLionel Sambuc 
1653f4a2713aSLionel Sambuc         if (CurrOpc == Opcode && CurrBase == Base && CurrPred == Pred) {
1654f4a2713aSLionel Sambuc           // No need to match PredReg.
1655f4a2713aSLionel Sambuc           // Continue adding to the queue.
1656f4a2713aSLionel Sambuc           if (Offset > MemOps.back().Offset) {
1657f4a2713aSLionel Sambuc             MemOps.push_back(MemOpQueueEntry(Offset, Reg, isKill,
1658f4a2713aSLionel Sambuc                                              Position, MBBI));
1659f4a2713aSLionel Sambuc             ++NumMemOps;
1660f4a2713aSLionel Sambuc             Advance = true;
1661f4a2713aSLionel Sambuc           } else {
1662f4a2713aSLionel Sambuc             for (MemOpQueueIter I = MemOps.begin(), E = MemOps.end();
1663f4a2713aSLionel Sambuc                  I != E; ++I) {
1664f4a2713aSLionel Sambuc               if (Offset < I->Offset) {
1665f4a2713aSLionel Sambuc                 MemOps.insert(I, MemOpQueueEntry(Offset, Reg, isKill,
1666f4a2713aSLionel Sambuc                                                  Position, MBBI));
1667f4a2713aSLionel Sambuc                 ++NumMemOps;
1668f4a2713aSLionel Sambuc                 Advance = true;
1669f4a2713aSLionel Sambuc                 break;
1670f4a2713aSLionel Sambuc               } else if (Offset == I->Offset) {
1671f4a2713aSLionel Sambuc                 // Collision! This can't be merged!
1672f4a2713aSLionel Sambuc                 break;
1673f4a2713aSLionel Sambuc               }
1674f4a2713aSLionel Sambuc             }
1675f4a2713aSLionel Sambuc           }
1676f4a2713aSLionel Sambuc         }
1677f4a2713aSLionel Sambuc       }
1678f4a2713aSLionel Sambuc     }
1679f4a2713aSLionel Sambuc 
1680f4a2713aSLionel Sambuc     if (MBBI->isDebugValue()) {
1681f4a2713aSLionel Sambuc       ++MBBI;
1682f4a2713aSLionel Sambuc       if (MBBI == E)
1683f4a2713aSLionel Sambuc         // Reach the end of the block, try merging the memory instructions.
1684f4a2713aSLionel Sambuc         TryMerge = true;
1685f4a2713aSLionel Sambuc     } else if (Advance) {
1686f4a2713aSLionel Sambuc       ++Position;
1687f4a2713aSLionel Sambuc       ++MBBI;
1688f4a2713aSLionel Sambuc       if (MBBI == E)
1689f4a2713aSLionel Sambuc         // Reach the end of the block, try merging the memory instructions.
1690f4a2713aSLionel Sambuc         TryMerge = true;
1691*0a6a1f1dSLionel Sambuc     } else {
1692f4a2713aSLionel Sambuc       TryMerge = true;
1693*0a6a1f1dSLionel Sambuc     }
1694f4a2713aSLionel Sambuc 
1695f4a2713aSLionel Sambuc     if (TryMerge) {
1696f4a2713aSLionel Sambuc       if (NumMemOps > 1) {
1697f4a2713aSLionel Sambuc         // Try to find a free register to use as a new base in case it's needed.
1698f4a2713aSLionel Sambuc         // First advance to the instruction just before the start of the chain.
1699f4a2713aSLionel Sambuc         AdvanceRS(MBB, MemOps);
1700*0a6a1f1dSLionel Sambuc 
1701f4a2713aSLionel Sambuc         // Find a scratch register.
1702*0a6a1f1dSLionel Sambuc         unsigned Scratch =
1703*0a6a1f1dSLionel Sambuc           RS->FindUnusedReg(isThumb1 ? &ARM::tGPRRegClass : &ARM::GPRRegClass);
1704*0a6a1f1dSLionel Sambuc 
1705f4a2713aSLionel Sambuc         // Process the load / store instructions.
1706*0a6a1f1dSLionel Sambuc         RS->forward(std::prev(MBBI));
1707f4a2713aSLionel Sambuc 
1708f4a2713aSLionel Sambuc         // Merge ops.
1709f4a2713aSLionel Sambuc         Merges.clear();
1710f4a2713aSLionel Sambuc         MergeLDR_STR(MBB, 0, CurrBase, CurrOpc, CurrSize,
1711f4a2713aSLionel Sambuc                      CurrPred, CurrPredReg, Scratch, MemOps, Merges);
1712f4a2713aSLionel Sambuc 
1713f4a2713aSLionel Sambuc         // Try folding preceding/trailing base inc/dec into the generated
1714f4a2713aSLionel Sambuc         // LDM/STM ops.
1715f4a2713aSLionel Sambuc         for (unsigned i = 0, e = Merges.size(); i < e; ++i)
1716f4a2713aSLionel Sambuc           if (MergeBaseUpdateLSMultiple(MBB, Merges[i], Advance, MBBI))
1717f4a2713aSLionel Sambuc             ++NumMerges;
1718f4a2713aSLionel Sambuc         NumMerges += Merges.size();
1719f4a2713aSLionel Sambuc 
1720f4a2713aSLionel Sambuc         // Try folding preceding/trailing base inc/dec into those load/store
1721f4a2713aSLionel Sambuc         // that were not merged to form LDM/STM ops.
1722f4a2713aSLionel Sambuc         for (unsigned i = 0; i != NumMemOps; ++i)
1723f4a2713aSLionel Sambuc           if (!MemOps[i].Merged)
1724f4a2713aSLionel Sambuc             if (MergeBaseUpdateLoadStore(MBB, MemOps[i].MBBI, TII,Advance,MBBI))
1725f4a2713aSLionel Sambuc               ++NumMerges;
1726f4a2713aSLionel Sambuc 
1727f4a2713aSLionel Sambuc         // RS may be pointing to an instruction that's deleted.
1728*0a6a1f1dSLionel Sambuc         RS->skipTo(std::prev(MBBI));
1729f4a2713aSLionel Sambuc       } else if (NumMemOps == 1) {
1730f4a2713aSLionel Sambuc         // Try folding preceding/trailing base inc/dec into the single
1731f4a2713aSLionel Sambuc         // load/store.
1732f4a2713aSLionel Sambuc         if (MergeBaseUpdateLoadStore(MBB, MemOps[0].MBBI, TII, Advance, MBBI)) {
1733f4a2713aSLionel Sambuc           ++NumMerges;
1734*0a6a1f1dSLionel Sambuc           RS->forward(std::prev(MBBI));
1735f4a2713aSLionel Sambuc         }
1736f4a2713aSLionel Sambuc       }
1737f4a2713aSLionel Sambuc 
1738f4a2713aSLionel Sambuc       CurrBase = 0;
1739f4a2713aSLionel Sambuc       CurrOpc = -1;
1740f4a2713aSLionel Sambuc       CurrSize = 0;
1741f4a2713aSLionel Sambuc       CurrPred = ARMCC::AL;
1742f4a2713aSLionel Sambuc       CurrPredReg = 0;
1743f4a2713aSLionel Sambuc       if (NumMemOps) {
1744f4a2713aSLionel Sambuc         MemOps.clear();
1745f4a2713aSLionel Sambuc         NumMemOps = 0;
1746f4a2713aSLionel Sambuc       }
1747f4a2713aSLionel Sambuc 
1748f4a2713aSLionel Sambuc       // If iterator hasn't been advanced and this is not a memory op, skip it.
1749f4a2713aSLionel Sambuc       // It can't start a new chain anyway.
1750f4a2713aSLionel Sambuc       if (!Advance && !isMemOp && MBBI != E) {
1751f4a2713aSLionel Sambuc         ++Position;
1752f4a2713aSLionel Sambuc         ++MBBI;
1753f4a2713aSLionel Sambuc       }
1754f4a2713aSLionel Sambuc     }
1755f4a2713aSLionel Sambuc   }
1756f4a2713aSLionel Sambuc   return NumMerges > 0;
1757f4a2713aSLionel Sambuc }
1758f4a2713aSLionel Sambuc 
1759f4a2713aSLionel Sambuc /// MergeReturnIntoLDM - If this is a exit BB, try merging the return ops
1760f4a2713aSLionel Sambuc /// ("bx lr" and "mov pc, lr") into the preceding stack restore so it
1761f4a2713aSLionel Sambuc /// directly restore the value of LR into pc.
1762f4a2713aSLionel Sambuc ///   ldmfd sp!, {..., lr}
1763f4a2713aSLionel Sambuc ///   bx lr
1764f4a2713aSLionel Sambuc /// or
1765f4a2713aSLionel Sambuc ///   ldmfd sp!, {..., lr}
1766f4a2713aSLionel Sambuc ///   mov pc, lr
1767f4a2713aSLionel Sambuc /// =>
1768f4a2713aSLionel Sambuc ///   ldmfd sp!, {..., pc}
MergeReturnIntoLDM(MachineBasicBlock & MBB)1769f4a2713aSLionel Sambuc bool ARMLoadStoreOpt::MergeReturnIntoLDM(MachineBasicBlock &MBB) {
1770*0a6a1f1dSLionel Sambuc   // Thumb1 LDM doesn't allow high registers.
1771*0a6a1f1dSLionel Sambuc   if (isThumb1) return false;
1772f4a2713aSLionel Sambuc   if (MBB.empty()) return false;
1773f4a2713aSLionel Sambuc 
1774f4a2713aSLionel Sambuc   MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr();
1775f4a2713aSLionel Sambuc   if (MBBI != MBB.begin() &&
1776f4a2713aSLionel Sambuc       (MBBI->getOpcode() == ARM::BX_RET ||
1777f4a2713aSLionel Sambuc        MBBI->getOpcode() == ARM::tBX_RET ||
1778f4a2713aSLionel Sambuc        MBBI->getOpcode() == ARM::MOVPCLR)) {
1779*0a6a1f1dSLionel Sambuc     MachineInstr *PrevMI = std::prev(MBBI);
1780f4a2713aSLionel Sambuc     unsigned Opcode = PrevMI->getOpcode();
1781f4a2713aSLionel Sambuc     if (Opcode == ARM::LDMIA_UPD || Opcode == ARM::LDMDA_UPD ||
1782f4a2713aSLionel Sambuc         Opcode == ARM::LDMDB_UPD || Opcode == ARM::LDMIB_UPD ||
1783f4a2713aSLionel Sambuc         Opcode == ARM::t2LDMIA_UPD || Opcode == ARM::t2LDMDB_UPD) {
1784f4a2713aSLionel Sambuc       MachineOperand &MO = PrevMI->getOperand(PrevMI->getNumOperands()-1);
1785f4a2713aSLionel Sambuc       if (MO.getReg() != ARM::LR)
1786f4a2713aSLionel Sambuc         return false;
1787f4a2713aSLionel Sambuc       unsigned NewOpc = (isThumb2 ? ARM::t2LDMIA_RET : ARM::LDMIA_RET);
1788f4a2713aSLionel Sambuc       assert(((isThumb2 && Opcode == ARM::t2LDMIA_UPD) ||
1789f4a2713aSLionel Sambuc               Opcode == ARM::LDMIA_UPD) && "Unsupported multiple load-return!");
1790f4a2713aSLionel Sambuc       PrevMI->setDesc(TII->get(NewOpc));
1791f4a2713aSLionel Sambuc       MO.setReg(ARM::PC);
1792f4a2713aSLionel Sambuc       PrevMI->copyImplicitOps(*MBB.getParent(), &*MBBI);
1793f4a2713aSLionel Sambuc       MBB.erase(MBBI);
1794f4a2713aSLionel Sambuc       return true;
1795f4a2713aSLionel Sambuc     }
1796f4a2713aSLionel Sambuc   }
1797f4a2713aSLionel Sambuc   return false;
1798f4a2713aSLionel Sambuc }
1799f4a2713aSLionel Sambuc 
runOnMachineFunction(MachineFunction & Fn)1800f4a2713aSLionel Sambuc bool ARMLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) {
1801f4a2713aSLionel Sambuc   const TargetMachine &TM = Fn.getTarget();
1802*0a6a1f1dSLionel Sambuc   TL = TM.getSubtargetImpl()->getTargetLowering();
1803f4a2713aSLionel Sambuc   AFI = Fn.getInfo<ARMFunctionInfo>();
1804*0a6a1f1dSLionel Sambuc   TII = TM.getSubtargetImpl()->getInstrInfo();
1805*0a6a1f1dSLionel Sambuc   TRI = TM.getSubtargetImpl()->getRegisterInfo();
1806f4a2713aSLionel Sambuc   STI = &TM.getSubtarget<ARMSubtarget>();
1807f4a2713aSLionel Sambuc   RS = new RegScavenger();
1808f4a2713aSLionel Sambuc   isThumb2 = AFI->isThumb2Function();
1809*0a6a1f1dSLionel Sambuc   isThumb1 = AFI->isThumbFunction() && !isThumb2;
1810f4a2713aSLionel Sambuc 
1811f4a2713aSLionel Sambuc   bool Modified = false;
1812f4a2713aSLionel Sambuc   for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E;
1813f4a2713aSLionel Sambuc        ++MFI) {
1814f4a2713aSLionel Sambuc     MachineBasicBlock &MBB = *MFI;
1815f4a2713aSLionel Sambuc     Modified |= LoadStoreMultipleOpti(MBB);
1816f4a2713aSLionel Sambuc     if (TM.getSubtarget<ARMSubtarget>().hasV5TOps())
1817f4a2713aSLionel Sambuc       Modified |= MergeReturnIntoLDM(MBB);
1818f4a2713aSLionel Sambuc   }
1819f4a2713aSLionel Sambuc 
1820f4a2713aSLionel Sambuc   delete RS;
1821f4a2713aSLionel Sambuc   return Modified;
1822f4a2713aSLionel Sambuc }
1823f4a2713aSLionel Sambuc 
1824f4a2713aSLionel Sambuc 
1825f4a2713aSLionel Sambuc /// ARMPreAllocLoadStoreOpt - Pre- register allocation pass that move
1826f4a2713aSLionel Sambuc /// load / stores from consecutive locations close to make it more
1827f4a2713aSLionel Sambuc /// likely they will be combined later.
1828f4a2713aSLionel Sambuc 
1829f4a2713aSLionel Sambuc namespace {
1830f4a2713aSLionel Sambuc   struct ARMPreAllocLoadStoreOpt : public MachineFunctionPass{
1831f4a2713aSLionel Sambuc     static char ID;
ARMPreAllocLoadStoreOpt__anon90483a300211::ARMPreAllocLoadStoreOpt1832f4a2713aSLionel Sambuc     ARMPreAllocLoadStoreOpt() : MachineFunctionPass(ID) {}
1833f4a2713aSLionel Sambuc 
1834f4a2713aSLionel Sambuc     const DataLayout *TD;
1835f4a2713aSLionel Sambuc     const TargetInstrInfo *TII;
1836f4a2713aSLionel Sambuc     const TargetRegisterInfo *TRI;
1837f4a2713aSLionel Sambuc     const ARMSubtarget *STI;
1838f4a2713aSLionel Sambuc     MachineRegisterInfo *MRI;
1839f4a2713aSLionel Sambuc     MachineFunction *MF;
1840f4a2713aSLionel Sambuc 
1841*0a6a1f1dSLionel Sambuc     bool runOnMachineFunction(MachineFunction &Fn) override;
1842f4a2713aSLionel Sambuc 
getPassName__anon90483a300211::ARMPreAllocLoadStoreOpt1843*0a6a1f1dSLionel Sambuc     const char *getPassName() const override {
1844f4a2713aSLionel Sambuc       return "ARM pre- register allocation load / store optimization pass";
1845f4a2713aSLionel Sambuc     }
1846f4a2713aSLionel Sambuc 
1847f4a2713aSLionel Sambuc   private:
1848f4a2713aSLionel Sambuc     bool CanFormLdStDWord(MachineInstr *Op0, MachineInstr *Op1, DebugLoc &dl,
1849f4a2713aSLionel Sambuc                           unsigned &NewOpc, unsigned &EvenReg,
1850f4a2713aSLionel Sambuc                           unsigned &OddReg, unsigned &BaseReg,
1851f4a2713aSLionel Sambuc                           int &Offset,
1852f4a2713aSLionel Sambuc                           unsigned &PredReg, ARMCC::CondCodes &Pred,
1853f4a2713aSLionel Sambuc                           bool &isT2);
1854f4a2713aSLionel Sambuc     bool RescheduleOps(MachineBasicBlock *MBB,
1855f4a2713aSLionel Sambuc                        SmallVectorImpl<MachineInstr *> &Ops,
1856f4a2713aSLionel Sambuc                        unsigned Base, bool isLd,
1857f4a2713aSLionel Sambuc                        DenseMap<MachineInstr*, unsigned> &MI2LocMap);
1858f4a2713aSLionel Sambuc     bool RescheduleLoadStoreInstrs(MachineBasicBlock *MBB);
1859f4a2713aSLionel Sambuc   };
1860f4a2713aSLionel Sambuc   char ARMPreAllocLoadStoreOpt::ID = 0;
1861f4a2713aSLionel Sambuc }
1862f4a2713aSLionel Sambuc 
runOnMachineFunction(MachineFunction & Fn)1863f4a2713aSLionel Sambuc bool ARMPreAllocLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) {
1864*0a6a1f1dSLionel Sambuc   TD = Fn.getSubtarget().getDataLayout();
1865*0a6a1f1dSLionel Sambuc   TII = Fn.getSubtarget().getInstrInfo();
1866*0a6a1f1dSLionel Sambuc   TRI = Fn.getSubtarget().getRegisterInfo();
1867*0a6a1f1dSLionel Sambuc   STI = &static_cast<const ARMSubtarget &>(Fn.getSubtarget());
1868f4a2713aSLionel Sambuc   MRI = &Fn.getRegInfo();
1869f4a2713aSLionel Sambuc   MF  = &Fn;
1870f4a2713aSLionel Sambuc 
1871f4a2713aSLionel Sambuc   bool Modified = false;
1872f4a2713aSLionel Sambuc   for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E;
1873f4a2713aSLionel Sambuc        ++MFI)
1874f4a2713aSLionel Sambuc     Modified |= RescheduleLoadStoreInstrs(MFI);
1875f4a2713aSLionel Sambuc 
1876f4a2713aSLionel Sambuc   return Modified;
1877f4a2713aSLionel Sambuc }
1878f4a2713aSLionel Sambuc 
IsSafeAndProfitableToMove(bool isLd,unsigned Base,MachineBasicBlock::iterator I,MachineBasicBlock::iterator E,SmallPtrSetImpl<MachineInstr * > & MemOps,SmallSet<unsigned,4> & MemRegs,const TargetRegisterInfo * TRI)1879f4a2713aSLionel Sambuc static bool IsSafeAndProfitableToMove(bool isLd, unsigned Base,
1880f4a2713aSLionel Sambuc                                       MachineBasicBlock::iterator I,
1881f4a2713aSLionel Sambuc                                       MachineBasicBlock::iterator E,
1882*0a6a1f1dSLionel Sambuc                                       SmallPtrSetImpl<MachineInstr*> &MemOps,
1883f4a2713aSLionel Sambuc                                       SmallSet<unsigned, 4> &MemRegs,
1884f4a2713aSLionel Sambuc                                       const TargetRegisterInfo *TRI) {
1885f4a2713aSLionel Sambuc   // Are there stores / loads / calls between them?
1886f4a2713aSLionel Sambuc   // FIXME: This is overly conservative. We should make use of alias information
1887f4a2713aSLionel Sambuc   // some day.
1888f4a2713aSLionel Sambuc   SmallSet<unsigned, 4> AddedRegPressure;
1889f4a2713aSLionel Sambuc   while (++I != E) {
1890f4a2713aSLionel Sambuc     if (I->isDebugValue() || MemOps.count(&*I))
1891f4a2713aSLionel Sambuc       continue;
1892f4a2713aSLionel Sambuc     if (I->isCall() || I->isTerminator() || I->hasUnmodeledSideEffects())
1893f4a2713aSLionel Sambuc       return false;
1894f4a2713aSLionel Sambuc     if (isLd && I->mayStore())
1895f4a2713aSLionel Sambuc       return false;
1896f4a2713aSLionel Sambuc     if (!isLd) {
1897f4a2713aSLionel Sambuc       if (I->mayLoad())
1898f4a2713aSLionel Sambuc         return false;
1899f4a2713aSLionel Sambuc       // It's not safe to move the first 'str' down.
1900f4a2713aSLionel Sambuc       // str r1, [r0]
1901f4a2713aSLionel Sambuc       // strh r5, [r0]
1902f4a2713aSLionel Sambuc       // str r4, [r0, #+4]
1903f4a2713aSLionel Sambuc       if (I->mayStore())
1904f4a2713aSLionel Sambuc         return false;
1905f4a2713aSLionel Sambuc     }
1906f4a2713aSLionel Sambuc     for (unsigned j = 0, NumOps = I->getNumOperands(); j != NumOps; ++j) {
1907f4a2713aSLionel Sambuc       MachineOperand &MO = I->getOperand(j);
1908f4a2713aSLionel Sambuc       if (!MO.isReg())
1909f4a2713aSLionel Sambuc         continue;
1910f4a2713aSLionel Sambuc       unsigned Reg = MO.getReg();
1911f4a2713aSLionel Sambuc       if (MO.isDef() && TRI->regsOverlap(Reg, Base))
1912f4a2713aSLionel Sambuc         return false;
1913f4a2713aSLionel Sambuc       if (Reg != Base && !MemRegs.count(Reg))
1914f4a2713aSLionel Sambuc         AddedRegPressure.insert(Reg);
1915f4a2713aSLionel Sambuc     }
1916f4a2713aSLionel Sambuc   }
1917f4a2713aSLionel Sambuc 
1918f4a2713aSLionel Sambuc   // Estimate register pressure increase due to the transformation.
1919f4a2713aSLionel Sambuc   if (MemRegs.size() <= 4)
1920f4a2713aSLionel Sambuc     // Ok if we are moving small number of instructions.
1921f4a2713aSLionel Sambuc     return true;
1922f4a2713aSLionel Sambuc   return AddedRegPressure.size() <= MemRegs.size() * 2;
1923f4a2713aSLionel Sambuc }
1924f4a2713aSLionel Sambuc 
1925f4a2713aSLionel Sambuc 
1926f4a2713aSLionel Sambuc /// Copy Op0 and Op1 operands into a new array assigned to MI.
concatenateMemOperands(MachineInstr * MI,MachineInstr * Op0,MachineInstr * Op1)1927f4a2713aSLionel Sambuc static void concatenateMemOperands(MachineInstr *MI, MachineInstr *Op0,
1928f4a2713aSLionel Sambuc                                    MachineInstr *Op1) {
1929f4a2713aSLionel Sambuc   assert(MI->memoperands_empty() && "expected a new machineinstr");
1930f4a2713aSLionel Sambuc   size_t numMemRefs = (Op0->memoperands_end() - Op0->memoperands_begin())
1931f4a2713aSLionel Sambuc     + (Op1->memoperands_end() - Op1->memoperands_begin());
1932f4a2713aSLionel Sambuc 
1933f4a2713aSLionel Sambuc   MachineFunction *MF = MI->getParent()->getParent();
1934f4a2713aSLionel Sambuc   MachineSDNode::mmo_iterator MemBegin = MF->allocateMemRefsArray(numMemRefs);
1935f4a2713aSLionel Sambuc   MachineSDNode::mmo_iterator MemEnd =
1936f4a2713aSLionel Sambuc     std::copy(Op0->memoperands_begin(), Op0->memoperands_end(), MemBegin);
1937f4a2713aSLionel Sambuc   MemEnd =
1938f4a2713aSLionel Sambuc     std::copy(Op1->memoperands_begin(), Op1->memoperands_end(), MemEnd);
1939f4a2713aSLionel Sambuc   MI->setMemRefs(MemBegin, MemEnd);
1940f4a2713aSLionel Sambuc }
1941f4a2713aSLionel Sambuc 
1942f4a2713aSLionel Sambuc bool
CanFormLdStDWord(MachineInstr * Op0,MachineInstr * Op1,DebugLoc & dl,unsigned & NewOpc,unsigned & EvenReg,unsigned & OddReg,unsigned & BaseReg,int & Offset,unsigned & PredReg,ARMCC::CondCodes & Pred,bool & isT2)1943f4a2713aSLionel Sambuc ARMPreAllocLoadStoreOpt::CanFormLdStDWord(MachineInstr *Op0, MachineInstr *Op1,
1944f4a2713aSLionel Sambuc                                           DebugLoc &dl,
1945f4a2713aSLionel Sambuc                                           unsigned &NewOpc, unsigned &EvenReg,
1946f4a2713aSLionel Sambuc                                           unsigned &OddReg, unsigned &BaseReg,
1947f4a2713aSLionel Sambuc                                           int &Offset, unsigned &PredReg,
1948f4a2713aSLionel Sambuc                                           ARMCC::CondCodes &Pred,
1949f4a2713aSLionel Sambuc                                           bool &isT2) {
1950f4a2713aSLionel Sambuc   // Make sure we're allowed to generate LDRD/STRD.
1951f4a2713aSLionel Sambuc   if (!STI->hasV5TEOps())
1952f4a2713aSLionel Sambuc     return false;
1953f4a2713aSLionel Sambuc 
1954f4a2713aSLionel Sambuc   // FIXME: VLDRS / VSTRS -> VLDRD / VSTRD
1955f4a2713aSLionel Sambuc   unsigned Scale = 1;
1956f4a2713aSLionel Sambuc   unsigned Opcode = Op0->getOpcode();
1957*0a6a1f1dSLionel Sambuc   if (Opcode == ARM::LDRi12) {
1958f4a2713aSLionel Sambuc     NewOpc = ARM::LDRD;
1959*0a6a1f1dSLionel Sambuc   } else if (Opcode == ARM::STRi12) {
1960f4a2713aSLionel Sambuc     NewOpc = ARM::STRD;
1961*0a6a1f1dSLionel Sambuc   } else if (Opcode == ARM::t2LDRi8 || Opcode == ARM::t2LDRi12) {
1962f4a2713aSLionel Sambuc     NewOpc = ARM::t2LDRDi8;
1963f4a2713aSLionel Sambuc     Scale = 4;
1964f4a2713aSLionel Sambuc     isT2 = true;
1965f4a2713aSLionel Sambuc   } else if (Opcode == ARM::t2STRi8 || Opcode == ARM::t2STRi12) {
1966f4a2713aSLionel Sambuc     NewOpc = ARM::t2STRDi8;
1967f4a2713aSLionel Sambuc     Scale = 4;
1968f4a2713aSLionel Sambuc     isT2 = true;
1969*0a6a1f1dSLionel Sambuc   } else {
1970f4a2713aSLionel Sambuc     return false;
1971*0a6a1f1dSLionel Sambuc   }
1972f4a2713aSLionel Sambuc 
1973f4a2713aSLionel Sambuc   // Make sure the base address satisfies i64 ld / st alignment requirement.
1974f4a2713aSLionel Sambuc   // At the moment, we ignore the memoryoperand's value.
1975f4a2713aSLionel Sambuc   // If we want to use AliasAnalysis, we should check it accordingly.
1976f4a2713aSLionel Sambuc   if (!Op0->hasOneMemOperand() ||
1977f4a2713aSLionel Sambuc       (*Op0->memoperands_begin())->isVolatile())
1978f4a2713aSLionel Sambuc     return false;
1979f4a2713aSLionel Sambuc 
1980f4a2713aSLionel Sambuc   unsigned Align = (*Op0->memoperands_begin())->getAlignment();
1981f4a2713aSLionel Sambuc   const Function *Func = MF->getFunction();
1982f4a2713aSLionel Sambuc   unsigned ReqAlign = STI->hasV6Ops()
1983f4a2713aSLionel Sambuc     ? TD->getABITypeAlignment(Type::getInt64Ty(Func->getContext()))
1984f4a2713aSLionel Sambuc     : 8;  // Pre-v6 need 8-byte align
1985f4a2713aSLionel Sambuc   if (Align < ReqAlign)
1986f4a2713aSLionel Sambuc     return false;
1987f4a2713aSLionel Sambuc 
1988f4a2713aSLionel Sambuc   // Then make sure the immediate offset fits.
1989f4a2713aSLionel Sambuc   int OffImm = getMemoryOpOffset(Op0);
1990f4a2713aSLionel Sambuc   if (isT2) {
1991f4a2713aSLionel Sambuc     int Limit = (1 << 8) * Scale;
1992f4a2713aSLionel Sambuc     if (OffImm >= Limit || (OffImm <= -Limit) || (OffImm & (Scale-1)))
1993f4a2713aSLionel Sambuc       return false;
1994f4a2713aSLionel Sambuc     Offset = OffImm;
1995f4a2713aSLionel Sambuc   } else {
1996f4a2713aSLionel Sambuc     ARM_AM::AddrOpc AddSub = ARM_AM::add;
1997f4a2713aSLionel Sambuc     if (OffImm < 0) {
1998f4a2713aSLionel Sambuc       AddSub = ARM_AM::sub;
1999f4a2713aSLionel Sambuc       OffImm = - OffImm;
2000f4a2713aSLionel Sambuc     }
2001f4a2713aSLionel Sambuc     int Limit = (1 << 8) * Scale;
2002f4a2713aSLionel Sambuc     if (OffImm >= Limit || (OffImm & (Scale-1)))
2003f4a2713aSLionel Sambuc       return false;
2004f4a2713aSLionel Sambuc     Offset = ARM_AM::getAM3Opc(AddSub, OffImm);
2005f4a2713aSLionel Sambuc   }
2006f4a2713aSLionel Sambuc   EvenReg = Op0->getOperand(0).getReg();
2007f4a2713aSLionel Sambuc   OddReg  = Op1->getOperand(0).getReg();
2008f4a2713aSLionel Sambuc   if (EvenReg == OddReg)
2009f4a2713aSLionel Sambuc     return false;
2010f4a2713aSLionel Sambuc   BaseReg = Op0->getOperand(1).getReg();
2011f4a2713aSLionel Sambuc   Pred = getInstrPredicate(Op0, PredReg);
2012f4a2713aSLionel Sambuc   dl = Op0->getDebugLoc();
2013f4a2713aSLionel Sambuc   return true;
2014f4a2713aSLionel Sambuc }
2015f4a2713aSLionel Sambuc 
RescheduleOps(MachineBasicBlock * MBB,SmallVectorImpl<MachineInstr * > & Ops,unsigned Base,bool isLd,DenseMap<MachineInstr *,unsigned> & MI2LocMap)2016f4a2713aSLionel Sambuc bool ARMPreAllocLoadStoreOpt::RescheduleOps(MachineBasicBlock *MBB,
2017f4a2713aSLionel Sambuc                                  SmallVectorImpl<MachineInstr *> &Ops,
2018f4a2713aSLionel Sambuc                                  unsigned Base, bool isLd,
2019f4a2713aSLionel Sambuc                                  DenseMap<MachineInstr*, unsigned> &MI2LocMap) {
2020f4a2713aSLionel Sambuc   bool RetVal = false;
2021f4a2713aSLionel Sambuc 
2022f4a2713aSLionel Sambuc   // Sort by offset (in reverse order).
2023*0a6a1f1dSLionel Sambuc   std::sort(Ops.begin(), Ops.end(),
2024*0a6a1f1dSLionel Sambuc             [](const MachineInstr *LHS, const MachineInstr *RHS) {
2025*0a6a1f1dSLionel Sambuc     int LOffset = getMemoryOpOffset(LHS);
2026*0a6a1f1dSLionel Sambuc     int ROffset = getMemoryOpOffset(RHS);
2027*0a6a1f1dSLionel Sambuc     assert(LHS == RHS || LOffset != ROffset);
2028*0a6a1f1dSLionel Sambuc     return LOffset > ROffset;
2029*0a6a1f1dSLionel Sambuc   });
2030f4a2713aSLionel Sambuc 
2031f4a2713aSLionel Sambuc   // The loads / stores of the same base are in order. Scan them from first to
2032f4a2713aSLionel Sambuc   // last and check for the following:
2033f4a2713aSLionel Sambuc   // 1. Any def of base.
2034f4a2713aSLionel Sambuc   // 2. Any gaps.
2035f4a2713aSLionel Sambuc   while (Ops.size() > 1) {
2036f4a2713aSLionel Sambuc     unsigned FirstLoc = ~0U;
2037f4a2713aSLionel Sambuc     unsigned LastLoc = 0;
2038*0a6a1f1dSLionel Sambuc     MachineInstr *FirstOp = nullptr;
2039*0a6a1f1dSLionel Sambuc     MachineInstr *LastOp = nullptr;
2040f4a2713aSLionel Sambuc     int LastOffset = 0;
2041f4a2713aSLionel Sambuc     unsigned LastOpcode = 0;
2042f4a2713aSLionel Sambuc     unsigned LastBytes = 0;
2043f4a2713aSLionel Sambuc     unsigned NumMove = 0;
2044f4a2713aSLionel Sambuc     for (int i = Ops.size() - 1; i >= 0; --i) {
2045f4a2713aSLionel Sambuc       MachineInstr *Op = Ops[i];
2046f4a2713aSLionel Sambuc       unsigned Loc = MI2LocMap[Op];
2047f4a2713aSLionel Sambuc       if (Loc <= FirstLoc) {
2048f4a2713aSLionel Sambuc         FirstLoc = Loc;
2049f4a2713aSLionel Sambuc         FirstOp = Op;
2050f4a2713aSLionel Sambuc       }
2051f4a2713aSLionel Sambuc       if (Loc >= LastLoc) {
2052f4a2713aSLionel Sambuc         LastLoc = Loc;
2053f4a2713aSLionel Sambuc         LastOp = Op;
2054f4a2713aSLionel Sambuc       }
2055f4a2713aSLionel Sambuc 
2056f4a2713aSLionel Sambuc       unsigned LSMOpcode
2057f4a2713aSLionel Sambuc         = getLoadStoreMultipleOpcode(Op->getOpcode(), ARM_AM::ia);
2058f4a2713aSLionel Sambuc       if (LastOpcode && LSMOpcode != LastOpcode)
2059f4a2713aSLionel Sambuc         break;
2060f4a2713aSLionel Sambuc 
2061f4a2713aSLionel Sambuc       int Offset = getMemoryOpOffset(Op);
2062f4a2713aSLionel Sambuc       unsigned Bytes = getLSMultipleTransferSize(Op);
2063f4a2713aSLionel Sambuc       if (LastBytes) {
2064f4a2713aSLionel Sambuc         if (Bytes != LastBytes || Offset != (LastOffset + (int)Bytes))
2065f4a2713aSLionel Sambuc           break;
2066f4a2713aSLionel Sambuc       }
2067f4a2713aSLionel Sambuc       LastOffset = Offset;
2068f4a2713aSLionel Sambuc       LastBytes = Bytes;
2069f4a2713aSLionel Sambuc       LastOpcode = LSMOpcode;
2070f4a2713aSLionel Sambuc       if (++NumMove == 8) // FIXME: Tune this limit.
2071f4a2713aSLionel Sambuc         break;
2072f4a2713aSLionel Sambuc     }
2073f4a2713aSLionel Sambuc 
2074f4a2713aSLionel Sambuc     if (NumMove <= 1)
2075f4a2713aSLionel Sambuc       Ops.pop_back();
2076f4a2713aSLionel Sambuc     else {
2077f4a2713aSLionel Sambuc       SmallPtrSet<MachineInstr*, 4> MemOps;
2078f4a2713aSLionel Sambuc       SmallSet<unsigned, 4> MemRegs;
2079f4a2713aSLionel Sambuc       for (int i = NumMove-1; i >= 0; --i) {
2080f4a2713aSLionel Sambuc         MemOps.insert(Ops[i]);
2081f4a2713aSLionel Sambuc         MemRegs.insert(Ops[i]->getOperand(0).getReg());
2082f4a2713aSLionel Sambuc       }
2083f4a2713aSLionel Sambuc 
2084f4a2713aSLionel Sambuc       // Be conservative, if the instructions are too far apart, don't
2085f4a2713aSLionel Sambuc       // move them. We want to limit the increase of register pressure.
2086f4a2713aSLionel Sambuc       bool DoMove = (LastLoc - FirstLoc) <= NumMove*4; // FIXME: Tune this.
2087f4a2713aSLionel Sambuc       if (DoMove)
2088f4a2713aSLionel Sambuc         DoMove = IsSafeAndProfitableToMove(isLd, Base, FirstOp, LastOp,
2089f4a2713aSLionel Sambuc                                            MemOps, MemRegs, TRI);
2090f4a2713aSLionel Sambuc       if (!DoMove) {
2091f4a2713aSLionel Sambuc         for (unsigned i = 0; i != NumMove; ++i)
2092f4a2713aSLionel Sambuc           Ops.pop_back();
2093f4a2713aSLionel Sambuc       } else {
2094f4a2713aSLionel Sambuc         // This is the new location for the loads / stores.
2095f4a2713aSLionel Sambuc         MachineBasicBlock::iterator InsertPos = isLd ? FirstOp : LastOp;
2096f4a2713aSLionel Sambuc         while (InsertPos != MBB->end()
2097f4a2713aSLionel Sambuc                && (MemOps.count(InsertPos) || InsertPos->isDebugValue()))
2098f4a2713aSLionel Sambuc           ++InsertPos;
2099f4a2713aSLionel Sambuc 
2100f4a2713aSLionel Sambuc         // If we are moving a pair of loads / stores, see if it makes sense
2101f4a2713aSLionel Sambuc         // to try to allocate a pair of registers that can form register pairs.
2102f4a2713aSLionel Sambuc         MachineInstr *Op0 = Ops.back();
2103f4a2713aSLionel Sambuc         MachineInstr *Op1 = Ops[Ops.size()-2];
2104f4a2713aSLionel Sambuc         unsigned EvenReg = 0, OddReg = 0;
2105f4a2713aSLionel Sambuc         unsigned BaseReg = 0, PredReg = 0;
2106f4a2713aSLionel Sambuc         ARMCC::CondCodes Pred = ARMCC::AL;
2107f4a2713aSLionel Sambuc         bool isT2 = false;
2108f4a2713aSLionel Sambuc         unsigned NewOpc = 0;
2109f4a2713aSLionel Sambuc         int Offset = 0;
2110f4a2713aSLionel Sambuc         DebugLoc dl;
2111f4a2713aSLionel Sambuc         if (NumMove == 2 && CanFormLdStDWord(Op0, Op1, dl, NewOpc,
2112f4a2713aSLionel Sambuc                                              EvenReg, OddReg, BaseReg,
2113f4a2713aSLionel Sambuc                                              Offset, PredReg, Pred, isT2)) {
2114f4a2713aSLionel Sambuc           Ops.pop_back();
2115f4a2713aSLionel Sambuc           Ops.pop_back();
2116f4a2713aSLionel Sambuc 
2117f4a2713aSLionel Sambuc           const MCInstrDesc &MCID = TII->get(NewOpc);
2118f4a2713aSLionel Sambuc           const TargetRegisterClass *TRC = TII->getRegClass(MCID, 0, TRI, *MF);
2119f4a2713aSLionel Sambuc           MRI->constrainRegClass(EvenReg, TRC);
2120f4a2713aSLionel Sambuc           MRI->constrainRegClass(OddReg, TRC);
2121f4a2713aSLionel Sambuc 
2122f4a2713aSLionel Sambuc           // Form the pair instruction.
2123f4a2713aSLionel Sambuc           if (isLd) {
2124f4a2713aSLionel Sambuc             MachineInstrBuilder MIB = BuildMI(*MBB, InsertPos, dl, MCID)
2125f4a2713aSLionel Sambuc               .addReg(EvenReg, RegState::Define)
2126f4a2713aSLionel Sambuc               .addReg(OddReg, RegState::Define)
2127f4a2713aSLionel Sambuc               .addReg(BaseReg);
2128f4a2713aSLionel Sambuc             // FIXME: We're converting from LDRi12 to an insn that still
2129f4a2713aSLionel Sambuc             // uses addrmode2, so we need an explicit offset reg. It should
2130f4a2713aSLionel Sambuc             // always by reg0 since we're transforming LDRi12s.
2131f4a2713aSLionel Sambuc             if (!isT2)
2132f4a2713aSLionel Sambuc               MIB.addReg(0);
2133f4a2713aSLionel Sambuc             MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
2134f4a2713aSLionel Sambuc             concatenateMemOperands(MIB, Op0, Op1);
2135f4a2713aSLionel Sambuc             DEBUG(dbgs() << "Formed " << *MIB << "\n");
2136f4a2713aSLionel Sambuc             ++NumLDRDFormed;
2137f4a2713aSLionel Sambuc           } else {
2138f4a2713aSLionel Sambuc             MachineInstrBuilder MIB = BuildMI(*MBB, InsertPos, dl, MCID)
2139f4a2713aSLionel Sambuc               .addReg(EvenReg)
2140f4a2713aSLionel Sambuc               .addReg(OddReg)
2141f4a2713aSLionel Sambuc               .addReg(BaseReg);
2142f4a2713aSLionel Sambuc             // FIXME: We're converting from LDRi12 to an insn that still
2143f4a2713aSLionel Sambuc             // uses addrmode2, so we need an explicit offset reg. It should
2144f4a2713aSLionel Sambuc             // always by reg0 since we're transforming STRi12s.
2145f4a2713aSLionel Sambuc             if (!isT2)
2146f4a2713aSLionel Sambuc               MIB.addReg(0);
2147f4a2713aSLionel Sambuc             MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
2148f4a2713aSLionel Sambuc             concatenateMemOperands(MIB, Op0, Op1);
2149f4a2713aSLionel Sambuc             DEBUG(dbgs() << "Formed " << *MIB << "\n");
2150f4a2713aSLionel Sambuc             ++NumSTRDFormed;
2151f4a2713aSLionel Sambuc           }
2152f4a2713aSLionel Sambuc           MBB->erase(Op0);
2153f4a2713aSLionel Sambuc           MBB->erase(Op1);
2154f4a2713aSLionel Sambuc 
2155f4a2713aSLionel Sambuc           // Add register allocation hints to form register pairs.
2156f4a2713aSLionel Sambuc           MRI->setRegAllocationHint(EvenReg, ARMRI::RegPairEven, OddReg);
2157f4a2713aSLionel Sambuc           MRI->setRegAllocationHint(OddReg,  ARMRI::RegPairOdd, EvenReg);
2158f4a2713aSLionel Sambuc         } else {
2159f4a2713aSLionel Sambuc           for (unsigned i = 0; i != NumMove; ++i) {
2160f4a2713aSLionel Sambuc             MachineInstr *Op = Ops.back();
2161f4a2713aSLionel Sambuc             Ops.pop_back();
2162f4a2713aSLionel Sambuc             MBB->splice(InsertPos, MBB, Op);
2163f4a2713aSLionel Sambuc           }
2164f4a2713aSLionel Sambuc         }
2165f4a2713aSLionel Sambuc 
2166f4a2713aSLionel Sambuc         NumLdStMoved += NumMove;
2167f4a2713aSLionel Sambuc         RetVal = true;
2168f4a2713aSLionel Sambuc       }
2169f4a2713aSLionel Sambuc     }
2170f4a2713aSLionel Sambuc   }
2171f4a2713aSLionel Sambuc 
2172f4a2713aSLionel Sambuc   return RetVal;
2173f4a2713aSLionel Sambuc }
2174f4a2713aSLionel Sambuc 
2175f4a2713aSLionel Sambuc bool
RescheduleLoadStoreInstrs(MachineBasicBlock * MBB)2176f4a2713aSLionel Sambuc ARMPreAllocLoadStoreOpt::RescheduleLoadStoreInstrs(MachineBasicBlock *MBB) {
2177f4a2713aSLionel Sambuc   bool RetVal = false;
2178f4a2713aSLionel Sambuc 
2179f4a2713aSLionel Sambuc   DenseMap<MachineInstr*, unsigned> MI2LocMap;
2180f4a2713aSLionel Sambuc   DenseMap<unsigned, SmallVector<MachineInstr*, 4> > Base2LdsMap;
2181f4a2713aSLionel Sambuc   DenseMap<unsigned, SmallVector<MachineInstr*, 4> > Base2StsMap;
2182f4a2713aSLionel Sambuc   SmallVector<unsigned, 4> LdBases;
2183f4a2713aSLionel Sambuc   SmallVector<unsigned, 4> StBases;
2184f4a2713aSLionel Sambuc 
2185f4a2713aSLionel Sambuc   unsigned Loc = 0;
2186f4a2713aSLionel Sambuc   MachineBasicBlock::iterator MBBI = MBB->begin();
2187f4a2713aSLionel Sambuc   MachineBasicBlock::iterator E = MBB->end();
2188f4a2713aSLionel Sambuc   while (MBBI != E) {
2189f4a2713aSLionel Sambuc     for (; MBBI != E; ++MBBI) {
2190f4a2713aSLionel Sambuc       MachineInstr *MI = MBBI;
2191f4a2713aSLionel Sambuc       if (MI->isCall() || MI->isTerminator()) {
2192f4a2713aSLionel Sambuc         // Stop at barriers.
2193f4a2713aSLionel Sambuc         ++MBBI;
2194f4a2713aSLionel Sambuc         break;
2195f4a2713aSLionel Sambuc       }
2196f4a2713aSLionel Sambuc 
2197f4a2713aSLionel Sambuc       if (!MI->isDebugValue())
2198f4a2713aSLionel Sambuc         MI2LocMap[MI] = ++Loc;
2199f4a2713aSLionel Sambuc 
2200f4a2713aSLionel Sambuc       if (!isMemoryOp(MI))
2201f4a2713aSLionel Sambuc         continue;
2202f4a2713aSLionel Sambuc       unsigned PredReg = 0;
2203f4a2713aSLionel Sambuc       if (getInstrPredicate(MI, PredReg) != ARMCC::AL)
2204f4a2713aSLionel Sambuc         continue;
2205f4a2713aSLionel Sambuc 
2206f4a2713aSLionel Sambuc       int Opc = MI->getOpcode();
2207f4a2713aSLionel Sambuc       bool isLd = isi32Load(Opc) || Opc == ARM::VLDRS || Opc == ARM::VLDRD;
2208f4a2713aSLionel Sambuc       unsigned Base = MI->getOperand(1).getReg();
2209f4a2713aSLionel Sambuc       int Offset = getMemoryOpOffset(MI);
2210f4a2713aSLionel Sambuc 
2211f4a2713aSLionel Sambuc       bool StopHere = false;
2212f4a2713aSLionel Sambuc       if (isLd) {
2213f4a2713aSLionel Sambuc         DenseMap<unsigned, SmallVector<MachineInstr*, 4> >::iterator BI =
2214f4a2713aSLionel Sambuc           Base2LdsMap.find(Base);
2215f4a2713aSLionel Sambuc         if (BI != Base2LdsMap.end()) {
2216f4a2713aSLionel Sambuc           for (unsigned i = 0, e = BI->second.size(); i != e; ++i) {
2217f4a2713aSLionel Sambuc             if (Offset == getMemoryOpOffset(BI->second[i])) {
2218f4a2713aSLionel Sambuc               StopHere = true;
2219f4a2713aSLionel Sambuc               break;
2220f4a2713aSLionel Sambuc             }
2221f4a2713aSLionel Sambuc           }
2222f4a2713aSLionel Sambuc           if (!StopHere)
2223f4a2713aSLionel Sambuc             BI->second.push_back(MI);
2224f4a2713aSLionel Sambuc         } else {
2225f4a2713aSLionel Sambuc           Base2LdsMap[Base].push_back(MI);
2226f4a2713aSLionel Sambuc           LdBases.push_back(Base);
2227f4a2713aSLionel Sambuc         }
2228f4a2713aSLionel Sambuc       } else {
2229f4a2713aSLionel Sambuc         DenseMap<unsigned, SmallVector<MachineInstr*, 4> >::iterator BI =
2230f4a2713aSLionel Sambuc           Base2StsMap.find(Base);
2231f4a2713aSLionel Sambuc         if (BI != Base2StsMap.end()) {
2232f4a2713aSLionel Sambuc           for (unsigned i = 0, e = BI->second.size(); i != e; ++i) {
2233f4a2713aSLionel Sambuc             if (Offset == getMemoryOpOffset(BI->second[i])) {
2234f4a2713aSLionel Sambuc               StopHere = true;
2235f4a2713aSLionel Sambuc               break;
2236f4a2713aSLionel Sambuc             }
2237f4a2713aSLionel Sambuc           }
2238f4a2713aSLionel Sambuc           if (!StopHere)
2239f4a2713aSLionel Sambuc             BI->second.push_back(MI);
2240f4a2713aSLionel Sambuc         } else {
2241f4a2713aSLionel Sambuc           Base2StsMap[Base].push_back(MI);
2242f4a2713aSLionel Sambuc           StBases.push_back(Base);
2243f4a2713aSLionel Sambuc         }
2244f4a2713aSLionel Sambuc       }
2245f4a2713aSLionel Sambuc 
2246f4a2713aSLionel Sambuc       if (StopHere) {
2247f4a2713aSLionel Sambuc         // Found a duplicate (a base+offset combination that's seen earlier).
2248f4a2713aSLionel Sambuc         // Backtrack.
2249f4a2713aSLionel Sambuc         --Loc;
2250f4a2713aSLionel Sambuc         break;
2251f4a2713aSLionel Sambuc       }
2252f4a2713aSLionel Sambuc     }
2253f4a2713aSLionel Sambuc 
2254f4a2713aSLionel Sambuc     // Re-schedule loads.
2255f4a2713aSLionel Sambuc     for (unsigned i = 0, e = LdBases.size(); i != e; ++i) {
2256f4a2713aSLionel Sambuc       unsigned Base = LdBases[i];
2257f4a2713aSLionel Sambuc       SmallVectorImpl<MachineInstr *> &Lds = Base2LdsMap[Base];
2258f4a2713aSLionel Sambuc       if (Lds.size() > 1)
2259f4a2713aSLionel Sambuc         RetVal |= RescheduleOps(MBB, Lds, Base, true, MI2LocMap);
2260f4a2713aSLionel Sambuc     }
2261f4a2713aSLionel Sambuc 
2262f4a2713aSLionel Sambuc     // Re-schedule stores.
2263f4a2713aSLionel Sambuc     for (unsigned i = 0, e = StBases.size(); i != e; ++i) {
2264f4a2713aSLionel Sambuc       unsigned Base = StBases[i];
2265f4a2713aSLionel Sambuc       SmallVectorImpl<MachineInstr *> &Sts = Base2StsMap[Base];
2266f4a2713aSLionel Sambuc       if (Sts.size() > 1)
2267f4a2713aSLionel Sambuc         RetVal |= RescheduleOps(MBB, Sts, Base, false, MI2LocMap);
2268f4a2713aSLionel Sambuc     }
2269f4a2713aSLionel Sambuc 
2270f4a2713aSLionel Sambuc     if (MBBI != E) {
2271f4a2713aSLionel Sambuc       Base2LdsMap.clear();
2272f4a2713aSLionel Sambuc       Base2StsMap.clear();
2273f4a2713aSLionel Sambuc       LdBases.clear();
2274f4a2713aSLionel Sambuc       StBases.clear();
2275f4a2713aSLionel Sambuc     }
2276f4a2713aSLionel Sambuc   }
2277f4a2713aSLionel Sambuc 
2278f4a2713aSLionel Sambuc   return RetVal;
2279f4a2713aSLionel Sambuc }
2280f4a2713aSLionel Sambuc 
2281f4a2713aSLionel Sambuc 
2282f4a2713aSLionel Sambuc /// createARMLoadStoreOptimizationPass - returns an instance of the load / store
2283f4a2713aSLionel Sambuc /// optimization pass.
createARMLoadStoreOptimizationPass(bool PreAlloc)2284f4a2713aSLionel Sambuc FunctionPass *llvm::createARMLoadStoreOptimizationPass(bool PreAlloc) {
2285f4a2713aSLionel Sambuc   if (PreAlloc)
2286f4a2713aSLionel Sambuc     return new ARMPreAllocLoadStoreOpt();
2287f4a2713aSLionel Sambuc   return new ARMLoadStoreOpt();
2288f4a2713aSLionel Sambuc }
2289