1 //===-- X86FixupLEAs.cpp - use or replace LEA instructions -----------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file defines the pass that finds instructions that can be
10 // re-written as LEA instructions in order to reduce pipeline delays.
11 // It replaces LEAs with ADD/INC/DEC when that is better for size/speed.
12 //
13 //===----------------------------------------------------------------------===//
14 
15 #include "X86.h"
16 #include "X86InstrInfo.h"
17 #include "X86Subtarget.h"
18 #include "llvm/ADT/Statistic.h"
19 #include "llvm/Analysis/ProfileSummaryInfo.h"
20 #include "llvm/CodeGen/LazyMachineBlockFrequencyInfo.h"
21 #include "llvm/CodeGen/MachineFunctionPass.h"
22 #include "llvm/CodeGen/MachineInstrBuilder.h"
23 #include "llvm/CodeGen/MachineSizeOpts.h"
24 #include "llvm/CodeGen/Passes.h"
25 #include "llvm/CodeGen/TargetSchedule.h"
26 #include "llvm/Support/Debug.h"
27 #include "llvm/Support/raw_ostream.h"
28 using namespace llvm;
29 
30 #define FIXUPLEA_DESC "X86 LEA Fixup"
31 #define FIXUPLEA_NAME "x86-fixup-LEAs"
32 
33 #define DEBUG_TYPE FIXUPLEA_NAME
34 
35 STATISTIC(NumLEAs, "Number of LEA instructions created");
36 
37 namespace {
38 class FixupLEAPass : public MachineFunctionPass {
39   enum RegUsageState { RU_NotUsed, RU_Write, RU_Read };
40 
41   /// Given a machine register, look for the instruction
42   /// which writes it in the current basic block. If found,
43   /// try to replace it with an equivalent LEA instruction.
44   /// If replacement succeeds, then also process the newly created
45   /// instruction.
46   void seekLEAFixup(MachineOperand &p, MachineBasicBlock::iterator &I,
47                     MachineBasicBlock &MBB);
48 
49   /// Given a memory access or LEA instruction
50   /// whose address mode uses a base and/or index register, look for
51   /// an opportunity to replace the instruction which sets the base or index
52   /// register with an equivalent LEA instruction.
53   void processInstruction(MachineBasicBlock::iterator &I,
54                           MachineBasicBlock &MBB);
55 
56   /// Given a LEA instruction which is unprofitable
57   /// on SlowLEA targets try to replace it with an equivalent ADD instruction.
58   void processInstructionForSlowLEA(MachineBasicBlock::iterator &I,
59                                     MachineBasicBlock &MBB);
60 
61   /// Given a LEA instruction which is unprofitable
62   /// on SNB+ try to replace it with other instructions.
63   /// According to Intel's Optimization Reference Manual:
64   /// " For LEA instructions with three source operands and some specific
65   ///   situations, instruction latency has increased to 3 cycles, and must
66   ///   dispatch via port 1:
67   /// - LEA that has all three source operands: base, index, and offset
68   /// - LEA that uses base and index registers where the base is EBP, RBP,
69   ///   or R13
70   /// - LEA that uses RIP relative addressing mode
71   /// - LEA that uses 16-bit addressing mode "
72   /// This function currently handles the first 2 cases only.
73   void processInstrForSlow3OpLEA(MachineBasicBlock::iterator &I,
74                                  MachineBasicBlock &MBB, bool OptIncDec);
75 
76   /// Look for LEAs that are really two address LEAs that we might be able to
77   /// turn into regular ADD instructions.
78   bool optTwoAddrLEA(MachineBasicBlock::iterator &I,
79                      MachineBasicBlock &MBB, bool OptIncDec,
80                      bool UseLEAForSP) const;
81 
82   /// Look for and transform the sequence
83   ///     lea (reg1, reg2), reg3
84   ///     sub reg3, reg4
85   /// to
86   ///     sub reg1, reg4
87   ///     sub reg2, reg4
88   /// It can also optimize the sequence lea/add similarly.
89   bool optLEAALU(MachineBasicBlock::iterator &I, MachineBasicBlock &MBB) const;
90 
91   /// Step forwards in MBB, looking for an ADD/SUB instruction which uses
92   /// the dest register of LEA instruction I.
93   MachineBasicBlock::iterator searchALUInst(MachineBasicBlock::iterator &I,
94                                             MachineBasicBlock &MBB) const;
95 
96   /// Check instructions between LeaI and AluI (exclusively).
97   /// Set BaseIndexDef to true if base or index register from LeaI is defined.
98   /// Set AluDestRef to true if the dest register of AluI is used or defined.
99   /// *KilledBase is set to the killed base register usage.
100   /// *KilledIndex is set to the killed index register usage.
101   void checkRegUsage(MachineBasicBlock::iterator &LeaI,
102                      MachineBasicBlock::iterator &AluI, bool &BaseIndexDef,
103                      bool &AluDestRef, MachineOperand **KilledBase,
104                      MachineOperand **KilledIndex) const;
105 
106   /// Determine if an instruction references a machine register
107   /// and, if so, whether it reads or writes the register.
108   RegUsageState usesRegister(MachineOperand &p, MachineBasicBlock::iterator I);
109 
110   /// Step backwards through a basic block, looking
111   /// for an instruction which writes a register within
112   /// a maximum of INSTR_DISTANCE_THRESHOLD instruction latency cycles.
113   MachineBasicBlock::iterator searchBackwards(MachineOperand &p,
114                                               MachineBasicBlock::iterator &I,
115                                               MachineBasicBlock &MBB);
116 
117   /// if an instruction can be converted to an
118   /// equivalent LEA, insert the new instruction into the basic block
119   /// and return a pointer to it. Otherwise, return zero.
120   MachineInstr *postRAConvertToLEA(MachineBasicBlock &MBB,
121                                    MachineBasicBlock::iterator &MBBI) const;
122 
123 public:
124   static char ID;
125 
getPassName() const126   StringRef getPassName() const override { return FIXUPLEA_DESC; }
127 
FixupLEAPass()128   FixupLEAPass() : MachineFunctionPass(ID) { }
129 
130   /// Loop over all of the basic blocks,
131   /// replacing instructions by equivalent LEA instructions
132   /// if needed and when possible.
133   bool runOnMachineFunction(MachineFunction &MF) override;
134 
135   // This pass runs after regalloc and doesn't support VReg operands.
getRequiredProperties() const136   MachineFunctionProperties getRequiredProperties() const override {
137     return MachineFunctionProperties().set(
138         MachineFunctionProperties::Property::NoVRegs);
139   }
140 
getAnalysisUsage(AnalysisUsage & AU) const141   void getAnalysisUsage(AnalysisUsage &AU) const override {
142     AU.addRequired<ProfileSummaryInfoWrapperPass>();
143     AU.addRequired<LazyMachineBlockFrequencyInfoPass>();
144     MachineFunctionPass::getAnalysisUsage(AU);
145   }
146 
147 private:
148   TargetSchedModel TSM;
149   const X86InstrInfo *TII = nullptr;
150   const X86RegisterInfo *TRI = nullptr;
151 };
152 }
153 
154 char FixupLEAPass::ID = 0;
155 
INITIALIZE_PASS(FixupLEAPass,FIXUPLEA_NAME,FIXUPLEA_DESC,false,false)156 INITIALIZE_PASS(FixupLEAPass, FIXUPLEA_NAME, FIXUPLEA_DESC, false, false)
157 
158 MachineInstr *
159 FixupLEAPass::postRAConvertToLEA(MachineBasicBlock &MBB,
160                                  MachineBasicBlock::iterator &MBBI) const {
161   MachineInstr &MI = *MBBI;
162   switch (MI.getOpcode()) {
163   case X86::MOV32rr:
164   case X86::MOV64rr: {
165     const MachineOperand &Src = MI.getOperand(1);
166     const MachineOperand &Dest = MI.getOperand(0);
167     MachineInstr *NewMI =
168         BuildMI(MBB, MBBI, MI.getDebugLoc(),
169                 TII->get(MI.getOpcode() == X86::MOV32rr ? X86::LEA32r
170                                                         : X86::LEA64r))
171             .add(Dest)
172             .add(Src)
173             .addImm(1)
174             .addReg(0)
175             .addImm(0)
176             .addReg(0);
177     return NewMI;
178   }
179   }
180 
181   if (!MI.isConvertibleTo3Addr())
182     return nullptr;
183 
184   switch (MI.getOpcode()) {
185   default:
186     // Only convert instructions that we've verified are safe.
187     return nullptr;
188   case X86::ADD64ri32:
189   case X86::ADD64ri32_DB:
190   case X86::ADD32ri:
191   case X86::ADD32ri_DB:
192     if (!MI.getOperand(2).isImm()) {
193       // convertToThreeAddress will call getImm()
194       // which requires isImm() to be true
195       return nullptr;
196     }
197     break;
198   case X86::SHL64ri:
199   case X86::SHL32ri:
200   case X86::INC64r:
201   case X86::INC32r:
202   case X86::DEC64r:
203   case X86::DEC32r:
204   case X86::ADD64rr:
205   case X86::ADD64rr_DB:
206   case X86::ADD32rr:
207   case X86::ADD32rr_DB:
208     // These instructions are all fine to convert.
209     break;
210   }
211   return TII->convertToThreeAddress(MI, nullptr, nullptr);
212 }
213 
createX86FixupLEAs()214 FunctionPass *llvm::createX86FixupLEAs() { return new FixupLEAPass(); }
215 
isLEA(unsigned Opcode)216 static bool isLEA(unsigned Opcode) {
217   return Opcode == X86::LEA32r || Opcode == X86::LEA64r ||
218          Opcode == X86::LEA64_32r;
219 }
220 
runOnMachineFunction(MachineFunction & MF)221 bool FixupLEAPass::runOnMachineFunction(MachineFunction &MF) {
222   if (skipFunction(MF.getFunction()))
223     return false;
224 
225   const X86Subtarget &ST = MF.getSubtarget<X86Subtarget>();
226   bool IsSlowLEA = ST.slowLEA();
227   bool IsSlow3OpsLEA = ST.slow3OpsLEA();
228   bool LEAUsesAG = ST.leaUsesAG();
229 
230   bool OptIncDec = !ST.slowIncDec() || MF.getFunction().hasOptSize();
231   bool UseLEAForSP = ST.useLeaForSP();
232 
233   TSM.init(&ST);
234   TII = ST.getInstrInfo();
235   TRI = ST.getRegisterInfo();
236   auto *PSI = &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
237   auto *MBFI = (PSI && PSI->hasProfileSummary())
238                    ? &getAnalysis<LazyMachineBlockFrequencyInfoPass>().getBFI()
239                    : nullptr;
240 
241   LLVM_DEBUG(dbgs() << "Start X86FixupLEAs\n";);
242   for (MachineBasicBlock &MBB : MF) {
243     // First pass. Try to remove or optimize existing LEAs.
244     bool OptIncDecPerBB =
245         OptIncDec || llvm::shouldOptimizeForSize(&MBB, PSI, MBFI);
246     for (MachineBasicBlock::iterator I = MBB.begin(); I != MBB.end(); ++I) {
247       if (!isLEA(I->getOpcode()))
248         continue;
249 
250       if (optTwoAddrLEA(I, MBB, OptIncDecPerBB, UseLEAForSP))
251         continue;
252 
253       if (IsSlowLEA)
254         processInstructionForSlowLEA(I, MBB);
255       else if (IsSlow3OpsLEA)
256         processInstrForSlow3OpLEA(I, MBB, OptIncDecPerBB);
257     }
258 
259     // Second pass for creating LEAs. This may reverse some of the
260     // transformations above.
261     if (LEAUsesAG) {
262       for (MachineBasicBlock::iterator I = MBB.begin(); I != MBB.end(); ++I)
263         processInstruction(I, MBB);
264     }
265   }
266 
267   LLVM_DEBUG(dbgs() << "End X86FixupLEAs\n";);
268 
269   return true;
270 }
271 
272 FixupLEAPass::RegUsageState
usesRegister(MachineOperand & p,MachineBasicBlock::iterator I)273 FixupLEAPass::usesRegister(MachineOperand &p, MachineBasicBlock::iterator I) {
274   RegUsageState RegUsage = RU_NotUsed;
275   MachineInstr &MI = *I;
276 
277   for (const MachineOperand &MO : MI.operands()) {
278     if (MO.isReg() && MO.getReg() == p.getReg()) {
279       if (MO.isDef())
280         return RU_Write;
281       RegUsage = RU_Read;
282     }
283   }
284   return RegUsage;
285 }
286 
287 /// getPreviousInstr - Given a reference to an instruction in a basic
288 /// block, return a reference to the previous instruction in the block,
289 /// wrapping around to the last instruction of the block if the block
290 /// branches to itself.
getPreviousInstr(MachineBasicBlock::iterator & I,MachineBasicBlock & MBB)291 static inline bool getPreviousInstr(MachineBasicBlock::iterator &I,
292                                     MachineBasicBlock &MBB) {
293   if (I == MBB.begin()) {
294     if (MBB.isPredecessor(&MBB)) {
295       I = --MBB.end();
296       return true;
297     } else
298       return false;
299   }
300   --I;
301   return true;
302 }
303 
304 MachineBasicBlock::iterator
searchBackwards(MachineOperand & p,MachineBasicBlock::iterator & I,MachineBasicBlock & MBB)305 FixupLEAPass::searchBackwards(MachineOperand &p, MachineBasicBlock::iterator &I,
306                               MachineBasicBlock &MBB) {
307   int InstrDistance = 1;
308   MachineBasicBlock::iterator CurInst;
309   static const int INSTR_DISTANCE_THRESHOLD = 5;
310 
311   CurInst = I;
312   bool Found;
313   Found = getPreviousInstr(CurInst, MBB);
314   while (Found && I != CurInst) {
315     if (CurInst->isCall() || CurInst->isInlineAsm())
316       break;
317     if (InstrDistance > INSTR_DISTANCE_THRESHOLD)
318       break; // too far back to make a difference
319     if (usesRegister(p, CurInst) == RU_Write) {
320       return CurInst;
321     }
322     InstrDistance += TSM.computeInstrLatency(&*CurInst);
323     Found = getPreviousInstr(CurInst, MBB);
324   }
325   return MachineBasicBlock::iterator();
326 }
327 
isInefficientLEAReg(unsigned Reg)328 static inline bool isInefficientLEAReg(unsigned Reg) {
329   return Reg == X86::EBP || Reg == X86::RBP ||
330          Reg == X86::R13D || Reg == X86::R13;
331 }
332 
333 /// Returns true if this LEA uses base and index registers, and the base
334 /// register is known to be inefficient for the subtarget.
335 // TODO: use a variant scheduling class to model the latency profile
336 // of LEA instructions, and implement this logic as a scheduling predicate.
hasInefficientLEABaseReg(const MachineOperand & Base,const MachineOperand & Index)337 static inline bool hasInefficientLEABaseReg(const MachineOperand &Base,
338                                             const MachineOperand &Index) {
339   return Base.isReg() && isInefficientLEAReg(Base.getReg()) && Index.isReg() &&
340          Index.getReg() != X86::NoRegister;
341 }
342 
hasLEAOffset(const MachineOperand & Offset)343 static inline bool hasLEAOffset(const MachineOperand &Offset) {
344   return (Offset.isImm() && Offset.getImm() != 0) || Offset.isGlobal() ||
345          Offset.isBlockAddress();
346 }
347 
getADDrrFromLEA(unsigned LEAOpcode)348 static inline unsigned getADDrrFromLEA(unsigned LEAOpcode) {
349   switch (LEAOpcode) {
350   default:
351     llvm_unreachable("Unexpected LEA instruction");
352   case X86::LEA32r:
353   case X86::LEA64_32r:
354     return X86::ADD32rr;
355   case X86::LEA64r:
356     return X86::ADD64rr;
357   }
358 }
359 
getSUBrrFromLEA(unsigned LEAOpcode)360 static inline unsigned getSUBrrFromLEA(unsigned LEAOpcode) {
361   switch (LEAOpcode) {
362   default:
363     llvm_unreachable("Unexpected LEA instruction");
364   case X86::LEA32r:
365   case X86::LEA64_32r:
366     return X86::SUB32rr;
367   case X86::LEA64r:
368     return X86::SUB64rr;
369   }
370 }
371 
getADDriFromLEA(unsigned LEAOpcode,const MachineOperand & Offset)372 static inline unsigned getADDriFromLEA(unsigned LEAOpcode,
373                                        const MachineOperand &Offset) {
374   switch (LEAOpcode) {
375   default:
376     llvm_unreachable("Unexpected LEA instruction");
377   case X86::LEA32r:
378   case X86::LEA64_32r:
379     return X86::ADD32ri;
380   case X86::LEA64r:
381     return X86::ADD64ri32;
382   }
383 }
384 
getINCDECFromLEA(unsigned LEAOpcode,bool IsINC)385 static inline unsigned getINCDECFromLEA(unsigned LEAOpcode, bool IsINC) {
386   switch (LEAOpcode) {
387   default:
388     llvm_unreachable("Unexpected LEA instruction");
389   case X86::LEA32r:
390   case X86::LEA64_32r:
391     return IsINC ? X86::INC32r : X86::DEC32r;
392   case X86::LEA64r:
393     return IsINC ? X86::INC64r : X86::DEC64r;
394   }
395 }
396 
397 MachineBasicBlock::iterator
searchALUInst(MachineBasicBlock::iterator & I,MachineBasicBlock & MBB) const398 FixupLEAPass::searchALUInst(MachineBasicBlock::iterator &I,
399                             MachineBasicBlock &MBB) const {
400   const int InstrDistanceThreshold = 5;
401   int InstrDistance = 1;
402   MachineBasicBlock::iterator CurInst = std::next(I);
403 
404   unsigned LEAOpcode = I->getOpcode();
405   unsigned AddOpcode = getADDrrFromLEA(LEAOpcode);
406   unsigned SubOpcode = getSUBrrFromLEA(LEAOpcode);
407   Register DestReg = I->getOperand(0).getReg();
408 
409   while (CurInst != MBB.end()) {
410     if (CurInst->isCall() || CurInst->isInlineAsm())
411       break;
412     if (InstrDistance > InstrDistanceThreshold)
413       break;
414 
415     // Check if the lea dest register is used in an add/sub instruction only.
416     for (unsigned I = 0, E = CurInst->getNumOperands(); I != E; ++I) {
417       MachineOperand &Opnd = CurInst->getOperand(I);
418       if (Opnd.isReg()) {
419         if (Opnd.getReg() == DestReg) {
420           if (Opnd.isDef() || !Opnd.isKill())
421             return MachineBasicBlock::iterator();
422 
423           unsigned AluOpcode = CurInst->getOpcode();
424           if (AluOpcode != AddOpcode && AluOpcode != SubOpcode)
425             return MachineBasicBlock::iterator();
426 
427           MachineOperand &Opnd2 = CurInst->getOperand(3 - I);
428           MachineOperand AluDest = CurInst->getOperand(0);
429           if (Opnd2.getReg() != AluDest.getReg())
430             return MachineBasicBlock::iterator();
431 
432           // X - (Y + Z) may generate different flags than (X - Y) - Z when
433           // there is overflow. So we can't change the alu instruction if the
434           // flags register is live.
435           if (!CurInst->registerDefIsDead(X86::EFLAGS, TRI))
436             return MachineBasicBlock::iterator();
437 
438           return CurInst;
439         }
440         if (TRI->regsOverlap(DestReg, Opnd.getReg()))
441           return MachineBasicBlock::iterator();
442       }
443     }
444 
445     InstrDistance++;
446     ++CurInst;
447   }
448   return MachineBasicBlock::iterator();
449 }
450 
checkRegUsage(MachineBasicBlock::iterator & LeaI,MachineBasicBlock::iterator & AluI,bool & BaseIndexDef,bool & AluDestRef,MachineOperand ** KilledBase,MachineOperand ** KilledIndex) const451 void FixupLEAPass::checkRegUsage(MachineBasicBlock::iterator &LeaI,
452                                  MachineBasicBlock::iterator &AluI,
453                                  bool &BaseIndexDef, bool &AluDestRef,
454                                  MachineOperand **KilledBase,
455                                  MachineOperand **KilledIndex) const {
456   BaseIndexDef = AluDestRef = false;
457   *KilledBase = *KilledIndex = nullptr;
458   Register BaseReg = LeaI->getOperand(1 + X86::AddrBaseReg).getReg();
459   Register IndexReg = LeaI->getOperand(1 + X86::AddrIndexReg).getReg();
460   Register AluDestReg = AluI->getOperand(0).getReg();
461 
462   for (MachineInstr &CurInst : llvm::make_range(std::next(LeaI), AluI)) {
463     for (MachineOperand &Opnd : CurInst.operands()) {
464       if (!Opnd.isReg())
465         continue;
466       Register Reg = Opnd.getReg();
467       if (TRI->regsOverlap(Reg, AluDestReg))
468         AluDestRef = true;
469       if (TRI->regsOverlap(Reg, BaseReg)) {
470         if (Opnd.isDef())
471           BaseIndexDef = true;
472         else if (Opnd.isKill())
473           *KilledBase = &Opnd;
474       }
475       if (TRI->regsOverlap(Reg, IndexReg)) {
476         if (Opnd.isDef())
477           BaseIndexDef = true;
478         else if (Opnd.isKill())
479           *KilledIndex = &Opnd;
480       }
481     }
482   }
483 }
484 
optLEAALU(MachineBasicBlock::iterator & I,MachineBasicBlock & MBB) const485 bool FixupLEAPass::optLEAALU(MachineBasicBlock::iterator &I,
486                              MachineBasicBlock &MBB) const {
487   // Look for an add/sub instruction which uses the result of lea.
488   MachineBasicBlock::iterator AluI = searchALUInst(I, MBB);
489   if (AluI == MachineBasicBlock::iterator())
490     return false;
491 
492   // Check if there are any related register usage between lea and alu.
493   bool BaseIndexDef, AluDestRef;
494   MachineOperand *KilledBase, *KilledIndex;
495   checkRegUsage(I, AluI, BaseIndexDef, AluDestRef, &KilledBase, &KilledIndex);
496 
497   MachineBasicBlock::iterator InsertPos = AluI;
498   if (BaseIndexDef) {
499     if (AluDestRef)
500       return false;
501     InsertPos = I;
502     KilledBase = KilledIndex = nullptr;
503   }
504 
505   // Check if there are same registers.
506   Register AluDestReg = AluI->getOperand(0).getReg();
507   Register BaseReg = I->getOperand(1 + X86::AddrBaseReg).getReg();
508   Register IndexReg = I->getOperand(1 + X86::AddrIndexReg).getReg();
509   if (I->getOpcode() == X86::LEA64_32r) {
510     BaseReg = TRI->getSubReg(BaseReg, X86::sub_32bit);
511     IndexReg = TRI->getSubReg(IndexReg, X86::sub_32bit);
512   }
513   if (AluDestReg == IndexReg) {
514     if (BaseReg == IndexReg)
515       return false;
516     std::swap(BaseReg, IndexReg);
517     std::swap(KilledBase, KilledIndex);
518   }
519   if (BaseReg == IndexReg)
520     KilledBase = nullptr;
521 
522   // Now it's safe to change instructions.
523   MachineInstr *NewMI1, *NewMI2;
524   unsigned NewOpcode = AluI->getOpcode();
525   NewMI1 = BuildMI(MBB, InsertPos, AluI->getDebugLoc(), TII->get(NewOpcode),
526                    AluDestReg)
527                .addReg(AluDestReg, RegState::Kill)
528                .addReg(BaseReg, KilledBase ? RegState::Kill : 0);
529   NewMI1->addRegisterDead(X86::EFLAGS, TRI);
530   NewMI2 = BuildMI(MBB, InsertPos, AluI->getDebugLoc(), TII->get(NewOpcode),
531                    AluDestReg)
532                .addReg(AluDestReg, RegState::Kill)
533                .addReg(IndexReg, KilledIndex ? RegState::Kill : 0);
534   NewMI2->addRegisterDead(X86::EFLAGS, TRI);
535 
536   // Clear the old Kill flags.
537   if (KilledBase)
538     KilledBase->setIsKill(false);
539   if (KilledIndex)
540     KilledIndex->setIsKill(false);
541 
542   MBB.getParent()->substituteDebugValuesForInst(*AluI, *NewMI2, 1);
543   MBB.erase(I);
544   MBB.erase(AluI);
545   I = NewMI1;
546   return true;
547 }
548 
optTwoAddrLEA(MachineBasicBlock::iterator & I,MachineBasicBlock & MBB,bool OptIncDec,bool UseLEAForSP) const549 bool FixupLEAPass::optTwoAddrLEA(MachineBasicBlock::iterator &I,
550                                  MachineBasicBlock &MBB, bool OptIncDec,
551                                  bool UseLEAForSP) const {
552   MachineInstr &MI = *I;
553 
554   const MachineOperand &Base =    MI.getOperand(1 + X86::AddrBaseReg);
555   const MachineOperand &Scale =   MI.getOperand(1 + X86::AddrScaleAmt);
556   const MachineOperand &Index =   MI.getOperand(1 + X86::AddrIndexReg);
557   const MachineOperand &Disp =    MI.getOperand(1 + X86::AddrDisp);
558   const MachineOperand &Segment = MI.getOperand(1 + X86::AddrSegmentReg);
559 
560   if (Segment.getReg() != 0 || !Disp.isImm() || Scale.getImm() > 1 ||
561       MBB.computeRegisterLiveness(TRI, X86::EFLAGS, I) !=
562           MachineBasicBlock::LQR_Dead)
563     return false;
564 
565   Register DestReg = MI.getOperand(0).getReg();
566   Register BaseReg = Base.getReg();
567   Register IndexReg = Index.getReg();
568 
569   // Don't change stack adjustment LEAs.
570   if (UseLEAForSP && (DestReg == X86::ESP || DestReg == X86::RSP))
571     return false;
572 
573   // LEA64_32 has 64-bit operands but 32-bit result.
574   if (MI.getOpcode() == X86::LEA64_32r) {
575     if (BaseReg != 0)
576       BaseReg = TRI->getSubReg(BaseReg, X86::sub_32bit);
577     if (IndexReg != 0)
578       IndexReg = TRI->getSubReg(IndexReg, X86::sub_32bit);
579   }
580 
581   MachineInstr *NewMI = nullptr;
582 
583   // Case 1.
584   // Look for lea(%reg1, %reg2), %reg1 or lea(%reg2, %reg1), %reg1
585   // which can be turned into add %reg2, %reg1
586   if (BaseReg != 0 && IndexReg != 0 && Disp.getImm() == 0 &&
587       (DestReg == BaseReg || DestReg == IndexReg)) {
588     unsigned NewOpcode = getADDrrFromLEA(MI.getOpcode());
589     if (DestReg != BaseReg)
590       std::swap(BaseReg, IndexReg);
591 
592     if (MI.getOpcode() == X86::LEA64_32r) {
593       // TODO: Do we need the super register implicit use?
594       NewMI = BuildMI(MBB, I, MI.getDebugLoc(), TII->get(NewOpcode), DestReg)
595         .addReg(BaseReg).addReg(IndexReg)
596         .addReg(Base.getReg(), RegState::Implicit)
597         .addReg(Index.getReg(), RegState::Implicit);
598     } else {
599       NewMI = BuildMI(MBB, I, MI.getDebugLoc(), TII->get(NewOpcode), DestReg)
600         .addReg(BaseReg).addReg(IndexReg);
601     }
602   } else if (DestReg == BaseReg && IndexReg == 0) {
603     // Case 2.
604     // This is an LEA with only a base register and a displacement,
605     // We can use ADDri or INC/DEC.
606 
607     // Does this LEA have one these forms:
608     // lea  %reg, 1(%reg)
609     // lea  %reg, -1(%reg)
610     if (OptIncDec && (Disp.getImm() == 1 || Disp.getImm() == -1)) {
611       bool IsINC = Disp.getImm() == 1;
612       unsigned NewOpcode = getINCDECFromLEA(MI.getOpcode(), IsINC);
613 
614       if (MI.getOpcode() == X86::LEA64_32r) {
615         // TODO: Do we need the super register implicit use?
616         NewMI = BuildMI(MBB, I, MI.getDebugLoc(), TII->get(NewOpcode), DestReg)
617           .addReg(BaseReg).addReg(Base.getReg(), RegState::Implicit);
618       } else {
619         NewMI = BuildMI(MBB, I, MI.getDebugLoc(), TII->get(NewOpcode), DestReg)
620           .addReg(BaseReg);
621       }
622     } else {
623       unsigned NewOpcode = getADDriFromLEA(MI.getOpcode(), Disp);
624       if (MI.getOpcode() == X86::LEA64_32r) {
625         // TODO: Do we need the super register implicit use?
626         NewMI = BuildMI(MBB, I, MI.getDebugLoc(), TII->get(NewOpcode), DestReg)
627           .addReg(BaseReg).addImm(Disp.getImm())
628           .addReg(Base.getReg(), RegState::Implicit);
629       } else {
630         NewMI = BuildMI(MBB, I, MI.getDebugLoc(), TII->get(NewOpcode), DestReg)
631           .addReg(BaseReg).addImm(Disp.getImm());
632       }
633     }
634   } else if (BaseReg != 0 && IndexReg != 0 && Disp.getImm() == 0) {
635     // Case 3.
636     // Look for and transform the sequence
637     //     lea (reg1, reg2), reg3
638     //     sub reg3, reg4
639     return optLEAALU(I, MBB);
640   } else
641     return false;
642 
643   MBB.getParent()->substituteDebugValuesForInst(*I, *NewMI, 1);
644   MBB.erase(I);
645   I = NewMI;
646   return true;
647 }
648 
processInstruction(MachineBasicBlock::iterator & I,MachineBasicBlock & MBB)649 void FixupLEAPass::processInstruction(MachineBasicBlock::iterator &I,
650                                       MachineBasicBlock &MBB) {
651   // Process a load, store, or LEA instruction.
652   MachineInstr &MI = *I;
653   const MCInstrDesc &Desc = MI.getDesc();
654   int AddrOffset = X86II::getMemoryOperandNo(Desc.TSFlags);
655   if (AddrOffset >= 0) {
656     AddrOffset += X86II::getOperandBias(Desc);
657     MachineOperand &p = MI.getOperand(AddrOffset + X86::AddrBaseReg);
658     if (p.isReg() && p.getReg() != X86::ESP) {
659       seekLEAFixup(p, I, MBB);
660     }
661     MachineOperand &q = MI.getOperand(AddrOffset + X86::AddrIndexReg);
662     if (q.isReg() && q.getReg() != X86::ESP) {
663       seekLEAFixup(q, I, MBB);
664     }
665   }
666 }
667 
seekLEAFixup(MachineOperand & p,MachineBasicBlock::iterator & I,MachineBasicBlock & MBB)668 void FixupLEAPass::seekLEAFixup(MachineOperand &p,
669                                 MachineBasicBlock::iterator &I,
670                                 MachineBasicBlock &MBB) {
671   MachineBasicBlock::iterator MBI = searchBackwards(p, I, MBB);
672   if (MBI != MachineBasicBlock::iterator()) {
673     MachineInstr *NewMI = postRAConvertToLEA(MBB, MBI);
674     if (NewMI) {
675       ++NumLEAs;
676       LLVM_DEBUG(dbgs() << "FixLEA: Candidate to replace:"; MBI->dump(););
677       // now to replace with an equivalent LEA...
678       LLVM_DEBUG(dbgs() << "FixLEA: Replaced by: "; NewMI->dump(););
679       MBB.getParent()->substituteDebugValuesForInst(*MBI, *NewMI, 1);
680       MBB.erase(MBI);
681       MachineBasicBlock::iterator J =
682           static_cast<MachineBasicBlock::iterator>(NewMI);
683       processInstruction(J, MBB);
684     }
685   }
686 }
687 
processInstructionForSlowLEA(MachineBasicBlock::iterator & I,MachineBasicBlock & MBB)688 void FixupLEAPass::processInstructionForSlowLEA(MachineBasicBlock::iterator &I,
689                                                 MachineBasicBlock &MBB) {
690   MachineInstr &MI = *I;
691   const unsigned Opcode = MI.getOpcode();
692 
693   const MachineOperand &Dst =     MI.getOperand(0);
694   const MachineOperand &Base =    MI.getOperand(1 + X86::AddrBaseReg);
695   const MachineOperand &Scale =   MI.getOperand(1 + X86::AddrScaleAmt);
696   const MachineOperand &Index =   MI.getOperand(1 + X86::AddrIndexReg);
697   const MachineOperand &Offset =  MI.getOperand(1 + X86::AddrDisp);
698   const MachineOperand &Segment = MI.getOperand(1 + X86::AddrSegmentReg);
699 
700   if (Segment.getReg() != 0 || !Offset.isImm() ||
701       MBB.computeRegisterLiveness(TRI, X86::EFLAGS, I, 4) !=
702           MachineBasicBlock::LQR_Dead)
703     return;
704   const Register DstR = Dst.getReg();
705   const Register SrcR1 = Base.getReg();
706   const Register SrcR2 = Index.getReg();
707   if ((SrcR1 == 0 || SrcR1 != DstR) && (SrcR2 == 0 || SrcR2 != DstR))
708     return;
709   if (Scale.getImm() > 1)
710     return;
711   LLVM_DEBUG(dbgs() << "FixLEA: Candidate to replace:"; I->dump(););
712   LLVM_DEBUG(dbgs() << "FixLEA: Replaced by: ";);
713   MachineInstr *NewMI = nullptr;
714   // Make ADD instruction for two registers writing to LEA's destination
715   if (SrcR1 != 0 && SrcR2 != 0) {
716     const MCInstrDesc &ADDrr = TII->get(getADDrrFromLEA(Opcode));
717     const MachineOperand &Src = SrcR1 == DstR ? Index : Base;
718     NewMI =
719         BuildMI(MBB, I, MI.getDebugLoc(), ADDrr, DstR).addReg(DstR).add(Src);
720     LLVM_DEBUG(NewMI->dump(););
721   }
722   // Make ADD instruction for immediate
723   if (Offset.getImm() != 0) {
724     const MCInstrDesc &ADDri =
725         TII->get(getADDriFromLEA(Opcode, Offset));
726     const MachineOperand &SrcR = SrcR1 == DstR ? Base : Index;
727     NewMI = BuildMI(MBB, I, MI.getDebugLoc(), ADDri, DstR)
728                 .add(SrcR)
729                 .addImm(Offset.getImm());
730     LLVM_DEBUG(NewMI->dump(););
731   }
732   if (NewMI) {
733     MBB.getParent()->substituteDebugValuesForInst(*I, *NewMI, 1);
734     MBB.erase(I);
735     I = NewMI;
736   }
737 }
738 
processInstrForSlow3OpLEA(MachineBasicBlock::iterator & I,MachineBasicBlock & MBB,bool OptIncDec)739 void FixupLEAPass::processInstrForSlow3OpLEA(MachineBasicBlock::iterator &I,
740                                              MachineBasicBlock &MBB,
741                                              bool OptIncDec) {
742   MachineInstr &MI = *I;
743   const unsigned LEAOpcode = MI.getOpcode();
744 
745   const MachineOperand &Dest =    MI.getOperand(0);
746   const MachineOperand &Base =    MI.getOperand(1 + X86::AddrBaseReg);
747   const MachineOperand &Scale =   MI.getOperand(1 + X86::AddrScaleAmt);
748   const MachineOperand &Index =   MI.getOperand(1 + X86::AddrIndexReg);
749   const MachineOperand &Offset =  MI.getOperand(1 + X86::AddrDisp);
750   const MachineOperand &Segment = MI.getOperand(1 + X86::AddrSegmentReg);
751 
752   if (!(TII->isThreeOperandsLEA(MI) || hasInefficientLEABaseReg(Base, Index)) ||
753       MBB.computeRegisterLiveness(TRI, X86::EFLAGS, I, 4) !=
754           MachineBasicBlock::LQR_Dead ||
755       Segment.getReg() != X86::NoRegister)
756     return;
757 
758   Register DestReg = Dest.getReg();
759   Register BaseReg = Base.getReg();
760   Register IndexReg = Index.getReg();
761 
762   if (MI.getOpcode() == X86::LEA64_32r) {
763     if (BaseReg != 0)
764       BaseReg = TRI->getSubReg(BaseReg, X86::sub_32bit);
765     if (IndexReg != 0)
766       IndexReg = TRI->getSubReg(IndexReg, X86::sub_32bit);
767   }
768 
769   bool IsScale1 = Scale.getImm() == 1;
770   bool IsInefficientBase = isInefficientLEAReg(BaseReg);
771   bool IsInefficientIndex = isInefficientLEAReg(IndexReg);
772 
773   // Skip these cases since it takes more than 2 instructions
774   // to replace the LEA instruction.
775   if (IsInefficientBase && DestReg == BaseReg && !IsScale1)
776     return;
777 
778   LLVM_DEBUG(dbgs() << "FixLEA: Candidate to replace:"; MI.dump(););
779   LLVM_DEBUG(dbgs() << "FixLEA: Replaced by: ";);
780 
781   MachineInstr *NewMI = nullptr;
782   bool BaseOrIndexIsDst = DestReg == BaseReg || DestReg == IndexReg;
783   // First try and remove the base while sticking with LEA iff base == index and
784   // scale == 1. We can handle:
785   //    1. lea D(%base,%index,1)   -> lea D(,%index,2)
786   //    2. lea D(%r13/%rbp,%index) -> lea D(,%index,2)
787   // Only do this if the LEA would otherwise be split into 2-instruction
788   // (either it has a an Offset or neither base nor index are dst)
789   if (IsScale1 && BaseReg == IndexReg &&
790       (hasLEAOffset(Offset) || (IsInefficientBase && !BaseOrIndexIsDst))) {
791     NewMI = BuildMI(MBB, MI, MI.getDebugLoc(), TII->get(LEAOpcode))
792                 .add(Dest)
793                 .addReg(0)
794                 .addImm(2)
795                 .add(Index)
796                 .add(Offset)
797                 .add(Segment);
798     LLVM_DEBUG(NewMI->dump(););
799 
800     MBB.getParent()->substituteDebugValuesForInst(*I, *NewMI, 1);
801     MBB.erase(I);
802     I = NewMI;
803     return;
804   } else if (IsScale1 && BaseOrIndexIsDst) {
805     // Try to replace LEA with one or two (for the 3-op LEA case)
806     // add instructions:
807     // 1.lea (%base,%index,1), %base => add %index,%base
808     // 2.lea (%base,%index,1), %index => add %base,%index
809 
810     unsigned NewOpc = getADDrrFromLEA(MI.getOpcode());
811     if (DestReg != BaseReg)
812       std::swap(BaseReg, IndexReg);
813 
814     if (MI.getOpcode() == X86::LEA64_32r) {
815       // TODO: Do we need the super register implicit use?
816       NewMI = BuildMI(MBB, I, MI.getDebugLoc(), TII->get(NewOpc), DestReg)
817                   .addReg(BaseReg)
818                   .addReg(IndexReg)
819                   .addReg(Base.getReg(), RegState::Implicit)
820                   .addReg(Index.getReg(), RegState::Implicit);
821     } else {
822       NewMI = BuildMI(MBB, I, MI.getDebugLoc(), TII->get(NewOpc), DestReg)
823                   .addReg(BaseReg)
824                   .addReg(IndexReg);
825     }
826   } else if (!IsInefficientBase || (!IsInefficientIndex && IsScale1)) {
827     // If the base is inefficient try switching the index and base operands,
828     // otherwise just break the 3-Ops LEA inst into 2-Ops LEA + ADD instruction:
829     // lea offset(%base,%index,scale),%dst =>
830     // lea (%base,%index,scale); add offset,%dst
831     NewMI = BuildMI(MBB, MI, MI.getDebugLoc(), TII->get(LEAOpcode))
832                 .add(Dest)
833                 .add(IsInefficientBase ? Index : Base)
834                 .add(Scale)
835                 .add(IsInefficientBase ? Base : Index)
836                 .addImm(0)
837                 .add(Segment);
838     LLVM_DEBUG(NewMI->dump(););
839   }
840 
841   // If either replacement succeeded above, add the offset if needed, then
842   // replace the instruction.
843   if (NewMI) {
844     // Create ADD instruction for the Offset in case of 3-Ops LEA.
845     if (hasLEAOffset(Offset)) {
846       if (OptIncDec && Offset.isImm() &&
847           (Offset.getImm() == 1 || Offset.getImm() == -1)) {
848         unsigned NewOpc =
849             getINCDECFromLEA(MI.getOpcode(), Offset.getImm() == 1);
850         NewMI = BuildMI(MBB, I, MI.getDebugLoc(), TII->get(NewOpc), DestReg)
851                     .addReg(DestReg);
852         LLVM_DEBUG(NewMI->dump(););
853       } else {
854         unsigned NewOpc = getADDriFromLEA(MI.getOpcode(), Offset);
855         NewMI = BuildMI(MBB, I, MI.getDebugLoc(), TII->get(NewOpc), DestReg)
856                     .addReg(DestReg)
857                     .add(Offset);
858         LLVM_DEBUG(NewMI->dump(););
859       }
860     }
861 
862     MBB.getParent()->substituteDebugValuesForInst(*I, *NewMI, 1);
863     MBB.erase(I);
864     I = NewMI;
865     return;
866   }
867 
868   // Handle the rest of the cases with inefficient base register:
869   assert(DestReg != BaseReg && "DestReg == BaseReg should be handled already!");
870   assert(IsInefficientBase && "efficient base should be handled already!");
871 
872   // FIXME: Handle LEA64_32r.
873   if (LEAOpcode == X86::LEA64_32r)
874     return;
875 
876   // lea (%base,%index,1), %dst => mov %base,%dst; add %index,%dst
877   if (IsScale1 && !hasLEAOffset(Offset)) {
878     bool BIK = Base.isKill() && BaseReg != IndexReg;
879     TII->copyPhysReg(MBB, MI, MI.getDebugLoc(), DestReg, BaseReg, BIK);
880     LLVM_DEBUG(MI.getPrevNode()->dump(););
881 
882     unsigned NewOpc = getADDrrFromLEA(MI.getOpcode());
883     NewMI = BuildMI(MBB, MI, MI.getDebugLoc(), TII->get(NewOpc), DestReg)
884                 .addReg(DestReg)
885                 .add(Index);
886     LLVM_DEBUG(NewMI->dump(););
887 
888     MBB.getParent()->substituteDebugValuesForInst(*I, *NewMI, 1);
889     MBB.erase(I);
890     I = NewMI;
891     return;
892   }
893 
894   // lea offset(%base,%index,scale), %dst =>
895   // lea offset( ,%index,scale), %dst; add %base,%dst
896   NewMI = BuildMI(MBB, MI, MI.getDebugLoc(), TII->get(LEAOpcode))
897               .add(Dest)
898               .addReg(0)
899               .add(Scale)
900               .add(Index)
901               .add(Offset)
902               .add(Segment);
903   LLVM_DEBUG(NewMI->dump(););
904 
905   unsigned NewOpc = getADDrrFromLEA(MI.getOpcode());
906   NewMI = BuildMI(MBB, MI, MI.getDebugLoc(), TII->get(NewOpc), DestReg)
907               .addReg(DestReg)
908               .add(Base);
909   LLVM_DEBUG(NewMI->dump(););
910 
911   MBB.getParent()->substituteDebugValuesForInst(*I, *NewMI, 1);
912   MBB.erase(I);
913   I = NewMI;
914 }
915