1 //===-- X86FixupLEAs.cpp - use or replace LEA instructions -----------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file defines the pass that finds instructions that can be
10 // re-written as LEA instructions in order to reduce pipeline delays.
11 // It replaces LEAs with ADD/INC/DEC when that is better for size/speed.
12 //
13 //===----------------------------------------------------------------------===//
14 
15 #include "X86.h"
16 #include "X86InstrInfo.h"
17 #include "X86Subtarget.h"
18 #include "llvm/ADT/Statistic.h"
19 #include "llvm/Analysis/ProfileSummaryInfo.h"
20 #include "llvm/CodeGen/LazyMachineBlockFrequencyInfo.h"
21 #include "llvm/CodeGen/MachineFunctionPass.h"
22 #include "llvm/CodeGen/MachineInstrBuilder.h"
23 #include "llvm/CodeGen/MachineSizeOpts.h"
24 #include "llvm/CodeGen/Passes.h"
25 #include "llvm/CodeGen/TargetSchedule.h"
26 #include "llvm/Support/Debug.h"
27 #include "llvm/Support/raw_ostream.h"
28 using namespace llvm;
29 
30 #define FIXUPLEA_DESC "X86 LEA Fixup"
31 #define FIXUPLEA_NAME "x86-fixup-LEAs"
32 
33 #define DEBUG_TYPE FIXUPLEA_NAME
34 
35 STATISTIC(NumLEAs, "Number of LEA instructions created");
36 
37 namespace {
38 class FixupLEAPass : public MachineFunctionPass {
39   enum RegUsageState { RU_NotUsed, RU_Write, RU_Read };
40 
41   /// Given a machine register, look for the instruction
42   /// which writes it in the current basic block. If found,
43   /// try to replace it with an equivalent LEA instruction.
44   /// If replacement succeeds, then also process the newly created
45   /// instruction.
46   void seekLEAFixup(MachineOperand &p, MachineBasicBlock::iterator &I,
47                     MachineBasicBlock &MBB);
48 
49   /// Given a memory access or LEA instruction
50   /// whose address mode uses a base and/or index register, look for
51   /// an opportunity to replace the instruction which sets the base or index
52   /// register with an equivalent LEA instruction.
53   void processInstruction(MachineBasicBlock::iterator &I,
54                           MachineBasicBlock &MBB);
55 
56   /// Given a LEA instruction which is unprofitable
57   /// on SlowLEA targets try to replace it with an equivalent ADD instruction.
58   void processInstructionForSlowLEA(MachineBasicBlock::iterator &I,
59                                     MachineBasicBlock &MBB);
60 
61   /// Given a LEA instruction which is unprofitable
62   /// on SNB+ try to replace it with other instructions.
63   /// According to Intel's Optimization Reference Manual:
64   /// " For LEA instructions with three source operands and some specific
65   ///   situations, instruction latency has increased to 3 cycles, and must
66   ///   dispatch via port 1:
67   /// - LEA that has all three source operands: base, index, and offset
68   /// - LEA that uses base and index registers where the base is EBP, RBP,
69   ///   or R13
70   /// - LEA that uses RIP relative addressing mode
71   /// - LEA that uses 16-bit addressing mode "
72   /// This function currently handles the first 2 cases only.
73   void processInstrForSlow3OpLEA(MachineBasicBlock::iterator &I,
74                                  MachineBasicBlock &MBB, bool OptIncDec);
75 
76   /// Look for LEAs that are really two address LEAs that we might be able to
77   /// turn into regular ADD instructions.
78   bool optTwoAddrLEA(MachineBasicBlock::iterator &I,
79                      MachineBasicBlock &MBB, bool OptIncDec,
80                      bool UseLEAForSP) const;
81 
82   /// Look for and transform the sequence
83   ///     lea (reg1, reg2), reg3
84   ///     sub reg3, reg4
85   /// to
86   ///     sub reg1, reg4
87   ///     sub reg2, reg4
88   /// It can also optimize the sequence lea/add similarly.
89   bool optLEAALU(MachineBasicBlock::iterator &I, MachineBasicBlock &MBB) const;
90 
91   /// Step forwards in MBB, looking for an ADD/SUB instruction which uses
92   /// the dest register of LEA instruction I.
93   MachineBasicBlock::iterator searchALUInst(MachineBasicBlock::iterator &I,
94                                             MachineBasicBlock &MBB) const;
95 
96   /// Check instructions between LeaI and AluI (exclusively).
97   /// Set BaseIndexDef to true if base or index register from LeaI is defined.
98   /// Set AluDestRef to true if the dest register of AluI is used or defined.
99   /// *KilledBase is set to the killed base register usage.
100   /// *KilledIndex is set to the killed index register usage.
101   void checkRegUsage(MachineBasicBlock::iterator &LeaI,
102                      MachineBasicBlock::iterator &AluI, bool &BaseIndexDef,
103                      bool &AluDestRef, MachineOperand **KilledBase,
104                      MachineOperand **KilledIndex) const;
105 
106   /// Determine if an instruction references a machine register
107   /// and, if so, whether it reads or writes the register.
108   RegUsageState usesRegister(MachineOperand &p, MachineBasicBlock::iterator I);
109 
110   /// Step backwards through a basic block, looking
111   /// for an instruction which writes a register within
112   /// a maximum of INSTR_DISTANCE_THRESHOLD instruction latency cycles.
113   MachineBasicBlock::iterator searchBackwards(MachineOperand &p,
114                                               MachineBasicBlock::iterator &I,
115                                               MachineBasicBlock &MBB);
116 
117   /// if an instruction can be converted to an
118   /// equivalent LEA, insert the new instruction into the basic block
119   /// and return a pointer to it. Otherwise, return zero.
120   MachineInstr *postRAConvertToLEA(MachineBasicBlock &MBB,
121                                    MachineBasicBlock::iterator &MBBI) const;
122 
123 public:
124   static char ID;
125 
126   StringRef getPassName() const override { return FIXUPLEA_DESC; }
127 
128   FixupLEAPass() : MachineFunctionPass(ID) { }
129 
130   /// Loop over all of the basic blocks,
131   /// replacing instructions by equivalent LEA instructions
132   /// if needed and when possible.
133   bool runOnMachineFunction(MachineFunction &MF) override;
134 
135   // This pass runs after regalloc and doesn't support VReg operands.
136   MachineFunctionProperties getRequiredProperties() const override {
137     return MachineFunctionProperties().set(
138         MachineFunctionProperties::Property::NoVRegs);
139   }
140 
141   void getAnalysisUsage(AnalysisUsage &AU) const override {
142     AU.addRequired<ProfileSummaryInfoWrapperPass>();
143     AU.addRequired<LazyMachineBlockFrequencyInfoPass>();
144     MachineFunctionPass::getAnalysisUsage(AU);
145   }
146 
147 private:
148   TargetSchedModel TSM;
149   const X86InstrInfo *TII = nullptr;
150   const X86RegisterInfo *TRI = nullptr;
151 };
152 }
153 
154 char FixupLEAPass::ID = 0;
155 
156 INITIALIZE_PASS(FixupLEAPass, FIXUPLEA_NAME, FIXUPLEA_DESC, false, false)
157 
158 MachineInstr *
159 FixupLEAPass::postRAConvertToLEA(MachineBasicBlock &MBB,
160                                  MachineBasicBlock::iterator &MBBI) const {
161   MachineInstr &MI = *MBBI;
162   switch (MI.getOpcode()) {
163   case X86::MOV32rr:
164   case X86::MOV64rr: {
165     const MachineOperand &Src = MI.getOperand(1);
166     const MachineOperand &Dest = MI.getOperand(0);
167     MachineInstr *NewMI =
168         BuildMI(MBB, MBBI, MI.getDebugLoc(),
169                 TII->get(MI.getOpcode() == X86::MOV32rr ? X86::LEA32r
170                                                         : X86::LEA64r))
171             .add(Dest)
172             .add(Src)
173             .addImm(1)
174             .addReg(0)
175             .addImm(0)
176             .addReg(0);
177     return NewMI;
178   }
179   }
180 
181   if (!MI.isConvertibleTo3Addr())
182     return nullptr;
183 
184   switch (MI.getOpcode()) {
185   default:
186     // Only convert instructions that we've verified are safe.
187     return nullptr;
188   case X86::ADD64ri32:
189   case X86::ADD64ri8:
190   case X86::ADD64ri32_DB:
191   case X86::ADD64ri8_DB:
192   case X86::ADD32ri:
193   case X86::ADD32ri8:
194   case X86::ADD32ri_DB:
195   case X86::ADD32ri8_DB:
196     if (!MI.getOperand(2).isImm()) {
197       // convertToThreeAddress will call getImm()
198       // which requires isImm() to be true
199       return nullptr;
200     }
201     break;
202   case X86::SHL64ri:
203   case X86::SHL32ri:
204   case X86::INC64r:
205   case X86::INC32r:
206   case X86::DEC64r:
207   case X86::DEC32r:
208   case X86::ADD64rr:
209   case X86::ADD64rr_DB:
210   case X86::ADD32rr:
211   case X86::ADD32rr_DB:
212     // These instructions are all fine to convert.
213     break;
214   }
215   return TII->convertToThreeAddress(MI, nullptr, nullptr);
216 }
217 
218 FunctionPass *llvm::createX86FixupLEAs() { return new FixupLEAPass(); }
219 
220 static bool isLEA(unsigned Opcode) {
221   return Opcode == X86::LEA32r || Opcode == X86::LEA64r ||
222          Opcode == X86::LEA64_32r;
223 }
224 
225 bool FixupLEAPass::runOnMachineFunction(MachineFunction &MF) {
226   if (skipFunction(MF.getFunction()))
227     return false;
228 
229   const X86Subtarget &ST = MF.getSubtarget<X86Subtarget>();
230   bool IsSlowLEA = ST.slowLEA();
231   bool IsSlow3OpsLEA = ST.slow3OpsLEA();
232   bool LEAUsesAG = ST.leaUsesAG();
233 
234   bool OptIncDec = !ST.slowIncDec() || MF.getFunction().hasOptSize();
235   bool UseLEAForSP = ST.useLeaForSP();
236 
237   TSM.init(&ST);
238   TII = ST.getInstrInfo();
239   TRI = ST.getRegisterInfo();
240   auto *PSI = &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
241   auto *MBFI = (PSI && PSI->hasProfileSummary())
242                    ? &getAnalysis<LazyMachineBlockFrequencyInfoPass>().getBFI()
243                    : nullptr;
244 
245   LLVM_DEBUG(dbgs() << "Start X86FixupLEAs\n";);
246   for (MachineBasicBlock &MBB : MF) {
247     // First pass. Try to remove or optimize existing LEAs.
248     bool OptIncDecPerBB =
249         OptIncDec || llvm::shouldOptimizeForSize(&MBB, PSI, MBFI);
250     for (MachineBasicBlock::iterator I = MBB.begin(); I != MBB.end(); ++I) {
251       if (!isLEA(I->getOpcode()))
252         continue;
253 
254       if (optTwoAddrLEA(I, MBB, OptIncDecPerBB, UseLEAForSP))
255         continue;
256 
257       if (IsSlowLEA)
258         processInstructionForSlowLEA(I, MBB);
259       else if (IsSlow3OpsLEA)
260         processInstrForSlow3OpLEA(I, MBB, OptIncDecPerBB);
261     }
262 
263     // Second pass for creating LEAs. This may reverse some of the
264     // transformations above.
265     if (LEAUsesAG) {
266       for (MachineBasicBlock::iterator I = MBB.begin(); I != MBB.end(); ++I)
267         processInstruction(I, MBB);
268     }
269   }
270 
271   LLVM_DEBUG(dbgs() << "End X86FixupLEAs\n";);
272 
273   return true;
274 }
275 
276 FixupLEAPass::RegUsageState
277 FixupLEAPass::usesRegister(MachineOperand &p, MachineBasicBlock::iterator I) {
278   RegUsageState RegUsage = RU_NotUsed;
279   MachineInstr &MI = *I;
280 
281   for (const MachineOperand &MO : MI.operands()) {
282     if (MO.isReg() && MO.getReg() == p.getReg()) {
283       if (MO.isDef())
284         return RU_Write;
285       RegUsage = RU_Read;
286     }
287   }
288   return RegUsage;
289 }
290 
291 /// getPreviousInstr - Given a reference to an instruction in a basic
292 /// block, return a reference to the previous instruction in the block,
293 /// wrapping around to the last instruction of the block if the block
294 /// branches to itself.
295 static inline bool getPreviousInstr(MachineBasicBlock::iterator &I,
296                                     MachineBasicBlock &MBB) {
297   if (I == MBB.begin()) {
298     if (MBB.isPredecessor(&MBB)) {
299       I = --MBB.end();
300       return true;
301     } else
302       return false;
303   }
304   --I;
305   return true;
306 }
307 
308 MachineBasicBlock::iterator
309 FixupLEAPass::searchBackwards(MachineOperand &p, MachineBasicBlock::iterator &I,
310                               MachineBasicBlock &MBB) {
311   int InstrDistance = 1;
312   MachineBasicBlock::iterator CurInst;
313   static const int INSTR_DISTANCE_THRESHOLD = 5;
314 
315   CurInst = I;
316   bool Found;
317   Found = getPreviousInstr(CurInst, MBB);
318   while (Found && I != CurInst) {
319     if (CurInst->isCall() || CurInst->isInlineAsm())
320       break;
321     if (InstrDistance > INSTR_DISTANCE_THRESHOLD)
322       break; // too far back to make a difference
323     if (usesRegister(p, CurInst) == RU_Write) {
324       return CurInst;
325     }
326     InstrDistance += TSM.computeInstrLatency(&*CurInst);
327     Found = getPreviousInstr(CurInst, MBB);
328   }
329   return MachineBasicBlock::iterator();
330 }
331 
332 static inline bool isInefficientLEAReg(unsigned Reg) {
333   return Reg == X86::EBP || Reg == X86::RBP ||
334          Reg == X86::R13D || Reg == X86::R13;
335 }
336 
337 /// Returns true if this LEA uses base an index registers, and the base register
338 /// is known to be inefficient for the subtarget.
339 // TODO: use a variant scheduling class to model the latency profile
340 // of LEA instructions, and implement this logic as a scheduling predicate.
341 static inline bool hasInefficientLEABaseReg(const MachineOperand &Base,
342                                             const MachineOperand &Index) {
343   return Base.isReg() && isInefficientLEAReg(Base.getReg()) && Index.isReg() &&
344          Index.getReg() != X86::NoRegister;
345 }
346 
347 static inline bool hasLEAOffset(const MachineOperand &Offset) {
348   return (Offset.isImm() && Offset.getImm() != 0) || Offset.isGlobal();
349 }
350 
351 static inline unsigned getADDrrFromLEA(unsigned LEAOpcode) {
352   switch (LEAOpcode) {
353   default:
354     llvm_unreachable("Unexpected LEA instruction");
355   case X86::LEA32r:
356   case X86::LEA64_32r:
357     return X86::ADD32rr;
358   case X86::LEA64r:
359     return X86::ADD64rr;
360   }
361 }
362 
363 static inline unsigned getSUBrrFromLEA(unsigned LEAOpcode) {
364   switch (LEAOpcode) {
365   default:
366     llvm_unreachable("Unexpected LEA instruction");
367   case X86::LEA32r:
368   case X86::LEA64_32r:
369     return X86::SUB32rr;
370   case X86::LEA64r:
371     return X86::SUB64rr;
372   }
373 }
374 
375 static inline unsigned getADDriFromLEA(unsigned LEAOpcode,
376                                        const MachineOperand &Offset) {
377   bool IsInt8 = Offset.isImm() && isInt<8>(Offset.getImm());
378   switch (LEAOpcode) {
379   default:
380     llvm_unreachable("Unexpected LEA instruction");
381   case X86::LEA32r:
382   case X86::LEA64_32r:
383     return IsInt8 ? X86::ADD32ri8 : X86::ADD32ri;
384   case X86::LEA64r:
385     return IsInt8 ? X86::ADD64ri8 : X86::ADD64ri32;
386   }
387 }
388 
389 static inline unsigned getINCDECFromLEA(unsigned LEAOpcode, bool IsINC) {
390   switch (LEAOpcode) {
391   default:
392     llvm_unreachable("Unexpected LEA instruction");
393   case X86::LEA32r:
394   case X86::LEA64_32r:
395     return IsINC ? X86::INC32r : X86::DEC32r;
396   case X86::LEA64r:
397     return IsINC ? X86::INC64r : X86::DEC64r;
398   }
399 }
400 
401 MachineBasicBlock::iterator
402 FixupLEAPass::searchALUInst(MachineBasicBlock::iterator &I,
403                             MachineBasicBlock &MBB) const {
404   const int InstrDistanceThreshold = 5;
405   int InstrDistance = 1;
406   MachineBasicBlock::iterator CurInst = std::next(I);
407 
408   unsigned LEAOpcode = I->getOpcode();
409   unsigned AddOpcode = getADDrrFromLEA(LEAOpcode);
410   unsigned SubOpcode = getSUBrrFromLEA(LEAOpcode);
411   Register DestReg = I->getOperand(0).getReg();
412 
413   while (CurInst != MBB.end()) {
414     if (CurInst->isCall() || CurInst->isInlineAsm())
415       break;
416     if (InstrDistance > InstrDistanceThreshold)
417       break;
418 
419     // Check if the lea dest register is used in an add/sub instruction only.
420     for (unsigned I = 0, E = CurInst->getNumOperands(); I != E; ++I) {
421       MachineOperand &Opnd = CurInst->getOperand(I);
422       if (Opnd.isReg()) {
423         if (Opnd.getReg() == DestReg) {
424           if (Opnd.isDef() || !Opnd.isKill())
425             return MachineBasicBlock::iterator();
426 
427           unsigned AluOpcode = CurInst->getOpcode();
428           if (AluOpcode != AddOpcode && AluOpcode != SubOpcode)
429             return MachineBasicBlock::iterator();
430 
431           MachineOperand &Opnd2 = CurInst->getOperand(3 - I);
432           MachineOperand AluDest = CurInst->getOperand(0);
433           if (Opnd2.getReg() != AluDest.getReg())
434             return MachineBasicBlock::iterator();
435 
436           // X - (Y + Z) may generate different flags than (X - Y) - Z when
437           // there is overflow. So we can't change the alu instruction if the
438           // flags register is live.
439           if (!CurInst->registerDefIsDead(X86::EFLAGS, TRI))
440             return MachineBasicBlock::iterator();
441 
442           return CurInst;
443         }
444         if (TRI->regsOverlap(DestReg, Opnd.getReg()))
445           return MachineBasicBlock::iterator();
446       }
447     }
448 
449     InstrDistance++;
450     ++CurInst;
451   }
452   return MachineBasicBlock::iterator();
453 }
454 
455 void FixupLEAPass::checkRegUsage(MachineBasicBlock::iterator &LeaI,
456                                  MachineBasicBlock::iterator &AluI,
457                                  bool &BaseIndexDef, bool &AluDestRef,
458                                  MachineOperand **KilledBase,
459                                  MachineOperand **KilledIndex) const {
460   BaseIndexDef = AluDestRef = false;
461   *KilledBase = *KilledIndex = nullptr;
462   Register BaseReg = LeaI->getOperand(1 + X86::AddrBaseReg).getReg();
463   Register IndexReg = LeaI->getOperand(1 + X86::AddrIndexReg).getReg();
464   Register AluDestReg = AluI->getOperand(0).getReg();
465 
466   MachineBasicBlock::iterator CurInst = std::next(LeaI);
467   while (CurInst != AluI) {
468     for (unsigned I = 0, E = CurInst->getNumOperands(); I != E; ++I) {
469       MachineOperand &Opnd = CurInst->getOperand(I);
470       if (!Opnd.isReg())
471         continue;
472       Register Reg = Opnd.getReg();
473       if (TRI->regsOverlap(Reg, AluDestReg))
474         AluDestRef = true;
475       if (TRI->regsOverlap(Reg, BaseReg)) {
476         if (Opnd.isDef())
477           BaseIndexDef = true;
478         else if (Opnd.isKill())
479           *KilledBase = &Opnd;
480       }
481       if (TRI->regsOverlap(Reg, IndexReg)) {
482         if (Opnd.isDef())
483           BaseIndexDef = true;
484         else if (Opnd.isKill())
485           *KilledIndex = &Opnd;
486       }
487     }
488     ++CurInst;
489   }
490 }
491 
492 bool FixupLEAPass::optLEAALU(MachineBasicBlock::iterator &I,
493                              MachineBasicBlock &MBB) const {
494   // Look for an add/sub instruction which uses the result of lea.
495   MachineBasicBlock::iterator AluI = searchALUInst(I, MBB);
496   if (AluI == MachineBasicBlock::iterator())
497     return false;
498 
499   // Check if there are any related register usage between lea and alu.
500   bool BaseIndexDef, AluDestRef;
501   MachineOperand *KilledBase, *KilledIndex;
502   checkRegUsage(I, AluI, BaseIndexDef, AluDestRef, &KilledBase, &KilledIndex);
503 
504   MachineBasicBlock::iterator InsertPos = AluI;
505   if (BaseIndexDef) {
506     if (AluDestRef)
507       return false;
508     InsertPos = I;
509     KilledBase = KilledIndex = nullptr;
510   }
511 
512   // Check if there are same registers.
513   Register AluDestReg = AluI->getOperand(0).getReg();
514   Register BaseReg = I->getOperand(1 + X86::AddrBaseReg).getReg();
515   Register IndexReg = I->getOperand(1 + X86::AddrIndexReg).getReg();
516   if (I->getOpcode() == X86::LEA64_32r) {
517     BaseReg = TRI->getSubReg(BaseReg, X86::sub_32bit);
518     IndexReg = TRI->getSubReg(IndexReg, X86::sub_32bit);
519   }
520   if (AluDestReg == IndexReg) {
521     if (BaseReg == IndexReg)
522       return false;
523     std::swap(BaseReg, IndexReg);
524     std::swap(KilledBase, KilledIndex);
525   }
526   if (BaseReg == IndexReg)
527     KilledBase = nullptr;
528 
529   // Now it's safe to change instructions.
530   MachineInstr *NewMI1, *NewMI2;
531   unsigned NewOpcode = AluI->getOpcode();
532   NewMI1 = BuildMI(MBB, InsertPos, AluI->getDebugLoc(), TII->get(NewOpcode),
533                    AluDestReg)
534                .addReg(AluDestReg, RegState::Kill)
535                .addReg(BaseReg, KilledBase ? RegState::Kill : 0);
536   NewMI1->addRegisterDead(X86::EFLAGS, TRI);
537   NewMI2 = BuildMI(MBB, InsertPos, AluI->getDebugLoc(), TII->get(NewOpcode),
538                    AluDestReg)
539                .addReg(AluDestReg, RegState::Kill)
540                .addReg(IndexReg, KilledIndex ? RegState::Kill : 0);
541   NewMI2->addRegisterDead(X86::EFLAGS, TRI);
542 
543   // Clear the old Kill flags.
544   if (KilledBase)
545     KilledBase->setIsKill(false);
546   if (KilledIndex)
547     KilledIndex->setIsKill(false);
548 
549   MBB.getParent()->substituteDebugValuesForInst(*AluI, *NewMI2, 1);
550   MBB.erase(I);
551   MBB.erase(AluI);
552   I = NewMI1;
553   return true;
554 }
555 
556 bool FixupLEAPass::optTwoAddrLEA(MachineBasicBlock::iterator &I,
557                                  MachineBasicBlock &MBB, bool OptIncDec,
558                                  bool UseLEAForSP) const {
559   MachineInstr &MI = *I;
560 
561   const MachineOperand &Base =    MI.getOperand(1 + X86::AddrBaseReg);
562   const MachineOperand &Scale =   MI.getOperand(1 + X86::AddrScaleAmt);
563   const MachineOperand &Index =   MI.getOperand(1 + X86::AddrIndexReg);
564   const MachineOperand &Disp =    MI.getOperand(1 + X86::AddrDisp);
565   const MachineOperand &Segment = MI.getOperand(1 + X86::AddrSegmentReg);
566 
567   if (Segment.getReg() != 0 || !Disp.isImm() || Scale.getImm() > 1 ||
568       MBB.computeRegisterLiveness(TRI, X86::EFLAGS, I) !=
569           MachineBasicBlock::LQR_Dead)
570     return false;
571 
572   Register DestReg = MI.getOperand(0).getReg();
573   Register BaseReg = Base.getReg();
574   Register IndexReg = Index.getReg();
575 
576   // Don't change stack adjustment LEAs.
577   if (UseLEAForSP && (DestReg == X86::ESP || DestReg == X86::RSP))
578     return false;
579 
580   // LEA64_32 has 64-bit operands but 32-bit result.
581   if (MI.getOpcode() == X86::LEA64_32r) {
582     if (BaseReg != 0)
583       BaseReg = TRI->getSubReg(BaseReg, X86::sub_32bit);
584     if (IndexReg != 0)
585       IndexReg = TRI->getSubReg(IndexReg, X86::sub_32bit);
586   }
587 
588   MachineInstr *NewMI = nullptr;
589 
590   // Case 1.
591   // Look for lea(%reg1, %reg2), %reg1 or lea(%reg2, %reg1), %reg1
592   // which can be turned into add %reg2, %reg1
593   if (BaseReg != 0 && IndexReg != 0 && Disp.getImm() == 0 &&
594       (DestReg == BaseReg || DestReg == IndexReg)) {
595     unsigned NewOpcode = getADDrrFromLEA(MI.getOpcode());
596     if (DestReg != BaseReg)
597       std::swap(BaseReg, IndexReg);
598 
599     if (MI.getOpcode() == X86::LEA64_32r) {
600       // TODO: Do we need the super register implicit use?
601       NewMI = BuildMI(MBB, I, MI.getDebugLoc(), TII->get(NewOpcode), DestReg)
602         .addReg(BaseReg).addReg(IndexReg)
603         .addReg(Base.getReg(), RegState::Implicit)
604         .addReg(Index.getReg(), RegState::Implicit);
605     } else {
606       NewMI = BuildMI(MBB, I, MI.getDebugLoc(), TII->get(NewOpcode), DestReg)
607         .addReg(BaseReg).addReg(IndexReg);
608     }
609   } else if (DestReg == BaseReg && IndexReg == 0) {
610     // Case 2.
611     // This is an LEA with only a base register and a displacement,
612     // We can use ADDri or INC/DEC.
613 
614     // Does this LEA have one these forms:
615     // lea  %reg, 1(%reg)
616     // lea  %reg, -1(%reg)
617     if (OptIncDec && (Disp.getImm() == 1 || Disp.getImm() == -1)) {
618       bool IsINC = Disp.getImm() == 1;
619       unsigned NewOpcode = getINCDECFromLEA(MI.getOpcode(), IsINC);
620 
621       if (MI.getOpcode() == X86::LEA64_32r) {
622         // TODO: Do we need the super register implicit use?
623         NewMI = BuildMI(MBB, I, MI.getDebugLoc(), TII->get(NewOpcode), DestReg)
624           .addReg(BaseReg).addReg(Base.getReg(), RegState::Implicit);
625       } else {
626         NewMI = BuildMI(MBB, I, MI.getDebugLoc(), TII->get(NewOpcode), DestReg)
627           .addReg(BaseReg);
628       }
629     } else {
630       unsigned NewOpcode = getADDriFromLEA(MI.getOpcode(), Disp);
631       if (MI.getOpcode() == X86::LEA64_32r) {
632         // TODO: Do we need the super register implicit use?
633         NewMI = BuildMI(MBB, I, MI.getDebugLoc(), TII->get(NewOpcode), DestReg)
634           .addReg(BaseReg).addImm(Disp.getImm())
635           .addReg(Base.getReg(), RegState::Implicit);
636       } else {
637         NewMI = BuildMI(MBB, I, MI.getDebugLoc(), TII->get(NewOpcode), DestReg)
638           .addReg(BaseReg).addImm(Disp.getImm());
639       }
640     }
641   } else if (BaseReg != 0 && IndexReg != 0 && Disp.getImm() == 0) {
642     // Case 3.
643     // Look for and transform the sequence
644     //     lea (reg1, reg2), reg3
645     //     sub reg3, reg4
646     return optLEAALU(I, MBB);
647   } else
648     return false;
649 
650   MBB.getParent()->substituteDebugValuesForInst(*I, *NewMI, 1);
651   MBB.erase(I);
652   I = NewMI;
653   return true;
654 }
655 
656 void FixupLEAPass::processInstruction(MachineBasicBlock::iterator &I,
657                                       MachineBasicBlock &MBB) {
658   // Process a load, store, or LEA instruction.
659   MachineInstr &MI = *I;
660   const MCInstrDesc &Desc = MI.getDesc();
661   int AddrOffset = X86II::getMemoryOperandNo(Desc.TSFlags);
662   if (AddrOffset >= 0) {
663     AddrOffset += X86II::getOperandBias(Desc);
664     MachineOperand &p = MI.getOperand(AddrOffset + X86::AddrBaseReg);
665     if (p.isReg() && p.getReg() != X86::ESP) {
666       seekLEAFixup(p, I, MBB);
667     }
668     MachineOperand &q = MI.getOperand(AddrOffset + X86::AddrIndexReg);
669     if (q.isReg() && q.getReg() != X86::ESP) {
670       seekLEAFixup(q, I, MBB);
671     }
672   }
673 }
674 
675 void FixupLEAPass::seekLEAFixup(MachineOperand &p,
676                                 MachineBasicBlock::iterator &I,
677                                 MachineBasicBlock &MBB) {
678   MachineBasicBlock::iterator MBI = searchBackwards(p, I, MBB);
679   if (MBI != MachineBasicBlock::iterator()) {
680     MachineInstr *NewMI = postRAConvertToLEA(MBB, MBI);
681     if (NewMI) {
682       ++NumLEAs;
683       LLVM_DEBUG(dbgs() << "FixLEA: Candidate to replace:"; MBI->dump(););
684       // now to replace with an equivalent LEA...
685       LLVM_DEBUG(dbgs() << "FixLEA: Replaced by: "; NewMI->dump(););
686       MBB.getParent()->substituteDebugValuesForInst(*MBI, *NewMI, 1);
687       MBB.erase(MBI);
688       MachineBasicBlock::iterator J =
689           static_cast<MachineBasicBlock::iterator>(NewMI);
690       processInstruction(J, MBB);
691     }
692   }
693 }
694 
695 void FixupLEAPass::processInstructionForSlowLEA(MachineBasicBlock::iterator &I,
696                                                 MachineBasicBlock &MBB) {
697   MachineInstr &MI = *I;
698   const unsigned Opcode = MI.getOpcode();
699 
700   const MachineOperand &Dst =     MI.getOperand(0);
701   const MachineOperand &Base =    MI.getOperand(1 + X86::AddrBaseReg);
702   const MachineOperand &Scale =   MI.getOperand(1 + X86::AddrScaleAmt);
703   const MachineOperand &Index =   MI.getOperand(1 + X86::AddrIndexReg);
704   const MachineOperand &Offset =  MI.getOperand(1 + X86::AddrDisp);
705   const MachineOperand &Segment = MI.getOperand(1 + X86::AddrSegmentReg);
706 
707   if (Segment.getReg() != 0 || !Offset.isImm() ||
708       MBB.computeRegisterLiveness(TRI, X86::EFLAGS, I, 4) !=
709           MachineBasicBlock::LQR_Dead)
710     return;
711   const Register DstR = Dst.getReg();
712   const Register SrcR1 = Base.getReg();
713   const Register SrcR2 = Index.getReg();
714   if ((SrcR1 == 0 || SrcR1 != DstR) && (SrcR2 == 0 || SrcR2 != DstR))
715     return;
716   if (Scale.getImm() > 1)
717     return;
718   LLVM_DEBUG(dbgs() << "FixLEA: Candidate to replace:"; I->dump(););
719   LLVM_DEBUG(dbgs() << "FixLEA: Replaced by: ";);
720   MachineInstr *NewMI = nullptr;
721   // Make ADD instruction for two registers writing to LEA's destination
722   if (SrcR1 != 0 && SrcR2 != 0) {
723     const MCInstrDesc &ADDrr = TII->get(getADDrrFromLEA(Opcode));
724     const MachineOperand &Src = SrcR1 == DstR ? Index : Base;
725     NewMI =
726         BuildMI(MBB, I, MI.getDebugLoc(), ADDrr, DstR).addReg(DstR).add(Src);
727     LLVM_DEBUG(NewMI->dump(););
728   }
729   // Make ADD instruction for immediate
730   if (Offset.getImm() != 0) {
731     const MCInstrDesc &ADDri =
732         TII->get(getADDriFromLEA(Opcode, Offset));
733     const MachineOperand &SrcR = SrcR1 == DstR ? Base : Index;
734     NewMI = BuildMI(MBB, I, MI.getDebugLoc(), ADDri, DstR)
735                 .add(SrcR)
736                 .addImm(Offset.getImm());
737     LLVM_DEBUG(NewMI->dump(););
738   }
739   if (NewMI) {
740     MBB.getParent()->substituteDebugValuesForInst(*I, *NewMI, 1);
741     MBB.erase(I);
742     I = NewMI;
743   }
744 }
745 
746 void FixupLEAPass::processInstrForSlow3OpLEA(MachineBasicBlock::iterator &I,
747                                              MachineBasicBlock &MBB,
748                                              bool OptIncDec) {
749   MachineInstr &MI = *I;
750   const unsigned LEAOpcode = MI.getOpcode();
751 
752   const MachineOperand &Dest =    MI.getOperand(0);
753   const MachineOperand &Base =    MI.getOperand(1 + X86::AddrBaseReg);
754   const MachineOperand &Scale =   MI.getOperand(1 + X86::AddrScaleAmt);
755   const MachineOperand &Index =   MI.getOperand(1 + X86::AddrIndexReg);
756   const MachineOperand &Offset =  MI.getOperand(1 + X86::AddrDisp);
757   const MachineOperand &Segment = MI.getOperand(1 + X86::AddrSegmentReg);
758 
759   if (!(TII->isThreeOperandsLEA(MI) || hasInefficientLEABaseReg(Base, Index)) ||
760       MBB.computeRegisterLiveness(TRI, X86::EFLAGS, I, 4) !=
761           MachineBasicBlock::LQR_Dead ||
762       Segment.getReg() != X86::NoRegister)
763     return;
764 
765   Register DestReg = Dest.getReg();
766   Register BaseReg = Base.getReg();
767   Register IndexReg = Index.getReg();
768 
769   if (MI.getOpcode() == X86::LEA64_32r) {
770     if (BaseReg != 0)
771       BaseReg = TRI->getSubReg(BaseReg, X86::sub_32bit);
772     if (IndexReg != 0)
773       IndexReg = TRI->getSubReg(IndexReg, X86::sub_32bit);
774   }
775 
776   bool IsScale1 = Scale.getImm() == 1;
777   bool IsInefficientBase = isInefficientLEAReg(BaseReg);
778   bool IsInefficientIndex = isInefficientLEAReg(IndexReg);
779 
780   // Skip these cases since it takes more than 2 instructions
781   // to replace the LEA instruction.
782   if (IsInefficientBase && DestReg == BaseReg && !IsScale1)
783     return;
784 
785   LLVM_DEBUG(dbgs() << "FixLEA: Candidate to replace:"; MI.dump(););
786   LLVM_DEBUG(dbgs() << "FixLEA: Replaced by: ";);
787 
788   MachineInstr *NewMI = nullptr;
789 
790   // First try to replace LEA with one or two (for the 3-op LEA case)
791   // add instructions:
792   // 1.lea (%base,%index,1), %base => add %index,%base
793   // 2.lea (%base,%index,1), %index => add %base,%index
794   if (IsScale1 && (DestReg == BaseReg || DestReg == IndexReg)) {
795     unsigned NewOpc = getADDrrFromLEA(MI.getOpcode());
796     if (DestReg != BaseReg)
797       std::swap(BaseReg, IndexReg);
798 
799     if (MI.getOpcode() == X86::LEA64_32r) {
800       // TODO: Do we need the super register implicit use?
801       NewMI = BuildMI(MBB, I, MI.getDebugLoc(), TII->get(NewOpc), DestReg)
802                   .addReg(BaseReg)
803                   .addReg(IndexReg)
804                   .addReg(Base.getReg(), RegState::Implicit)
805                   .addReg(Index.getReg(), RegState::Implicit);
806     } else {
807       NewMI = BuildMI(MBB, I, MI.getDebugLoc(), TII->get(NewOpc), DestReg)
808                   .addReg(BaseReg)
809                   .addReg(IndexReg);
810     }
811   } else if (!IsInefficientBase || (!IsInefficientIndex && IsScale1)) {
812     // If the base is inefficient try switching the index and base operands,
813     // otherwise just break the 3-Ops LEA inst into 2-Ops LEA + ADD instruction:
814     // lea offset(%base,%index,scale),%dst =>
815     // lea (%base,%index,scale); add offset,%dst
816     NewMI = BuildMI(MBB, MI, MI.getDebugLoc(), TII->get(LEAOpcode))
817                 .add(Dest)
818                 .add(IsInefficientBase ? Index : Base)
819                 .add(Scale)
820                 .add(IsInefficientBase ? Base : Index)
821                 .addImm(0)
822                 .add(Segment);
823     LLVM_DEBUG(NewMI->dump(););
824   }
825 
826   // If either replacement succeeded above, add the offset if needed, then
827   // replace the instruction.
828   if (NewMI) {
829     // Create ADD instruction for the Offset in case of 3-Ops LEA.
830     if (hasLEAOffset(Offset)) {
831       if (OptIncDec && Offset.isImm() &&
832           (Offset.getImm() == 1 || Offset.getImm() == -1)) {
833         unsigned NewOpc =
834             getINCDECFromLEA(MI.getOpcode(), Offset.getImm() == 1);
835         NewMI = BuildMI(MBB, I, MI.getDebugLoc(), TII->get(NewOpc), DestReg)
836                     .addReg(DestReg);
837         LLVM_DEBUG(NewMI->dump(););
838       } else {
839         unsigned NewOpc = getADDriFromLEA(MI.getOpcode(), Offset);
840         NewMI = BuildMI(MBB, I, MI.getDebugLoc(), TII->get(NewOpc), DestReg)
841                     .addReg(DestReg)
842                     .add(Offset);
843         LLVM_DEBUG(NewMI->dump(););
844       }
845     }
846 
847     MBB.getParent()->substituteDebugValuesForInst(*I, *NewMI, 1);
848     MBB.erase(I);
849     I = NewMI;
850     return;
851   }
852 
853   // Handle the rest of the cases with inefficient base register:
854   assert(DestReg != BaseReg && "DestReg == BaseReg should be handled already!");
855   assert(IsInefficientBase && "efficient base should be handled already!");
856 
857   // FIXME: Handle LEA64_32r.
858   if (LEAOpcode == X86::LEA64_32r)
859     return;
860 
861   // lea (%base,%index,1), %dst => mov %base,%dst; add %index,%dst
862   if (IsScale1 && !hasLEAOffset(Offset)) {
863     bool BIK = Base.isKill() && BaseReg != IndexReg;
864     TII->copyPhysReg(MBB, MI, MI.getDebugLoc(), DestReg, BaseReg, BIK);
865     LLVM_DEBUG(MI.getPrevNode()->dump(););
866 
867     unsigned NewOpc = getADDrrFromLEA(MI.getOpcode());
868     NewMI = BuildMI(MBB, MI, MI.getDebugLoc(), TII->get(NewOpc), DestReg)
869                 .addReg(DestReg)
870                 .add(Index);
871     LLVM_DEBUG(NewMI->dump(););
872 
873     MBB.getParent()->substituteDebugValuesForInst(*I, *NewMI, 1);
874     MBB.erase(I);
875     I = NewMI;
876     return;
877   }
878 
879   // lea offset(%base,%index,scale), %dst =>
880   // lea offset( ,%index,scale), %dst; add %base,%dst
881   NewMI = BuildMI(MBB, MI, MI.getDebugLoc(), TII->get(LEAOpcode))
882               .add(Dest)
883               .addReg(0)
884               .add(Scale)
885               .add(Index)
886               .add(Offset)
887               .add(Segment);
888   LLVM_DEBUG(NewMI->dump(););
889 
890   unsigned NewOpc = getADDrrFromLEA(MI.getOpcode());
891   NewMI = BuildMI(MBB, MI, MI.getDebugLoc(), TII->get(NewOpc), DestReg)
892               .addReg(DestReg)
893               .add(Base);
894   LLVM_DEBUG(NewMI->dump(););
895 
896   MBB.getParent()->substituteDebugValuesForInst(*I, *NewMI, 1);
897   MBB.erase(I);
898   I = NewMI;
899 }
900