1 //===-- X86FixupLEAs.cpp - use or replace LEA instructions -----------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file defines the pass that finds instructions that can be
10 // re-written as LEA instructions in order to reduce pipeline delays.
11 // It replaces LEAs with ADD/INC/DEC when that is better for size/speed.
12 //
13 //===----------------------------------------------------------------------===//
14
15 #include "X86.h"
16 #include "X86InstrInfo.h"
17 #include "X86Subtarget.h"
18 #include "llvm/ADT/Statistic.h"
19 #include "llvm/Analysis/ProfileSummaryInfo.h"
20 #include "llvm/CodeGen/LazyMachineBlockFrequencyInfo.h"
21 #include "llvm/CodeGen/MachineFunctionPass.h"
22 #include "llvm/CodeGen/MachineInstrBuilder.h"
23 #include "llvm/CodeGen/MachineSizeOpts.h"
24 #include "llvm/CodeGen/Passes.h"
25 #include "llvm/CodeGen/TargetSchedule.h"
26 #include "llvm/Support/Debug.h"
27 #include "llvm/Support/raw_ostream.h"
28 using namespace llvm;
29
30 #define FIXUPLEA_DESC "X86 LEA Fixup"
31 #define FIXUPLEA_NAME "x86-fixup-LEAs"
32
33 #define DEBUG_TYPE FIXUPLEA_NAME
34
35 STATISTIC(NumLEAs, "Number of LEA instructions created");
36
37 namespace {
38 class FixupLEAPass : public MachineFunctionPass {
39 enum RegUsageState { RU_NotUsed, RU_Write, RU_Read };
40
41 /// Given a machine register, look for the instruction
42 /// which writes it in the current basic block. If found,
43 /// try to replace it with an equivalent LEA instruction.
44 /// If replacement succeeds, then also process the newly created
45 /// instruction.
46 void seekLEAFixup(MachineOperand &p, MachineBasicBlock::iterator &I,
47 MachineBasicBlock &MBB);
48
49 /// Given a memory access or LEA instruction
50 /// whose address mode uses a base and/or index register, look for
51 /// an opportunity to replace the instruction which sets the base or index
52 /// register with an equivalent LEA instruction.
53 void processInstruction(MachineBasicBlock::iterator &I,
54 MachineBasicBlock &MBB);
55
56 /// Given a LEA instruction which is unprofitable
57 /// on SlowLEA targets try to replace it with an equivalent ADD instruction.
58 void processInstructionForSlowLEA(MachineBasicBlock::iterator &I,
59 MachineBasicBlock &MBB);
60
61 /// Given a LEA instruction which is unprofitable
62 /// on SNB+ try to replace it with other instructions.
63 /// According to Intel's Optimization Reference Manual:
64 /// " For LEA instructions with three source operands and some specific
65 /// situations, instruction latency has increased to 3 cycles, and must
66 /// dispatch via port 1:
67 /// - LEA that has all three source operands: base, index, and offset
68 /// - LEA that uses base and index registers where the base is EBP, RBP,
69 /// or R13
70 /// - LEA that uses RIP relative addressing mode
71 /// - LEA that uses 16-bit addressing mode "
72 /// This function currently handles the first 2 cases only.
73 void processInstrForSlow3OpLEA(MachineBasicBlock::iterator &I,
74 MachineBasicBlock &MBB, bool OptIncDec);
75
76 /// Look for LEAs that are really two address LEAs that we might be able to
77 /// turn into regular ADD instructions.
78 bool optTwoAddrLEA(MachineBasicBlock::iterator &I,
79 MachineBasicBlock &MBB, bool OptIncDec,
80 bool UseLEAForSP) const;
81
82 /// Determine if an instruction references a machine register
83 /// and, if so, whether it reads or writes the register.
84 RegUsageState usesRegister(MachineOperand &p, MachineBasicBlock::iterator I);
85
86 /// Step backwards through a basic block, looking
87 /// for an instruction which writes a register within
88 /// a maximum of INSTR_DISTANCE_THRESHOLD instruction latency cycles.
89 MachineBasicBlock::iterator searchBackwards(MachineOperand &p,
90 MachineBasicBlock::iterator &I,
91 MachineBasicBlock &MBB);
92
93 /// if an instruction can be converted to an
94 /// equivalent LEA, insert the new instruction into the basic block
95 /// and return a pointer to it. Otherwise, return zero.
96 MachineInstr *postRAConvertToLEA(MachineBasicBlock &MBB,
97 MachineBasicBlock::iterator &MBBI) const;
98
99 public:
100 static char ID;
101
getPassName() const102 StringRef getPassName() const override { return FIXUPLEA_DESC; }
103
FixupLEAPass()104 FixupLEAPass() : MachineFunctionPass(ID) { }
105
106 /// Loop over all of the basic blocks,
107 /// replacing instructions by equivalent LEA instructions
108 /// if needed and when possible.
109 bool runOnMachineFunction(MachineFunction &MF) override;
110
111 // This pass runs after regalloc and doesn't support VReg operands.
getRequiredProperties() const112 MachineFunctionProperties getRequiredProperties() const override {
113 return MachineFunctionProperties().set(
114 MachineFunctionProperties::Property::NoVRegs);
115 }
116
getAnalysisUsage(AnalysisUsage & AU) const117 void getAnalysisUsage(AnalysisUsage &AU) const override {
118 AU.addRequired<ProfileSummaryInfoWrapperPass>();
119 AU.addRequired<LazyMachineBlockFrequencyInfoPass>();
120 MachineFunctionPass::getAnalysisUsage(AU);
121 }
122
123 private:
124 TargetSchedModel TSM;
125 const X86InstrInfo *TII = nullptr;
126 const X86RegisterInfo *TRI = nullptr;
127 };
128 }
129
130 char FixupLEAPass::ID = 0;
131
INITIALIZE_PASS(FixupLEAPass,FIXUPLEA_NAME,FIXUPLEA_DESC,false,false)132 INITIALIZE_PASS(FixupLEAPass, FIXUPLEA_NAME, FIXUPLEA_DESC, false, false)
133
134 MachineInstr *
135 FixupLEAPass::postRAConvertToLEA(MachineBasicBlock &MBB,
136 MachineBasicBlock::iterator &MBBI) const {
137 MachineInstr &MI = *MBBI;
138 switch (MI.getOpcode()) {
139 case X86::MOV32rr:
140 case X86::MOV64rr: {
141 const MachineOperand &Src = MI.getOperand(1);
142 const MachineOperand &Dest = MI.getOperand(0);
143 MachineInstr *NewMI =
144 BuildMI(MBB, MBBI, MI.getDebugLoc(),
145 TII->get(MI.getOpcode() == X86::MOV32rr ? X86::LEA32r
146 : X86::LEA64r))
147 .add(Dest)
148 .add(Src)
149 .addImm(1)
150 .addReg(0)
151 .addImm(0)
152 .addReg(0);
153 return NewMI;
154 }
155 }
156
157 if (!MI.isConvertibleTo3Addr())
158 return nullptr;
159
160 switch (MI.getOpcode()) {
161 default:
162 // Only convert instructions that we've verified are safe.
163 return nullptr;
164 case X86::ADD64ri32:
165 case X86::ADD64ri8:
166 case X86::ADD64ri32_DB:
167 case X86::ADD64ri8_DB:
168 case X86::ADD32ri:
169 case X86::ADD32ri8:
170 case X86::ADD32ri_DB:
171 case X86::ADD32ri8_DB:
172 if (!MI.getOperand(2).isImm()) {
173 // convertToThreeAddress will call getImm()
174 // which requires isImm() to be true
175 return nullptr;
176 }
177 break;
178 case X86::SHL64ri:
179 case X86::SHL32ri:
180 case X86::INC64r:
181 case X86::INC32r:
182 case X86::DEC64r:
183 case X86::DEC32r:
184 case X86::ADD64rr:
185 case X86::ADD64rr_DB:
186 case X86::ADD32rr:
187 case X86::ADD32rr_DB:
188 // These instructions are all fine to convert.
189 break;
190 }
191 MachineFunction::iterator MFI = MBB.getIterator();
192 return TII->convertToThreeAddress(MFI, MI, nullptr);
193 }
194
createX86FixupLEAs()195 FunctionPass *llvm::createX86FixupLEAs() { return new FixupLEAPass(); }
196
isLEA(unsigned Opcode)197 static bool isLEA(unsigned Opcode) {
198 return Opcode == X86::LEA32r || Opcode == X86::LEA64r ||
199 Opcode == X86::LEA64_32r;
200 }
201
runOnMachineFunction(MachineFunction & MF)202 bool FixupLEAPass::runOnMachineFunction(MachineFunction &MF) {
203 if (skipFunction(MF.getFunction()))
204 return false;
205
206 const X86Subtarget &ST = MF.getSubtarget<X86Subtarget>();
207 bool IsSlowLEA = ST.slowLEA();
208 bool IsSlow3OpsLEA = ST.slow3OpsLEA();
209 bool LEAUsesAG = ST.LEAusesAG();
210
211 bool OptIncDec = !ST.slowIncDec() || MF.getFunction().hasOptSize();
212 bool UseLEAForSP = ST.useLeaForSP();
213
214 TSM.init(&ST);
215 TII = ST.getInstrInfo();
216 TRI = ST.getRegisterInfo();
217 auto *PSI = &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
218 auto *MBFI = (PSI && PSI->hasProfileSummary())
219 ? &getAnalysis<LazyMachineBlockFrequencyInfoPass>().getBFI()
220 : nullptr;
221
222 LLVM_DEBUG(dbgs() << "Start X86FixupLEAs\n";);
223 for (MachineBasicBlock &MBB : MF) {
224 // First pass. Try to remove or optimize existing LEAs.
225 bool OptIncDecPerBB =
226 OptIncDec || llvm::shouldOptimizeForSize(&MBB, PSI, MBFI);
227 for (MachineBasicBlock::iterator I = MBB.begin(); I != MBB.end(); ++I) {
228 if (!isLEA(I->getOpcode()))
229 continue;
230
231 if (optTwoAddrLEA(I, MBB, OptIncDecPerBB, UseLEAForSP))
232 continue;
233
234 if (IsSlowLEA)
235 processInstructionForSlowLEA(I, MBB);
236 else if (IsSlow3OpsLEA)
237 processInstrForSlow3OpLEA(I, MBB, OptIncDecPerBB);
238 }
239
240 // Second pass for creating LEAs. This may reverse some of the
241 // transformations above.
242 if (LEAUsesAG) {
243 for (MachineBasicBlock::iterator I = MBB.begin(); I != MBB.end(); ++I)
244 processInstruction(I, MBB);
245 }
246 }
247
248 LLVM_DEBUG(dbgs() << "End X86FixupLEAs\n";);
249
250 return true;
251 }
252
253 FixupLEAPass::RegUsageState
usesRegister(MachineOperand & p,MachineBasicBlock::iterator I)254 FixupLEAPass::usesRegister(MachineOperand &p, MachineBasicBlock::iterator I) {
255 RegUsageState RegUsage = RU_NotUsed;
256 MachineInstr &MI = *I;
257
258 for (unsigned i = 0; i < MI.getNumOperands(); ++i) {
259 MachineOperand &opnd = MI.getOperand(i);
260 if (opnd.isReg() && opnd.getReg() == p.getReg()) {
261 if (opnd.isDef())
262 return RU_Write;
263 RegUsage = RU_Read;
264 }
265 }
266 return RegUsage;
267 }
268
269 /// getPreviousInstr - Given a reference to an instruction in a basic
270 /// block, return a reference to the previous instruction in the block,
271 /// wrapping around to the last instruction of the block if the block
272 /// branches to itself.
getPreviousInstr(MachineBasicBlock::iterator & I,MachineBasicBlock & MBB)273 static inline bool getPreviousInstr(MachineBasicBlock::iterator &I,
274 MachineBasicBlock &MBB) {
275 if (I == MBB.begin()) {
276 if (MBB.isPredecessor(&MBB)) {
277 I = --MBB.end();
278 return true;
279 } else
280 return false;
281 }
282 --I;
283 return true;
284 }
285
286 MachineBasicBlock::iterator
searchBackwards(MachineOperand & p,MachineBasicBlock::iterator & I,MachineBasicBlock & MBB)287 FixupLEAPass::searchBackwards(MachineOperand &p, MachineBasicBlock::iterator &I,
288 MachineBasicBlock &MBB) {
289 int InstrDistance = 1;
290 MachineBasicBlock::iterator CurInst;
291 static const int INSTR_DISTANCE_THRESHOLD = 5;
292
293 CurInst = I;
294 bool Found;
295 Found = getPreviousInstr(CurInst, MBB);
296 while (Found && I != CurInst) {
297 if (CurInst->isCall() || CurInst->isInlineAsm())
298 break;
299 if (InstrDistance > INSTR_DISTANCE_THRESHOLD)
300 break; // too far back to make a difference
301 if (usesRegister(p, CurInst) == RU_Write) {
302 return CurInst;
303 }
304 InstrDistance += TSM.computeInstrLatency(&*CurInst);
305 Found = getPreviousInstr(CurInst, MBB);
306 }
307 return MachineBasicBlock::iterator();
308 }
309
isInefficientLEAReg(unsigned Reg)310 static inline bool isInefficientLEAReg(unsigned Reg) {
311 return Reg == X86::EBP || Reg == X86::RBP ||
312 Reg == X86::R13D || Reg == X86::R13;
313 }
314
315 /// Returns true if this LEA uses base an index registers, and the base register
316 /// is known to be inefficient for the subtarget.
317 // TODO: use a variant scheduling class to model the latency profile
318 // of LEA instructions, and implement this logic as a scheduling predicate.
hasInefficientLEABaseReg(const MachineOperand & Base,const MachineOperand & Index)319 static inline bool hasInefficientLEABaseReg(const MachineOperand &Base,
320 const MachineOperand &Index) {
321 return Base.isReg() && isInefficientLEAReg(Base.getReg()) && Index.isReg() &&
322 Index.getReg() != X86::NoRegister;
323 }
324
hasLEAOffset(const MachineOperand & Offset)325 static inline bool hasLEAOffset(const MachineOperand &Offset) {
326 return (Offset.isImm() && Offset.getImm() != 0) || Offset.isGlobal();
327 }
328
getADDrrFromLEA(unsigned LEAOpcode)329 static inline unsigned getADDrrFromLEA(unsigned LEAOpcode) {
330 switch (LEAOpcode) {
331 default:
332 llvm_unreachable("Unexpected LEA instruction");
333 case X86::LEA32r:
334 case X86::LEA64_32r:
335 return X86::ADD32rr;
336 case X86::LEA64r:
337 return X86::ADD64rr;
338 }
339 }
340
getADDriFromLEA(unsigned LEAOpcode,const MachineOperand & Offset)341 static inline unsigned getADDriFromLEA(unsigned LEAOpcode,
342 const MachineOperand &Offset) {
343 bool IsInt8 = Offset.isImm() && isInt<8>(Offset.getImm());
344 switch (LEAOpcode) {
345 default:
346 llvm_unreachable("Unexpected LEA instruction");
347 case X86::LEA32r:
348 case X86::LEA64_32r:
349 return IsInt8 ? X86::ADD32ri8 : X86::ADD32ri;
350 case X86::LEA64r:
351 return IsInt8 ? X86::ADD64ri8 : X86::ADD64ri32;
352 }
353 }
354
getINCDECFromLEA(unsigned LEAOpcode,bool IsINC)355 static inline unsigned getINCDECFromLEA(unsigned LEAOpcode, bool IsINC) {
356 switch (LEAOpcode) {
357 default:
358 llvm_unreachable("Unexpected LEA instruction");
359 case X86::LEA32r:
360 case X86::LEA64_32r:
361 return IsINC ? X86::INC32r : X86::DEC32r;
362 case X86::LEA64r:
363 return IsINC ? X86::INC64r : X86::DEC64r;
364 }
365 }
366
optTwoAddrLEA(MachineBasicBlock::iterator & I,MachineBasicBlock & MBB,bool OptIncDec,bool UseLEAForSP) const367 bool FixupLEAPass::optTwoAddrLEA(MachineBasicBlock::iterator &I,
368 MachineBasicBlock &MBB, bool OptIncDec,
369 bool UseLEAForSP) const {
370 MachineInstr &MI = *I;
371
372 const MachineOperand &Base = MI.getOperand(1 + X86::AddrBaseReg);
373 const MachineOperand &Scale = MI.getOperand(1 + X86::AddrScaleAmt);
374 const MachineOperand &Index = MI.getOperand(1 + X86::AddrIndexReg);
375 const MachineOperand &Disp = MI.getOperand(1 + X86::AddrDisp);
376 const MachineOperand &Segment = MI.getOperand(1 + X86::AddrSegmentReg);
377
378 if (Segment.getReg() != 0 || !Disp.isImm() || Scale.getImm() > 1 ||
379 MBB.computeRegisterLiveness(TRI, X86::EFLAGS, I) !=
380 MachineBasicBlock::LQR_Dead)
381 return false;
382
383 Register DestReg = MI.getOperand(0).getReg();
384 Register BaseReg = Base.getReg();
385 Register IndexReg = Index.getReg();
386
387 // Don't change stack adjustment LEAs.
388 if (UseLEAForSP && (DestReg == X86::ESP || DestReg == X86::RSP))
389 return false;
390
391 // LEA64_32 has 64-bit operands but 32-bit result.
392 if (MI.getOpcode() == X86::LEA64_32r) {
393 if (BaseReg != 0)
394 BaseReg = TRI->getSubReg(BaseReg, X86::sub_32bit);
395 if (IndexReg != 0)
396 IndexReg = TRI->getSubReg(IndexReg, X86::sub_32bit);
397 }
398
399 MachineInstr *NewMI = nullptr;
400
401 // Look for lea(%reg1, %reg2), %reg1 or lea(%reg2, %reg1), %reg1
402 // which can be turned into add %reg2, %reg1
403 if (BaseReg != 0 && IndexReg != 0 && Disp.getImm() == 0 &&
404 (DestReg == BaseReg || DestReg == IndexReg)) {
405 unsigned NewOpcode = getADDrrFromLEA(MI.getOpcode());
406 if (DestReg != BaseReg)
407 std::swap(BaseReg, IndexReg);
408
409 if (MI.getOpcode() == X86::LEA64_32r) {
410 // TODO: Do we need the super register implicit use?
411 NewMI = BuildMI(MBB, I, MI.getDebugLoc(), TII->get(NewOpcode), DestReg)
412 .addReg(BaseReg).addReg(IndexReg)
413 .addReg(Base.getReg(), RegState::Implicit)
414 .addReg(Index.getReg(), RegState::Implicit);
415 } else {
416 NewMI = BuildMI(MBB, I, MI.getDebugLoc(), TII->get(NewOpcode), DestReg)
417 .addReg(BaseReg).addReg(IndexReg);
418 }
419 } else if (DestReg == BaseReg && IndexReg == 0) {
420 // This is an LEA with only a base register and a displacement,
421 // We can use ADDri or INC/DEC.
422
423 // Does this LEA have one these forms:
424 // lea %reg, 1(%reg)
425 // lea %reg, -1(%reg)
426 if (OptIncDec && (Disp.getImm() == 1 || Disp.getImm() == -1)) {
427 bool IsINC = Disp.getImm() == 1;
428 unsigned NewOpcode = getINCDECFromLEA(MI.getOpcode(), IsINC);
429
430 if (MI.getOpcode() == X86::LEA64_32r) {
431 // TODO: Do we need the super register implicit use?
432 NewMI = BuildMI(MBB, I, MI.getDebugLoc(), TII->get(NewOpcode), DestReg)
433 .addReg(BaseReg).addReg(Base.getReg(), RegState::Implicit);
434 } else {
435 NewMI = BuildMI(MBB, I, MI.getDebugLoc(), TII->get(NewOpcode), DestReg)
436 .addReg(BaseReg);
437 }
438 } else {
439 unsigned NewOpcode = getADDriFromLEA(MI.getOpcode(), Disp);
440 if (MI.getOpcode() == X86::LEA64_32r) {
441 // TODO: Do we need the super register implicit use?
442 NewMI = BuildMI(MBB, I, MI.getDebugLoc(), TII->get(NewOpcode), DestReg)
443 .addReg(BaseReg).addImm(Disp.getImm())
444 .addReg(Base.getReg(), RegState::Implicit);
445 } else {
446 NewMI = BuildMI(MBB, I, MI.getDebugLoc(), TII->get(NewOpcode), DestReg)
447 .addReg(BaseReg).addImm(Disp.getImm());
448 }
449 }
450 } else
451 return false;
452
453 MBB.getParent()->substituteDebugValuesForInst(*I, *NewMI, 1);
454 MBB.erase(I);
455 I = NewMI;
456 return true;
457 }
458
processInstruction(MachineBasicBlock::iterator & I,MachineBasicBlock & MBB)459 void FixupLEAPass::processInstruction(MachineBasicBlock::iterator &I,
460 MachineBasicBlock &MBB) {
461 // Process a load, store, or LEA instruction.
462 MachineInstr &MI = *I;
463 const MCInstrDesc &Desc = MI.getDesc();
464 int AddrOffset = X86II::getMemoryOperandNo(Desc.TSFlags);
465 if (AddrOffset >= 0) {
466 AddrOffset += X86II::getOperandBias(Desc);
467 MachineOperand &p = MI.getOperand(AddrOffset + X86::AddrBaseReg);
468 if (p.isReg() && p.getReg() != X86::ESP) {
469 seekLEAFixup(p, I, MBB);
470 }
471 MachineOperand &q = MI.getOperand(AddrOffset + X86::AddrIndexReg);
472 if (q.isReg() && q.getReg() != X86::ESP) {
473 seekLEAFixup(q, I, MBB);
474 }
475 }
476 }
477
seekLEAFixup(MachineOperand & p,MachineBasicBlock::iterator & I,MachineBasicBlock & MBB)478 void FixupLEAPass::seekLEAFixup(MachineOperand &p,
479 MachineBasicBlock::iterator &I,
480 MachineBasicBlock &MBB) {
481 MachineBasicBlock::iterator MBI = searchBackwards(p, I, MBB);
482 if (MBI != MachineBasicBlock::iterator()) {
483 MachineInstr *NewMI = postRAConvertToLEA(MBB, MBI);
484 if (NewMI) {
485 ++NumLEAs;
486 LLVM_DEBUG(dbgs() << "FixLEA: Candidate to replace:"; MBI->dump(););
487 // now to replace with an equivalent LEA...
488 LLVM_DEBUG(dbgs() << "FixLEA: Replaced by: "; NewMI->dump(););
489 MBB.getParent()->substituteDebugValuesForInst(*MBI, *NewMI, 1);
490 MBB.erase(MBI);
491 MachineBasicBlock::iterator J =
492 static_cast<MachineBasicBlock::iterator>(NewMI);
493 processInstruction(J, MBB);
494 }
495 }
496 }
497
processInstructionForSlowLEA(MachineBasicBlock::iterator & I,MachineBasicBlock & MBB)498 void FixupLEAPass::processInstructionForSlowLEA(MachineBasicBlock::iterator &I,
499 MachineBasicBlock &MBB) {
500 MachineInstr &MI = *I;
501 const unsigned Opcode = MI.getOpcode();
502
503 const MachineOperand &Dst = MI.getOperand(0);
504 const MachineOperand &Base = MI.getOperand(1 + X86::AddrBaseReg);
505 const MachineOperand &Scale = MI.getOperand(1 + X86::AddrScaleAmt);
506 const MachineOperand &Index = MI.getOperand(1 + X86::AddrIndexReg);
507 const MachineOperand &Offset = MI.getOperand(1 + X86::AddrDisp);
508 const MachineOperand &Segment = MI.getOperand(1 + X86::AddrSegmentReg);
509
510 if (Segment.getReg() != 0 || !Offset.isImm() ||
511 MBB.computeRegisterLiveness(TRI, X86::EFLAGS, I, 4) !=
512 MachineBasicBlock::LQR_Dead)
513 return;
514 const Register DstR = Dst.getReg();
515 const Register SrcR1 = Base.getReg();
516 const Register SrcR2 = Index.getReg();
517 if ((SrcR1 == 0 || SrcR1 != DstR) && (SrcR2 == 0 || SrcR2 != DstR))
518 return;
519 if (Scale.getImm() > 1)
520 return;
521 LLVM_DEBUG(dbgs() << "FixLEA: Candidate to replace:"; I->dump(););
522 LLVM_DEBUG(dbgs() << "FixLEA: Replaced by: ";);
523 MachineInstr *NewMI = nullptr;
524 // Make ADD instruction for two registers writing to LEA's destination
525 if (SrcR1 != 0 && SrcR2 != 0) {
526 const MCInstrDesc &ADDrr = TII->get(getADDrrFromLEA(Opcode));
527 const MachineOperand &Src = SrcR1 == DstR ? Index : Base;
528 NewMI =
529 BuildMI(MBB, I, MI.getDebugLoc(), ADDrr, DstR).addReg(DstR).add(Src);
530 LLVM_DEBUG(NewMI->dump(););
531 }
532 // Make ADD instruction for immediate
533 if (Offset.getImm() != 0) {
534 const MCInstrDesc &ADDri =
535 TII->get(getADDriFromLEA(Opcode, Offset));
536 const MachineOperand &SrcR = SrcR1 == DstR ? Base : Index;
537 NewMI = BuildMI(MBB, I, MI.getDebugLoc(), ADDri, DstR)
538 .add(SrcR)
539 .addImm(Offset.getImm());
540 LLVM_DEBUG(NewMI->dump(););
541 }
542 if (NewMI) {
543 MBB.getParent()->substituteDebugValuesForInst(*I, *NewMI, 1);
544 MBB.erase(I);
545 I = NewMI;
546 }
547 }
548
processInstrForSlow3OpLEA(MachineBasicBlock::iterator & I,MachineBasicBlock & MBB,bool OptIncDec)549 void FixupLEAPass::processInstrForSlow3OpLEA(MachineBasicBlock::iterator &I,
550 MachineBasicBlock &MBB,
551 bool OptIncDec) {
552 MachineInstr &MI = *I;
553 const unsigned LEAOpcode = MI.getOpcode();
554
555 const MachineOperand &Dest = MI.getOperand(0);
556 const MachineOperand &Base = MI.getOperand(1 + X86::AddrBaseReg);
557 const MachineOperand &Scale = MI.getOperand(1 + X86::AddrScaleAmt);
558 const MachineOperand &Index = MI.getOperand(1 + X86::AddrIndexReg);
559 const MachineOperand &Offset = MI.getOperand(1 + X86::AddrDisp);
560 const MachineOperand &Segment = MI.getOperand(1 + X86::AddrSegmentReg);
561
562 if (!(TII->isThreeOperandsLEA(MI) || hasInefficientLEABaseReg(Base, Index)) ||
563 MBB.computeRegisterLiveness(TRI, X86::EFLAGS, I, 4) !=
564 MachineBasicBlock::LQR_Dead ||
565 Segment.getReg() != X86::NoRegister)
566 return;
567
568 Register DestReg = Dest.getReg();
569 Register BaseReg = Base.getReg();
570 Register IndexReg = Index.getReg();
571
572 if (MI.getOpcode() == X86::LEA64_32r) {
573 if (BaseReg != 0)
574 BaseReg = TRI->getSubReg(BaseReg, X86::sub_32bit);
575 if (IndexReg != 0)
576 IndexReg = TRI->getSubReg(IndexReg, X86::sub_32bit);
577 }
578
579 bool IsScale1 = Scale.getImm() == 1;
580 bool IsInefficientBase = isInefficientLEAReg(BaseReg);
581 bool IsInefficientIndex = isInefficientLEAReg(IndexReg);
582
583 // Skip these cases since it takes more than 2 instructions
584 // to replace the LEA instruction.
585 if (IsInefficientBase && DestReg == BaseReg && !IsScale1)
586 return;
587
588 LLVM_DEBUG(dbgs() << "FixLEA: Candidate to replace:"; MI.dump(););
589 LLVM_DEBUG(dbgs() << "FixLEA: Replaced by: ";);
590
591 MachineInstr *NewMI = nullptr;
592
593 // First try to replace LEA with one or two (for the 3-op LEA case)
594 // add instructions:
595 // 1.lea (%base,%index,1), %base => add %index,%base
596 // 2.lea (%base,%index,1), %index => add %base,%index
597 if (IsScale1 && (DestReg == BaseReg || DestReg == IndexReg)) {
598 unsigned NewOpc = getADDrrFromLEA(MI.getOpcode());
599 if (DestReg != BaseReg)
600 std::swap(BaseReg, IndexReg);
601
602 if (MI.getOpcode() == X86::LEA64_32r) {
603 // TODO: Do we need the super register implicit use?
604 NewMI = BuildMI(MBB, I, MI.getDebugLoc(), TII->get(NewOpc), DestReg)
605 .addReg(BaseReg)
606 .addReg(IndexReg)
607 .addReg(Base.getReg(), RegState::Implicit)
608 .addReg(Index.getReg(), RegState::Implicit);
609 } else {
610 NewMI = BuildMI(MBB, I, MI.getDebugLoc(), TII->get(NewOpc), DestReg)
611 .addReg(BaseReg)
612 .addReg(IndexReg);
613 }
614 } else if (!IsInefficientBase || (!IsInefficientIndex && IsScale1)) {
615 // If the base is inefficient try switching the index and base operands,
616 // otherwise just break the 3-Ops LEA inst into 2-Ops LEA + ADD instruction:
617 // lea offset(%base,%index,scale),%dst =>
618 // lea (%base,%index,scale); add offset,%dst
619 NewMI = BuildMI(MBB, MI, MI.getDebugLoc(), TII->get(LEAOpcode))
620 .add(Dest)
621 .add(IsInefficientBase ? Index : Base)
622 .add(Scale)
623 .add(IsInefficientBase ? Base : Index)
624 .addImm(0)
625 .add(Segment);
626 LLVM_DEBUG(NewMI->dump(););
627 }
628
629 // If either replacement succeeded above, add the offset if needed, then
630 // replace the instruction.
631 if (NewMI) {
632 // Create ADD instruction for the Offset in case of 3-Ops LEA.
633 if (hasLEAOffset(Offset)) {
634 if (OptIncDec && Offset.isImm() &&
635 (Offset.getImm() == 1 || Offset.getImm() == -1)) {
636 unsigned NewOpc =
637 getINCDECFromLEA(MI.getOpcode(), Offset.getImm() == 1);
638 NewMI = BuildMI(MBB, I, MI.getDebugLoc(), TII->get(NewOpc), DestReg)
639 .addReg(DestReg);
640 LLVM_DEBUG(NewMI->dump(););
641 } else {
642 unsigned NewOpc = getADDriFromLEA(MI.getOpcode(), Offset);
643 NewMI = BuildMI(MBB, I, MI.getDebugLoc(), TII->get(NewOpc), DestReg)
644 .addReg(DestReg)
645 .add(Offset);
646 LLVM_DEBUG(NewMI->dump(););
647 }
648 }
649
650 MBB.getParent()->substituteDebugValuesForInst(*I, *NewMI, 1);
651 MBB.erase(I);
652 I = NewMI;
653 return;
654 }
655
656 // Handle the rest of the cases with inefficient base register:
657 assert(DestReg != BaseReg && "DestReg == BaseReg should be handled already!");
658 assert(IsInefficientBase && "efficient base should be handled already!");
659
660 // FIXME: Handle LEA64_32r.
661 if (LEAOpcode == X86::LEA64_32r)
662 return;
663
664 // lea (%base,%index,1), %dst => mov %base,%dst; add %index,%dst
665 if (IsScale1 && !hasLEAOffset(Offset)) {
666 bool BIK = Base.isKill() && BaseReg != IndexReg;
667 TII->copyPhysReg(MBB, MI, MI.getDebugLoc(), DestReg, BaseReg, BIK);
668 LLVM_DEBUG(MI.getPrevNode()->dump(););
669
670 unsigned NewOpc = getADDrrFromLEA(MI.getOpcode());
671 NewMI = BuildMI(MBB, MI, MI.getDebugLoc(), TII->get(NewOpc), DestReg)
672 .addReg(DestReg)
673 .add(Index);
674 LLVM_DEBUG(NewMI->dump(););
675
676 MBB.getParent()->substituteDebugValuesForInst(*I, *NewMI, 1);
677 MBB.erase(I);
678 I = NewMI;
679 return;
680 }
681
682 // lea offset(%base,%index,scale), %dst =>
683 // lea offset( ,%index,scale), %dst; add %base,%dst
684 NewMI = BuildMI(MBB, MI, MI.getDebugLoc(), TII->get(LEAOpcode))
685 .add(Dest)
686 .addReg(0)
687 .add(Scale)
688 .add(Index)
689 .add(Offset)
690 .add(Segment);
691 LLVM_DEBUG(NewMI->dump(););
692
693 unsigned NewOpc = getADDrrFromLEA(MI.getOpcode());
694 NewMI = BuildMI(MBB, MI, MI.getDebugLoc(), TII->get(NewOpc), DestReg)
695 .addReg(DestReg)
696 .add(Base);
697 LLVM_DEBUG(NewMI->dump(););
698
699 MBB.getParent()->substituteDebugValuesForInst(*I, *NewMI, 1);
700 MBB.erase(I);
701 I = NewMI;
702 }
703