1 //===- MachineLICM.cpp - Machine Loop Invariant Code Motion Pass ----------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This pass performs loop invariant code motion on machine instructions. We
10 // attempt to remove as much code from the body of a loop as possible.
11 //
12 // This pass is not intended to be a replacement or a complete alternative
13 // for the LLVM-IR-level LICM pass. It is only designed to hoist simple
14 // constructs that are not exposed before lowering and instruction selection.
15 //
16 //===----------------------------------------------------------------------===//
17
18 #include "llvm/ADT/BitVector.h"
19 #include "llvm/ADT/DenseMap.h"
20 #include "llvm/ADT/STLExtras.h"
21 #include "llvm/ADT/SmallSet.h"
22 #include "llvm/ADT/SmallVector.h"
23 #include "llvm/ADT/Statistic.h"
24 #include "llvm/Analysis/AliasAnalysis.h"
25 #include "llvm/CodeGen/MachineBasicBlock.h"
26 #include "llvm/CodeGen/MachineBlockFrequencyInfo.h"
27 #include "llvm/CodeGen/MachineDominators.h"
28 #include "llvm/CodeGen/MachineFrameInfo.h"
29 #include "llvm/CodeGen/MachineFunction.h"
30 #include "llvm/CodeGen/MachineFunctionPass.h"
31 #include "llvm/CodeGen/MachineInstr.h"
32 #include "llvm/CodeGen/MachineLoopInfo.h"
33 #include "llvm/CodeGen/MachineMemOperand.h"
34 #include "llvm/CodeGen/MachineOperand.h"
35 #include "llvm/CodeGen/MachineRegisterInfo.h"
36 #include "llvm/CodeGen/PseudoSourceValue.h"
37 #include "llvm/CodeGen/TargetInstrInfo.h"
38 #include "llvm/CodeGen/TargetLowering.h"
39 #include "llvm/CodeGen/TargetRegisterInfo.h"
40 #include "llvm/CodeGen/TargetSchedule.h"
41 #include "llvm/CodeGen/TargetSubtargetInfo.h"
42 #include "llvm/IR/DebugLoc.h"
43 #include "llvm/InitializePasses.h"
44 #include "llvm/MC/MCInstrDesc.h"
45 #include "llvm/MC/MCRegister.h"
46 #include "llvm/MC/MCRegisterInfo.h"
47 #include "llvm/Pass.h"
48 #include "llvm/Support/Casting.h"
49 #include "llvm/Support/CommandLine.h"
50 #include "llvm/Support/Debug.h"
51 #include "llvm/Support/raw_ostream.h"
52 #include <algorithm>
53 #include <cassert>
54 #include <limits>
55 #include <vector>
56
57 using namespace llvm;
58
59 #define DEBUG_TYPE "machinelicm"
60
61 static cl::opt<bool>
62 AvoidSpeculation("avoid-speculation",
63 cl::desc("MachineLICM should avoid speculation"),
64 cl::init(true), cl::Hidden);
65
66 static cl::opt<bool>
67 HoistCheapInsts("hoist-cheap-insts",
68 cl::desc("MachineLICM should hoist even cheap instructions"),
69 cl::init(false), cl::Hidden);
70
71 static cl::opt<bool>
72 HoistConstStores("hoist-const-stores",
73 cl::desc("Hoist invariant stores"),
74 cl::init(true), cl::Hidden);
75 // The default threshold of 100 (i.e. if target block is 100 times hotter)
76 // is based on empirical data on a single target and is subject to tuning.
77 static cl::opt<unsigned>
78 BlockFrequencyRatioThreshold("block-freq-ratio-threshold",
79 cl::desc("Do not hoist instructions if target"
80 "block is N times hotter than the source."),
81 cl::init(100), cl::Hidden);
82
83 enum class UseBFI { None, PGO, All };
84
85 static cl::opt<UseBFI>
86 DisableHoistingToHotterBlocks("disable-hoisting-to-hotter-blocks",
87 cl::desc("Disable hoisting instructions to"
88 " hotter blocks"),
89 cl::init(UseBFI::PGO), cl::Hidden,
90 cl::values(clEnumValN(UseBFI::None, "none",
91 "disable the feature"),
92 clEnumValN(UseBFI::PGO, "pgo",
93 "enable the feature when using profile data"),
94 clEnumValN(UseBFI::All, "all",
95 "enable the feature with/wo profile data")));
96
97 STATISTIC(NumHoisted,
98 "Number of machine instructions hoisted out of loops");
99 STATISTIC(NumLowRP,
100 "Number of instructions hoisted in low reg pressure situation");
101 STATISTIC(NumHighLatency,
102 "Number of high latency instructions hoisted");
103 STATISTIC(NumCSEed,
104 "Number of hoisted machine instructions CSEed");
105 STATISTIC(NumPostRAHoisted,
106 "Number of machine instructions hoisted out of loops post regalloc");
107 STATISTIC(NumStoreConst,
108 "Number of stores of const phys reg hoisted out of loops");
109 STATISTIC(NumNotHoistedDueToHotness,
110 "Number of instructions not hoisted due to block frequency");
111
112 namespace {
113
114 class MachineLICMBase : public MachineFunctionPass {
115 const TargetInstrInfo *TII;
116 const TargetLoweringBase *TLI;
117 const TargetRegisterInfo *TRI;
118 const MachineFrameInfo *MFI;
119 MachineRegisterInfo *MRI;
120 TargetSchedModel SchedModel;
121 bool PreRegAlloc;
122 bool HasProfileData;
123
124 // Various analyses that we use...
125 AliasAnalysis *AA; // Alias analysis info.
126 MachineBlockFrequencyInfo *MBFI; // Machine block frequncy info
127 MachineLoopInfo *MLI; // Current MachineLoopInfo
128 MachineDominatorTree *DT; // Machine dominator tree for the cur loop
129
130 // State that is updated as we process loops
131 bool Changed; // True if a loop is changed.
132 bool FirstInLoop; // True if it's the first LICM in the loop.
133 MachineLoop *CurLoop; // The current loop we are working on.
134 MachineBasicBlock *CurPreheader; // The preheader for CurLoop.
135
136 // Exit blocks for CurLoop.
137 SmallVector<MachineBasicBlock *, 8> ExitBlocks;
138
isExitBlock(const MachineBasicBlock * MBB) const139 bool isExitBlock(const MachineBasicBlock *MBB) const {
140 return is_contained(ExitBlocks, MBB);
141 }
142
143 // Track 'estimated' register pressure.
144 SmallSet<Register, 32> RegSeen;
145 SmallVector<unsigned, 8> RegPressure;
146
147 // Register pressure "limit" per register pressure set. If the pressure
148 // is higher than the limit, then it's considered high.
149 SmallVector<unsigned, 8> RegLimit;
150
151 // Register pressure on path leading from loop preheader to current BB.
152 SmallVector<SmallVector<unsigned, 8>, 16> BackTrace;
153
154 // For each opcode, keep a list of potential CSE instructions.
155 DenseMap<unsigned, std::vector<MachineInstr *>> CSEMap;
156
157 enum {
158 SpeculateFalse = 0,
159 SpeculateTrue = 1,
160 SpeculateUnknown = 2
161 };
162
163 // If a MBB does not dominate loop exiting blocks then it may not safe
164 // to hoist loads from this block.
165 // Tri-state: 0 - false, 1 - true, 2 - unknown
166 unsigned SpeculationState;
167
168 public:
MachineLICMBase(char & PassID,bool PreRegAlloc)169 MachineLICMBase(char &PassID, bool PreRegAlloc)
170 : MachineFunctionPass(PassID), PreRegAlloc(PreRegAlloc) {}
171
172 bool runOnMachineFunction(MachineFunction &MF) override;
173
getAnalysisUsage(AnalysisUsage & AU) const174 void getAnalysisUsage(AnalysisUsage &AU) const override {
175 AU.addRequired<MachineLoopInfo>();
176 if (DisableHoistingToHotterBlocks != UseBFI::None)
177 AU.addRequired<MachineBlockFrequencyInfo>();
178 AU.addRequired<MachineDominatorTree>();
179 AU.addRequired<AAResultsWrapperPass>();
180 AU.addPreserved<MachineLoopInfo>();
181 MachineFunctionPass::getAnalysisUsage(AU);
182 }
183
releaseMemory()184 void releaseMemory() override {
185 RegSeen.clear();
186 RegPressure.clear();
187 RegLimit.clear();
188 BackTrace.clear();
189 CSEMap.clear();
190 }
191
192 private:
193 /// Keep track of information about hoisting candidates.
194 struct CandidateInfo {
195 MachineInstr *MI;
196 unsigned Def;
197 int FI;
198
CandidateInfo__anond79774340111::MachineLICMBase::CandidateInfo199 CandidateInfo(MachineInstr *mi, unsigned def, int fi)
200 : MI(mi), Def(def), FI(fi) {}
201 };
202
203 void HoistRegionPostRA();
204
205 void HoistPostRA(MachineInstr *MI, unsigned Def);
206
207 void ProcessMI(MachineInstr *MI, BitVector &PhysRegDefs,
208 BitVector &PhysRegClobbers, SmallSet<int, 32> &StoredFIs,
209 SmallVectorImpl<CandidateInfo> &Candidates);
210
211 void AddToLiveIns(MCRegister Reg);
212
213 bool IsLICMCandidate(MachineInstr &I);
214
215 bool IsLoopInvariantInst(MachineInstr &I);
216
217 bool HasLoopPHIUse(const MachineInstr *MI) const;
218
219 bool HasHighOperandLatency(MachineInstr &MI, unsigned DefIdx,
220 Register Reg) const;
221
222 bool IsCheapInstruction(MachineInstr &MI) const;
223
224 bool CanCauseHighRegPressure(const DenseMap<unsigned, int> &Cost,
225 bool Cheap);
226
227 void UpdateBackTraceRegPressure(const MachineInstr *MI);
228
229 bool IsProfitableToHoist(MachineInstr &MI);
230
231 bool IsGuaranteedToExecute(MachineBasicBlock *BB);
232
233 void EnterScope(MachineBasicBlock *MBB);
234
235 void ExitScope(MachineBasicBlock *MBB);
236
237 void ExitScopeIfDone(
238 MachineDomTreeNode *Node,
239 DenseMap<MachineDomTreeNode *, unsigned> &OpenChildren,
240 DenseMap<MachineDomTreeNode *, MachineDomTreeNode *> &ParentMap);
241
242 void HoistOutOfLoop(MachineDomTreeNode *HeaderN);
243
244 void InitRegPressure(MachineBasicBlock *BB);
245
246 DenseMap<unsigned, int> calcRegisterCost(const MachineInstr *MI,
247 bool ConsiderSeen,
248 bool ConsiderUnseenAsDef);
249
250 void UpdateRegPressure(const MachineInstr *MI,
251 bool ConsiderUnseenAsDef = false);
252
253 MachineInstr *ExtractHoistableLoad(MachineInstr *MI);
254
255 MachineInstr *LookForDuplicate(const MachineInstr *MI,
256 std::vector<MachineInstr *> &PrevMIs);
257
258 bool
259 EliminateCSE(MachineInstr *MI,
260 DenseMap<unsigned, std::vector<MachineInstr *>>::iterator &CI);
261
262 bool MayCSE(MachineInstr *MI);
263
264 bool Hoist(MachineInstr *MI, MachineBasicBlock *Preheader);
265
266 void InitCSEMap(MachineBasicBlock *BB);
267
268 bool isTgtHotterThanSrc(MachineBasicBlock *SrcBlock,
269 MachineBasicBlock *TgtBlock);
270 MachineBasicBlock *getCurPreheader();
271 };
272
273 class MachineLICM : public MachineLICMBase {
274 public:
275 static char ID;
MachineLICM()276 MachineLICM() : MachineLICMBase(ID, false) {
277 initializeMachineLICMPass(*PassRegistry::getPassRegistry());
278 }
279 };
280
281 class EarlyMachineLICM : public MachineLICMBase {
282 public:
283 static char ID;
EarlyMachineLICM()284 EarlyMachineLICM() : MachineLICMBase(ID, true) {
285 initializeEarlyMachineLICMPass(*PassRegistry::getPassRegistry());
286 }
287 };
288
289 } // end anonymous namespace
290
291 char MachineLICM::ID;
292 char EarlyMachineLICM::ID;
293
294 char &llvm::MachineLICMID = MachineLICM::ID;
295 char &llvm::EarlyMachineLICMID = EarlyMachineLICM::ID;
296
297 INITIALIZE_PASS_BEGIN(MachineLICM, DEBUG_TYPE,
298 "Machine Loop Invariant Code Motion", false, false)
INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)299 INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
300 INITIALIZE_PASS_DEPENDENCY(MachineBlockFrequencyInfo)
301 INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
302 INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
303 INITIALIZE_PASS_END(MachineLICM, DEBUG_TYPE,
304 "Machine Loop Invariant Code Motion", false, false)
305
306 INITIALIZE_PASS_BEGIN(EarlyMachineLICM, "early-machinelicm",
307 "Early Machine Loop Invariant Code Motion", false, false)
308 INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
309 INITIALIZE_PASS_DEPENDENCY(MachineBlockFrequencyInfo)
310 INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
311 INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
312 INITIALIZE_PASS_END(EarlyMachineLICM, "early-machinelicm",
313 "Early Machine Loop Invariant Code Motion", false, false)
314
315 /// Test if the given loop is the outer-most loop that has a unique predecessor.
316 static bool LoopIsOuterMostWithPredecessor(MachineLoop *CurLoop) {
317 // Check whether this loop even has a unique predecessor.
318 if (!CurLoop->getLoopPredecessor())
319 return false;
320 // Ok, now check to see if any of its outer loops do.
321 for (MachineLoop *L = CurLoop->getParentLoop(); L; L = L->getParentLoop())
322 if (L->getLoopPredecessor())
323 return false;
324 // None of them did, so this is the outermost with a unique predecessor.
325 return true;
326 }
327
runOnMachineFunction(MachineFunction & MF)328 bool MachineLICMBase::runOnMachineFunction(MachineFunction &MF) {
329 if (skipFunction(MF.getFunction()))
330 return false;
331
332 Changed = FirstInLoop = false;
333 const TargetSubtargetInfo &ST = MF.getSubtarget();
334 TII = ST.getInstrInfo();
335 TLI = ST.getTargetLowering();
336 TRI = ST.getRegisterInfo();
337 MFI = &MF.getFrameInfo();
338 MRI = &MF.getRegInfo();
339 SchedModel.init(&ST);
340
341 PreRegAlloc = MRI->isSSA();
342 HasProfileData = MF.getFunction().hasProfileData();
343
344 if (PreRegAlloc)
345 LLVM_DEBUG(dbgs() << "******** Pre-regalloc Machine LICM: ");
346 else
347 LLVM_DEBUG(dbgs() << "******** Post-regalloc Machine LICM: ");
348 LLVM_DEBUG(dbgs() << MF.getName() << " ********\n");
349
350 if (PreRegAlloc) {
351 // Estimate register pressure during pre-regalloc pass.
352 unsigned NumRPS = TRI->getNumRegPressureSets();
353 RegPressure.resize(NumRPS);
354 std::fill(RegPressure.begin(), RegPressure.end(), 0);
355 RegLimit.resize(NumRPS);
356 for (unsigned i = 0, e = NumRPS; i != e; ++i)
357 RegLimit[i] = TRI->getRegPressureSetLimit(MF, i);
358 }
359
360 // Get our Loop information...
361 if (DisableHoistingToHotterBlocks != UseBFI::None)
362 MBFI = &getAnalysis<MachineBlockFrequencyInfo>();
363 MLI = &getAnalysis<MachineLoopInfo>();
364 DT = &getAnalysis<MachineDominatorTree>();
365 AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
366
367 SmallVector<MachineLoop *, 8> Worklist(MLI->begin(), MLI->end());
368 while (!Worklist.empty()) {
369 CurLoop = Worklist.pop_back_val();
370 CurPreheader = nullptr;
371 ExitBlocks.clear();
372
373 // If this is done before regalloc, only visit outer-most preheader-sporting
374 // loops.
375 if (PreRegAlloc && !LoopIsOuterMostWithPredecessor(CurLoop)) {
376 Worklist.append(CurLoop->begin(), CurLoop->end());
377 continue;
378 }
379
380 CurLoop->getExitBlocks(ExitBlocks);
381
382 if (!PreRegAlloc)
383 HoistRegionPostRA();
384 else {
385 // CSEMap is initialized for loop header when the first instruction is
386 // being hoisted.
387 MachineDomTreeNode *N = DT->getNode(CurLoop->getHeader());
388 FirstInLoop = true;
389 HoistOutOfLoop(N);
390 CSEMap.clear();
391 }
392 }
393
394 return Changed;
395 }
396
397 /// Return true if instruction stores to the specified frame.
InstructionStoresToFI(const MachineInstr * MI,int FI)398 static bool InstructionStoresToFI(const MachineInstr *MI, int FI) {
399 // Check mayStore before memory operands so that e.g. DBG_VALUEs will return
400 // true since they have no memory operands.
401 if (!MI->mayStore())
402 return false;
403 // If we lost memory operands, conservatively assume that the instruction
404 // writes to all slots.
405 if (MI->memoperands_empty())
406 return true;
407 for (const MachineMemOperand *MemOp : MI->memoperands()) {
408 if (!MemOp->isStore() || !MemOp->getPseudoValue())
409 continue;
410 if (const FixedStackPseudoSourceValue *Value =
411 dyn_cast<FixedStackPseudoSourceValue>(MemOp->getPseudoValue())) {
412 if (Value->getFrameIndex() == FI)
413 return true;
414 }
415 }
416 return false;
417 }
418
419 /// Examine the instruction for potentai LICM candidate. Also
420 /// gather register def and frame object update information.
ProcessMI(MachineInstr * MI,BitVector & PhysRegDefs,BitVector & PhysRegClobbers,SmallSet<int,32> & StoredFIs,SmallVectorImpl<CandidateInfo> & Candidates)421 void MachineLICMBase::ProcessMI(MachineInstr *MI,
422 BitVector &PhysRegDefs,
423 BitVector &PhysRegClobbers,
424 SmallSet<int, 32> &StoredFIs,
425 SmallVectorImpl<CandidateInfo> &Candidates) {
426 bool RuledOut = false;
427 bool HasNonInvariantUse = false;
428 unsigned Def = 0;
429 for (const MachineOperand &MO : MI->operands()) {
430 if (MO.isFI()) {
431 // Remember if the instruction stores to the frame index.
432 int FI = MO.getIndex();
433 if (!StoredFIs.count(FI) &&
434 MFI->isSpillSlotObjectIndex(FI) &&
435 InstructionStoresToFI(MI, FI))
436 StoredFIs.insert(FI);
437 HasNonInvariantUse = true;
438 continue;
439 }
440
441 // We can't hoist an instruction defining a physreg that is clobbered in
442 // the loop.
443 if (MO.isRegMask()) {
444 PhysRegClobbers.setBitsNotInMask(MO.getRegMask());
445 continue;
446 }
447
448 if (!MO.isReg())
449 continue;
450 Register Reg = MO.getReg();
451 if (!Reg)
452 continue;
453 assert(Register::isPhysicalRegister(Reg) &&
454 "Not expecting virtual register!");
455
456 if (!MO.isDef()) {
457 if (Reg && (PhysRegDefs.test(Reg) || PhysRegClobbers.test(Reg)))
458 // If it's using a non-loop-invariant register, then it's obviously not
459 // safe to hoist.
460 HasNonInvariantUse = true;
461 continue;
462 }
463
464 if (MO.isImplicit()) {
465 for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI)
466 PhysRegClobbers.set(*AI);
467 if (!MO.isDead())
468 // Non-dead implicit def? This cannot be hoisted.
469 RuledOut = true;
470 // No need to check if a dead implicit def is also defined by
471 // another instruction.
472 continue;
473 }
474
475 // FIXME: For now, avoid instructions with multiple defs, unless
476 // it's a dead implicit def.
477 if (Def)
478 RuledOut = true;
479 else
480 Def = Reg;
481
482 // If we have already seen another instruction that defines the same
483 // register, then this is not safe. Two defs is indicated by setting a
484 // PhysRegClobbers bit.
485 for (MCRegAliasIterator AS(Reg, TRI, true); AS.isValid(); ++AS) {
486 if (PhysRegDefs.test(*AS))
487 PhysRegClobbers.set(*AS);
488 }
489 // Need a second loop because MCRegAliasIterator can visit the same
490 // register twice.
491 for (MCRegAliasIterator AS(Reg, TRI, true); AS.isValid(); ++AS)
492 PhysRegDefs.set(*AS);
493
494 if (PhysRegClobbers.test(Reg))
495 // MI defined register is seen defined by another instruction in
496 // the loop, it cannot be a LICM candidate.
497 RuledOut = true;
498 }
499
500 // Only consider reloads for now and remats which do not have register
501 // operands. FIXME: Consider unfold load folding instructions.
502 if (Def && !RuledOut) {
503 int FI = std::numeric_limits<int>::min();
504 if ((!HasNonInvariantUse && IsLICMCandidate(*MI)) ||
505 (TII->isLoadFromStackSlot(*MI, FI) && MFI->isSpillSlotObjectIndex(FI)))
506 Candidates.push_back(CandidateInfo(MI, Def, FI));
507 }
508 }
509
510 /// Walk the specified region of the CFG and hoist loop invariants out to the
511 /// preheader.
HoistRegionPostRA()512 void MachineLICMBase::HoistRegionPostRA() {
513 MachineBasicBlock *Preheader = getCurPreheader();
514 if (!Preheader)
515 return;
516
517 unsigned NumRegs = TRI->getNumRegs();
518 BitVector PhysRegDefs(NumRegs); // Regs defined once in the loop.
519 BitVector PhysRegClobbers(NumRegs); // Regs defined more than once.
520
521 SmallVector<CandidateInfo, 32> Candidates;
522 SmallSet<int, 32> StoredFIs;
523
524 // Walk the entire region, count number of defs for each register, and
525 // collect potential LICM candidates.
526 for (MachineBasicBlock *BB : CurLoop->getBlocks()) {
527 // If the header of the loop containing this basic block is a landing pad,
528 // then don't try to hoist instructions out of this loop.
529 const MachineLoop *ML = MLI->getLoopFor(BB);
530 if (ML && ML->getHeader()->isEHPad()) continue;
531
532 // Conservatively treat live-in's as an external def.
533 // FIXME: That means a reload that're reused in successor block(s) will not
534 // be LICM'ed.
535 for (const auto &LI : BB->liveins()) {
536 for (MCRegAliasIterator AI(LI.PhysReg, TRI, true); AI.isValid(); ++AI)
537 PhysRegDefs.set(*AI);
538 }
539
540 SpeculationState = SpeculateUnknown;
541 for (MachineInstr &MI : *BB)
542 ProcessMI(&MI, PhysRegDefs, PhysRegClobbers, StoredFIs, Candidates);
543 }
544
545 // Gather the registers read / clobbered by the terminator.
546 BitVector TermRegs(NumRegs);
547 MachineBasicBlock::iterator TI = Preheader->getFirstTerminator();
548 if (TI != Preheader->end()) {
549 for (const MachineOperand &MO : TI->operands()) {
550 if (!MO.isReg())
551 continue;
552 Register Reg = MO.getReg();
553 if (!Reg)
554 continue;
555 for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI)
556 TermRegs.set(*AI);
557 }
558 }
559
560 // Now evaluate whether the potential candidates qualify.
561 // 1. Check if the candidate defined register is defined by another
562 // instruction in the loop.
563 // 2. If the candidate is a load from stack slot (always true for now),
564 // check if the slot is stored anywhere in the loop.
565 // 3. Make sure candidate def should not clobber
566 // registers read by the terminator. Similarly its def should not be
567 // clobbered by the terminator.
568 for (CandidateInfo &Candidate : Candidates) {
569 if (Candidate.FI != std::numeric_limits<int>::min() &&
570 StoredFIs.count(Candidate.FI))
571 continue;
572
573 unsigned Def = Candidate.Def;
574 if (!PhysRegClobbers.test(Def) && !TermRegs.test(Def)) {
575 bool Safe = true;
576 MachineInstr *MI = Candidate.MI;
577 for (const MachineOperand &MO : MI->operands()) {
578 if (!MO.isReg() || MO.isDef() || !MO.getReg())
579 continue;
580 Register Reg = MO.getReg();
581 if (PhysRegDefs.test(Reg) ||
582 PhysRegClobbers.test(Reg)) {
583 // If it's using a non-loop-invariant register, then it's obviously
584 // not safe to hoist.
585 Safe = false;
586 break;
587 }
588 }
589 if (Safe)
590 HoistPostRA(MI, Candidate.Def);
591 }
592 }
593 }
594
595 /// Add register 'Reg' to the livein sets of BBs in the current loop, and make
596 /// sure it is not killed by any instructions in the loop.
AddToLiveIns(MCRegister Reg)597 void MachineLICMBase::AddToLiveIns(MCRegister Reg) {
598 for (MachineBasicBlock *BB : CurLoop->getBlocks()) {
599 if (!BB->isLiveIn(Reg))
600 BB->addLiveIn(Reg);
601 for (MachineInstr &MI : *BB) {
602 for (MachineOperand &MO : MI.operands()) {
603 if (!MO.isReg() || !MO.getReg() || MO.isDef()) continue;
604 if (MO.getReg() == Reg || TRI->isSuperRegister(Reg, MO.getReg()))
605 MO.setIsKill(false);
606 }
607 }
608 }
609 }
610
611 /// When an instruction is found to only use loop invariant operands that is
612 /// safe to hoist, this instruction is called to do the dirty work.
HoistPostRA(MachineInstr * MI,unsigned Def)613 void MachineLICMBase::HoistPostRA(MachineInstr *MI, unsigned Def) {
614 MachineBasicBlock *Preheader = getCurPreheader();
615
616 // Now move the instructions to the predecessor, inserting it before any
617 // terminator instructions.
618 LLVM_DEBUG(dbgs() << "Hoisting to " << printMBBReference(*Preheader)
619 << " from " << printMBBReference(*MI->getParent()) << ": "
620 << *MI);
621
622 // Splice the instruction to the preheader.
623 MachineBasicBlock *MBB = MI->getParent();
624 Preheader->splice(Preheader->getFirstTerminator(), MBB, MI);
625
626 // Since we are moving the instruction out of its basic block, we do not
627 // retain its debug location. Doing so would degrade the debugging
628 // experience and adversely affect the accuracy of profiling information.
629 assert(!MI->isDebugInstr() && "Should not hoist debug inst");
630 MI->setDebugLoc(DebugLoc());
631
632 // Add register to livein list to all the BBs in the current loop since a
633 // loop invariant must be kept live throughout the whole loop. This is
634 // important to ensure later passes do not scavenge the def register.
635 AddToLiveIns(Def);
636
637 ++NumPostRAHoisted;
638 Changed = true;
639 }
640
641 /// Check if this mbb is guaranteed to execute. If not then a load from this mbb
642 /// may not be safe to hoist.
IsGuaranteedToExecute(MachineBasicBlock * BB)643 bool MachineLICMBase::IsGuaranteedToExecute(MachineBasicBlock *BB) {
644 if (SpeculationState != SpeculateUnknown)
645 return SpeculationState == SpeculateFalse;
646
647 if (BB != CurLoop->getHeader()) {
648 // Check loop exiting blocks.
649 SmallVector<MachineBasicBlock*, 8> CurrentLoopExitingBlocks;
650 CurLoop->getExitingBlocks(CurrentLoopExitingBlocks);
651 for (MachineBasicBlock *CurrentLoopExitingBlock : CurrentLoopExitingBlocks)
652 if (!DT->dominates(BB, CurrentLoopExitingBlock)) {
653 SpeculationState = SpeculateTrue;
654 return false;
655 }
656 }
657
658 SpeculationState = SpeculateFalse;
659 return true;
660 }
661
EnterScope(MachineBasicBlock * MBB)662 void MachineLICMBase::EnterScope(MachineBasicBlock *MBB) {
663 LLVM_DEBUG(dbgs() << "Entering " << printMBBReference(*MBB) << '\n');
664
665 // Remember livein register pressure.
666 BackTrace.push_back(RegPressure);
667 }
668
ExitScope(MachineBasicBlock * MBB)669 void MachineLICMBase::ExitScope(MachineBasicBlock *MBB) {
670 LLVM_DEBUG(dbgs() << "Exiting " << printMBBReference(*MBB) << '\n');
671 BackTrace.pop_back();
672 }
673
674 /// Destroy scope for the MBB that corresponds to the given dominator tree node
675 /// if its a leaf or all of its children are done. Walk up the dominator tree to
676 /// destroy ancestors which are now done.
ExitScopeIfDone(MachineDomTreeNode * Node,DenseMap<MachineDomTreeNode *,unsigned> & OpenChildren,DenseMap<MachineDomTreeNode *,MachineDomTreeNode * > & ParentMap)677 void MachineLICMBase::ExitScopeIfDone(MachineDomTreeNode *Node,
678 DenseMap<MachineDomTreeNode*, unsigned> &OpenChildren,
679 DenseMap<MachineDomTreeNode*, MachineDomTreeNode*> &ParentMap) {
680 if (OpenChildren[Node])
681 return;
682
683 // Pop scope.
684 ExitScope(Node->getBlock());
685
686 // Now traverse upwards to pop ancestors whose offsprings are all done.
687 while (MachineDomTreeNode *Parent = ParentMap[Node]) {
688 unsigned Left = --OpenChildren[Parent];
689 if (Left != 0)
690 break;
691 ExitScope(Parent->getBlock());
692 Node = Parent;
693 }
694 }
695
696 /// Walk the specified loop in the CFG (defined by all blocks dominated by the
697 /// specified header block, and that are in the current loop) in depth first
698 /// order w.r.t the DominatorTree. This allows us to visit definitions before
699 /// uses, allowing us to hoist a loop body in one pass without iteration.
HoistOutOfLoop(MachineDomTreeNode * HeaderN)700 void MachineLICMBase::HoistOutOfLoop(MachineDomTreeNode *HeaderN) {
701 MachineBasicBlock *Preheader = getCurPreheader();
702 if (!Preheader)
703 return;
704
705 SmallVector<MachineDomTreeNode*, 32> Scopes;
706 SmallVector<MachineDomTreeNode*, 8> WorkList;
707 DenseMap<MachineDomTreeNode*, MachineDomTreeNode*> ParentMap;
708 DenseMap<MachineDomTreeNode*, unsigned> OpenChildren;
709
710 // Perform a DFS walk to determine the order of visit.
711 WorkList.push_back(HeaderN);
712 while (!WorkList.empty()) {
713 MachineDomTreeNode *Node = WorkList.pop_back_val();
714 assert(Node && "Null dominator tree node?");
715 MachineBasicBlock *BB = Node->getBlock();
716
717 // If the header of the loop containing this basic block is a landing pad,
718 // then don't try to hoist instructions out of this loop.
719 const MachineLoop *ML = MLI->getLoopFor(BB);
720 if (ML && ML->getHeader()->isEHPad())
721 continue;
722
723 // If this subregion is not in the top level loop at all, exit.
724 if (!CurLoop->contains(BB))
725 continue;
726
727 Scopes.push_back(Node);
728 unsigned NumChildren = Node->getNumChildren();
729
730 // Don't hoist things out of a large switch statement. This often causes
731 // code to be hoisted that wasn't going to be executed, and increases
732 // register pressure in a situation where it's likely to matter.
733 if (BB->succ_size() >= 25)
734 NumChildren = 0;
735
736 OpenChildren[Node] = NumChildren;
737 if (NumChildren) {
738 // Add children in reverse order as then the next popped worklist node is
739 // the first child of this node. This means we ultimately traverse the
740 // DOM tree in exactly the same order as if we'd recursed.
741 for (MachineDomTreeNode *Child : reverse(Node->children())) {
742 ParentMap[Child] = Node;
743 WorkList.push_back(Child);
744 }
745 }
746 }
747
748 if (Scopes.size() == 0)
749 return;
750
751 // Compute registers which are livein into the loop headers.
752 RegSeen.clear();
753 BackTrace.clear();
754 InitRegPressure(Preheader);
755
756 // Now perform LICM.
757 for (MachineDomTreeNode *Node : Scopes) {
758 MachineBasicBlock *MBB = Node->getBlock();
759
760 EnterScope(MBB);
761
762 // Process the block
763 SpeculationState = SpeculateUnknown;
764 for (MachineBasicBlock::iterator
765 MII = MBB->begin(), E = MBB->end(); MII != E; ) {
766 MachineBasicBlock::iterator NextMII = MII; ++NextMII;
767 MachineInstr *MI = &*MII;
768 if (!Hoist(MI, Preheader))
769 UpdateRegPressure(MI);
770 // If we have hoisted an instruction that may store, it can only be a
771 // constant store.
772 MII = NextMII;
773 }
774
775 // If it's a leaf node, it's done. Traverse upwards to pop ancestors.
776 ExitScopeIfDone(Node, OpenChildren, ParentMap);
777 }
778 }
779
isOperandKill(const MachineOperand & MO,MachineRegisterInfo * MRI)780 static bool isOperandKill(const MachineOperand &MO, MachineRegisterInfo *MRI) {
781 return MO.isKill() || MRI->hasOneNonDBGUse(MO.getReg());
782 }
783
784 /// Find all virtual register references that are liveout of the preheader to
785 /// initialize the starting "register pressure". Note this does not count live
786 /// through (livein but not used) registers.
InitRegPressure(MachineBasicBlock * BB)787 void MachineLICMBase::InitRegPressure(MachineBasicBlock *BB) {
788 std::fill(RegPressure.begin(), RegPressure.end(), 0);
789
790 // If the preheader has only a single predecessor and it ends with a
791 // fallthrough or an unconditional branch, then scan its predecessor for live
792 // defs as well. This happens whenever the preheader is created by splitting
793 // the critical edge from the loop predecessor to the loop header.
794 if (BB->pred_size() == 1) {
795 MachineBasicBlock *TBB = nullptr, *FBB = nullptr;
796 SmallVector<MachineOperand, 4> Cond;
797 if (!TII->analyzeBranch(*BB, TBB, FBB, Cond, false) && Cond.empty())
798 InitRegPressure(*BB->pred_begin());
799 }
800
801 for (const MachineInstr &MI : *BB)
802 UpdateRegPressure(&MI, /*ConsiderUnseenAsDef=*/true);
803 }
804
805 /// Update estimate of register pressure after the specified instruction.
UpdateRegPressure(const MachineInstr * MI,bool ConsiderUnseenAsDef)806 void MachineLICMBase::UpdateRegPressure(const MachineInstr *MI,
807 bool ConsiderUnseenAsDef) {
808 auto Cost = calcRegisterCost(MI, /*ConsiderSeen=*/true, ConsiderUnseenAsDef);
809 for (const auto &RPIdAndCost : Cost) {
810 unsigned Class = RPIdAndCost.first;
811 if (static_cast<int>(RegPressure[Class]) < -RPIdAndCost.second)
812 RegPressure[Class] = 0;
813 else
814 RegPressure[Class] += RPIdAndCost.second;
815 }
816 }
817
818 /// Calculate the additional register pressure that the registers used in MI
819 /// cause.
820 ///
821 /// If 'ConsiderSeen' is true, updates 'RegSeen' and uses the information to
822 /// figure out which usages are live-ins.
823 /// FIXME: Figure out a way to consider 'RegSeen' from all code paths.
824 DenseMap<unsigned, int>
calcRegisterCost(const MachineInstr * MI,bool ConsiderSeen,bool ConsiderUnseenAsDef)825 MachineLICMBase::calcRegisterCost(const MachineInstr *MI, bool ConsiderSeen,
826 bool ConsiderUnseenAsDef) {
827 DenseMap<unsigned, int> Cost;
828 if (MI->isImplicitDef())
829 return Cost;
830 for (unsigned i = 0, e = MI->getDesc().getNumOperands(); i != e; ++i) {
831 const MachineOperand &MO = MI->getOperand(i);
832 if (!MO.isReg() || MO.isImplicit())
833 continue;
834 Register Reg = MO.getReg();
835 if (!Register::isVirtualRegister(Reg))
836 continue;
837
838 // FIXME: It seems bad to use RegSeen only for some of these calculations.
839 bool isNew = ConsiderSeen ? RegSeen.insert(Reg).second : false;
840 const TargetRegisterClass *RC = MRI->getRegClass(Reg);
841
842 RegClassWeight W = TRI->getRegClassWeight(RC);
843 int RCCost = 0;
844 if (MO.isDef())
845 RCCost = W.RegWeight;
846 else {
847 bool isKill = isOperandKill(MO, MRI);
848 if (isNew && !isKill && ConsiderUnseenAsDef)
849 // Haven't seen this, it must be a livein.
850 RCCost = W.RegWeight;
851 else if (!isNew && isKill)
852 RCCost = -W.RegWeight;
853 }
854 if (RCCost == 0)
855 continue;
856 const int *PS = TRI->getRegClassPressureSets(RC);
857 for (; *PS != -1; ++PS) {
858 if (Cost.find(*PS) == Cost.end())
859 Cost[*PS] = RCCost;
860 else
861 Cost[*PS] += RCCost;
862 }
863 }
864 return Cost;
865 }
866
867 /// Return true if this machine instruction loads from global offset table or
868 /// constant pool.
mayLoadFromGOTOrConstantPool(MachineInstr & MI)869 static bool mayLoadFromGOTOrConstantPool(MachineInstr &MI) {
870 assert(MI.mayLoad() && "Expected MI that loads!");
871
872 // If we lost memory operands, conservatively assume that the instruction
873 // reads from everything..
874 if (MI.memoperands_empty())
875 return true;
876
877 for (MachineMemOperand *MemOp : MI.memoperands())
878 if (const PseudoSourceValue *PSV = MemOp->getPseudoValue())
879 if (PSV->isGOT() || PSV->isConstantPool())
880 return true;
881
882 return false;
883 }
884
885 // This function iterates through all the operands of the input store MI and
886 // checks that each register operand statisfies isCallerPreservedPhysReg.
887 // This means, the value being stored and the address where it is being stored
888 // is constant throughout the body of the function (not including prologue and
889 // epilogue). When called with an MI that isn't a store, it returns false.
890 // A future improvement can be to check if the store registers are constant
891 // throughout the loop rather than throughout the funtion.
isInvariantStore(const MachineInstr & MI,const TargetRegisterInfo * TRI,const MachineRegisterInfo * MRI)892 static bool isInvariantStore(const MachineInstr &MI,
893 const TargetRegisterInfo *TRI,
894 const MachineRegisterInfo *MRI) {
895
896 bool FoundCallerPresReg = false;
897 if (!MI.mayStore() || MI.hasUnmodeledSideEffects() ||
898 (MI.getNumOperands() == 0))
899 return false;
900
901 // Check that all register operands are caller-preserved physical registers.
902 for (const MachineOperand &MO : MI.operands()) {
903 if (MO.isReg()) {
904 Register Reg = MO.getReg();
905 // If operand is a virtual register, check if it comes from a copy of a
906 // physical register.
907 if (Register::isVirtualRegister(Reg))
908 Reg = TRI->lookThruCopyLike(MO.getReg(), MRI);
909 if (Register::isVirtualRegister(Reg))
910 return false;
911 if (!TRI->isCallerPreservedPhysReg(Reg.asMCReg(), *MI.getMF()))
912 return false;
913 else
914 FoundCallerPresReg = true;
915 } else if (!MO.isImm()) {
916 return false;
917 }
918 }
919 return FoundCallerPresReg;
920 }
921
922 // Return true if the input MI is a copy instruction that feeds an invariant
923 // store instruction. This means that the src of the copy has to satisfy
924 // isCallerPreservedPhysReg and atleast one of it's users should satisfy
925 // isInvariantStore.
isCopyFeedingInvariantStore(const MachineInstr & MI,const MachineRegisterInfo * MRI,const TargetRegisterInfo * TRI)926 static bool isCopyFeedingInvariantStore(const MachineInstr &MI,
927 const MachineRegisterInfo *MRI,
928 const TargetRegisterInfo *TRI) {
929
930 // FIXME: If targets would like to look through instructions that aren't
931 // pure copies, this can be updated to a query.
932 if (!MI.isCopy())
933 return false;
934
935 const MachineFunction *MF = MI.getMF();
936 // Check that we are copying a constant physical register.
937 Register CopySrcReg = MI.getOperand(1).getReg();
938 if (Register::isVirtualRegister(CopySrcReg))
939 return false;
940
941 if (!TRI->isCallerPreservedPhysReg(CopySrcReg.asMCReg(), *MF))
942 return false;
943
944 Register CopyDstReg = MI.getOperand(0).getReg();
945 // Check if any of the uses of the copy are invariant stores.
946 assert(Register::isVirtualRegister(CopyDstReg) &&
947 "copy dst is not a virtual reg");
948
949 for (MachineInstr &UseMI : MRI->use_instructions(CopyDstReg)) {
950 if (UseMI.mayStore() && isInvariantStore(UseMI, TRI, MRI))
951 return true;
952 }
953 return false;
954 }
955
956 /// Returns true if the instruction may be a suitable candidate for LICM.
957 /// e.g. If the instruction is a call, then it's obviously not safe to hoist it.
IsLICMCandidate(MachineInstr & I)958 bool MachineLICMBase::IsLICMCandidate(MachineInstr &I) {
959 // Check if it's safe to move the instruction.
960 bool DontMoveAcrossStore = true;
961 if ((!I.isSafeToMove(AA, DontMoveAcrossStore)) &&
962 !(HoistConstStores && isInvariantStore(I, TRI, MRI))) {
963 LLVM_DEBUG(dbgs() << "LICM: Instruction not safe to move.\n");
964 return false;
965 }
966
967 // If it is a load then check if it is guaranteed to execute by making sure
968 // that it dominates all exiting blocks. If it doesn't, then there is a path
969 // out of the loop which does not execute this load, so we can't hoist it.
970 // Loads from constant memory are safe to speculate, for example indexed load
971 // from a jump table.
972 // Stores and side effects are already checked by isSafeToMove.
973 if (I.mayLoad() && !mayLoadFromGOTOrConstantPool(I) &&
974 !IsGuaranteedToExecute(I.getParent())) {
975 LLVM_DEBUG(dbgs() << "LICM: Load not guaranteed to execute.\n");
976 return false;
977 }
978
979 // Convergent attribute has been used on operations that involve inter-thread
980 // communication which results are implicitly affected by the enclosing
981 // control flows. It is not safe to hoist or sink such operations across
982 // control flow.
983 if (I.isConvergent())
984 return false;
985
986 return true;
987 }
988
989 /// Returns true if the instruction is loop invariant.
IsLoopInvariantInst(MachineInstr & I)990 bool MachineLICMBase::IsLoopInvariantInst(MachineInstr &I) {
991 if (!IsLICMCandidate(I)) {
992 LLVM_DEBUG(dbgs() << "LICM: Instruction not a LICM candidate\n");
993 return false;
994 }
995 return CurLoop->isLoopInvariant(I);
996 }
997
998 /// Return true if the specified instruction is used by a phi node and hoisting
999 /// it could cause a copy to be inserted.
HasLoopPHIUse(const MachineInstr * MI) const1000 bool MachineLICMBase::HasLoopPHIUse(const MachineInstr *MI) const {
1001 SmallVector<const MachineInstr*, 8> Work(1, MI);
1002 do {
1003 MI = Work.pop_back_val();
1004 for (const MachineOperand &MO : MI->operands()) {
1005 if (!MO.isReg() || !MO.isDef())
1006 continue;
1007 Register Reg = MO.getReg();
1008 if (!Register::isVirtualRegister(Reg))
1009 continue;
1010 for (MachineInstr &UseMI : MRI->use_instructions(Reg)) {
1011 // A PHI may cause a copy to be inserted.
1012 if (UseMI.isPHI()) {
1013 // A PHI inside the loop causes a copy because the live range of Reg is
1014 // extended across the PHI.
1015 if (CurLoop->contains(&UseMI))
1016 return true;
1017 // A PHI in an exit block can cause a copy to be inserted if the PHI
1018 // has multiple predecessors in the loop with different values.
1019 // For now, approximate by rejecting all exit blocks.
1020 if (isExitBlock(UseMI.getParent()))
1021 return true;
1022 continue;
1023 }
1024 // Look past copies as well.
1025 if (UseMI.isCopy() && CurLoop->contains(&UseMI))
1026 Work.push_back(&UseMI);
1027 }
1028 }
1029 } while (!Work.empty());
1030 return false;
1031 }
1032
1033 /// Compute operand latency between a def of 'Reg' and an use in the current
1034 /// loop, return true if the target considered it high.
HasHighOperandLatency(MachineInstr & MI,unsigned DefIdx,Register Reg) const1035 bool MachineLICMBase::HasHighOperandLatency(MachineInstr &MI, unsigned DefIdx,
1036 Register Reg) const {
1037 if (MRI->use_nodbg_empty(Reg))
1038 return false;
1039
1040 for (MachineInstr &UseMI : MRI->use_nodbg_instructions(Reg)) {
1041 if (UseMI.isCopyLike())
1042 continue;
1043 if (!CurLoop->contains(UseMI.getParent()))
1044 continue;
1045 for (unsigned i = 0, e = UseMI.getNumOperands(); i != e; ++i) {
1046 const MachineOperand &MO = UseMI.getOperand(i);
1047 if (!MO.isReg() || !MO.isUse())
1048 continue;
1049 Register MOReg = MO.getReg();
1050 if (MOReg != Reg)
1051 continue;
1052
1053 if (TII->hasHighOperandLatency(SchedModel, MRI, MI, DefIdx, UseMI, i))
1054 return true;
1055 }
1056
1057 // Only look at the first in loop use.
1058 break;
1059 }
1060
1061 return false;
1062 }
1063
1064 /// Return true if the instruction is marked "cheap" or the operand latency
1065 /// between its def and a use is one or less.
IsCheapInstruction(MachineInstr & MI) const1066 bool MachineLICMBase::IsCheapInstruction(MachineInstr &MI) const {
1067 if (TII->isAsCheapAsAMove(MI) || MI.isCopyLike())
1068 return true;
1069
1070 bool isCheap = false;
1071 unsigned NumDefs = MI.getDesc().getNumDefs();
1072 for (unsigned i = 0, e = MI.getNumOperands(); NumDefs && i != e; ++i) {
1073 MachineOperand &DefMO = MI.getOperand(i);
1074 if (!DefMO.isReg() || !DefMO.isDef())
1075 continue;
1076 --NumDefs;
1077 Register Reg = DefMO.getReg();
1078 if (Register::isPhysicalRegister(Reg))
1079 continue;
1080
1081 if (!TII->hasLowDefLatency(SchedModel, MI, i))
1082 return false;
1083 isCheap = true;
1084 }
1085
1086 return isCheap;
1087 }
1088
1089 /// Visit BBs from header to current BB, check if hoisting an instruction of the
1090 /// given cost matrix can cause high register pressure.
1091 bool
CanCauseHighRegPressure(const DenseMap<unsigned,int> & Cost,bool CheapInstr)1092 MachineLICMBase::CanCauseHighRegPressure(const DenseMap<unsigned, int>& Cost,
1093 bool CheapInstr) {
1094 for (const auto &RPIdAndCost : Cost) {
1095 if (RPIdAndCost.second <= 0)
1096 continue;
1097
1098 unsigned Class = RPIdAndCost.first;
1099 int Limit = RegLimit[Class];
1100
1101 // Don't hoist cheap instructions if they would increase register pressure,
1102 // even if we're under the limit.
1103 if (CheapInstr && !HoistCheapInsts)
1104 return true;
1105
1106 for (const auto &RP : BackTrace)
1107 if (static_cast<int>(RP[Class]) + RPIdAndCost.second >= Limit)
1108 return true;
1109 }
1110
1111 return false;
1112 }
1113
1114 /// Traverse the back trace from header to the current block and update their
1115 /// register pressures to reflect the effect of hoisting MI from the current
1116 /// block to the preheader.
UpdateBackTraceRegPressure(const MachineInstr * MI)1117 void MachineLICMBase::UpdateBackTraceRegPressure(const MachineInstr *MI) {
1118 // First compute the 'cost' of the instruction, i.e. its contribution
1119 // to register pressure.
1120 auto Cost = calcRegisterCost(MI, /*ConsiderSeen=*/false,
1121 /*ConsiderUnseenAsDef=*/false);
1122
1123 // Update register pressure of blocks from loop header to current block.
1124 for (auto &RP : BackTrace)
1125 for (const auto &RPIdAndCost : Cost)
1126 RP[RPIdAndCost.first] += RPIdAndCost.second;
1127 }
1128
1129 /// Return true if it is potentially profitable to hoist the given loop
1130 /// invariant.
IsProfitableToHoist(MachineInstr & MI)1131 bool MachineLICMBase::IsProfitableToHoist(MachineInstr &MI) {
1132 if (MI.isImplicitDef())
1133 return true;
1134
1135 // Besides removing computation from the loop, hoisting an instruction has
1136 // these effects:
1137 //
1138 // - The value defined by the instruction becomes live across the entire
1139 // loop. This increases register pressure in the loop.
1140 //
1141 // - If the value is used by a PHI in the loop, a copy will be required for
1142 // lowering the PHI after extending the live range.
1143 //
1144 // - When hoisting the last use of a value in the loop, that value no longer
1145 // needs to be live in the loop. This lowers register pressure in the loop.
1146
1147 if (HoistConstStores && isCopyFeedingInvariantStore(MI, MRI, TRI))
1148 return true;
1149
1150 bool CheapInstr = IsCheapInstruction(MI);
1151 bool CreatesCopy = HasLoopPHIUse(&MI);
1152
1153 // Don't hoist a cheap instruction if it would create a copy in the loop.
1154 if (CheapInstr && CreatesCopy) {
1155 LLVM_DEBUG(dbgs() << "Won't hoist cheap instr with loop PHI use: " << MI);
1156 return false;
1157 }
1158
1159 // Rematerializable instructions should always be hoisted since the register
1160 // allocator can just pull them down again when needed.
1161 if (TII->isTriviallyReMaterializable(MI, AA))
1162 return true;
1163
1164 // FIXME: If there are long latency loop-invariant instructions inside the
1165 // loop at this point, why didn't the optimizer's LICM hoist them?
1166 for (unsigned i = 0, e = MI.getDesc().getNumOperands(); i != e; ++i) {
1167 const MachineOperand &MO = MI.getOperand(i);
1168 if (!MO.isReg() || MO.isImplicit())
1169 continue;
1170 Register Reg = MO.getReg();
1171 if (!Register::isVirtualRegister(Reg))
1172 continue;
1173 if (MO.isDef() && HasHighOperandLatency(MI, i, Reg)) {
1174 LLVM_DEBUG(dbgs() << "Hoist High Latency: " << MI);
1175 ++NumHighLatency;
1176 return true;
1177 }
1178 }
1179
1180 // Estimate register pressure to determine whether to LICM the instruction.
1181 // In low register pressure situation, we can be more aggressive about
1182 // hoisting. Also, favors hoisting long latency instructions even in
1183 // moderately high pressure situation.
1184 // Cheap instructions will only be hoisted if they don't increase register
1185 // pressure at all.
1186 auto Cost = calcRegisterCost(&MI, /*ConsiderSeen=*/false,
1187 /*ConsiderUnseenAsDef=*/false);
1188
1189 // Visit BBs from header to current BB, if hoisting this doesn't cause
1190 // high register pressure, then it's safe to proceed.
1191 if (!CanCauseHighRegPressure(Cost, CheapInstr)) {
1192 LLVM_DEBUG(dbgs() << "Hoist non-reg-pressure: " << MI);
1193 ++NumLowRP;
1194 return true;
1195 }
1196
1197 // Don't risk increasing register pressure if it would create copies.
1198 if (CreatesCopy) {
1199 LLVM_DEBUG(dbgs() << "Won't hoist instr with loop PHI use: " << MI);
1200 return false;
1201 }
1202
1203 // Do not "speculate" in high register pressure situation. If an
1204 // instruction is not guaranteed to be executed in the loop, it's best to be
1205 // conservative.
1206 if (AvoidSpeculation &&
1207 (!IsGuaranteedToExecute(MI.getParent()) && !MayCSE(&MI))) {
1208 LLVM_DEBUG(dbgs() << "Won't speculate: " << MI);
1209 return false;
1210 }
1211
1212 // High register pressure situation, only hoist if the instruction is going
1213 // to be remat'ed.
1214 if (!TII->isTriviallyReMaterializable(MI, AA) &&
1215 !MI.isDereferenceableInvariantLoad(AA)) {
1216 LLVM_DEBUG(dbgs() << "Can't remat / high reg-pressure: " << MI);
1217 return false;
1218 }
1219
1220 return true;
1221 }
1222
1223 /// Unfold a load from the given machineinstr if the load itself could be
1224 /// hoisted. Return the unfolded and hoistable load, or null if the load
1225 /// couldn't be unfolded or if it wouldn't be hoistable.
ExtractHoistableLoad(MachineInstr * MI)1226 MachineInstr *MachineLICMBase::ExtractHoistableLoad(MachineInstr *MI) {
1227 // Don't unfold simple loads.
1228 if (MI->canFoldAsLoad())
1229 return nullptr;
1230
1231 // If not, we may be able to unfold a load and hoist that.
1232 // First test whether the instruction is loading from an amenable
1233 // memory location.
1234 if (!MI->isDereferenceableInvariantLoad(AA))
1235 return nullptr;
1236
1237 // Next determine the register class for a temporary register.
1238 unsigned LoadRegIndex;
1239 unsigned NewOpc =
1240 TII->getOpcodeAfterMemoryUnfold(MI->getOpcode(),
1241 /*UnfoldLoad=*/true,
1242 /*UnfoldStore=*/false,
1243 &LoadRegIndex);
1244 if (NewOpc == 0) return nullptr;
1245 const MCInstrDesc &MID = TII->get(NewOpc);
1246 MachineFunction &MF = *MI->getMF();
1247 const TargetRegisterClass *RC = TII->getRegClass(MID, LoadRegIndex, TRI, MF);
1248 // Ok, we're unfolding. Create a temporary register and do the unfold.
1249 Register Reg = MRI->createVirtualRegister(RC);
1250
1251 SmallVector<MachineInstr *, 2> NewMIs;
1252 bool Success = TII->unfoldMemoryOperand(MF, *MI, Reg,
1253 /*UnfoldLoad=*/true,
1254 /*UnfoldStore=*/false, NewMIs);
1255 (void)Success;
1256 assert(Success &&
1257 "unfoldMemoryOperand failed when getOpcodeAfterMemoryUnfold "
1258 "succeeded!");
1259 assert(NewMIs.size() == 2 &&
1260 "Unfolded a load into multiple instructions!");
1261 MachineBasicBlock *MBB = MI->getParent();
1262 MachineBasicBlock::iterator Pos = MI;
1263 MBB->insert(Pos, NewMIs[0]);
1264 MBB->insert(Pos, NewMIs[1]);
1265 // If unfolding produced a load that wasn't loop-invariant or profitable to
1266 // hoist, discard the new instructions and bail.
1267 if (!IsLoopInvariantInst(*NewMIs[0]) || !IsProfitableToHoist(*NewMIs[0])) {
1268 NewMIs[0]->eraseFromParent();
1269 NewMIs[1]->eraseFromParent();
1270 return nullptr;
1271 }
1272
1273 // Update register pressure for the unfolded instruction.
1274 UpdateRegPressure(NewMIs[1]);
1275
1276 // Otherwise we successfully unfolded a load that we can hoist.
1277
1278 // Update the call site info.
1279 if (MI->shouldUpdateCallSiteInfo())
1280 MF.eraseCallSiteInfo(MI);
1281
1282 MI->eraseFromParent();
1283 return NewMIs[0];
1284 }
1285
1286 /// Initialize the CSE map with instructions that are in the current loop
1287 /// preheader that may become duplicates of instructions that are hoisted
1288 /// out of the loop.
InitCSEMap(MachineBasicBlock * BB)1289 void MachineLICMBase::InitCSEMap(MachineBasicBlock *BB) {
1290 for (MachineInstr &MI : *BB)
1291 CSEMap[MI.getOpcode()].push_back(&MI);
1292 }
1293
1294 /// Find an instruction amount PrevMIs that is a duplicate of MI.
1295 /// Return this instruction if it's found.
1296 MachineInstr *
LookForDuplicate(const MachineInstr * MI,std::vector<MachineInstr * > & PrevMIs)1297 MachineLICMBase::LookForDuplicate(const MachineInstr *MI,
1298 std::vector<MachineInstr *> &PrevMIs) {
1299 for (MachineInstr *PrevMI : PrevMIs)
1300 if (TII->produceSameValue(*MI, *PrevMI, (PreRegAlloc ? MRI : nullptr)))
1301 return PrevMI;
1302
1303 return nullptr;
1304 }
1305
1306 /// Given a LICM'ed instruction, look for an instruction on the preheader that
1307 /// computes the same value. If it's found, do a RAU on with the definition of
1308 /// the existing instruction rather than hoisting the instruction to the
1309 /// preheader.
EliminateCSE(MachineInstr * MI,DenseMap<unsigned,std::vector<MachineInstr * >>::iterator & CI)1310 bool MachineLICMBase::EliminateCSE(
1311 MachineInstr *MI,
1312 DenseMap<unsigned, std::vector<MachineInstr *>>::iterator &CI) {
1313 // Do not CSE implicit_def so ProcessImplicitDefs can properly propagate
1314 // the undef property onto uses.
1315 if (CI == CSEMap.end() || MI->isImplicitDef())
1316 return false;
1317
1318 if (MachineInstr *Dup = LookForDuplicate(MI, CI->second)) {
1319 LLVM_DEBUG(dbgs() << "CSEing " << *MI << " with " << *Dup);
1320
1321 // Replace virtual registers defined by MI by their counterparts defined
1322 // by Dup.
1323 SmallVector<unsigned, 2> Defs;
1324 for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
1325 const MachineOperand &MO = MI->getOperand(i);
1326
1327 // Physical registers may not differ here.
1328 assert((!MO.isReg() || MO.getReg() == 0 ||
1329 !Register::isPhysicalRegister(MO.getReg()) ||
1330 MO.getReg() == Dup->getOperand(i).getReg()) &&
1331 "Instructions with different phys regs are not identical!");
1332
1333 if (MO.isReg() && MO.isDef() &&
1334 !Register::isPhysicalRegister(MO.getReg()))
1335 Defs.push_back(i);
1336 }
1337
1338 SmallVector<const TargetRegisterClass*, 2> OrigRCs;
1339 for (unsigned i = 0, e = Defs.size(); i != e; ++i) {
1340 unsigned Idx = Defs[i];
1341 Register Reg = MI->getOperand(Idx).getReg();
1342 Register DupReg = Dup->getOperand(Idx).getReg();
1343 OrigRCs.push_back(MRI->getRegClass(DupReg));
1344
1345 if (!MRI->constrainRegClass(DupReg, MRI->getRegClass(Reg))) {
1346 // Restore old RCs if more than one defs.
1347 for (unsigned j = 0; j != i; ++j)
1348 MRI->setRegClass(Dup->getOperand(Defs[j]).getReg(), OrigRCs[j]);
1349 return false;
1350 }
1351 }
1352
1353 for (unsigned Idx : Defs) {
1354 Register Reg = MI->getOperand(Idx).getReg();
1355 Register DupReg = Dup->getOperand(Idx).getReg();
1356 MRI->replaceRegWith(Reg, DupReg);
1357 MRI->clearKillFlags(DupReg);
1358 // Clear Dup dead flag if any, we reuse it for Reg.
1359 if (!MRI->use_nodbg_empty(DupReg))
1360 Dup->getOperand(Idx).setIsDead(false);
1361 }
1362
1363 MI->eraseFromParent();
1364 ++NumCSEed;
1365 return true;
1366 }
1367 return false;
1368 }
1369
1370 /// Return true if the given instruction will be CSE'd if it's hoisted out of
1371 /// the loop.
MayCSE(MachineInstr * MI)1372 bool MachineLICMBase::MayCSE(MachineInstr *MI) {
1373 unsigned Opcode = MI->getOpcode();
1374 DenseMap<unsigned, std::vector<MachineInstr *>>::iterator CI =
1375 CSEMap.find(Opcode);
1376 // Do not CSE implicit_def so ProcessImplicitDefs can properly propagate
1377 // the undef property onto uses.
1378 if (CI == CSEMap.end() || MI->isImplicitDef())
1379 return false;
1380
1381 return LookForDuplicate(MI, CI->second) != nullptr;
1382 }
1383
1384 /// When an instruction is found to use only loop invariant operands
1385 /// that are safe to hoist, this instruction is called to do the dirty work.
1386 /// It returns true if the instruction is hoisted.
Hoist(MachineInstr * MI,MachineBasicBlock * Preheader)1387 bool MachineLICMBase::Hoist(MachineInstr *MI, MachineBasicBlock *Preheader) {
1388 MachineBasicBlock *SrcBlock = MI->getParent();
1389
1390 // Disable the instruction hoisting due to block hotness
1391 if ((DisableHoistingToHotterBlocks == UseBFI::All ||
1392 (DisableHoistingToHotterBlocks == UseBFI::PGO && HasProfileData)) &&
1393 isTgtHotterThanSrc(SrcBlock, Preheader)) {
1394 ++NumNotHoistedDueToHotness;
1395 return false;
1396 }
1397 // First check whether we should hoist this instruction.
1398 if (!IsLoopInvariantInst(*MI) || !IsProfitableToHoist(*MI)) {
1399 // If not, try unfolding a hoistable load.
1400 MI = ExtractHoistableLoad(MI);
1401 if (!MI) return false;
1402 }
1403
1404 // If we have hoisted an instruction that may store, it can only be a constant
1405 // store.
1406 if (MI->mayStore())
1407 NumStoreConst++;
1408
1409 // Now move the instructions to the predecessor, inserting it before any
1410 // terminator instructions.
1411 LLVM_DEBUG({
1412 dbgs() << "Hoisting " << *MI;
1413 if (MI->getParent()->getBasicBlock())
1414 dbgs() << " from " << printMBBReference(*MI->getParent());
1415 if (Preheader->getBasicBlock())
1416 dbgs() << " to " << printMBBReference(*Preheader);
1417 dbgs() << "\n";
1418 });
1419
1420 // If this is the first instruction being hoisted to the preheader,
1421 // initialize the CSE map with potential common expressions.
1422 if (FirstInLoop) {
1423 InitCSEMap(Preheader);
1424 FirstInLoop = false;
1425 }
1426
1427 // Look for opportunity to CSE the hoisted instruction.
1428 unsigned Opcode = MI->getOpcode();
1429 DenseMap<unsigned, std::vector<MachineInstr *>>::iterator CI =
1430 CSEMap.find(Opcode);
1431 if (!EliminateCSE(MI, CI)) {
1432 // Otherwise, splice the instruction to the preheader.
1433 Preheader->splice(Preheader->getFirstTerminator(),MI->getParent(),MI);
1434
1435 // Since we are moving the instruction out of its basic block, we do not
1436 // retain its debug location. Doing so would degrade the debugging
1437 // experience and adversely affect the accuracy of profiling information.
1438 assert(!MI->isDebugInstr() && "Should not hoist debug inst");
1439 MI->setDebugLoc(DebugLoc());
1440
1441 // Update register pressure for BBs from header to this block.
1442 UpdateBackTraceRegPressure(MI);
1443
1444 // Clear the kill flags of any register this instruction defines,
1445 // since they may need to be live throughout the entire loop
1446 // rather than just live for part of it.
1447 for (MachineOperand &MO : MI->operands())
1448 if (MO.isReg() && MO.isDef() && !MO.isDead())
1449 MRI->clearKillFlags(MO.getReg());
1450
1451 // Add to the CSE map.
1452 if (CI != CSEMap.end())
1453 CI->second.push_back(MI);
1454 else
1455 CSEMap[Opcode].push_back(MI);
1456 }
1457
1458 ++NumHoisted;
1459 Changed = true;
1460
1461 return true;
1462 }
1463
1464 /// Get the preheader for the current loop, splitting a critical edge if needed.
getCurPreheader()1465 MachineBasicBlock *MachineLICMBase::getCurPreheader() {
1466 // Determine the block to which to hoist instructions. If we can't find a
1467 // suitable loop predecessor, we can't do any hoisting.
1468
1469 // If we've tried to get a preheader and failed, don't try again.
1470 if (CurPreheader == reinterpret_cast<MachineBasicBlock *>(-1))
1471 return nullptr;
1472
1473 if (!CurPreheader) {
1474 CurPreheader = CurLoop->getLoopPreheader();
1475 if (!CurPreheader) {
1476 MachineBasicBlock *Pred = CurLoop->getLoopPredecessor();
1477 if (!Pred) {
1478 CurPreheader = reinterpret_cast<MachineBasicBlock *>(-1);
1479 return nullptr;
1480 }
1481
1482 CurPreheader = Pred->SplitCriticalEdge(CurLoop->getHeader(), *this);
1483 if (!CurPreheader) {
1484 CurPreheader = reinterpret_cast<MachineBasicBlock *>(-1);
1485 return nullptr;
1486 }
1487 }
1488 }
1489 return CurPreheader;
1490 }
1491
1492 /// Is the target basic block at least "BlockFrequencyRatioThreshold"
1493 /// times hotter than the source basic block.
isTgtHotterThanSrc(MachineBasicBlock * SrcBlock,MachineBasicBlock * TgtBlock)1494 bool MachineLICMBase::isTgtHotterThanSrc(MachineBasicBlock *SrcBlock,
1495 MachineBasicBlock *TgtBlock) {
1496 // Parse source and target basic block frequency from MBFI
1497 uint64_t SrcBF = MBFI->getBlockFreq(SrcBlock).getFrequency();
1498 uint64_t DstBF = MBFI->getBlockFreq(TgtBlock).getFrequency();
1499
1500 // Disable the hoisting if source block frequency is zero
1501 if (!SrcBF)
1502 return true;
1503
1504 double Ratio = (double)DstBF / SrcBF;
1505
1506 // Compare the block frequency ratio with the threshold
1507 return Ratio > BlockFrequencyRatioThreshold;
1508 }
1509