1 //===- GVNHoist.cpp - Hoist scalar and load expressions -------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This pass hoists expressions from branches to a common dominator. It uses
10 // GVN (global value numbering) to discover expressions computing the same
11 // values. The primary goals of code-hoisting are:
12 // 1. To reduce the code size.
13 // 2. In some cases reduce critical path (by exposing more ILP).
14 //
15 // The algorithm factors out the reachability of values such that multiple
16 // queries to find reachability of values are fast. This is based on finding the
17 // ANTIC points in the CFG which do not change during hoisting. The ANTIC points
18 // are basically the dominance-frontiers in the inverse graph. So we introduce a
19 // data structure (CHI nodes) to keep track of values flowing out of a basic
20 // block. We only do this for values with multiple occurrences in the function
21 // as they are the potential hoistable candidates. This approach allows us to
22 // hoist instructions to a basic block with more than two successors, as well as
23 // deal with infinite loops in a trivial way.
24 //
25 // Limitations: This pass does not hoist fully redundant expressions because
26 // they are already handled by GVN-PRE. It is advisable to run gvn-hoist before
27 // and after gvn-pre because gvn-pre creates opportunities for more instructions
28 // to be hoisted.
29 //
30 // Hoisting may affect the performance in some cases. To mitigate that, hoisting
31 // is disabled in the following cases.
32 // 1. Scalars across calls.
33 // 2. geps when corresponding load/store cannot be hoisted.
34 //===----------------------------------------------------------------------===//
35 
36 #include "llvm/ADT/DenseMap.h"
37 #include "llvm/ADT/DenseSet.h"
38 #include "llvm/ADT/STLExtras.h"
39 #include "llvm/ADT/SmallPtrSet.h"
40 #include "llvm/ADT/SmallVector.h"
41 #include "llvm/ADT/Statistic.h"
42 #include "llvm/ADT/iterator_range.h"
43 #include "llvm/Analysis/AliasAnalysis.h"
44 #include "llvm/Analysis/GlobalsModRef.h"
45 #include "llvm/Analysis/IteratedDominanceFrontier.h"
46 #include "llvm/Analysis/MemoryDependenceAnalysis.h"
47 #include "llvm/Analysis/MemorySSA.h"
48 #include "llvm/Analysis/MemorySSAUpdater.h"
49 #include "llvm/Analysis/PostDominators.h"
50 #include "llvm/Analysis/ValueTracking.h"
51 #include "llvm/IR/Argument.h"
52 #include "llvm/IR/BasicBlock.h"
53 #include "llvm/IR/CFG.h"
54 #include "llvm/IR/Constants.h"
55 #include "llvm/IR/Dominators.h"
56 #include "llvm/IR/Function.h"
57 #include "llvm/IR/InstrTypes.h"
58 #include "llvm/IR/Instruction.h"
59 #include "llvm/IR/Instructions.h"
60 #include "llvm/IR/IntrinsicInst.h"
61 #include "llvm/IR/Intrinsics.h"
62 #include "llvm/IR/LLVMContext.h"
63 #include "llvm/IR/PassManager.h"
64 #include "llvm/IR/Use.h"
65 #include "llvm/IR/User.h"
66 #include "llvm/IR/Value.h"
67 #include "llvm/InitializePasses.h"
68 #include "llvm/Pass.h"
69 #include "llvm/Support/Casting.h"
70 #include "llvm/Support/CommandLine.h"
71 #include "llvm/Support/Debug.h"
72 #include "llvm/Support/raw_ostream.h"
73 #include "llvm/Transforms/Scalar.h"
74 #include "llvm/Transforms/Scalar/GVN.h"
75 #include "llvm/Transforms/Utils/Local.h"
76 #include <algorithm>
77 #include <cassert>
78 #include <iterator>
79 #include <memory>
80 #include <utility>
81 #include <vector>
82 
83 using namespace llvm;
84 
85 #define DEBUG_TYPE "gvn-hoist"
86 
87 STATISTIC(NumHoisted, "Number of instructions hoisted");
88 STATISTIC(NumRemoved, "Number of instructions removed");
89 STATISTIC(NumLoadsHoisted, "Number of loads hoisted");
90 STATISTIC(NumLoadsRemoved, "Number of loads removed");
91 STATISTIC(NumStoresHoisted, "Number of stores hoisted");
92 STATISTIC(NumStoresRemoved, "Number of stores removed");
93 STATISTIC(NumCallsHoisted, "Number of calls hoisted");
94 STATISTIC(NumCallsRemoved, "Number of calls removed");
95 
96 static cl::opt<int>
97     MaxHoistedThreshold("gvn-max-hoisted", cl::Hidden, cl::init(-1),
98                         cl::desc("Max number of instructions to hoist "
99                                  "(default unlimited = -1)"));
100 
101 static cl::opt<int> MaxNumberOfBBSInPath(
102     "gvn-hoist-max-bbs", cl::Hidden, cl::init(4),
103     cl::desc("Max number of basic blocks on the path between "
104              "hoisting locations (default = 4, unlimited = -1)"));
105 
106 static cl::opt<int> MaxDepthInBB(
107     "gvn-hoist-max-depth", cl::Hidden, cl::init(100),
108     cl::desc("Hoist instructions from the beginning of the BB up to the "
109              "maximum specified depth (default = 100, unlimited = -1)"));
110 
111 static cl::opt<int>
112     MaxChainLength("gvn-hoist-max-chain-length", cl::Hidden, cl::init(10),
113                    cl::desc("Maximum length of dependent chains to hoist "
114                             "(default = 10, unlimited = -1)"));
115 
116 namespace llvm {
117 
118 using BBSideEffectsSet = DenseMap<const BasicBlock *, bool>;
119 using SmallVecInsn = SmallVector<Instruction *, 4>;
120 using SmallVecImplInsn = SmallVectorImpl<Instruction *>;
121 
122 // Each element of a hoisting list contains the basic block where to hoist and
123 // a list of instructions to be hoisted.
124 using HoistingPointInfo = std::pair<BasicBlock *, SmallVecInsn>;
125 
126 using HoistingPointList = SmallVector<HoistingPointInfo, 4>;
127 
128 // A map from a pair of VNs to all the instructions with those VNs.
129 using VNType = std::pair<unsigned, unsigned>;
130 
131 using VNtoInsns = DenseMap<VNType, SmallVector<Instruction *, 4>>;
132 
133 // CHI keeps information about values flowing out of a basic block.  It is
134 // similar to PHI but in the inverse graph, and used for outgoing values on each
135 // edge. For conciseness, it is computed only for instructions with multiple
136 // occurrences in the CFG because they are the only hoistable candidates.
137 //     A (CHI[{V, B, I1}, {V, C, I2}]
138 //  /     \
139 // /       \
140 // B(I1)  C (I2)
141 // The Value number for both I1 and I2 is V, the CHI node will save the
142 // instruction as well as the edge where the value is flowing to.
143 struct CHIArg {
144   VNType VN;
145 
146   // Edge destination (shows the direction of flow), may not be where the I is.
147   BasicBlock *Dest;
148 
149   // The instruction (VN) which uses the values flowing out of CHI.
150   Instruction *I;
151 
operator ==llvm::CHIArg152   bool operator==(const CHIArg &A) { return VN == A.VN; }
operator !=llvm::CHIArg153   bool operator!=(const CHIArg &A) { return !(*this == A); }
154 };
155 
156 using CHIIt = SmallVectorImpl<CHIArg>::iterator;
157 using CHIArgs = iterator_range<CHIIt>;
158 using OutValuesType = DenseMap<BasicBlock *, SmallVector<CHIArg, 2>>;
159 using InValuesType =
160     DenseMap<BasicBlock *, SmallVector<std::pair<VNType, Instruction *>, 2>>;
161 
162 // An invalid value number Used when inserting a single value number into
163 // VNtoInsns.
164 enum : unsigned { InvalidVN = ~2U };
165 
166 // Records all scalar instructions candidate for code hoisting.
167 class InsnInfo {
168   VNtoInsns VNtoScalars;
169 
170 public:
171   // Inserts I and its value number in VNtoScalars.
insert(Instruction * I,GVN::ValueTable & VN)172   void insert(Instruction *I, GVN::ValueTable &VN) {
173     // Scalar instruction.
174     unsigned V = VN.lookupOrAdd(I);
175     VNtoScalars[{V, InvalidVN}].push_back(I);
176   }
177 
getVNTable() const178   const VNtoInsns &getVNTable() const { return VNtoScalars; }
179 };
180 
181 // Records all load instructions candidate for code hoisting.
182 class LoadInfo {
183   VNtoInsns VNtoLoads;
184 
185 public:
186   // Insert Load and the value number of its memory address in VNtoLoads.
insert(LoadInst * Load,GVN::ValueTable & VN)187   void insert(LoadInst *Load, GVN::ValueTable &VN) {
188     if (Load->isSimple()) {
189       unsigned V = VN.lookupOrAdd(Load->getPointerOperand());
190       VNtoLoads[{V, InvalidVN}].push_back(Load);
191     }
192   }
193 
getVNTable() const194   const VNtoInsns &getVNTable() const { return VNtoLoads; }
195 };
196 
197 // Records all store instructions candidate for code hoisting.
198 class StoreInfo {
199   VNtoInsns VNtoStores;
200 
201 public:
202   // Insert the Store and a hash number of the store address and the stored
203   // value in VNtoStores.
insert(StoreInst * Store,GVN::ValueTable & VN)204   void insert(StoreInst *Store, GVN::ValueTable &VN) {
205     if (!Store->isSimple())
206       return;
207     // Hash the store address and the stored value.
208     Value *Ptr = Store->getPointerOperand();
209     Value *Val = Store->getValueOperand();
210     VNtoStores[{VN.lookupOrAdd(Ptr), VN.lookupOrAdd(Val)}].push_back(Store);
211   }
212 
getVNTable() const213   const VNtoInsns &getVNTable() const { return VNtoStores; }
214 };
215 
216 // Records all call instructions candidate for code hoisting.
217 class CallInfo {
218   VNtoInsns VNtoCallsScalars;
219   VNtoInsns VNtoCallsLoads;
220   VNtoInsns VNtoCallsStores;
221 
222 public:
223   // Insert Call and its value numbering in one of the VNtoCalls* containers.
insert(CallInst * Call,GVN::ValueTable & VN)224   void insert(CallInst *Call, GVN::ValueTable &VN) {
225     // A call that doesNotAccessMemory is handled as a Scalar,
226     // onlyReadsMemory will be handled as a Load instruction,
227     // all other calls will be handled as stores.
228     unsigned V = VN.lookupOrAdd(Call);
229     auto Entry = std::make_pair(V, InvalidVN);
230 
231     if (Call->doesNotAccessMemory())
232       VNtoCallsScalars[Entry].push_back(Call);
233     else if (Call->onlyReadsMemory())
234       VNtoCallsLoads[Entry].push_back(Call);
235     else
236       VNtoCallsStores[Entry].push_back(Call);
237   }
238 
getScalarVNTable() const239   const VNtoInsns &getScalarVNTable() const { return VNtoCallsScalars; }
getLoadVNTable() const240   const VNtoInsns &getLoadVNTable() const { return VNtoCallsLoads; }
getStoreVNTable() const241   const VNtoInsns &getStoreVNTable() const { return VNtoCallsStores; }
242 };
243 
combineKnownMetadata(Instruction * ReplInst,Instruction * I)244 static void combineKnownMetadata(Instruction *ReplInst, Instruction *I) {
245   static const unsigned KnownIDs[] = {
246       LLVMContext::MD_tbaa,           LLVMContext::MD_alias_scope,
247       LLVMContext::MD_noalias,        LLVMContext::MD_range,
248       LLVMContext::MD_fpmath,         LLVMContext::MD_invariant_load,
249       LLVMContext::MD_invariant_group, LLVMContext::MD_access_group};
250   combineMetadata(ReplInst, I, KnownIDs, true);
251 }
252 
253 // This pass hoists common computations across branches sharing common
254 // dominator. The primary goal is to reduce the code size, and in some
255 // cases reduce critical path (by exposing more ILP).
256 class GVNHoist {
257 public:
GVNHoist(DominatorTree * DT,PostDominatorTree * PDT,AliasAnalysis * AA,MemoryDependenceResults * MD,MemorySSA * MSSA)258   GVNHoist(DominatorTree *DT, PostDominatorTree *PDT, AliasAnalysis *AA,
259            MemoryDependenceResults *MD, MemorySSA *MSSA)
260       : DT(DT), PDT(PDT), AA(AA), MD(MD), MSSA(MSSA),
261         MSSAUpdater(std::make_unique<MemorySSAUpdater>(MSSA)) {}
262 
run(Function & F)263   bool run(Function &F) {
264     NumFuncArgs = F.arg_size();
265     VN.setDomTree(DT);
266     VN.setAliasAnalysis(AA);
267     VN.setMemDep(MD);
268     bool Res = false;
269     // Perform DFS Numbering of instructions.
270     unsigned BBI = 0;
271     for (const BasicBlock *BB : depth_first(&F.getEntryBlock())) {
272       DFSNumber[BB] = ++BBI;
273       unsigned I = 0;
274       for (auto &Inst : *BB)
275         DFSNumber[&Inst] = ++I;
276     }
277 
278     int ChainLength = 0;
279 
280     // FIXME: use lazy evaluation of VN to avoid the fix-point computation.
281     while (true) {
282       if (MaxChainLength != -1 && ++ChainLength >= MaxChainLength)
283         return Res;
284 
285       auto HoistStat = hoistExpressions(F);
286       if (HoistStat.first + HoistStat.second == 0)
287         return Res;
288 
289       if (HoistStat.second > 0)
290         // To address a limitation of the current GVN, we need to rerun the
291         // hoisting after we hoisted loads or stores in order to be able to
292         // hoist all scalars dependent on the hoisted ld/st.
293         VN.clear();
294 
295       Res = true;
296     }
297 
298     return Res;
299   }
300 
301   // Copied from NewGVN.cpp
302   // This function provides global ranking of operations so that we can place
303   // them in a canonical order.  Note that rank alone is not necessarily enough
304   // for a complete ordering, as constants all have the same rank.  However,
305   // generally, we will simplify an operation with all constants so that it
306   // doesn't matter what order they appear in.
rank(const Value * V) const307   unsigned int rank(const Value *V) const {
308     // Prefer constants to undef to anything else
309     // Undef is a constant, have to check it first.
310     // Prefer smaller constants to constantexprs
311     if (isa<ConstantExpr>(V))
312       return 2;
313     if (isa<UndefValue>(V))
314       return 1;
315     if (isa<Constant>(V))
316       return 0;
317     else if (auto *A = dyn_cast<Argument>(V))
318       return 3 + A->getArgNo();
319 
320     // Need to shift the instruction DFS by number of arguments + 3 to account
321     // for the constant and argument ranking above.
322     auto Result = DFSNumber.lookup(V);
323     if (Result > 0)
324       return 4 + NumFuncArgs + Result;
325     // Unreachable or something else, just return a really large number.
326     return ~0;
327   }
328 
329 private:
330   GVN::ValueTable VN;
331   DominatorTree *DT;
332   PostDominatorTree *PDT;
333   AliasAnalysis *AA;
334   MemoryDependenceResults *MD;
335   MemorySSA *MSSA;
336   std::unique_ptr<MemorySSAUpdater> MSSAUpdater;
337   DenseMap<const Value *, unsigned> DFSNumber;
338   BBSideEffectsSet BBSideEffects;
339   DenseSet<const BasicBlock *> HoistBarrier;
340   SmallVector<BasicBlock *, 32> IDFBlocks;
341   unsigned NumFuncArgs;
342   const bool HoistingGeps = false;
343 
344   enum InsKind { Unknown, Scalar, Load, Store };
345 
346   // Return true when there are exception handling in BB.
hasEH(const BasicBlock * BB)347   bool hasEH(const BasicBlock *BB) {
348     auto It = BBSideEffects.find(BB);
349     if (It != BBSideEffects.end())
350       return It->second;
351 
352     if (BB->isEHPad() || BB->hasAddressTaken()) {
353       BBSideEffects[BB] = true;
354       return true;
355     }
356 
357     if (BB->getTerminator()->mayThrow()) {
358       BBSideEffects[BB] = true;
359       return true;
360     }
361 
362     BBSideEffects[BB] = false;
363     return false;
364   }
365 
366   // Return true when a successor of BB dominates A.
successorDominate(const BasicBlock * BB,const BasicBlock * A)367   bool successorDominate(const BasicBlock *BB, const BasicBlock *A) {
368     for (const BasicBlock *Succ : successors(BB))
369       if (DT->dominates(Succ, A))
370         return true;
371 
372     return false;
373   }
374 
375   // Return true when I1 appears before I2 in the instructions of BB.
firstInBB(const Instruction * I1,const Instruction * I2)376   bool firstInBB(const Instruction *I1, const Instruction *I2) {
377     assert(I1->getParent() == I2->getParent());
378     unsigned I1DFS = DFSNumber.lookup(I1);
379     unsigned I2DFS = DFSNumber.lookup(I2);
380     assert(I1DFS && I2DFS);
381     return I1DFS < I2DFS;
382   }
383 
384   // Return true when there are memory uses of Def in BB.
hasMemoryUse(const Instruction * NewPt,MemoryDef * Def,const BasicBlock * BB)385   bool hasMemoryUse(const Instruction *NewPt, MemoryDef *Def,
386                     const BasicBlock *BB) {
387     const MemorySSA::AccessList *Acc = MSSA->getBlockAccesses(BB);
388     if (!Acc)
389       return false;
390 
391     Instruction *OldPt = Def->getMemoryInst();
392     const BasicBlock *OldBB = OldPt->getParent();
393     const BasicBlock *NewBB = NewPt->getParent();
394     bool ReachedNewPt = false;
395 
396     for (const MemoryAccess &MA : *Acc)
397       if (const MemoryUse *MU = dyn_cast<MemoryUse>(&MA)) {
398         Instruction *Insn = MU->getMemoryInst();
399 
400         // Do not check whether MU aliases Def when MU occurs after OldPt.
401         if (BB == OldBB && firstInBB(OldPt, Insn))
402           break;
403 
404         // Do not check whether MU aliases Def when MU occurs before NewPt.
405         if (BB == NewBB) {
406           if (!ReachedNewPt) {
407             if (firstInBB(Insn, NewPt))
408               continue;
409             ReachedNewPt = true;
410           }
411         }
412         if (MemorySSAUtil::defClobbersUseOrDef(Def, MU, *AA))
413           return true;
414       }
415 
416     return false;
417   }
418 
hasEHhelper(const BasicBlock * BB,const BasicBlock * SrcBB,int & NBBsOnAllPaths)419   bool hasEHhelper(const BasicBlock *BB, const BasicBlock *SrcBB,
420                    int &NBBsOnAllPaths) {
421     // Stop walk once the limit is reached.
422     if (NBBsOnAllPaths == 0)
423       return true;
424 
425     // Impossible to hoist with exceptions on the path.
426     if (hasEH(BB))
427       return true;
428 
429     // No such instruction after HoistBarrier in a basic block was
430     // selected for hoisting so instructions selected within basic block with
431     // a hoist barrier can be hoisted.
432     if ((BB != SrcBB) && HoistBarrier.count(BB))
433       return true;
434 
435     return false;
436   }
437 
438   // Return true when there are exception handling or loads of memory Def
439   // between Def and NewPt.  This function is only called for stores: Def is
440   // the MemoryDef of the store to be hoisted.
441 
442   // Decrement by 1 NBBsOnAllPaths for each block between HoistPt and BB, and
443   // return true when the counter NBBsOnAllPaths reaces 0, except when it is
444   // initialized to -1 which is unlimited.
hasEHOrLoadsOnPath(const Instruction * NewPt,MemoryDef * Def,int & NBBsOnAllPaths)445   bool hasEHOrLoadsOnPath(const Instruction *NewPt, MemoryDef *Def,
446                           int &NBBsOnAllPaths) {
447     const BasicBlock *NewBB = NewPt->getParent();
448     const BasicBlock *OldBB = Def->getBlock();
449     assert(DT->dominates(NewBB, OldBB) && "invalid path");
450     assert(DT->dominates(Def->getDefiningAccess()->getBlock(), NewBB) &&
451            "def does not dominate new hoisting point");
452 
453     // Walk all basic blocks reachable in depth-first iteration on the inverse
454     // CFG from OldBB to NewBB. These blocks are all the blocks that may be
455     // executed between the execution of NewBB and OldBB. Hoisting an expression
456     // from OldBB into NewBB has to be safe on all execution paths.
457     for (auto I = idf_begin(OldBB), E = idf_end(OldBB); I != E;) {
458       const BasicBlock *BB = *I;
459       if (BB == NewBB) {
460         // Stop traversal when reaching HoistPt.
461         I.skipChildren();
462         continue;
463       }
464 
465       if (hasEHhelper(BB, OldBB, NBBsOnAllPaths))
466         return true;
467 
468       // Check that we do not move a store past loads.
469       if (hasMemoryUse(NewPt, Def, BB))
470         return true;
471 
472       // -1 is unlimited number of blocks on all paths.
473       if (NBBsOnAllPaths != -1)
474         --NBBsOnAllPaths;
475 
476       ++I;
477     }
478 
479     return false;
480   }
481 
482   // Return true when there are exception handling between HoistPt and BB.
483   // Decrement by 1 NBBsOnAllPaths for each block between HoistPt and BB, and
484   // return true when the counter NBBsOnAllPaths reaches 0, except when it is
485   // initialized to -1 which is unlimited.
hasEHOnPath(const BasicBlock * HoistPt,const BasicBlock * SrcBB,int & NBBsOnAllPaths)486   bool hasEHOnPath(const BasicBlock *HoistPt, const BasicBlock *SrcBB,
487                    int &NBBsOnAllPaths) {
488     assert(DT->dominates(HoistPt, SrcBB) && "Invalid path");
489 
490     // Walk all basic blocks reachable in depth-first iteration on
491     // the inverse CFG from BBInsn to NewHoistPt. These blocks are all the
492     // blocks that may be executed between the execution of NewHoistPt and
493     // BBInsn. Hoisting an expression from BBInsn into NewHoistPt has to be safe
494     // on all execution paths.
495     for (auto I = idf_begin(SrcBB), E = idf_end(SrcBB); I != E;) {
496       const BasicBlock *BB = *I;
497       if (BB == HoistPt) {
498         // Stop traversal when reaching NewHoistPt.
499         I.skipChildren();
500         continue;
501       }
502 
503       if (hasEHhelper(BB, SrcBB, NBBsOnAllPaths))
504         return true;
505 
506       // -1 is unlimited number of blocks on all paths.
507       if (NBBsOnAllPaths != -1)
508         --NBBsOnAllPaths;
509 
510       ++I;
511     }
512 
513     return false;
514   }
515 
516   // Return true when it is safe to hoist a memory load or store U from OldPt
517   // to NewPt.
safeToHoistLdSt(const Instruction * NewPt,const Instruction * OldPt,MemoryUseOrDef * U,InsKind K,int & NBBsOnAllPaths)518   bool safeToHoistLdSt(const Instruction *NewPt, const Instruction *OldPt,
519                        MemoryUseOrDef *U, InsKind K, int &NBBsOnAllPaths) {
520     // In place hoisting is safe.
521     if (NewPt == OldPt)
522       return true;
523 
524     const BasicBlock *NewBB = NewPt->getParent();
525     const BasicBlock *OldBB = OldPt->getParent();
526     const BasicBlock *UBB = U->getBlock();
527 
528     // Check for dependences on the Memory SSA.
529     MemoryAccess *D = U->getDefiningAccess();
530     BasicBlock *DBB = D->getBlock();
531     if (DT->properlyDominates(NewBB, DBB))
532       // Cannot move the load or store to NewBB above its definition in DBB.
533       return false;
534 
535     if (NewBB == DBB && !MSSA->isLiveOnEntryDef(D))
536       if (auto *UD = dyn_cast<MemoryUseOrDef>(D))
537         if (!firstInBB(UD->getMemoryInst(), NewPt))
538           // Cannot move the load or store to NewPt above its definition in D.
539           return false;
540 
541     // Check for unsafe hoistings due to side effects.
542     if (K == InsKind::Store) {
543       if (hasEHOrLoadsOnPath(NewPt, cast<MemoryDef>(U), NBBsOnAllPaths))
544         return false;
545     } else if (hasEHOnPath(NewBB, OldBB, NBBsOnAllPaths))
546       return false;
547 
548     if (UBB == NewBB) {
549       if (DT->properlyDominates(DBB, NewBB))
550         return true;
551       assert(UBB == DBB);
552       assert(MSSA->locallyDominates(D, U));
553     }
554 
555     // No side effects: it is safe to hoist.
556     return true;
557   }
558 
559   // Return true when it is safe to hoist scalar instructions from all blocks in
560   // WL to HoistBB.
safeToHoistScalar(const BasicBlock * HoistBB,const BasicBlock * BB,int & NBBsOnAllPaths)561   bool safeToHoistScalar(const BasicBlock *HoistBB, const BasicBlock *BB,
562                          int &NBBsOnAllPaths) {
563     return !hasEHOnPath(HoistBB, BB, NBBsOnAllPaths);
564   }
565 
566   // In the inverse CFG, the dominance frontier of basic block (BB) is the
567   // point where ANTIC needs to be computed for instructions which are going
568   // to be hoisted. Since this point does not change during gvn-hoist,
569   // we compute it only once (on demand).
570   // The ides is inspired from:
571   // "Partial Redundancy Elimination in SSA Form"
572   // ROBERT KENNEDY, SUN CHAN, SHIN-MING LIU, RAYMOND LO, PENG TU and FRED CHOW
573   // They use similar idea in the forward graph to find fully redundant and
574   // partially redundant expressions, here it is used in the inverse graph to
575   // find fully anticipable instructions at merge point (post-dominator in
576   // the inverse CFG).
577   // Returns the edge via which an instruction in BB will get the values from.
578 
579   // Returns true when the values are flowing out to each edge.
valueAnticipable(CHIArgs C,Instruction * TI) const580   bool valueAnticipable(CHIArgs C, Instruction *TI) const {
581     if (TI->getNumSuccessors() > (unsigned)size(C))
582       return false; // Not enough args in this CHI.
583 
584     for (auto CHI : C) {
585       BasicBlock *Dest = CHI.Dest;
586       // Find if all the edges have values flowing out of BB.
587       bool Found = llvm::any_of(
588           successors(TI), [Dest](const BasicBlock *BB) { return BB == Dest; });
589       if (!Found)
590         return false;
591     }
592     return true;
593   }
594 
595   // Check if it is safe to hoist values tracked by CHI in the range
596   // [Begin, End) and accumulate them in Safe.
checkSafety(CHIArgs C,BasicBlock * BB,InsKind K,SmallVectorImpl<CHIArg> & Safe)597   void checkSafety(CHIArgs C, BasicBlock *BB, InsKind K,
598                    SmallVectorImpl<CHIArg> &Safe) {
599     int NumBBsOnAllPaths = MaxNumberOfBBSInPath;
600     for (auto CHI : C) {
601       Instruction *Insn = CHI.I;
602       if (!Insn) // No instruction was inserted in this CHI.
603         continue;
604       if (K == InsKind::Scalar) {
605         if (safeToHoistScalar(BB, Insn->getParent(), NumBBsOnAllPaths))
606           Safe.push_back(CHI);
607       } else {
608         MemoryUseOrDef *UD = MSSA->getMemoryAccess(Insn);
609         if (safeToHoistLdSt(BB->getTerminator(), Insn, UD, K, NumBBsOnAllPaths))
610           Safe.push_back(CHI);
611       }
612     }
613   }
614 
615   using RenameStackType = DenseMap<VNType, SmallVector<Instruction *, 2>>;
616 
617   // Push all the VNs corresponding to BB into RenameStack.
fillRenameStack(BasicBlock * BB,InValuesType & ValueBBs,RenameStackType & RenameStack)618   void fillRenameStack(BasicBlock *BB, InValuesType &ValueBBs,
619                        RenameStackType &RenameStack) {
620     auto it1 = ValueBBs.find(BB);
621     if (it1 != ValueBBs.end()) {
622       // Iterate in reverse order to keep lower ranked values on the top.
623       for (std::pair<VNType, Instruction *> &VI : reverse(it1->second)) {
624         // Get the value of instruction I
625         LLVM_DEBUG(dbgs() << "\nPushing on stack: " << *VI.second);
626         RenameStack[VI.first].push_back(VI.second);
627       }
628     }
629   }
630 
fillChiArgs(BasicBlock * BB,OutValuesType & CHIBBs,RenameStackType & RenameStack)631   void fillChiArgs(BasicBlock *BB, OutValuesType &CHIBBs,
632                    RenameStackType &RenameStack) {
633     // For each *predecessor* (because Post-DOM) of BB check if it has a CHI
634     for (auto Pred : predecessors(BB)) {
635       auto P = CHIBBs.find(Pred);
636       if (P == CHIBBs.end()) {
637         continue;
638       }
639       LLVM_DEBUG(dbgs() << "\nLooking at CHIs in: " << Pred->getName(););
640       // A CHI is found (BB -> Pred is an edge in the CFG)
641       // Pop the stack until Top(V) = Ve.
642       auto &VCHI = P->second;
643       for (auto It = VCHI.begin(), E = VCHI.end(); It != E;) {
644         CHIArg &C = *It;
645         if (!C.Dest) {
646           auto si = RenameStack.find(C.VN);
647           // The Basic Block where CHI is must dominate the value we want to
648           // track in a CHI. In the PDom walk, there can be values in the
649           // stack which are not control dependent e.g., nested loop.
650           if (si != RenameStack.end() && si->second.size() &&
651               DT->properlyDominates(Pred, si->second.back()->getParent())) {
652             C.Dest = BB;                     // Assign the edge
653             C.I = si->second.pop_back_val(); // Assign the argument
654             LLVM_DEBUG(dbgs()
655                        << "\nCHI Inserted in BB: " << C.Dest->getName() << *C.I
656                        << ", VN: " << C.VN.first << ", " << C.VN.second);
657           }
658           // Move to next CHI of a different value
659           It = std::find_if(It, VCHI.end(),
660                             [It](CHIArg &A) { return A != *It; });
661         } else
662           ++It;
663       }
664     }
665   }
666 
667   // Walk the post-dominator tree top-down and use a stack for each value to
668   // store the last value you see. When you hit a CHI from a given edge, the
669   // value to use as the argument is at the top of the stack, add the value to
670   // CHI and pop.
insertCHI(InValuesType & ValueBBs,OutValuesType & CHIBBs)671   void insertCHI(InValuesType &ValueBBs, OutValuesType &CHIBBs) {
672     auto Root = PDT->getNode(nullptr);
673     if (!Root)
674       return;
675     // Depth first walk on PDom tree to fill the CHIargs at each PDF.
676     RenameStackType RenameStack;
677     for (auto Node : depth_first(Root)) {
678       BasicBlock *BB = Node->getBlock();
679       if (!BB)
680         continue;
681 
682       // Collect all values in BB and push to stack.
683       fillRenameStack(BB, ValueBBs, RenameStack);
684 
685       // Fill outgoing values in each CHI corresponding to BB.
686       fillChiArgs(BB, CHIBBs, RenameStack);
687     }
688   }
689 
690   // Walk all the CHI-nodes to find ones which have a empty-entry and remove
691   // them Then collect all the instructions which are safe to hoist and see if
692   // they form a list of anticipable values. OutValues contains CHIs
693   // corresponding to each basic block.
findHoistableCandidates(OutValuesType & CHIBBs,InsKind K,HoistingPointList & HPL)694   void findHoistableCandidates(OutValuesType &CHIBBs, InsKind K,
695                                HoistingPointList &HPL) {
696     auto cmpVN = [](const CHIArg &A, const CHIArg &B) { return A.VN < B.VN; };
697 
698     // CHIArgs now have the outgoing values, so check for anticipability and
699     // accumulate hoistable candidates in HPL.
700     for (std::pair<BasicBlock *, SmallVector<CHIArg, 2>> &A : CHIBBs) {
701       BasicBlock *BB = A.first;
702       SmallVectorImpl<CHIArg> &CHIs = A.second;
703       // Vector of PHIs contains PHIs for different instructions.
704       // Sort the args according to their VNs, such that identical
705       // instructions are together.
706       llvm::stable_sort(CHIs, cmpVN);
707       auto TI = BB->getTerminator();
708       auto B = CHIs.begin();
709       // [PreIt, PHIIt) form a range of CHIs which have identical VNs.
710       auto PHIIt = std::find_if(CHIs.begin(), CHIs.end(),
711                                  [B](CHIArg &A) { return A != *B; });
712       auto PrevIt = CHIs.begin();
713       while (PrevIt != PHIIt) {
714         // Collect values which satisfy safety checks.
715         SmallVector<CHIArg, 2> Safe;
716         // We check for safety first because there might be multiple values in
717         // the same path, some of which are not safe to be hoisted, but overall
718         // each edge has at least one value which can be hoisted, making the
719         // value anticipable along that path.
720         checkSafety(make_range(PrevIt, PHIIt), BB, K, Safe);
721 
722         // List of safe values should be anticipable at TI.
723         if (valueAnticipable(make_range(Safe.begin(), Safe.end()), TI)) {
724           HPL.push_back({BB, SmallVecInsn()});
725           SmallVecInsn &V = HPL.back().second;
726           for (auto B : Safe)
727             V.push_back(B.I);
728         }
729 
730         // Check other VNs
731         PrevIt = PHIIt;
732         PHIIt = std::find_if(PrevIt, CHIs.end(),
733                              [PrevIt](CHIArg &A) { return A != *PrevIt; });
734       }
735     }
736   }
737 
738   // Compute insertion points for each values which can be fully anticipated at
739   // a dominator. HPL contains all such values.
computeInsertionPoints(const VNtoInsns & Map,HoistingPointList & HPL,InsKind K)740   void computeInsertionPoints(const VNtoInsns &Map, HoistingPointList &HPL,
741                               InsKind K) {
742     // Sort VNs based on their rankings
743     std::vector<VNType> Ranks;
744     for (const auto &Entry : Map) {
745       Ranks.push_back(Entry.first);
746     }
747 
748     // TODO: Remove fully-redundant expressions.
749     // Get instruction from the Map, assume that all the Instructions
750     // with same VNs have same rank (this is an approximation).
751     llvm::sort(Ranks, [this, &Map](const VNType &r1, const VNType &r2) {
752       return (rank(*Map.lookup(r1).begin()) < rank(*Map.lookup(r2).begin()));
753     });
754 
755     // - Sort VNs according to their rank, and start with lowest ranked VN
756     // - Take a VN and for each instruction with same VN
757     //   - Find the dominance frontier in the inverse graph (PDF)
758     //   - Insert the chi-node at PDF
759     // - Remove the chi-nodes with missing entries
760     // - Remove values from CHI-nodes which do not truly flow out, e.g.,
761     //   modified along the path.
762     // - Collect the remaining values that are still anticipable
763     SmallVector<BasicBlock *, 2> IDFBlocks;
764     ReverseIDFCalculator IDFs(*PDT);
765     OutValuesType OutValue;
766     InValuesType InValue;
767     for (const auto &R : Ranks) {
768       const SmallVecInsn &V = Map.lookup(R);
769       if (V.size() < 2)
770         continue;
771       const VNType &VN = R;
772       SmallPtrSet<BasicBlock *, 2> VNBlocks;
773       for (auto &I : V) {
774         BasicBlock *BBI = I->getParent();
775         if (!hasEH(BBI))
776           VNBlocks.insert(BBI);
777       }
778       // Compute the Post Dominance Frontiers of each basic block
779       // The dominance frontier of a live block X in the reverse
780       // control graph is the set of blocks upon which X is control
781       // dependent. The following sequence computes the set of blocks
782       // which currently have dead terminators that are control
783       // dependence sources of a block which is in NewLiveBlocks.
784       IDFs.setDefiningBlocks(VNBlocks);
785       IDFBlocks.clear();
786       IDFs.calculate(IDFBlocks);
787 
788       // Make a map of BB vs instructions to be hoisted.
789       for (unsigned i = 0; i < V.size(); ++i) {
790         InValue[V[i]->getParent()].push_back(std::make_pair(VN, V[i]));
791       }
792       // Insert empty CHI node for this VN. This is used to factor out
793       // basic blocks where the ANTIC can potentially change.
794       for (auto IDFB : IDFBlocks) {
795         for (unsigned i = 0; i < V.size(); ++i) {
796           CHIArg C = {VN, nullptr, nullptr};
797            // Ignore spurious PDFs.
798           if (DT->properlyDominates(IDFB, V[i]->getParent())) {
799             OutValue[IDFB].push_back(C);
800             LLVM_DEBUG(dbgs() << "\nInsertion a CHI for BB: " << IDFB->getName()
801                               << ", for Insn: " << *V[i]);
802           }
803         }
804       }
805     }
806 
807     // Insert CHI args at each PDF to iterate on factored graph of
808     // control dependence.
809     insertCHI(InValue, OutValue);
810     // Using the CHI args inserted at each PDF, find fully anticipable values.
811     findHoistableCandidates(OutValue, K, HPL);
812   }
813 
814   // Return true when all operands of Instr are available at insertion point
815   // HoistPt. When limiting the number of hoisted expressions, one could hoist
816   // a load without hoisting its access function. So before hoisting any
817   // expression, make sure that all its operands are available at insert point.
allOperandsAvailable(const Instruction * I,const BasicBlock * HoistPt) const818   bool allOperandsAvailable(const Instruction *I,
819                             const BasicBlock *HoistPt) const {
820     for (const Use &Op : I->operands())
821       if (const auto *Inst = dyn_cast<Instruction>(&Op))
822         if (!DT->dominates(Inst->getParent(), HoistPt))
823           return false;
824 
825     return true;
826   }
827 
828   // Same as allOperandsAvailable with recursive check for GEP operands.
allGepOperandsAvailable(const Instruction * I,const BasicBlock * HoistPt) const829   bool allGepOperandsAvailable(const Instruction *I,
830                                const BasicBlock *HoistPt) const {
831     for (const Use &Op : I->operands())
832       if (const auto *Inst = dyn_cast<Instruction>(&Op))
833         if (!DT->dominates(Inst->getParent(), HoistPt)) {
834           if (const GetElementPtrInst *GepOp =
835                   dyn_cast<GetElementPtrInst>(Inst)) {
836             if (!allGepOperandsAvailable(GepOp, HoistPt))
837               return false;
838             // Gep is available if all operands of GepOp are available.
839           } else {
840             // Gep is not available if it has operands other than GEPs that are
841             // defined in blocks not dominating HoistPt.
842             return false;
843           }
844         }
845     return true;
846   }
847 
848   // Make all operands of the GEP available.
makeGepsAvailable(Instruction * Repl,BasicBlock * HoistPt,const SmallVecInsn & InstructionsToHoist,Instruction * Gep) const849   void makeGepsAvailable(Instruction *Repl, BasicBlock *HoistPt,
850                          const SmallVecInsn &InstructionsToHoist,
851                          Instruction *Gep) const {
852     assert(allGepOperandsAvailable(Gep, HoistPt) &&
853            "GEP operands not available");
854 
855     Instruction *ClonedGep = Gep->clone();
856     for (unsigned i = 0, e = Gep->getNumOperands(); i != e; ++i)
857       if (Instruction *Op = dyn_cast<Instruction>(Gep->getOperand(i))) {
858         // Check whether the operand is already available.
859         if (DT->dominates(Op->getParent(), HoistPt))
860           continue;
861 
862         // As a GEP can refer to other GEPs, recursively make all the operands
863         // of this GEP available at HoistPt.
864         if (GetElementPtrInst *GepOp = dyn_cast<GetElementPtrInst>(Op))
865           makeGepsAvailable(ClonedGep, HoistPt, InstructionsToHoist, GepOp);
866       }
867 
868     // Copy Gep and replace its uses in Repl with ClonedGep.
869     ClonedGep->insertBefore(HoistPt->getTerminator());
870 
871     // Conservatively discard any optimization hints, they may differ on the
872     // other paths.
873     ClonedGep->dropUnknownNonDebugMetadata();
874 
875     // If we have optimization hints which agree with each other along different
876     // paths, preserve them.
877     for (const Instruction *OtherInst : InstructionsToHoist) {
878       const GetElementPtrInst *OtherGep;
879       if (auto *OtherLd = dyn_cast<LoadInst>(OtherInst))
880         OtherGep = cast<GetElementPtrInst>(OtherLd->getPointerOperand());
881       else
882         OtherGep = cast<GetElementPtrInst>(
883             cast<StoreInst>(OtherInst)->getPointerOperand());
884       ClonedGep->andIRFlags(OtherGep);
885     }
886 
887     // Replace uses of Gep with ClonedGep in Repl.
888     Repl->replaceUsesOfWith(Gep, ClonedGep);
889   }
890 
updateAlignment(Instruction * I,Instruction * Repl)891   void updateAlignment(Instruction *I, Instruction *Repl) {
892     if (auto *ReplacementLoad = dyn_cast<LoadInst>(Repl)) {
893       ReplacementLoad->setAlignment(
894           std::min(ReplacementLoad->getAlign(), cast<LoadInst>(I)->getAlign()));
895       ++NumLoadsRemoved;
896     } else if (auto *ReplacementStore = dyn_cast<StoreInst>(Repl)) {
897       ReplacementStore->setAlignment(std::min(ReplacementStore->getAlign(),
898                                               cast<StoreInst>(I)->getAlign()));
899       ++NumStoresRemoved;
900     } else if (auto *ReplacementAlloca = dyn_cast<AllocaInst>(Repl)) {
901       ReplacementAlloca->setAlignment(std::max(
902           ReplacementAlloca->getAlign(), cast<AllocaInst>(I)->getAlign()));
903     } else if (isa<CallInst>(Repl)) {
904       ++NumCallsRemoved;
905     }
906   }
907 
908   // Remove all the instructions in Candidates and replace their usage with Repl.
909   // Returns the number of instructions removed.
rauw(const SmallVecInsn & Candidates,Instruction * Repl,MemoryUseOrDef * NewMemAcc)910   unsigned rauw(const SmallVecInsn &Candidates, Instruction *Repl,
911                 MemoryUseOrDef *NewMemAcc) {
912     unsigned NR = 0;
913     for (Instruction *I : Candidates) {
914       if (I != Repl) {
915         ++NR;
916         updateAlignment(I, Repl);
917         if (NewMemAcc) {
918           // Update the uses of the old MSSA access with NewMemAcc.
919           MemoryAccess *OldMA = MSSA->getMemoryAccess(I);
920           OldMA->replaceAllUsesWith(NewMemAcc);
921           MSSAUpdater->removeMemoryAccess(OldMA);
922         }
923 
924         Repl->andIRFlags(I);
925         combineKnownMetadata(Repl, I);
926         I->replaceAllUsesWith(Repl);
927         // Also invalidate the Alias Analysis cache.
928         MD->removeInstruction(I);
929         I->eraseFromParent();
930       }
931     }
932     return NR;
933   }
934 
935   // Replace all Memory PHI usage with NewMemAcc.
raMPHIuw(MemoryUseOrDef * NewMemAcc)936   void raMPHIuw(MemoryUseOrDef *NewMemAcc) {
937     SmallPtrSet<MemoryPhi *, 4> UsePhis;
938     for (User *U : NewMemAcc->users())
939       if (MemoryPhi *Phi = dyn_cast<MemoryPhi>(U))
940         UsePhis.insert(Phi);
941 
942     for (MemoryPhi *Phi : UsePhis) {
943       auto In = Phi->incoming_values();
944       if (llvm::all_of(In, [&](Use &U) { return U == NewMemAcc; })) {
945         Phi->replaceAllUsesWith(NewMemAcc);
946         MSSAUpdater->removeMemoryAccess(Phi);
947       }
948     }
949   }
950 
951   // Remove all other instructions and replace them with Repl.
removeAndReplace(const SmallVecInsn & Candidates,Instruction * Repl,BasicBlock * DestBB,bool MoveAccess)952   unsigned removeAndReplace(const SmallVecInsn &Candidates, Instruction *Repl,
953                             BasicBlock *DestBB, bool MoveAccess) {
954     MemoryUseOrDef *NewMemAcc = MSSA->getMemoryAccess(Repl);
955     if (MoveAccess && NewMemAcc) {
956         // The definition of this ld/st will not change: ld/st hoisting is
957         // legal when the ld/st is not moved past its current definition.
958         MSSAUpdater->moveToPlace(NewMemAcc, DestBB,
959                                  MemorySSA::BeforeTerminator);
960     }
961 
962     // Replace all other instructions with Repl with memory access NewMemAcc.
963     unsigned NR = rauw(Candidates, Repl, NewMemAcc);
964 
965     // Remove MemorySSA phi nodes with the same arguments.
966     if (NewMemAcc)
967       raMPHIuw(NewMemAcc);
968     return NR;
969   }
970 
971   // In the case Repl is a load or a store, we make all their GEPs
972   // available: GEPs are not hoisted by default to avoid the address
973   // computations to be hoisted without the associated load or store.
makeGepOperandsAvailable(Instruction * Repl,BasicBlock * HoistPt,const SmallVecInsn & InstructionsToHoist) const974   bool makeGepOperandsAvailable(Instruction *Repl, BasicBlock *HoistPt,
975                                 const SmallVecInsn &InstructionsToHoist) const {
976     // Check whether the GEP of a ld/st can be synthesized at HoistPt.
977     GetElementPtrInst *Gep = nullptr;
978     Instruction *Val = nullptr;
979     if (auto *Ld = dyn_cast<LoadInst>(Repl)) {
980       Gep = dyn_cast<GetElementPtrInst>(Ld->getPointerOperand());
981     } else if (auto *St = dyn_cast<StoreInst>(Repl)) {
982       Gep = dyn_cast<GetElementPtrInst>(St->getPointerOperand());
983       Val = dyn_cast<Instruction>(St->getValueOperand());
984       // Check that the stored value is available.
985       if (Val) {
986         if (isa<GetElementPtrInst>(Val)) {
987           // Check whether we can compute the GEP at HoistPt.
988           if (!allGepOperandsAvailable(Val, HoistPt))
989             return false;
990         } else if (!DT->dominates(Val->getParent(), HoistPt))
991           return false;
992       }
993     }
994 
995     // Check whether we can compute the Gep at HoistPt.
996     if (!Gep || !allGepOperandsAvailable(Gep, HoistPt))
997       return false;
998 
999     makeGepsAvailable(Repl, HoistPt, InstructionsToHoist, Gep);
1000 
1001     if (Val && isa<GetElementPtrInst>(Val))
1002       makeGepsAvailable(Repl, HoistPt, InstructionsToHoist, Val);
1003 
1004     return true;
1005   }
1006 
hoist(HoistingPointList & HPL)1007   std::pair<unsigned, unsigned> hoist(HoistingPointList &HPL) {
1008     unsigned NI = 0, NL = 0, NS = 0, NC = 0, NR = 0;
1009     for (const HoistingPointInfo &HP : HPL) {
1010       // Find out whether we already have one of the instructions in HoistPt,
1011       // in which case we do not have to move it.
1012       BasicBlock *DestBB = HP.first;
1013       const SmallVecInsn &InstructionsToHoist = HP.second;
1014       Instruction *Repl = nullptr;
1015       for (Instruction *I : InstructionsToHoist)
1016         if (I->getParent() == DestBB)
1017           // If there are two instructions in HoistPt to be hoisted in place:
1018           // update Repl to be the first one, such that we can rename the uses
1019           // of the second based on the first.
1020           if (!Repl || firstInBB(I, Repl))
1021             Repl = I;
1022 
1023       // Keep track of whether we moved the instruction so we know whether we
1024       // should move the MemoryAccess.
1025       bool MoveAccess = true;
1026       if (Repl) {
1027         // Repl is already in HoistPt: it remains in place.
1028         assert(allOperandsAvailable(Repl, DestBB) &&
1029                "instruction depends on operands that are not available");
1030         MoveAccess = false;
1031       } else {
1032         // When we do not find Repl in HoistPt, select the first in the list
1033         // and move it to HoistPt.
1034         Repl = InstructionsToHoist.front();
1035 
1036         // We can move Repl in HoistPt only when all operands are available.
1037         // The order in which hoistings are done may influence the availability
1038         // of operands.
1039         if (!allOperandsAvailable(Repl, DestBB)) {
1040           // When HoistingGeps there is nothing more we can do to make the
1041           // operands available: just continue.
1042           if (HoistingGeps)
1043             continue;
1044 
1045           // When not HoistingGeps we need to copy the GEPs.
1046           if (!makeGepOperandsAvailable(Repl, DestBB, InstructionsToHoist))
1047             continue;
1048         }
1049 
1050         // Move the instruction at the end of HoistPt.
1051         Instruction *Last = DestBB->getTerminator();
1052         MD->removeInstruction(Repl);
1053         Repl->moveBefore(Last);
1054 
1055         DFSNumber[Repl] = DFSNumber[Last]++;
1056       }
1057 
1058       NR += removeAndReplace(InstructionsToHoist, Repl, DestBB, MoveAccess);
1059 
1060       if (isa<LoadInst>(Repl))
1061         ++NL;
1062       else if (isa<StoreInst>(Repl))
1063         ++NS;
1064       else if (isa<CallInst>(Repl))
1065         ++NC;
1066       else // Scalar
1067         ++NI;
1068     }
1069 
1070     if (MSSA && VerifyMemorySSA)
1071       MSSA->verifyMemorySSA();
1072 
1073     NumHoisted += NL + NS + NC + NI;
1074     NumRemoved += NR;
1075     NumLoadsHoisted += NL;
1076     NumStoresHoisted += NS;
1077     NumCallsHoisted += NC;
1078     return {NI, NL + NC + NS};
1079   }
1080 
1081   // Hoist all expressions. Returns Number of scalars hoisted
1082   // and number of non-scalars hoisted.
hoistExpressions(Function & F)1083   std::pair<unsigned, unsigned> hoistExpressions(Function &F) {
1084     InsnInfo II;
1085     LoadInfo LI;
1086     StoreInfo SI;
1087     CallInfo CI;
1088     for (BasicBlock *BB : depth_first(&F.getEntryBlock())) {
1089       int InstructionNb = 0;
1090       for (Instruction &I1 : *BB) {
1091         // If I1 cannot guarantee progress, subsequent instructions
1092         // in BB cannot be hoisted anyways.
1093         if (!isGuaranteedToTransferExecutionToSuccessor(&I1)) {
1094           HoistBarrier.insert(BB);
1095           break;
1096         }
1097         // Only hoist the first instructions in BB up to MaxDepthInBB. Hoisting
1098         // deeper may increase the register pressure and compilation time.
1099         if (MaxDepthInBB != -1 && InstructionNb++ >= MaxDepthInBB)
1100           break;
1101 
1102         // Do not value number terminator instructions.
1103         if (I1.isTerminator())
1104           break;
1105 
1106         if (auto *Load = dyn_cast<LoadInst>(&I1))
1107           LI.insert(Load, VN);
1108         else if (auto *Store = dyn_cast<StoreInst>(&I1))
1109           SI.insert(Store, VN);
1110         else if (auto *Call = dyn_cast<CallInst>(&I1)) {
1111           if (auto *Intr = dyn_cast<IntrinsicInst>(Call)) {
1112             if (isa<DbgInfoIntrinsic>(Intr) ||
1113                 Intr->getIntrinsicID() == Intrinsic::assume ||
1114                 Intr->getIntrinsicID() == Intrinsic::sideeffect)
1115               continue;
1116           }
1117           if (Call->mayHaveSideEffects())
1118             break;
1119 
1120           if (Call->isConvergent())
1121             break;
1122 
1123           CI.insert(Call, VN);
1124         } else if (HoistingGeps || !isa<GetElementPtrInst>(&I1))
1125           // Do not hoist scalars past calls that may write to memory because
1126           // that could result in spills later. geps are handled separately.
1127           // TODO: We can relax this for targets like AArch64 as they have more
1128           // registers than X86.
1129           II.insert(&I1, VN);
1130       }
1131     }
1132 
1133     HoistingPointList HPL;
1134     computeInsertionPoints(II.getVNTable(), HPL, InsKind::Scalar);
1135     computeInsertionPoints(LI.getVNTable(), HPL, InsKind::Load);
1136     computeInsertionPoints(SI.getVNTable(), HPL, InsKind::Store);
1137     computeInsertionPoints(CI.getScalarVNTable(), HPL, InsKind::Scalar);
1138     computeInsertionPoints(CI.getLoadVNTable(), HPL, InsKind::Load);
1139     computeInsertionPoints(CI.getStoreVNTable(), HPL, InsKind::Store);
1140     return hoist(HPL);
1141   }
1142 };
1143 
1144 class GVNHoistLegacyPass : public FunctionPass {
1145 public:
1146   static char ID;
1147 
GVNHoistLegacyPass()1148   GVNHoistLegacyPass() : FunctionPass(ID) {
1149     initializeGVNHoistLegacyPassPass(*PassRegistry::getPassRegistry());
1150   }
1151 
runOnFunction(Function & F)1152   bool runOnFunction(Function &F) override {
1153     if (skipFunction(F))
1154       return false;
1155     auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
1156     auto &PDT = getAnalysis<PostDominatorTreeWrapperPass>().getPostDomTree();
1157     auto &AA = getAnalysis<AAResultsWrapperPass>().getAAResults();
1158     auto &MD = getAnalysis<MemoryDependenceWrapperPass>().getMemDep();
1159     auto &MSSA = getAnalysis<MemorySSAWrapperPass>().getMSSA();
1160 
1161     GVNHoist G(&DT, &PDT, &AA, &MD, &MSSA);
1162     return G.run(F);
1163   }
1164 
getAnalysisUsage(AnalysisUsage & AU) const1165   void getAnalysisUsage(AnalysisUsage &AU) const override {
1166     AU.addRequired<DominatorTreeWrapperPass>();
1167     AU.addRequired<PostDominatorTreeWrapperPass>();
1168     AU.addRequired<AAResultsWrapperPass>();
1169     AU.addRequired<MemoryDependenceWrapperPass>();
1170     AU.addRequired<MemorySSAWrapperPass>();
1171     AU.addPreserved<DominatorTreeWrapperPass>();
1172     AU.addPreserved<MemorySSAWrapperPass>();
1173     AU.addPreserved<GlobalsAAWrapperPass>();
1174     AU.addPreserved<AAResultsWrapperPass>();
1175   }
1176 };
1177 
1178 } // end namespace llvm
1179 
run(Function & F,FunctionAnalysisManager & AM)1180 PreservedAnalyses GVNHoistPass::run(Function &F, FunctionAnalysisManager &AM) {
1181   DominatorTree &DT = AM.getResult<DominatorTreeAnalysis>(F);
1182   PostDominatorTree &PDT = AM.getResult<PostDominatorTreeAnalysis>(F);
1183   AliasAnalysis &AA = AM.getResult<AAManager>(F);
1184   MemoryDependenceResults &MD = AM.getResult<MemoryDependenceAnalysis>(F);
1185   MemorySSA &MSSA = AM.getResult<MemorySSAAnalysis>(F).getMSSA();
1186   GVNHoist G(&DT, &PDT, &AA, &MD, &MSSA);
1187   if (!G.run(F))
1188     return PreservedAnalyses::all();
1189 
1190   PreservedAnalyses PA;
1191   PA.preserve<DominatorTreeAnalysis>();
1192   PA.preserve<MemorySSAAnalysis>();
1193   PA.preserve<GlobalsAA>();
1194   return PA;
1195 }
1196 
1197 char GVNHoistLegacyPass::ID = 0;
1198 
1199 INITIALIZE_PASS_BEGIN(GVNHoistLegacyPass, "gvn-hoist",
1200                       "Early GVN Hoisting of Expressions", false, false)
INITIALIZE_PASS_DEPENDENCY(MemoryDependenceWrapperPass)1201 INITIALIZE_PASS_DEPENDENCY(MemoryDependenceWrapperPass)
1202 INITIALIZE_PASS_DEPENDENCY(MemorySSAWrapperPass)
1203 INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
1204 INITIALIZE_PASS_DEPENDENCY(PostDominatorTreeWrapperPass)
1205 INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
1206 INITIALIZE_PASS_END(GVNHoistLegacyPass, "gvn-hoist",
1207                     "Early GVN Hoisting of Expressions", false, false)
1208 
1209 FunctionPass *llvm::createGVNHoistPass() { return new GVNHoistLegacyPass(); }
1210