1 //===- GVNHoist.cpp - Hoist scalar and load expressions -------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This pass hoists expressions from branches to a common dominator. It uses
10 // GVN (global value numbering) to discover expressions computing the same
11 // values. The primary goals of code-hoisting are:
12 // 1. To reduce the code size.
13 // 2. In some cases reduce critical path (by exposing more ILP).
14 //
15 // The algorithm factors out the reachability of values such that multiple
16 // queries to find reachability of values are fast. This is based on finding the
17 // ANTIC points in the CFG which do not change during hoisting. The ANTIC points
18 // are basically the dominance-frontiers in the inverse graph. So we introduce a
19 // data structure (CHI nodes) to keep track of values flowing out of a basic
20 // block. We only do this for values with multiple occurrences in the function
21 // as they are the potential hoistable candidates. This approach allows us to
22 // hoist instructions to a basic block with more than two successors, as well as
23 // deal with infinite loops in a trivial way.
24 //
25 // Limitations: This pass does not hoist fully redundant expressions because
26 // they are already handled by GVN-PRE. It is advisable to run gvn-hoist before
27 // and after gvn-pre because gvn-pre creates opportunities for more instructions
28 // to be hoisted.
29 //
30 // Hoisting may affect the performance in some cases. To mitigate that, hoisting
31 // is disabled in the following cases.
32 // 1. Scalars across calls.
33 // 2. geps when corresponding load/store cannot be hoisted.
34 //===----------------------------------------------------------------------===//
35 
36 #include "llvm/ADT/DenseMap.h"
37 #include "llvm/ADT/DenseSet.h"
38 #include "llvm/ADT/STLExtras.h"
39 #include "llvm/ADT/SmallPtrSet.h"
40 #include "llvm/ADT/SmallVector.h"
41 #include "llvm/ADT/Statistic.h"
42 #include "llvm/ADT/iterator_range.h"
43 #include "llvm/Analysis/AliasAnalysis.h"
44 #include "llvm/Analysis/GlobalsModRef.h"
45 #include "llvm/Analysis/IteratedDominanceFrontier.h"
46 #include "llvm/Analysis/MemoryDependenceAnalysis.h"
47 #include "llvm/Analysis/MemorySSA.h"
48 #include "llvm/Analysis/MemorySSAUpdater.h"
49 #include "llvm/Analysis/PostDominators.h"
50 #include "llvm/Analysis/ValueTracking.h"
51 #include "llvm/IR/Argument.h"
52 #include "llvm/IR/BasicBlock.h"
53 #include "llvm/IR/CFG.h"
54 #include "llvm/IR/Constants.h"
55 #include "llvm/IR/Dominators.h"
56 #include "llvm/IR/Function.h"
57 #include "llvm/IR/InstrTypes.h"
58 #include "llvm/IR/Instruction.h"
59 #include "llvm/IR/Instructions.h"
60 #include "llvm/IR/IntrinsicInst.h"
61 #include "llvm/IR/Intrinsics.h"
62 #include "llvm/IR/LLVMContext.h"
63 #include "llvm/IR/PassManager.h"
64 #include "llvm/IR/Use.h"
65 #include "llvm/IR/User.h"
66 #include "llvm/IR/Value.h"
67 #include "llvm/InitializePasses.h"
68 #include "llvm/Pass.h"
69 #include "llvm/Support/Casting.h"
70 #include "llvm/Support/CommandLine.h"
71 #include "llvm/Support/Debug.h"
72 #include "llvm/Support/raw_ostream.h"
73 #include "llvm/Transforms/Scalar.h"
74 #include "llvm/Transforms/Scalar/GVN.h"
75 #include "llvm/Transforms/Utils/Local.h"
76 #include <algorithm>
77 #include <cassert>
78 #include <iterator>
79 #include <memory>
80 #include <utility>
81 #include <vector>
82 
83 using namespace llvm;
84 
85 #define DEBUG_TYPE "gvn-hoist"
86 
87 STATISTIC(NumHoisted, "Number of instructions hoisted");
88 STATISTIC(NumRemoved, "Number of instructions removed");
89 STATISTIC(NumLoadsHoisted, "Number of loads hoisted");
90 STATISTIC(NumLoadsRemoved, "Number of loads removed");
91 STATISTIC(NumStoresHoisted, "Number of stores hoisted");
92 STATISTIC(NumStoresRemoved, "Number of stores removed");
93 STATISTIC(NumCallsHoisted, "Number of calls hoisted");
94 STATISTIC(NumCallsRemoved, "Number of calls removed");
95 
96 static cl::opt<int>
97     MaxHoistedThreshold("gvn-max-hoisted", cl::Hidden, cl::init(-1),
98                         cl::desc("Max number of instructions to hoist "
99                                  "(default unlimited = -1)"));
100 
101 static cl::opt<int> MaxNumberOfBBSInPath(
102     "gvn-hoist-max-bbs", cl::Hidden, cl::init(4),
103     cl::desc("Max number of basic blocks on the path between "
104              "hoisting locations (default = 4, unlimited = -1)"));
105 
106 static cl::opt<int> MaxDepthInBB(
107     "gvn-hoist-max-depth", cl::Hidden, cl::init(100),
108     cl::desc("Hoist instructions from the beginning of the BB up to the "
109              "maximum specified depth (default = 100, unlimited = -1)"));
110 
111 static cl::opt<int>
112     MaxChainLength("gvn-hoist-max-chain-length", cl::Hidden, cl::init(10),
113                    cl::desc("Maximum length of dependent chains to hoist "
114                             "(default = 10, unlimited = -1)"));
115 
116 namespace llvm {
117 
118 using BBSideEffectsSet = DenseMap<const BasicBlock *, bool>;
119 using SmallVecInsn = SmallVector<Instruction *, 4>;
120 using SmallVecImplInsn = SmallVectorImpl<Instruction *>;
121 
122 // Each element of a hoisting list contains the basic block where to hoist and
123 // a list of instructions to be hoisted.
124 using HoistingPointInfo = std::pair<BasicBlock *, SmallVecInsn>;
125 
126 using HoistingPointList = SmallVector<HoistingPointInfo, 4>;
127 
128 // A map from a pair of VNs to all the instructions with those VNs.
129 using VNType = std::pair<unsigned, unsigned>;
130 
131 using VNtoInsns = DenseMap<VNType, SmallVector<Instruction *, 4>>;
132 
133 // CHI keeps information about values flowing out of a basic block.  It is
134 // similar to PHI but in the inverse graph, and used for outgoing values on each
135 // edge. For conciseness, it is computed only for instructions with multiple
136 // occurrences in the CFG because they are the only hoistable candidates.
137 //     A (CHI[{V, B, I1}, {V, C, I2}]
138 //  /     \
139 // /       \
140 // B(I1)  C (I2)
141 // The Value number for both I1 and I2 is V, the CHI node will save the
142 // instruction as well as the edge where the value is flowing to.
143 struct CHIArg {
144   VNType VN;
145 
146   // Edge destination (shows the direction of flow), may not be where the I is.
147   BasicBlock *Dest;
148 
149   // The instruction (VN) which uses the values flowing out of CHI.
150   Instruction *I;
151 
operator ==llvm::CHIArg152   bool operator==(const CHIArg &A) { return VN == A.VN; }
operator !=llvm::CHIArg153   bool operator!=(const CHIArg &A) { return !(*this == A); }
154 };
155 
156 using CHIIt = SmallVectorImpl<CHIArg>::iterator;
157 using CHIArgs = iterator_range<CHIIt>;
158 using OutValuesType = DenseMap<BasicBlock *, SmallVector<CHIArg, 2>>;
159 using InValuesType =
160     DenseMap<BasicBlock *, SmallVector<std::pair<VNType, Instruction *>, 2>>;
161 
162 // An invalid value number Used when inserting a single value number into
163 // VNtoInsns.
164 enum : unsigned { InvalidVN = ~2U };
165 
166 // Records all scalar instructions candidate for code hoisting.
167 class InsnInfo {
168   VNtoInsns VNtoScalars;
169 
170 public:
171   // Inserts I and its value number in VNtoScalars.
insert(Instruction * I,GVN::ValueTable & VN)172   void insert(Instruction *I, GVN::ValueTable &VN) {
173     // Scalar instruction.
174     unsigned V = VN.lookupOrAdd(I);
175     VNtoScalars[{V, InvalidVN}].push_back(I);
176   }
177 
getVNTable() const178   const VNtoInsns &getVNTable() const { return VNtoScalars; }
179 };
180 
181 // Records all load instructions candidate for code hoisting.
182 class LoadInfo {
183   VNtoInsns VNtoLoads;
184 
185 public:
186   // Insert Load and the value number of its memory address in VNtoLoads.
insert(LoadInst * Load,GVN::ValueTable & VN)187   void insert(LoadInst *Load, GVN::ValueTable &VN) {
188     if (Load->isSimple()) {
189       unsigned V = VN.lookupOrAdd(Load->getPointerOperand());
190       VNtoLoads[{V, InvalidVN}].push_back(Load);
191     }
192   }
193 
getVNTable() const194   const VNtoInsns &getVNTable() const { return VNtoLoads; }
195 };
196 
197 // Records all store instructions candidate for code hoisting.
198 class StoreInfo {
199   VNtoInsns VNtoStores;
200 
201 public:
202   // Insert the Store and a hash number of the store address and the stored
203   // value in VNtoStores.
insert(StoreInst * Store,GVN::ValueTable & VN)204   void insert(StoreInst *Store, GVN::ValueTable &VN) {
205     if (!Store->isSimple())
206       return;
207     // Hash the store address and the stored value.
208     Value *Ptr = Store->getPointerOperand();
209     Value *Val = Store->getValueOperand();
210     VNtoStores[{VN.lookupOrAdd(Ptr), VN.lookupOrAdd(Val)}].push_back(Store);
211   }
212 
getVNTable() const213   const VNtoInsns &getVNTable() const { return VNtoStores; }
214 };
215 
216 // Records all call instructions candidate for code hoisting.
217 class CallInfo {
218   VNtoInsns VNtoCallsScalars;
219   VNtoInsns VNtoCallsLoads;
220   VNtoInsns VNtoCallsStores;
221 
222 public:
223   // Insert Call and its value numbering in one of the VNtoCalls* containers.
insert(CallInst * Call,GVN::ValueTable & VN)224   void insert(CallInst *Call, GVN::ValueTable &VN) {
225     // A call that doesNotAccessMemory is handled as a Scalar,
226     // onlyReadsMemory will be handled as a Load instruction,
227     // all other calls will be handled as stores.
228     unsigned V = VN.lookupOrAdd(Call);
229     auto Entry = std::make_pair(V, InvalidVN);
230 
231     if (Call->doesNotAccessMemory())
232       VNtoCallsScalars[Entry].push_back(Call);
233     else if (Call->onlyReadsMemory())
234       VNtoCallsLoads[Entry].push_back(Call);
235     else
236       VNtoCallsStores[Entry].push_back(Call);
237   }
238 
getScalarVNTable() const239   const VNtoInsns &getScalarVNTable() const { return VNtoCallsScalars; }
getLoadVNTable() const240   const VNtoInsns &getLoadVNTable() const { return VNtoCallsLoads; }
getStoreVNTable() const241   const VNtoInsns &getStoreVNTable() const { return VNtoCallsStores; }
242 };
243 
combineKnownMetadata(Instruction * ReplInst,Instruction * I)244 static void combineKnownMetadata(Instruction *ReplInst, Instruction *I) {
245   static const unsigned KnownIDs[] = {LLVMContext::MD_tbaa,
246                                       LLVMContext::MD_alias_scope,
247                                       LLVMContext::MD_noalias,
248                                       LLVMContext::MD_range,
249                                       LLVMContext::MD_fpmath,
250                                       LLVMContext::MD_invariant_load,
251                                       LLVMContext::MD_invariant_group,
252                                       LLVMContext::MD_access_group};
253   combineMetadata(ReplInst, I, KnownIDs, true);
254 }
255 
256 // This pass hoists common computations across branches sharing common
257 // dominator. The primary goal is to reduce the code size, and in some
258 // cases reduce critical path (by exposing more ILP).
259 class GVNHoist {
260 public:
GVNHoist(DominatorTree * DT,PostDominatorTree * PDT,AliasAnalysis * AA,MemoryDependenceResults * MD,MemorySSA * MSSA)261   GVNHoist(DominatorTree *DT, PostDominatorTree *PDT, AliasAnalysis *AA,
262            MemoryDependenceResults *MD, MemorySSA *MSSA)
263       : DT(DT), PDT(PDT), AA(AA), MD(MD), MSSA(MSSA),
264         MSSAUpdater(std::make_unique<MemorySSAUpdater>(MSSA)) {}
265 
266   bool run(Function &F);
267 
268   // Copied from NewGVN.cpp
269   // This function provides global ranking of operations so that we can place
270   // them in a canonical order.  Note that rank alone is not necessarily enough
271   // for a complete ordering, as constants all have the same rank.  However,
272   // generally, we will simplify an operation with all constants so that it
273   // doesn't matter what order they appear in.
274   unsigned int rank(const Value *V) const;
275 
276 private:
277   GVN::ValueTable VN;
278   DominatorTree *DT;
279   PostDominatorTree *PDT;
280   AliasAnalysis *AA;
281   MemoryDependenceResults *MD;
282   MemorySSA *MSSA;
283   std::unique_ptr<MemorySSAUpdater> MSSAUpdater;
284   DenseMap<const Value *, unsigned> DFSNumber;
285   BBSideEffectsSet BBSideEffects;
286   DenseSet<const BasicBlock *> HoistBarrier;
287   SmallVector<BasicBlock *, 32> IDFBlocks;
288   unsigned NumFuncArgs;
289   const bool HoistingGeps = false;
290 
291   enum InsKind { Unknown, Scalar, Load, Store };
292 
293   // Return true when there are exception handling in BB.
294   bool hasEH(const BasicBlock *BB);
295 
296   // Return true when a successor of BB dominates A.
successorDominate(const BasicBlock * BB,const BasicBlock * A)297   bool successorDominate(const BasicBlock *BB, const BasicBlock *A) {
298     for (const BasicBlock *Succ : successors(BB))
299       if (DT->dominates(Succ, A))
300         return true;
301 
302     return false;
303   }
304 
305   // Return true when I1 appears before I2 in the instructions of BB.
firstInBB(const Instruction * I1,const Instruction * I2)306   bool firstInBB(const Instruction *I1, const Instruction *I2) {
307     assert(I1->getParent() == I2->getParent());
308     unsigned I1DFS = DFSNumber.lookup(I1);
309     unsigned I2DFS = DFSNumber.lookup(I2);
310     assert(I1DFS && I2DFS);
311     return I1DFS < I2DFS;
312   }
313 
314   // Return true when there are memory uses of Def in BB.
315   bool hasMemoryUse(const Instruction *NewPt, MemoryDef *Def,
316                     const BasicBlock *BB);
317 
318   bool hasEHhelper(const BasicBlock *BB, const BasicBlock *SrcBB,
319                    int &NBBsOnAllPaths);
320 
321   // Return true when there are exception handling or loads of memory Def
322   // between Def and NewPt.  This function is only called for stores: Def is
323   // the MemoryDef of the store to be hoisted.
324 
325   // Decrement by 1 NBBsOnAllPaths for each block between HoistPt and BB, and
326   // return true when the counter NBBsOnAllPaths reaces 0, except when it is
327   // initialized to -1 which is unlimited.
328   bool hasEHOrLoadsOnPath(const Instruction *NewPt, MemoryDef *Def,
329                           int &NBBsOnAllPaths);
330 
331   // Return true when there are exception handling between HoistPt and BB.
332   // Decrement by 1 NBBsOnAllPaths for each block between HoistPt and BB, and
333   // return true when the counter NBBsOnAllPaths reaches 0, except when it is
334   // initialized to -1 which is unlimited.
335   bool hasEHOnPath(const BasicBlock *HoistPt, const BasicBlock *SrcBB,
336                    int &NBBsOnAllPaths);
337 
338   // Return true when it is safe to hoist a memory load or store U from OldPt
339   // to NewPt.
340   bool safeToHoistLdSt(const Instruction *NewPt, const Instruction *OldPt,
341                        MemoryUseOrDef *U, InsKind K, int &NBBsOnAllPaths);
342 
343   // Return true when it is safe to hoist scalar instructions from all blocks in
344   // WL to HoistBB.
safeToHoistScalar(const BasicBlock * HoistBB,const BasicBlock * BB,int & NBBsOnAllPaths)345   bool safeToHoistScalar(const BasicBlock *HoistBB, const BasicBlock *BB,
346                          int &NBBsOnAllPaths) {
347     return !hasEHOnPath(HoistBB, BB, NBBsOnAllPaths);
348   }
349 
350   // In the inverse CFG, the dominance frontier of basic block (BB) is the
351   // point where ANTIC needs to be computed for instructions which are going
352   // to be hoisted. Since this point does not change during gvn-hoist,
353   // we compute it only once (on demand).
354   // The ides is inspired from:
355   // "Partial Redundancy Elimination in SSA Form"
356   // ROBERT KENNEDY, SUN CHAN, SHIN-MING LIU, RAYMOND LO, PENG TU and FRED CHOW
357   // They use similar idea in the forward graph to find fully redundant and
358   // partially redundant expressions, here it is used in the inverse graph to
359   // find fully anticipable instructions at merge point (post-dominator in
360   // the inverse CFG).
361   // Returns the edge via which an instruction in BB will get the values from.
362 
363   // Returns true when the values are flowing out to each edge.
364   bool valueAnticipable(CHIArgs C, Instruction *TI) const;
365 
366   // Check if it is safe to hoist values tracked by CHI in the range
367   // [Begin, End) and accumulate them in Safe.
368   void checkSafety(CHIArgs C, BasicBlock *BB, InsKind K,
369                    SmallVectorImpl<CHIArg> &Safe);
370 
371   using RenameStackType = DenseMap<VNType, SmallVector<Instruction *, 2>>;
372 
373   // Push all the VNs corresponding to BB into RenameStack.
374   void fillRenameStack(BasicBlock *BB, InValuesType &ValueBBs,
375                        RenameStackType &RenameStack);
376 
377   void fillChiArgs(BasicBlock *BB, OutValuesType &CHIBBs,
378                    RenameStackType &RenameStack);
379 
380   // Walk the post-dominator tree top-down and use a stack for each value to
381   // store the last value you see. When you hit a CHI from a given edge, the
382   // value to use as the argument is at the top of the stack, add the value to
383   // CHI and pop.
insertCHI(InValuesType & ValueBBs,OutValuesType & CHIBBs)384   void insertCHI(InValuesType &ValueBBs, OutValuesType &CHIBBs) {
385     auto Root = PDT->getNode(nullptr);
386     if (!Root)
387       return;
388     // Depth first walk on PDom tree to fill the CHIargs at each PDF.
389     RenameStackType RenameStack;
390     for (auto Node : depth_first(Root)) {
391       BasicBlock *BB = Node->getBlock();
392       if (!BB)
393         continue;
394 
395       // Collect all values in BB and push to stack.
396       fillRenameStack(BB, ValueBBs, RenameStack);
397 
398       // Fill outgoing values in each CHI corresponding to BB.
399       fillChiArgs(BB, CHIBBs, RenameStack);
400     }
401   }
402 
403   // Walk all the CHI-nodes to find ones which have a empty-entry and remove
404   // them Then collect all the instructions which are safe to hoist and see if
405   // they form a list of anticipable values. OutValues contains CHIs
406   // corresponding to each basic block.
407   void findHoistableCandidates(OutValuesType &CHIBBs, InsKind K,
408                                HoistingPointList &HPL);
409 
410   // Compute insertion points for each values which can be fully anticipated at
411   // a dominator. HPL contains all such values.
computeInsertionPoints(const VNtoInsns & Map,HoistingPointList & HPL,InsKind K)412   void computeInsertionPoints(const VNtoInsns &Map, HoistingPointList &HPL,
413                               InsKind K) {
414     // Sort VNs based on their rankings
415     std::vector<VNType> Ranks;
416     for (const auto &Entry : Map) {
417       Ranks.push_back(Entry.first);
418     }
419 
420     // TODO: Remove fully-redundant expressions.
421     // Get instruction from the Map, assume that all the Instructions
422     // with same VNs have same rank (this is an approximation).
423     llvm::sort(Ranks, [this, &Map](const VNType &r1, const VNType &r2) {
424       return (rank(*Map.lookup(r1).begin()) < rank(*Map.lookup(r2).begin()));
425     });
426 
427     // - Sort VNs according to their rank, and start with lowest ranked VN
428     // - Take a VN and for each instruction with same VN
429     //   - Find the dominance frontier in the inverse graph (PDF)
430     //   - Insert the chi-node at PDF
431     // - Remove the chi-nodes with missing entries
432     // - Remove values from CHI-nodes which do not truly flow out, e.g.,
433     //   modified along the path.
434     // - Collect the remaining values that are still anticipable
435     SmallVector<BasicBlock *, 2> IDFBlocks;
436     ReverseIDFCalculator IDFs(*PDT);
437     OutValuesType OutValue;
438     InValuesType InValue;
439     for (const auto &R : Ranks) {
440       const SmallVecInsn &V = Map.lookup(R);
441       if (V.size() < 2)
442         continue;
443       const VNType &VN = R;
444       SmallPtrSet<BasicBlock *, 2> VNBlocks;
445       for (auto &I : V) {
446         BasicBlock *BBI = I->getParent();
447         if (!hasEH(BBI))
448           VNBlocks.insert(BBI);
449       }
450       // Compute the Post Dominance Frontiers of each basic block
451       // The dominance frontier of a live block X in the reverse
452       // control graph is the set of blocks upon which X is control
453       // dependent. The following sequence computes the set of blocks
454       // which currently have dead terminators that are control
455       // dependence sources of a block which is in NewLiveBlocks.
456       IDFs.setDefiningBlocks(VNBlocks);
457       IDFBlocks.clear();
458       IDFs.calculate(IDFBlocks);
459 
460       // Make a map of BB vs instructions to be hoisted.
461       for (unsigned i = 0; i < V.size(); ++i) {
462         InValue[V[i]->getParent()].push_back(std::make_pair(VN, V[i]));
463       }
464       // Insert empty CHI node for this VN. This is used to factor out
465       // basic blocks where the ANTIC can potentially change.
466       CHIArg EmptyChi = {VN, nullptr, nullptr};
467       for (auto *IDFBB : IDFBlocks) {
468         for (unsigned i = 0; i < V.size(); ++i) {
469           // Ignore spurious PDFs.
470           if (DT->properlyDominates(IDFBB, V[i]->getParent())) {
471             OutValue[IDFBB].push_back(EmptyChi);
472             LLVM_DEBUG(dbgs() << "\nInserting a CHI for BB: "
473                               << IDFBB->getName() << ", for Insn: " << *V[i]);
474           }
475         }
476       }
477     }
478 
479     // Insert CHI args at each PDF to iterate on factored graph of
480     // control dependence.
481     insertCHI(InValue, OutValue);
482     // Using the CHI args inserted at each PDF, find fully anticipable values.
483     findHoistableCandidates(OutValue, K, HPL);
484   }
485 
486   // Return true when all operands of Instr are available at insertion point
487   // HoistPt. When limiting the number of hoisted expressions, one could hoist
488   // a load without hoisting its access function. So before hoisting any
489   // expression, make sure that all its operands are available at insert point.
490   bool allOperandsAvailable(const Instruction *I,
491                             const BasicBlock *HoistPt) const;
492 
493   // Same as allOperandsAvailable with recursive check for GEP operands.
494   bool allGepOperandsAvailable(const Instruction *I,
495                                const BasicBlock *HoistPt) const;
496 
497   // Make all operands of the GEP available.
498   void makeGepsAvailable(Instruction *Repl, BasicBlock *HoistPt,
499                          const SmallVecInsn &InstructionsToHoist,
500                          Instruction *Gep) const;
501 
502   void updateAlignment(Instruction *I, Instruction *Repl);
503 
504   // Remove all the instructions in Candidates and replace their usage with
505   // Repl. Returns the number of instructions removed.
506   unsigned rauw(const SmallVecInsn &Candidates, Instruction *Repl,
507                 MemoryUseOrDef *NewMemAcc);
508 
509   // Replace all Memory PHI usage with NewMemAcc.
510   void raMPHIuw(MemoryUseOrDef *NewMemAcc);
511 
512   // Remove all other instructions and replace them with Repl.
513   unsigned removeAndReplace(const SmallVecInsn &Candidates, Instruction *Repl,
514                             BasicBlock *DestBB, bool MoveAccess);
515 
516   // In the case Repl is a load or a store, we make all their GEPs
517   // available: GEPs are not hoisted by default to avoid the address
518   // computations to be hoisted without the associated load or store.
519   bool makeGepOperandsAvailable(Instruction *Repl, BasicBlock *HoistPt,
520                                 const SmallVecInsn &InstructionsToHoist) const;
521 
522   std::pair<unsigned, unsigned> hoist(HoistingPointList &HPL);
523 
524   // Hoist all expressions. Returns Number of scalars hoisted
525   // and number of non-scalars hoisted.
526   std::pair<unsigned, unsigned> hoistExpressions(Function &F);
527 };
528 
529 class GVNHoistLegacyPass : public FunctionPass {
530 public:
531   static char ID;
532 
GVNHoistLegacyPass()533   GVNHoistLegacyPass() : FunctionPass(ID) {
534     initializeGVNHoistLegacyPassPass(*PassRegistry::getPassRegistry());
535   }
536 
runOnFunction(Function & F)537   bool runOnFunction(Function &F) override {
538     if (skipFunction(F))
539       return false;
540     auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
541     auto &PDT = getAnalysis<PostDominatorTreeWrapperPass>().getPostDomTree();
542     auto &AA = getAnalysis<AAResultsWrapperPass>().getAAResults();
543     auto &MD = getAnalysis<MemoryDependenceWrapperPass>().getMemDep();
544     auto &MSSA = getAnalysis<MemorySSAWrapperPass>().getMSSA();
545 
546     GVNHoist G(&DT, &PDT, &AA, &MD, &MSSA);
547     return G.run(F);
548   }
549 
getAnalysisUsage(AnalysisUsage & AU) const550   void getAnalysisUsage(AnalysisUsage &AU) const override {
551     AU.addRequired<DominatorTreeWrapperPass>();
552     AU.addRequired<PostDominatorTreeWrapperPass>();
553     AU.addRequired<AAResultsWrapperPass>();
554     AU.addRequired<MemoryDependenceWrapperPass>();
555     AU.addRequired<MemorySSAWrapperPass>();
556     AU.addPreserved<DominatorTreeWrapperPass>();
557     AU.addPreserved<MemorySSAWrapperPass>();
558     AU.addPreserved<GlobalsAAWrapperPass>();
559     AU.addPreserved<AAResultsWrapperPass>();
560   }
561 };
562 
run(Function & F)563 bool GVNHoist::run(Function &F) {
564   NumFuncArgs = F.arg_size();
565   VN.setDomTree(DT);
566   VN.setAliasAnalysis(AA);
567   VN.setMemDep(MD);
568   bool Res = false;
569   // Perform DFS Numbering of instructions.
570   unsigned BBI = 0;
571   for (const BasicBlock *BB : depth_first(&F.getEntryBlock())) {
572     DFSNumber[BB] = ++BBI;
573     unsigned I = 0;
574     for (auto &Inst : *BB)
575       DFSNumber[&Inst] = ++I;
576   }
577 
578   int ChainLength = 0;
579 
580   // FIXME: use lazy evaluation of VN to avoid the fix-point computation.
581   while (true) {
582     if (MaxChainLength != -1 && ++ChainLength >= MaxChainLength)
583       return Res;
584 
585     auto HoistStat = hoistExpressions(F);
586     if (HoistStat.first + HoistStat.second == 0)
587       return Res;
588 
589     if (HoistStat.second > 0)
590       // To address a limitation of the current GVN, we need to rerun the
591       // hoisting after we hoisted loads or stores in order to be able to
592       // hoist all scalars dependent on the hoisted ld/st.
593       VN.clear();
594 
595     Res = true;
596   }
597 
598   return Res;
599 }
600 
rank(const Value * V) const601 unsigned int GVNHoist::rank(const Value *V) const {
602   // Prefer constants to undef to anything else
603   // Undef is a constant, have to check it first.
604   // Prefer smaller constants to constantexprs
605   if (isa<ConstantExpr>(V))
606     return 2;
607   if (isa<UndefValue>(V))
608     return 1;
609   if (isa<Constant>(V))
610     return 0;
611   else if (auto *A = dyn_cast<Argument>(V))
612     return 3 + A->getArgNo();
613 
614   // Need to shift the instruction DFS by number of arguments + 3 to account
615   // for the constant and argument ranking above.
616   auto Result = DFSNumber.lookup(V);
617   if (Result > 0)
618     return 4 + NumFuncArgs + Result;
619   // Unreachable or something else, just return a really large number.
620   return ~0;
621 }
622 
hasEH(const BasicBlock * BB)623 bool GVNHoist::hasEH(const BasicBlock *BB) {
624   auto It = BBSideEffects.find(BB);
625   if (It != BBSideEffects.end())
626     return It->second;
627 
628   if (BB->isEHPad() || BB->hasAddressTaken()) {
629     BBSideEffects[BB] = true;
630     return true;
631   }
632 
633   if (BB->getTerminator()->mayThrow()) {
634     BBSideEffects[BB] = true;
635     return true;
636   }
637 
638   BBSideEffects[BB] = false;
639   return false;
640 }
641 
hasMemoryUse(const Instruction * NewPt,MemoryDef * Def,const BasicBlock * BB)642 bool GVNHoist::hasMemoryUse(const Instruction *NewPt, MemoryDef *Def,
643                             const BasicBlock *BB) {
644   const MemorySSA::AccessList *Acc = MSSA->getBlockAccesses(BB);
645   if (!Acc)
646     return false;
647 
648   Instruction *OldPt = Def->getMemoryInst();
649   const BasicBlock *OldBB = OldPt->getParent();
650   const BasicBlock *NewBB = NewPt->getParent();
651   bool ReachedNewPt = false;
652 
653   for (const MemoryAccess &MA : *Acc)
654     if (const MemoryUse *MU = dyn_cast<MemoryUse>(&MA)) {
655       Instruction *Insn = MU->getMemoryInst();
656 
657       // Do not check whether MU aliases Def when MU occurs after OldPt.
658       if (BB == OldBB && firstInBB(OldPt, Insn))
659         break;
660 
661       // Do not check whether MU aliases Def when MU occurs before NewPt.
662       if (BB == NewBB) {
663         if (!ReachedNewPt) {
664           if (firstInBB(Insn, NewPt))
665             continue;
666           ReachedNewPt = true;
667         }
668       }
669       if (MemorySSAUtil::defClobbersUseOrDef(Def, MU, *AA))
670         return true;
671     }
672 
673   return false;
674 }
675 
hasEHhelper(const BasicBlock * BB,const BasicBlock * SrcBB,int & NBBsOnAllPaths)676 bool GVNHoist::hasEHhelper(const BasicBlock *BB, const BasicBlock *SrcBB,
677                            int &NBBsOnAllPaths) {
678   // Stop walk once the limit is reached.
679   if (NBBsOnAllPaths == 0)
680     return true;
681 
682   // Impossible to hoist with exceptions on the path.
683   if (hasEH(BB))
684     return true;
685 
686   // No such instruction after HoistBarrier in a basic block was
687   // selected for hoisting so instructions selected within basic block with
688   // a hoist barrier can be hoisted.
689   if ((BB != SrcBB) && HoistBarrier.count(BB))
690     return true;
691 
692   return false;
693 }
694 
hasEHOrLoadsOnPath(const Instruction * NewPt,MemoryDef * Def,int & NBBsOnAllPaths)695 bool GVNHoist::hasEHOrLoadsOnPath(const Instruction *NewPt, MemoryDef *Def,
696                                   int &NBBsOnAllPaths) {
697   const BasicBlock *NewBB = NewPt->getParent();
698   const BasicBlock *OldBB = Def->getBlock();
699   assert(DT->dominates(NewBB, OldBB) && "invalid path");
700   assert(DT->dominates(Def->getDefiningAccess()->getBlock(), NewBB) &&
701          "def does not dominate new hoisting point");
702 
703   // Walk all basic blocks reachable in depth-first iteration on the inverse
704   // CFG from OldBB to NewBB. These blocks are all the blocks that may be
705   // executed between the execution of NewBB and OldBB. Hoisting an expression
706   // from OldBB into NewBB has to be safe on all execution paths.
707   for (auto I = idf_begin(OldBB), E = idf_end(OldBB); I != E;) {
708     const BasicBlock *BB = *I;
709     if (BB == NewBB) {
710       // Stop traversal when reaching HoistPt.
711       I.skipChildren();
712       continue;
713     }
714 
715     if (hasEHhelper(BB, OldBB, NBBsOnAllPaths))
716       return true;
717 
718     // Check that we do not move a store past loads.
719     if (hasMemoryUse(NewPt, Def, BB))
720       return true;
721 
722     // -1 is unlimited number of blocks on all paths.
723     if (NBBsOnAllPaths != -1)
724       --NBBsOnAllPaths;
725 
726     ++I;
727   }
728 
729   return false;
730 }
731 
hasEHOnPath(const BasicBlock * HoistPt,const BasicBlock * SrcBB,int & NBBsOnAllPaths)732 bool GVNHoist::hasEHOnPath(const BasicBlock *HoistPt, const BasicBlock *SrcBB,
733                            int &NBBsOnAllPaths) {
734   assert(DT->dominates(HoistPt, SrcBB) && "Invalid path");
735 
736   // Walk all basic blocks reachable in depth-first iteration on
737   // the inverse CFG from BBInsn to NewHoistPt. These blocks are all the
738   // blocks that may be executed between the execution of NewHoistPt and
739   // BBInsn. Hoisting an expression from BBInsn into NewHoistPt has to be safe
740   // on all execution paths.
741   for (auto I = idf_begin(SrcBB), E = idf_end(SrcBB); I != E;) {
742     const BasicBlock *BB = *I;
743     if (BB == HoistPt) {
744       // Stop traversal when reaching NewHoistPt.
745       I.skipChildren();
746       continue;
747     }
748 
749     if (hasEHhelper(BB, SrcBB, NBBsOnAllPaths))
750       return true;
751 
752     // -1 is unlimited number of blocks on all paths.
753     if (NBBsOnAllPaths != -1)
754       --NBBsOnAllPaths;
755 
756     ++I;
757   }
758 
759   return false;
760 }
761 
safeToHoistLdSt(const Instruction * NewPt,const Instruction * OldPt,MemoryUseOrDef * U,GVNHoist::InsKind K,int & NBBsOnAllPaths)762 bool GVNHoist::safeToHoistLdSt(const Instruction *NewPt,
763                                const Instruction *OldPt, MemoryUseOrDef *U,
764                                GVNHoist::InsKind K, int &NBBsOnAllPaths) {
765   // In place hoisting is safe.
766   if (NewPt == OldPt)
767     return true;
768 
769   const BasicBlock *NewBB = NewPt->getParent();
770   const BasicBlock *OldBB = OldPt->getParent();
771   const BasicBlock *UBB = U->getBlock();
772 
773   // Check for dependences on the Memory SSA.
774   MemoryAccess *D = U->getDefiningAccess();
775   BasicBlock *DBB = D->getBlock();
776   if (DT->properlyDominates(NewBB, DBB))
777     // Cannot move the load or store to NewBB above its definition in DBB.
778     return false;
779 
780   if (NewBB == DBB && !MSSA->isLiveOnEntryDef(D))
781     if (auto *UD = dyn_cast<MemoryUseOrDef>(D))
782       if (!firstInBB(UD->getMemoryInst(), NewPt))
783         // Cannot move the load or store to NewPt above its definition in D.
784         return false;
785 
786   // Check for unsafe hoistings due to side effects.
787   if (K == InsKind::Store) {
788     if (hasEHOrLoadsOnPath(NewPt, cast<MemoryDef>(U), NBBsOnAllPaths))
789       return false;
790   } else if (hasEHOnPath(NewBB, OldBB, NBBsOnAllPaths))
791     return false;
792 
793   if (UBB == NewBB) {
794     if (DT->properlyDominates(DBB, NewBB))
795       return true;
796     assert(UBB == DBB);
797     assert(MSSA->locallyDominates(D, U));
798   }
799 
800   // No side effects: it is safe to hoist.
801   return true;
802 }
803 
valueAnticipable(CHIArgs C,Instruction * TI) const804 bool GVNHoist::valueAnticipable(CHIArgs C, Instruction *TI) const {
805   if (TI->getNumSuccessors() > (unsigned)size(C))
806     return false; // Not enough args in this CHI.
807 
808   for (auto CHI : C) {
809     BasicBlock *Dest = CHI.Dest;
810     // Find if all the edges have values flowing out of BB.
811     bool Found = llvm::any_of(
812         successors(TI), [Dest](const BasicBlock *BB) { return BB == Dest; });
813     if (!Found)
814       return false;
815   }
816   return true;
817 }
818 
checkSafety(CHIArgs C,BasicBlock * BB,GVNHoist::InsKind K,SmallVectorImpl<CHIArg> & Safe)819 void GVNHoist::checkSafety(CHIArgs C, BasicBlock *BB, GVNHoist::InsKind K,
820                            SmallVectorImpl<CHIArg> &Safe) {
821   int NumBBsOnAllPaths = MaxNumberOfBBSInPath;
822   for (auto CHI : C) {
823     Instruction *Insn = CHI.I;
824     if (!Insn) // No instruction was inserted in this CHI.
825       continue;
826     if (K == InsKind::Scalar) {
827       if (safeToHoistScalar(BB, Insn->getParent(), NumBBsOnAllPaths))
828         Safe.push_back(CHI);
829     } else {
830       auto *T = BB->getTerminator();
831       if (MemoryUseOrDef *UD = MSSA->getMemoryAccess(Insn))
832         if (safeToHoistLdSt(T, Insn, UD, K, NumBBsOnAllPaths))
833           Safe.push_back(CHI);
834     }
835   }
836 }
837 
fillRenameStack(BasicBlock * BB,InValuesType & ValueBBs,GVNHoist::RenameStackType & RenameStack)838 void GVNHoist::fillRenameStack(BasicBlock *BB, InValuesType &ValueBBs,
839                                GVNHoist::RenameStackType &RenameStack) {
840   auto it1 = ValueBBs.find(BB);
841   if (it1 != ValueBBs.end()) {
842     // Iterate in reverse order to keep lower ranked values on the top.
843     for (std::pair<VNType, Instruction *> &VI : reverse(it1->second)) {
844       // Get the value of instruction I
845       LLVM_DEBUG(dbgs() << "\nPushing on stack: " << *VI.second);
846       RenameStack[VI.first].push_back(VI.second);
847     }
848   }
849 }
850 
fillChiArgs(BasicBlock * BB,OutValuesType & CHIBBs,GVNHoist::RenameStackType & RenameStack)851 void GVNHoist::fillChiArgs(BasicBlock *BB, OutValuesType &CHIBBs,
852                            GVNHoist::RenameStackType &RenameStack) {
853   // For each *predecessor* (because Post-DOM) of BB check if it has a CHI
854   for (auto Pred : predecessors(BB)) {
855     auto P = CHIBBs.find(Pred);
856     if (P == CHIBBs.end()) {
857       continue;
858     }
859     LLVM_DEBUG(dbgs() << "\nLooking at CHIs in: " << Pred->getName(););
860     // A CHI is found (BB -> Pred is an edge in the CFG)
861     // Pop the stack until Top(V) = Ve.
862     auto &VCHI = P->second;
863     for (auto It = VCHI.begin(), E = VCHI.end(); It != E;) {
864       CHIArg &C = *It;
865       if (!C.Dest) {
866         auto si = RenameStack.find(C.VN);
867         // The Basic Block where CHI is must dominate the value we want to
868         // track in a CHI. In the PDom walk, there can be values in the
869         // stack which are not control dependent e.g., nested loop.
870         if (si != RenameStack.end() && si->second.size() &&
871             DT->properlyDominates(Pred, si->second.back()->getParent())) {
872           C.Dest = BB;                     // Assign the edge
873           C.I = si->second.pop_back_val(); // Assign the argument
874           LLVM_DEBUG(dbgs()
875                      << "\nCHI Inserted in BB: " << C.Dest->getName() << *C.I
876                      << ", VN: " << C.VN.first << ", " << C.VN.second);
877         }
878         // Move to next CHI of a different value
879         It = std::find_if(It, VCHI.end(), [It](CHIArg &A) { return A != *It; });
880       } else
881         ++It;
882     }
883   }
884 }
885 
findHoistableCandidates(OutValuesType & CHIBBs,GVNHoist::InsKind K,HoistingPointList & HPL)886 void GVNHoist::findHoistableCandidates(OutValuesType &CHIBBs,
887                                        GVNHoist::InsKind K,
888                                        HoistingPointList &HPL) {
889   auto cmpVN = [](const CHIArg &A, const CHIArg &B) { return A.VN < B.VN; };
890 
891   // CHIArgs now have the outgoing values, so check for anticipability and
892   // accumulate hoistable candidates in HPL.
893   for (std::pair<BasicBlock *, SmallVector<CHIArg, 2>> &A : CHIBBs) {
894     BasicBlock *BB = A.first;
895     SmallVectorImpl<CHIArg> &CHIs = A.second;
896     // Vector of PHIs contains PHIs for different instructions.
897     // Sort the args according to their VNs, such that identical
898     // instructions are together.
899     llvm::stable_sort(CHIs, cmpVN);
900     auto TI = BB->getTerminator();
901     auto B = CHIs.begin();
902     // [PreIt, PHIIt) form a range of CHIs which have identical VNs.
903     auto PHIIt = std::find_if(CHIs.begin(), CHIs.end(),
904                               [B](CHIArg &A) { return A != *B; });
905     auto PrevIt = CHIs.begin();
906     while (PrevIt != PHIIt) {
907       // Collect values which satisfy safety checks.
908       SmallVector<CHIArg, 2> Safe;
909       // We check for safety first because there might be multiple values in
910       // the same path, some of which are not safe to be hoisted, but overall
911       // each edge has at least one value which can be hoisted, making the
912       // value anticipable along that path.
913       checkSafety(make_range(PrevIt, PHIIt), BB, K, Safe);
914 
915       // List of safe values should be anticipable at TI.
916       if (valueAnticipable(make_range(Safe.begin(), Safe.end()), TI)) {
917         HPL.push_back({BB, SmallVecInsn()});
918         SmallVecInsn &V = HPL.back().second;
919         for (auto B : Safe)
920           V.push_back(B.I);
921       }
922 
923       // Check other VNs
924       PrevIt = PHIIt;
925       PHIIt = std::find_if(PrevIt, CHIs.end(),
926                            [PrevIt](CHIArg &A) { return A != *PrevIt; });
927     }
928   }
929 }
930 
allOperandsAvailable(const Instruction * I,const BasicBlock * HoistPt) const931 bool GVNHoist::allOperandsAvailable(const Instruction *I,
932                                     const BasicBlock *HoistPt) const {
933   for (const Use &Op : I->operands())
934     if (const auto *Inst = dyn_cast<Instruction>(&Op))
935       if (!DT->dominates(Inst->getParent(), HoistPt))
936         return false;
937 
938   return true;
939 }
940 
allGepOperandsAvailable(const Instruction * I,const BasicBlock * HoistPt) const941 bool GVNHoist::allGepOperandsAvailable(const Instruction *I,
942                                        const BasicBlock *HoistPt) const {
943   for (const Use &Op : I->operands())
944     if (const auto *Inst = dyn_cast<Instruction>(&Op))
945       if (!DT->dominates(Inst->getParent(), HoistPt)) {
946         if (const GetElementPtrInst *GepOp =
947                 dyn_cast<GetElementPtrInst>(Inst)) {
948           if (!allGepOperandsAvailable(GepOp, HoistPt))
949             return false;
950           // Gep is available if all operands of GepOp are available.
951         } else {
952           // Gep is not available if it has operands other than GEPs that are
953           // defined in blocks not dominating HoistPt.
954           return false;
955         }
956       }
957   return true;
958 }
959 
makeGepsAvailable(Instruction * Repl,BasicBlock * HoistPt,const SmallVecInsn & InstructionsToHoist,Instruction * Gep) const960 void GVNHoist::makeGepsAvailable(Instruction *Repl, BasicBlock *HoistPt,
961                                  const SmallVecInsn &InstructionsToHoist,
962                                  Instruction *Gep) const {
963   assert(allGepOperandsAvailable(Gep, HoistPt) && "GEP operands not available");
964 
965   Instruction *ClonedGep = Gep->clone();
966   for (unsigned i = 0, e = Gep->getNumOperands(); i != e; ++i)
967     if (Instruction *Op = dyn_cast<Instruction>(Gep->getOperand(i))) {
968       // Check whether the operand is already available.
969       if (DT->dominates(Op->getParent(), HoistPt))
970         continue;
971 
972       // As a GEP can refer to other GEPs, recursively make all the operands
973       // of this GEP available at HoistPt.
974       if (GetElementPtrInst *GepOp = dyn_cast<GetElementPtrInst>(Op))
975         makeGepsAvailable(ClonedGep, HoistPt, InstructionsToHoist, GepOp);
976     }
977 
978   // Copy Gep and replace its uses in Repl with ClonedGep.
979   ClonedGep->insertBefore(HoistPt->getTerminator());
980 
981   // Conservatively discard any optimization hints, they may differ on the
982   // other paths.
983   ClonedGep->dropUnknownNonDebugMetadata();
984 
985   // If we have optimization hints which agree with each other along different
986   // paths, preserve them.
987   for (const Instruction *OtherInst : InstructionsToHoist) {
988     const GetElementPtrInst *OtherGep;
989     if (auto *OtherLd = dyn_cast<LoadInst>(OtherInst))
990       OtherGep = cast<GetElementPtrInst>(OtherLd->getPointerOperand());
991     else
992       OtherGep = cast<GetElementPtrInst>(
993           cast<StoreInst>(OtherInst)->getPointerOperand());
994     ClonedGep->andIRFlags(OtherGep);
995   }
996 
997   // Replace uses of Gep with ClonedGep in Repl.
998   Repl->replaceUsesOfWith(Gep, ClonedGep);
999 }
1000 
updateAlignment(Instruction * I,Instruction * Repl)1001 void GVNHoist::updateAlignment(Instruction *I, Instruction *Repl) {
1002   if (auto *ReplacementLoad = dyn_cast<LoadInst>(Repl)) {
1003     ReplacementLoad->setAlignment(
1004         std::min(ReplacementLoad->getAlign(), cast<LoadInst>(I)->getAlign()));
1005     ++NumLoadsRemoved;
1006   } else if (auto *ReplacementStore = dyn_cast<StoreInst>(Repl)) {
1007     ReplacementStore->setAlignment(
1008         std::min(ReplacementStore->getAlign(), cast<StoreInst>(I)->getAlign()));
1009     ++NumStoresRemoved;
1010   } else if (auto *ReplacementAlloca = dyn_cast<AllocaInst>(Repl)) {
1011     ReplacementAlloca->setAlignment(std::max(ReplacementAlloca->getAlign(),
1012                                              cast<AllocaInst>(I)->getAlign()));
1013   } else if (isa<CallInst>(Repl)) {
1014     ++NumCallsRemoved;
1015   }
1016 }
1017 
rauw(const SmallVecInsn & Candidates,Instruction * Repl,MemoryUseOrDef * NewMemAcc)1018 unsigned GVNHoist::rauw(const SmallVecInsn &Candidates, Instruction *Repl,
1019                         MemoryUseOrDef *NewMemAcc) {
1020   unsigned NR = 0;
1021   for (Instruction *I : Candidates) {
1022     if (I != Repl) {
1023       ++NR;
1024       updateAlignment(I, Repl);
1025       if (NewMemAcc) {
1026         // Update the uses of the old MSSA access with NewMemAcc.
1027         MemoryAccess *OldMA = MSSA->getMemoryAccess(I);
1028         OldMA->replaceAllUsesWith(NewMemAcc);
1029         MSSAUpdater->removeMemoryAccess(OldMA);
1030       }
1031 
1032       Repl->andIRFlags(I);
1033       combineKnownMetadata(Repl, I);
1034       I->replaceAllUsesWith(Repl);
1035       // Also invalidate the Alias Analysis cache.
1036       MD->removeInstruction(I);
1037       I->eraseFromParent();
1038     }
1039   }
1040   return NR;
1041 }
1042 
raMPHIuw(MemoryUseOrDef * NewMemAcc)1043 void GVNHoist::raMPHIuw(MemoryUseOrDef *NewMemAcc) {
1044   SmallPtrSet<MemoryPhi *, 4> UsePhis;
1045   for (User *U : NewMemAcc->users())
1046     if (MemoryPhi *Phi = dyn_cast<MemoryPhi>(U))
1047       UsePhis.insert(Phi);
1048 
1049   for (MemoryPhi *Phi : UsePhis) {
1050     auto In = Phi->incoming_values();
1051     if (llvm::all_of(In, [&](Use &U) { return U == NewMemAcc; })) {
1052       Phi->replaceAllUsesWith(NewMemAcc);
1053       MSSAUpdater->removeMemoryAccess(Phi);
1054     }
1055   }
1056 }
1057 
removeAndReplace(const SmallVecInsn & Candidates,Instruction * Repl,BasicBlock * DestBB,bool MoveAccess)1058 unsigned GVNHoist::removeAndReplace(const SmallVecInsn &Candidates,
1059                                     Instruction *Repl, BasicBlock *DestBB,
1060                                     bool MoveAccess) {
1061   MemoryUseOrDef *NewMemAcc = MSSA->getMemoryAccess(Repl);
1062   if (MoveAccess && NewMemAcc) {
1063     // The definition of this ld/st will not change: ld/st hoisting is
1064     // legal when the ld/st is not moved past its current definition.
1065     MSSAUpdater->moveToPlace(NewMemAcc, DestBB, MemorySSA::BeforeTerminator);
1066   }
1067 
1068   // Replace all other instructions with Repl with memory access NewMemAcc.
1069   unsigned NR = rauw(Candidates, Repl, NewMemAcc);
1070 
1071   // Remove MemorySSA phi nodes with the same arguments.
1072   if (NewMemAcc)
1073     raMPHIuw(NewMemAcc);
1074   return NR;
1075 }
1076 
makeGepOperandsAvailable(Instruction * Repl,BasicBlock * HoistPt,const SmallVecInsn & InstructionsToHoist) const1077 bool GVNHoist::makeGepOperandsAvailable(
1078     Instruction *Repl, BasicBlock *HoistPt,
1079     const SmallVecInsn &InstructionsToHoist) const {
1080   // Check whether the GEP of a ld/st can be synthesized at HoistPt.
1081   GetElementPtrInst *Gep = nullptr;
1082   Instruction *Val = nullptr;
1083   if (auto *Ld = dyn_cast<LoadInst>(Repl)) {
1084     Gep = dyn_cast<GetElementPtrInst>(Ld->getPointerOperand());
1085   } else if (auto *St = dyn_cast<StoreInst>(Repl)) {
1086     Gep = dyn_cast<GetElementPtrInst>(St->getPointerOperand());
1087     Val = dyn_cast<Instruction>(St->getValueOperand());
1088     // Check that the stored value is available.
1089     if (Val) {
1090       if (isa<GetElementPtrInst>(Val)) {
1091         // Check whether we can compute the GEP at HoistPt.
1092         if (!allGepOperandsAvailable(Val, HoistPt))
1093           return false;
1094       } else if (!DT->dominates(Val->getParent(), HoistPt))
1095         return false;
1096     }
1097   }
1098 
1099   // Check whether we can compute the Gep at HoistPt.
1100   if (!Gep || !allGepOperandsAvailable(Gep, HoistPt))
1101     return false;
1102 
1103   makeGepsAvailable(Repl, HoistPt, InstructionsToHoist, Gep);
1104 
1105   if (Val && isa<GetElementPtrInst>(Val))
1106     makeGepsAvailable(Repl, HoistPt, InstructionsToHoist, Val);
1107 
1108   return true;
1109 }
1110 
hoist(HoistingPointList & HPL)1111 std::pair<unsigned, unsigned> GVNHoist::hoist(HoistingPointList &HPL) {
1112   unsigned NI = 0, NL = 0, NS = 0, NC = 0, NR = 0;
1113   for (const HoistingPointInfo &HP : HPL) {
1114     // Find out whether we already have one of the instructions in HoistPt,
1115     // in which case we do not have to move it.
1116     BasicBlock *DestBB = HP.first;
1117     const SmallVecInsn &InstructionsToHoist = HP.second;
1118     Instruction *Repl = nullptr;
1119     for (Instruction *I : InstructionsToHoist)
1120       if (I->getParent() == DestBB)
1121         // If there are two instructions in HoistPt to be hoisted in place:
1122         // update Repl to be the first one, such that we can rename the uses
1123         // of the second based on the first.
1124         if (!Repl || firstInBB(I, Repl))
1125           Repl = I;
1126 
1127     // Keep track of whether we moved the instruction so we know whether we
1128     // should move the MemoryAccess.
1129     bool MoveAccess = true;
1130     if (Repl) {
1131       // Repl is already in HoistPt: it remains in place.
1132       assert(allOperandsAvailable(Repl, DestBB) &&
1133              "instruction depends on operands that are not available");
1134       MoveAccess = false;
1135     } else {
1136       // When we do not find Repl in HoistPt, select the first in the list
1137       // and move it to HoistPt.
1138       Repl = InstructionsToHoist.front();
1139 
1140       // We can move Repl in HoistPt only when all operands are available.
1141       // The order in which hoistings are done may influence the availability
1142       // of operands.
1143       if (!allOperandsAvailable(Repl, DestBB)) {
1144         // When HoistingGeps there is nothing more we can do to make the
1145         // operands available: just continue.
1146         if (HoistingGeps)
1147           continue;
1148 
1149         // When not HoistingGeps we need to copy the GEPs.
1150         if (!makeGepOperandsAvailable(Repl, DestBB, InstructionsToHoist))
1151           continue;
1152       }
1153 
1154       // Move the instruction at the end of HoistPt.
1155       Instruction *Last = DestBB->getTerminator();
1156       MD->removeInstruction(Repl);
1157       Repl->moveBefore(Last);
1158 
1159       DFSNumber[Repl] = DFSNumber[Last]++;
1160     }
1161 
1162     NR += removeAndReplace(InstructionsToHoist, Repl, DestBB, MoveAccess);
1163 
1164     if (isa<LoadInst>(Repl))
1165       ++NL;
1166     else if (isa<StoreInst>(Repl))
1167       ++NS;
1168     else if (isa<CallInst>(Repl))
1169       ++NC;
1170     else // Scalar
1171       ++NI;
1172   }
1173 
1174   if (MSSA && VerifyMemorySSA)
1175     MSSA->verifyMemorySSA();
1176 
1177   NumHoisted += NL + NS + NC + NI;
1178   NumRemoved += NR;
1179   NumLoadsHoisted += NL;
1180   NumStoresHoisted += NS;
1181   NumCallsHoisted += NC;
1182   return {NI, NL + NC + NS};
1183 }
1184 
hoistExpressions(Function & F)1185 std::pair<unsigned, unsigned> GVNHoist::hoistExpressions(Function &F) {
1186   InsnInfo II;
1187   LoadInfo LI;
1188   StoreInfo SI;
1189   CallInfo CI;
1190   for (BasicBlock *BB : depth_first(&F.getEntryBlock())) {
1191     int InstructionNb = 0;
1192     for (Instruction &I1 : *BB) {
1193       // If I1 cannot guarantee progress, subsequent instructions
1194       // in BB cannot be hoisted anyways.
1195       if (!isGuaranteedToTransferExecutionToSuccessor(&I1)) {
1196         HoistBarrier.insert(BB);
1197         break;
1198       }
1199       // Only hoist the first instructions in BB up to MaxDepthInBB. Hoisting
1200       // deeper may increase the register pressure and compilation time.
1201       if (MaxDepthInBB != -1 && InstructionNb++ >= MaxDepthInBB)
1202         break;
1203 
1204       // Do not value number terminator instructions.
1205       if (I1.isTerminator())
1206         break;
1207 
1208       if (auto *Load = dyn_cast<LoadInst>(&I1))
1209         LI.insert(Load, VN);
1210       else if (auto *Store = dyn_cast<StoreInst>(&I1))
1211         SI.insert(Store, VN);
1212       else if (auto *Call = dyn_cast<CallInst>(&I1)) {
1213         if (auto *Intr = dyn_cast<IntrinsicInst>(Call)) {
1214           if (isa<DbgInfoIntrinsic>(Intr) ||
1215               Intr->getIntrinsicID() == Intrinsic::assume ||
1216               Intr->getIntrinsicID() == Intrinsic::sideeffect)
1217             continue;
1218         }
1219         if (Call->mayHaveSideEffects())
1220           break;
1221 
1222         if (Call->isConvergent())
1223           break;
1224 
1225         CI.insert(Call, VN);
1226       } else if (HoistingGeps || !isa<GetElementPtrInst>(&I1))
1227         // Do not hoist scalars past calls that may write to memory because
1228         // that could result in spills later. geps are handled separately.
1229         // TODO: We can relax this for targets like AArch64 as they have more
1230         // registers than X86.
1231         II.insert(&I1, VN);
1232     }
1233   }
1234 
1235   HoistingPointList HPL;
1236   computeInsertionPoints(II.getVNTable(), HPL, InsKind::Scalar);
1237   computeInsertionPoints(LI.getVNTable(), HPL, InsKind::Load);
1238   computeInsertionPoints(SI.getVNTable(), HPL, InsKind::Store);
1239   computeInsertionPoints(CI.getScalarVNTable(), HPL, InsKind::Scalar);
1240   computeInsertionPoints(CI.getLoadVNTable(), HPL, InsKind::Load);
1241   computeInsertionPoints(CI.getStoreVNTable(), HPL, InsKind::Store);
1242   return hoist(HPL);
1243 }
1244 
1245 } // end namespace llvm
1246 
run(Function & F,FunctionAnalysisManager & AM)1247 PreservedAnalyses GVNHoistPass::run(Function &F, FunctionAnalysisManager &AM) {
1248   DominatorTree &DT = AM.getResult<DominatorTreeAnalysis>(F);
1249   PostDominatorTree &PDT = AM.getResult<PostDominatorTreeAnalysis>(F);
1250   AliasAnalysis &AA = AM.getResult<AAManager>(F);
1251   MemoryDependenceResults &MD = AM.getResult<MemoryDependenceAnalysis>(F);
1252   MemorySSA &MSSA = AM.getResult<MemorySSAAnalysis>(F).getMSSA();
1253   GVNHoist G(&DT, &PDT, &AA, &MD, &MSSA);
1254   if (!G.run(F))
1255     return PreservedAnalyses::all();
1256 
1257   PreservedAnalyses PA;
1258   PA.preserve<DominatorTreeAnalysis>();
1259   PA.preserve<MemorySSAAnalysis>();
1260   PA.preserve<GlobalsAA>();
1261   return PA;
1262 }
1263 
1264 char GVNHoistLegacyPass::ID = 0;
1265 
1266 INITIALIZE_PASS_BEGIN(GVNHoistLegacyPass, "gvn-hoist",
1267                       "Early GVN Hoisting of Expressions", false, false)
INITIALIZE_PASS_DEPENDENCY(MemoryDependenceWrapperPass)1268 INITIALIZE_PASS_DEPENDENCY(MemoryDependenceWrapperPass)
1269 INITIALIZE_PASS_DEPENDENCY(MemorySSAWrapperPass)
1270 INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
1271 INITIALIZE_PASS_DEPENDENCY(PostDominatorTreeWrapperPass)
1272 INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
1273 INITIALIZE_PASS_END(GVNHoistLegacyPass, "gvn-hoist",
1274                     "Early GVN Hoisting of Expressions", false, false)
1275 
1276 FunctionPass *llvm::createGVNHoistPass() { return new GVNHoistLegacyPass(); }
1277