1 //===- GVNSink.cpp - sink expressions into successors ---------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 /// \file GVNSink.cpp
10 /// This pass attempts to sink instructions into successors, reducing static
11 /// instruction count and enabling if-conversion.
12 ///
13 /// We use a variant of global value numbering to decide what can be sunk.
14 /// Consider:
15 ///
16 /// [ %a1 = add i32 %b, 1  ]   [ %c1 = add i32 %d, 1  ]
17 /// [ %a2 = xor i32 %a1, 1 ]   [ %c2 = xor i32 %c1, 1 ]
18 ///                  \           /
19 ///            [ %e = phi i32 %a2, %c2 ]
20 ///            [ add i32 %e, 4         ]
21 ///
22 ///
23 /// GVN would number %a1 and %c1 differently because they compute different
24 /// results - the VN of an instruction is a function of its opcode and the
25 /// transitive closure of its operands. This is the key property for hoisting
26 /// and CSE.
27 ///
28 /// What we want when sinking however is for a numbering that is a function of
29 /// the *uses* of an instruction, which allows us to answer the question "if I
30 /// replace %a1 with %c1, will it contribute in an equivalent way to all
31 /// successive instructions?". The PostValueTable class in GVN provides this
32 /// mapping.
33 //
34 //===----------------------------------------------------------------------===//
35 
36 #include "llvm/ADT/ArrayRef.h"
37 #include "llvm/ADT/DenseMap.h"
38 #include "llvm/ADT/DenseSet.h"
39 #include "llvm/ADT/Hashing.h"
40 #include "llvm/ADT/None.h"
41 #include "llvm/ADT/Optional.h"
42 #include "llvm/ADT/PostOrderIterator.h"
43 #include "llvm/ADT/STLExtras.h"
44 #include "llvm/ADT/SmallPtrSet.h"
45 #include "llvm/ADT/SmallVector.h"
46 #include "llvm/ADT/Statistic.h"
47 #include "llvm/Analysis/GlobalsModRef.h"
48 #include "llvm/IR/BasicBlock.h"
49 #include "llvm/IR/CFG.h"
50 #include "llvm/IR/Constants.h"
51 #include "llvm/IR/Function.h"
52 #include "llvm/IR/InstrTypes.h"
53 #include "llvm/IR/Instruction.h"
54 #include "llvm/IR/Instructions.h"
55 #include "llvm/IR/PassManager.h"
56 #include "llvm/IR/Type.h"
57 #include "llvm/IR/Use.h"
58 #include "llvm/IR/Value.h"
59 #include "llvm/InitializePasses.h"
60 #include "llvm/Pass.h"
61 #include "llvm/Support/Allocator.h"
62 #include "llvm/Support/ArrayRecycler.h"
63 #include "llvm/Support/AtomicOrdering.h"
64 #include "llvm/Support/Casting.h"
65 #include "llvm/Support/Compiler.h"
66 #include "llvm/Support/Debug.h"
67 #include "llvm/Support/raw_ostream.h"
68 #include "llvm/Transforms/Scalar.h"
69 #include "llvm/Transforms/Scalar/GVN.h"
70 #include "llvm/Transforms/Scalar/GVNExpression.h"
71 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
72 #include "llvm/Transforms/Utils/Local.h"
73 #include <algorithm>
74 #include <cassert>
75 #include <cstddef>
76 #include <cstdint>
77 #include <iterator>
78 #include <utility>
79 
80 using namespace llvm;
81 
82 #define DEBUG_TYPE "gvn-sink"
83 
84 STATISTIC(NumRemoved, "Number of instructions removed");
85 
86 namespace llvm {
87 namespace GVNExpression {
88 
89 LLVM_DUMP_METHOD void Expression::dump() const {
90   print(dbgs());
91   dbgs() << "\n";
92 }
93 
94 } // end namespace GVNExpression
95 } // end namespace llvm
96 
97 namespace {
98 
99 static bool isMemoryInst(const Instruction *I) {
100   return isa<LoadInst>(I) || isa<StoreInst>(I) ||
101          (isa<InvokeInst>(I) && !cast<InvokeInst>(I)->doesNotAccessMemory()) ||
102          (isa<CallInst>(I) && !cast<CallInst>(I)->doesNotAccessMemory());
103 }
104 
105 /// Iterates through instructions in a set of blocks in reverse order from the
106 /// first non-terminator. For example (assume all blocks have size n):
107 ///   LockstepReverseIterator I([B1, B2, B3]);
108 ///   *I-- = [B1[n], B2[n], B3[n]];
109 ///   *I-- = [B1[n-1], B2[n-1], B3[n-1]];
110 ///   *I-- = [B1[n-2], B2[n-2], B3[n-2]];
111 ///   ...
112 ///
113 /// It continues until all blocks have been exhausted. Use \c getActiveBlocks()
114 /// to
115 /// determine which blocks are still going and the order they appear in the
116 /// list returned by operator*.
117 class LockstepReverseIterator {
118   ArrayRef<BasicBlock *> Blocks;
119   SmallSetVector<BasicBlock *, 4> ActiveBlocks;
120   SmallVector<Instruction *, 4> Insts;
121   bool Fail;
122 
123 public:
124   LockstepReverseIterator(ArrayRef<BasicBlock *> Blocks) : Blocks(Blocks) {
125     reset();
126   }
127 
128   void reset() {
129     Fail = false;
130     ActiveBlocks.clear();
131     for (BasicBlock *BB : Blocks)
132       ActiveBlocks.insert(BB);
133     Insts.clear();
134     for (BasicBlock *BB : Blocks) {
135       if (BB->size() <= 1) {
136         // Block wasn't big enough - only contained a terminator.
137         ActiveBlocks.remove(BB);
138         continue;
139       }
140       Insts.push_back(BB->getTerminator()->getPrevNode());
141     }
142     if (Insts.empty())
143       Fail = true;
144   }
145 
146   bool isValid() const { return !Fail; }
147   ArrayRef<Instruction *> operator*() const { return Insts; }
148 
149   // Note: This needs to return a SmallSetVector as the elements of
150   // ActiveBlocks will be later copied to Blocks using std::copy. The
151   // resultant order of elements in Blocks needs to be deterministic.
152   // Using SmallPtrSet instead causes non-deterministic order while
153   // copying. And we cannot simply sort Blocks as they need to match the
154   // corresponding Values.
155   SmallSetVector<BasicBlock *, 4> &getActiveBlocks() { return ActiveBlocks; }
156 
157   void restrictToBlocks(SmallSetVector<BasicBlock *, 4> &Blocks) {
158     for (auto II = Insts.begin(); II != Insts.end();) {
159       if (!llvm::is_contained(Blocks, (*II)->getParent())) {
160         ActiveBlocks.remove((*II)->getParent());
161         II = Insts.erase(II);
162       } else {
163         ++II;
164       }
165     }
166   }
167 
168   void operator--() {
169     if (Fail)
170       return;
171     SmallVector<Instruction *, 4> NewInsts;
172     for (auto *Inst : Insts) {
173       if (Inst == &Inst->getParent()->front())
174         ActiveBlocks.remove(Inst->getParent());
175       else
176         NewInsts.push_back(Inst->getPrevNode());
177     }
178     if (NewInsts.empty()) {
179       Fail = true;
180       return;
181     }
182     Insts = NewInsts;
183   }
184 };
185 
186 //===----------------------------------------------------------------------===//
187 
188 /// Candidate solution for sinking. There may be different ways to
189 /// sink instructions, differing in the number of instructions sunk,
190 /// the number of predecessors sunk from and the number of PHIs
191 /// required.
192 struct SinkingInstructionCandidate {
193   unsigned NumBlocks;
194   unsigned NumInstructions;
195   unsigned NumPHIs;
196   unsigned NumMemoryInsts;
197   int Cost = -1;
198   SmallVector<BasicBlock *, 4> Blocks;
199 
200   void calculateCost(unsigned NumOrigPHIs, unsigned NumOrigBlocks) {
201     unsigned NumExtraPHIs = NumPHIs - NumOrigPHIs;
202     unsigned SplitEdgeCost = (NumOrigBlocks > NumBlocks) ? 2 : 0;
203     Cost = (NumInstructions * (NumBlocks - 1)) -
204            (NumExtraPHIs *
205             NumExtraPHIs) // PHIs are expensive, so make sure they're worth it.
206            - SplitEdgeCost;
207   }
208 
209   bool operator>(const SinkingInstructionCandidate &Other) const {
210     return Cost > Other.Cost;
211   }
212 };
213 
214 #ifndef NDEBUG
215 raw_ostream &operator<<(raw_ostream &OS, const SinkingInstructionCandidate &C) {
216   OS << "<Candidate Cost=" << C.Cost << " #Blocks=" << C.NumBlocks
217      << " #Insts=" << C.NumInstructions << " #PHIs=" << C.NumPHIs << ">";
218   return OS;
219 }
220 #endif
221 
222 //===----------------------------------------------------------------------===//
223 
224 /// Describes a PHI node that may or may not exist. These track the PHIs
225 /// that must be created if we sunk a sequence of instructions. It provides
226 /// a hash function for efficient equality comparisons.
227 class ModelledPHI {
228   SmallVector<Value *, 4> Values;
229   SmallVector<BasicBlock *, 4> Blocks;
230 
231 public:
232   ModelledPHI() = default;
233 
234   ModelledPHI(const PHINode *PN) {
235     // BasicBlock comes first so we sort by basic block pointer order, then by value pointer order.
236     SmallVector<std::pair<BasicBlock *, Value *>, 4> Ops;
237     for (unsigned I = 0, E = PN->getNumIncomingValues(); I != E; ++I)
238       Ops.push_back({PN->getIncomingBlock(I), PN->getIncomingValue(I)});
239     llvm::sort(Ops);
240     for (auto &P : Ops) {
241       Blocks.push_back(P.first);
242       Values.push_back(P.second);
243     }
244   }
245 
246   /// Create a dummy ModelledPHI that will compare unequal to any other ModelledPHI
247   /// without the same ID.
248   /// \note This is specifically for DenseMapInfo - do not use this!
249   static ModelledPHI createDummy(size_t ID) {
250     ModelledPHI M;
251     M.Values.push_back(reinterpret_cast<Value*>(ID));
252     return M;
253   }
254 
255   /// Create a PHI from an array of incoming values and incoming blocks.
256   template <typename VArray, typename BArray>
257   ModelledPHI(const VArray &V, const BArray &B) {
258     llvm::copy(V, std::back_inserter(Values));
259     llvm::copy(B, std::back_inserter(Blocks));
260   }
261 
262   /// Create a PHI from [I[OpNum] for I in Insts].
263   template <typename BArray>
264   ModelledPHI(ArrayRef<Instruction *> Insts, unsigned OpNum, const BArray &B) {
265     llvm::copy(B, std::back_inserter(Blocks));
266     for (auto *I : Insts)
267       Values.push_back(I->getOperand(OpNum));
268   }
269 
270   /// Restrict the PHI's contents down to only \c NewBlocks.
271   /// \c NewBlocks must be a subset of \c this->Blocks.
272   void restrictToBlocks(const SmallSetVector<BasicBlock *, 4> &NewBlocks) {
273     auto BI = Blocks.begin();
274     auto VI = Values.begin();
275     while (BI != Blocks.end()) {
276       assert(VI != Values.end());
277       if (!llvm::is_contained(NewBlocks, *BI)) {
278         BI = Blocks.erase(BI);
279         VI = Values.erase(VI);
280       } else {
281         ++BI;
282         ++VI;
283       }
284     }
285     assert(Blocks.size() == NewBlocks.size());
286   }
287 
288   ArrayRef<Value *> getValues() const { return Values; }
289 
290   bool areAllIncomingValuesSame() const {
291     return llvm::all_of(Values, [&](Value *V) { return V == Values[0]; });
292   }
293 
294   bool areAllIncomingValuesSameType() const {
295     return llvm::all_of(
296         Values, [&](Value *V) { return V->getType() == Values[0]->getType(); });
297   }
298 
299   bool areAnyIncomingValuesConstant() const {
300     return llvm::any_of(Values, [&](Value *V) { return isa<Constant>(V); });
301   }
302 
303   // Hash functor
304   unsigned hash() const {
305       return (unsigned)hash_combine_range(Values.begin(), Values.end());
306   }
307 
308   bool operator==(const ModelledPHI &Other) const {
309     return Values == Other.Values && Blocks == Other.Blocks;
310   }
311 };
312 
313 template <typename ModelledPHI> struct DenseMapInfo {
314   static inline ModelledPHI &getEmptyKey() {
315     static ModelledPHI Dummy = ModelledPHI::createDummy(0);
316     return Dummy;
317   }
318 
319   static inline ModelledPHI &getTombstoneKey() {
320     static ModelledPHI Dummy = ModelledPHI::createDummy(1);
321     return Dummy;
322   }
323 
324   static unsigned getHashValue(const ModelledPHI &V) { return V.hash(); }
325 
326   static bool isEqual(const ModelledPHI &LHS, const ModelledPHI &RHS) {
327     return LHS == RHS;
328   }
329 };
330 
331 using ModelledPHISet = DenseSet<ModelledPHI, DenseMapInfo<ModelledPHI>>;
332 
333 //===----------------------------------------------------------------------===//
334 //                             ValueTable
335 //===----------------------------------------------------------------------===//
336 // This is a value number table where the value number is a function of the
337 // *uses* of a value, rather than its operands. Thus, if VN(A) == VN(B) we know
338 // that the program would be equivalent if we replaced A with PHI(A, B).
339 //===----------------------------------------------------------------------===//
340 
341 /// A GVN expression describing how an instruction is used. The operands
342 /// field of BasicExpression is used to store uses, not operands.
343 ///
344 /// This class also contains fields for discriminators used when determining
345 /// equivalence of instructions with sideeffects.
346 class InstructionUseExpr : public GVNExpression::BasicExpression {
347   unsigned MemoryUseOrder = -1;
348   bool Volatile = false;
349   ArrayRef<int> ShuffleMask;
350 
351 public:
352   InstructionUseExpr(Instruction *I, ArrayRecycler<Value *> &R,
353                      BumpPtrAllocator &A)
354       : GVNExpression::BasicExpression(I->getNumUses()) {
355     allocateOperands(R, A);
356     setOpcode(I->getOpcode());
357     setType(I->getType());
358 
359     if (ShuffleVectorInst *SVI = dyn_cast<ShuffleVectorInst>(I))
360       ShuffleMask = SVI->getShuffleMask().copy(A);
361 
362     for (auto &U : I->uses())
363       op_push_back(U.getUser());
364     llvm::sort(op_begin(), op_end());
365   }
366 
367   void setMemoryUseOrder(unsigned MUO) { MemoryUseOrder = MUO; }
368   void setVolatile(bool V) { Volatile = V; }
369 
370   hash_code getHashValue() const override {
371     return hash_combine(GVNExpression::BasicExpression::getHashValue(),
372                         MemoryUseOrder, Volatile, ShuffleMask);
373   }
374 
375   template <typename Function> hash_code getHashValue(Function MapFn) {
376     hash_code H = hash_combine(getOpcode(), getType(), MemoryUseOrder, Volatile,
377                                ShuffleMask);
378     for (auto *V : operands())
379       H = hash_combine(H, MapFn(V));
380     return H;
381   }
382 };
383 
384 using BasicBlocksSet = SmallPtrSet<const BasicBlock *, 32>;
385 
386 class ValueTable {
387   DenseMap<Value *, uint32_t> ValueNumbering;
388   DenseMap<GVNExpression::Expression *, uint32_t> ExpressionNumbering;
389   DenseMap<size_t, uint32_t> HashNumbering;
390   BumpPtrAllocator Allocator;
391   ArrayRecycler<Value *> Recycler;
392   uint32_t nextValueNumber = 1;
393   BasicBlocksSet ReachableBBs;
394 
395   /// Create an expression for I based on its opcode and its uses. If I
396   /// touches or reads memory, the expression is also based upon its memory
397   /// order - see \c getMemoryUseOrder().
398   InstructionUseExpr *createExpr(Instruction *I) {
399     InstructionUseExpr *E =
400         new (Allocator) InstructionUseExpr(I, Recycler, Allocator);
401     if (isMemoryInst(I))
402       E->setMemoryUseOrder(getMemoryUseOrder(I));
403 
404     if (CmpInst *C = dyn_cast<CmpInst>(I)) {
405       CmpInst::Predicate Predicate = C->getPredicate();
406       E->setOpcode((C->getOpcode() << 8) | Predicate);
407     }
408     return E;
409   }
410 
411   /// Helper to compute the value number for a memory instruction
412   /// (LoadInst/StoreInst), including checking the memory ordering and
413   /// volatility.
414   template <class Inst> InstructionUseExpr *createMemoryExpr(Inst *I) {
415     if (isStrongerThanUnordered(I->getOrdering()) || I->isAtomic())
416       return nullptr;
417     InstructionUseExpr *E = createExpr(I);
418     E->setVolatile(I->isVolatile());
419     return E;
420   }
421 
422 public:
423   ValueTable() = default;
424 
425   /// Set basic blocks reachable from entry block.
426   void setReachableBBs(const BasicBlocksSet &ReachableBBs) {
427     this->ReachableBBs = ReachableBBs;
428   }
429 
430   /// Returns the value number for the specified value, assigning
431   /// it a new number if it did not have one before.
432   uint32_t lookupOrAdd(Value *V) {
433     auto VI = ValueNumbering.find(V);
434     if (VI != ValueNumbering.end())
435       return VI->second;
436 
437     if (!isa<Instruction>(V)) {
438       ValueNumbering[V] = nextValueNumber;
439       return nextValueNumber++;
440     }
441 
442     Instruction *I = cast<Instruction>(V);
443     if (!ReachableBBs.contains(I->getParent()))
444       return ~0U;
445 
446     InstructionUseExpr *exp = nullptr;
447     switch (I->getOpcode()) {
448     case Instruction::Load:
449       exp = createMemoryExpr(cast<LoadInst>(I));
450       break;
451     case Instruction::Store:
452       exp = createMemoryExpr(cast<StoreInst>(I));
453       break;
454     case Instruction::Call:
455     case Instruction::Invoke:
456     case Instruction::FNeg:
457     case Instruction::Add:
458     case Instruction::FAdd:
459     case Instruction::Sub:
460     case Instruction::FSub:
461     case Instruction::Mul:
462     case Instruction::FMul:
463     case Instruction::UDiv:
464     case Instruction::SDiv:
465     case Instruction::FDiv:
466     case Instruction::URem:
467     case Instruction::SRem:
468     case Instruction::FRem:
469     case Instruction::Shl:
470     case Instruction::LShr:
471     case Instruction::AShr:
472     case Instruction::And:
473     case Instruction::Or:
474     case Instruction::Xor:
475     case Instruction::ICmp:
476     case Instruction::FCmp:
477     case Instruction::Trunc:
478     case Instruction::ZExt:
479     case Instruction::SExt:
480     case Instruction::FPToUI:
481     case Instruction::FPToSI:
482     case Instruction::UIToFP:
483     case Instruction::SIToFP:
484     case Instruction::FPTrunc:
485     case Instruction::FPExt:
486     case Instruction::PtrToInt:
487     case Instruction::IntToPtr:
488     case Instruction::BitCast:
489     case Instruction::AddrSpaceCast:
490     case Instruction::Select:
491     case Instruction::ExtractElement:
492     case Instruction::InsertElement:
493     case Instruction::ShuffleVector:
494     case Instruction::InsertValue:
495     case Instruction::GetElementPtr:
496       exp = createExpr(I);
497       break;
498     default:
499       break;
500     }
501 
502     if (!exp) {
503       ValueNumbering[V] = nextValueNumber;
504       return nextValueNumber++;
505     }
506 
507     uint32_t e = ExpressionNumbering[exp];
508     if (!e) {
509       hash_code H = exp->getHashValue([=](Value *V) { return lookupOrAdd(V); });
510       auto I = HashNumbering.find(H);
511       if (I != HashNumbering.end()) {
512         e = I->second;
513       } else {
514         e = nextValueNumber++;
515         HashNumbering[H] = e;
516         ExpressionNumbering[exp] = e;
517       }
518     }
519     ValueNumbering[V] = e;
520     return e;
521   }
522 
523   /// Returns the value number of the specified value. Fails if the value has
524   /// not yet been numbered.
525   uint32_t lookup(Value *V) const {
526     auto VI = ValueNumbering.find(V);
527     assert(VI != ValueNumbering.end() && "Value not numbered?");
528     return VI->second;
529   }
530 
531   /// Removes all value numberings and resets the value table.
532   void clear() {
533     ValueNumbering.clear();
534     ExpressionNumbering.clear();
535     HashNumbering.clear();
536     Recycler.clear(Allocator);
537     nextValueNumber = 1;
538   }
539 
540   /// \c Inst uses or touches memory. Return an ID describing the memory state
541   /// at \c Inst such that if getMemoryUseOrder(I1) == getMemoryUseOrder(I2),
542   /// the exact same memory operations happen after I1 and I2.
543   ///
544   /// This is a very hard problem in general, so we use domain-specific
545   /// knowledge that we only ever check for equivalence between blocks sharing a
546   /// single immediate successor that is common, and when determining if I1 ==
547   /// I2 we will have already determined that next(I1) == next(I2). This
548   /// inductive property allows us to simply return the value number of the next
549   /// instruction that defines memory.
550   uint32_t getMemoryUseOrder(Instruction *Inst) {
551     auto *BB = Inst->getParent();
552     for (auto I = std::next(Inst->getIterator()), E = BB->end();
553          I != E && !I->isTerminator(); ++I) {
554       if (!isMemoryInst(&*I))
555         continue;
556       if (isa<LoadInst>(&*I))
557         continue;
558       CallInst *CI = dyn_cast<CallInst>(&*I);
559       if (CI && CI->onlyReadsMemory())
560         continue;
561       InvokeInst *II = dyn_cast<InvokeInst>(&*I);
562       if (II && II->onlyReadsMemory())
563         continue;
564       return lookupOrAdd(&*I);
565     }
566     return 0;
567   }
568 };
569 
570 //===----------------------------------------------------------------------===//
571 
572 class GVNSink {
573 public:
574   GVNSink() = default;
575 
576   bool run(Function &F) {
577     LLVM_DEBUG(dbgs() << "GVNSink: running on function @" << F.getName()
578                       << "\n");
579 
580     unsigned NumSunk = 0;
581     ReversePostOrderTraversal<Function*> RPOT(&F);
582     VN.setReachableBBs(BasicBlocksSet(RPOT.begin(), RPOT.end()));
583     for (auto *N : RPOT)
584       NumSunk += sinkBB(N);
585 
586     return NumSunk > 0;
587   }
588 
589 private:
590   ValueTable VN;
591 
592   bool shouldAvoidSinkingInstruction(Instruction *I) {
593     // These instructions may change or break semantics if moved.
594     if (isa<PHINode>(I) || I->isEHPad() || isa<AllocaInst>(I) ||
595         I->getType()->isTokenTy())
596       return true;
597     return false;
598   }
599 
600   /// The main heuristic function. Analyze the set of instructions pointed to by
601   /// LRI and return a candidate solution if these instructions can be sunk, or
602   /// None otherwise.
603   Optional<SinkingInstructionCandidate> analyzeInstructionForSinking(
604       LockstepReverseIterator &LRI, unsigned &InstNum, unsigned &MemoryInstNum,
605       ModelledPHISet &NeededPHIs, SmallPtrSetImpl<Value *> &PHIContents);
606 
607   /// Create a ModelledPHI for each PHI in BB, adding to PHIs.
608   void analyzeInitialPHIs(BasicBlock *BB, ModelledPHISet &PHIs,
609                           SmallPtrSetImpl<Value *> &PHIContents) {
610     for (PHINode &PN : BB->phis()) {
611       auto MPHI = ModelledPHI(&PN);
612       PHIs.insert(MPHI);
613       for (auto *V : MPHI.getValues())
614         PHIContents.insert(V);
615     }
616   }
617 
618   /// The main instruction sinking driver. Set up state and try and sink
619   /// instructions into BBEnd from its predecessors.
620   unsigned sinkBB(BasicBlock *BBEnd);
621 
622   /// Perform the actual mechanics of sinking an instruction from Blocks into
623   /// BBEnd, which is their only successor.
624   void sinkLastInstruction(ArrayRef<BasicBlock *> Blocks, BasicBlock *BBEnd);
625 
626   /// Remove PHIs that all have the same incoming value.
627   void foldPointlessPHINodes(BasicBlock *BB) {
628     auto I = BB->begin();
629     while (PHINode *PN = dyn_cast<PHINode>(I++)) {
630       if (!llvm::all_of(PN->incoming_values(), [&](const Value *V) {
631             return V == PN->getIncomingValue(0);
632           }))
633         continue;
634       if (PN->getIncomingValue(0) != PN)
635         PN->replaceAllUsesWith(PN->getIncomingValue(0));
636       else
637         PN->replaceAllUsesWith(UndefValue::get(PN->getType()));
638       PN->eraseFromParent();
639     }
640   }
641 };
642 
643 Optional<SinkingInstructionCandidate> GVNSink::analyzeInstructionForSinking(
644   LockstepReverseIterator &LRI, unsigned &InstNum, unsigned &MemoryInstNum,
645   ModelledPHISet &NeededPHIs, SmallPtrSetImpl<Value *> &PHIContents) {
646   auto Insts = *LRI;
647   LLVM_DEBUG(dbgs() << " -- Analyzing instruction set: [\n"; for (auto *I
648                                                                   : Insts) {
649     I->dump();
650   } dbgs() << " ]\n";);
651 
652   DenseMap<uint32_t, unsigned> VNums;
653   for (auto *I : Insts) {
654     uint32_t N = VN.lookupOrAdd(I);
655     LLVM_DEBUG(dbgs() << " VN=" << Twine::utohexstr(N) << " for" << *I << "\n");
656     if (N == ~0U)
657       return None;
658     VNums[N]++;
659   }
660   unsigned VNumToSink =
661       std::max_element(VNums.begin(), VNums.end(), llvm::less_second())->first;
662 
663   if (VNums[VNumToSink] == 1)
664     // Can't sink anything!
665     return None;
666 
667   // Now restrict the number of incoming blocks down to only those with
668   // VNumToSink.
669   auto &ActivePreds = LRI.getActiveBlocks();
670   unsigned InitialActivePredSize = ActivePreds.size();
671   SmallVector<Instruction *, 4> NewInsts;
672   for (auto *I : Insts) {
673     if (VN.lookup(I) != VNumToSink)
674       ActivePreds.remove(I->getParent());
675     else
676       NewInsts.push_back(I);
677   }
678   for (auto *I : NewInsts)
679     if (shouldAvoidSinkingInstruction(I))
680       return None;
681 
682   // If we've restricted the incoming blocks, restrict all needed PHIs also
683   // to that set.
684   bool RecomputePHIContents = false;
685   if (ActivePreds.size() != InitialActivePredSize) {
686     ModelledPHISet NewNeededPHIs;
687     for (auto P : NeededPHIs) {
688       P.restrictToBlocks(ActivePreds);
689       NewNeededPHIs.insert(P);
690     }
691     NeededPHIs = NewNeededPHIs;
692     LRI.restrictToBlocks(ActivePreds);
693     RecomputePHIContents = true;
694   }
695 
696   // The sunk instruction's results.
697   ModelledPHI NewPHI(NewInsts, ActivePreds);
698 
699   // Does sinking this instruction render previous PHIs redundant?
700   if (NeededPHIs.erase(NewPHI))
701     RecomputePHIContents = true;
702 
703   if (RecomputePHIContents) {
704     // The needed PHIs have changed, so recompute the set of all needed
705     // values.
706     PHIContents.clear();
707     for (auto &PHI : NeededPHIs)
708       PHIContents.insert(PHI.getValues().begin(), PHI.getValues().end());
709   }
710 
711   // Is this instruction required by a later PHI that doesn't match this PHI?
712   // if so, we can't sink this instruction.
713   for (auto *V : NewPHI.getValues())
714     if (PHIContents.count(V))
715       // V exists in this PHI, but the whole PHI is different to NewPHI
716       // (else it would have been removed earlier). We cannot continue
717       // because this isn't representable.
718       return None;
719 
720   // Which operands need PHIs?
721   // FIXME: If any of these fail, we should partition up the candidates to
722   // try and continue making progress.
723   Instruction *I0 = NewInsts[0];
724 
725   // If all instructions that are going to participate don't have the same
726   // number of operands, we can't do any useful PHI analysis for all operands.
727   auto hasDifferentNumOperands = [&I0](Instruction *I) {
728     return I->getNumOperands() != I0->getNumOperands();
729   };
730   if (any_of(NewInsts, hasDifferentNumOperands))
731     return None;
732 
733   for (unsigned OpNum = 0, E = I0->getNumOperands(); OpNum != E; ++OpNum) {
734     ModelledPHI PHI(NewInsts, OpNum, ActivePreds);
735     if (PHI.areAllIncomingValuesSame())
736       continue;
737     if (!canReplaceOperandWithVariable(I0, OpNum))
738       // We can 't create a PHI from this instruction!
739       return None;
740     if (NeededPHIs.count(PHI))
741       continue;
742     if (!PHI.areAllIncomingValuesSameType())
743       return None;
744     // Don't create indirect calls! The called value is the final operand.
745     if ((isa<CallInst>(I0) || isa<InvokeInst>(I0)) && OpNum == E - 1 &&
746         PHI.areAnyIncomingValuesConstant())
747       return None;
748 
749     NeededPHIs.reserve(NeededPHIs.size());
750     NeededPHIs.insert(PHI);
751     PHIContents.insert(PHI.getValues().begin(), PHI.getValues().end());
752   }
753 
754   if (isMemoryInst(NewInsts[0]))
755     ++MemoryInstNum;
756 
757   SinkingInstructionCandidate Cand;
758   Cand.NumInstructions = ++InstNum;
759   Cand.NumMemoryInsts = MemoryInstNum;
760   Cand.NumBlocks = ActivePreds.size();
761   Cand.NumPHIs = NeededPHIs.size();
762   append_range(Cand.Blocks, ActivePreds);
763 
764   return Cand;
765 }
766 
767 unsigned GVNSink::sinkBB(BasicBlock *BBEnd) {
768   LLVM_DEBUG(dbgs() << "GVNSink: running on basic block ";
769              BBEnd->printAsOperand(dbgs()); dbgs() << "\n");
770   SmallVector<BasicBlock *, 4> Preds;
771   for (auto *B : predecessors(BBEnd)) {
772     auto *T = B->getTerminator();
773     if (isa<BranchInst>(T) || isa<SwitchInst>(T))
774       Preds.push_back(B);
775     else
776       return 0;
777   }
778   if (Preds.size() < 2)
779     return 0;
780   llvm::sort(Preds);
781 
782   unsigned NumOrigPreds = Preds.size();
783   // We can only sink instructions through unconditional branches.
784   llvm::erase_if(Preds, [](BasicBlock *BB) {
785     return BB->getTerminator()->getNumSuccessors() != 1;
786   });
787 
788   LockstepReverseIterator LRI(Preds);
789   SmallVector<SinkingInstructionCandidate, 4> Candidates;
790   unsigned InstNum = 0, MemoryInstNum = 0;
791   ModelledPHISet NeededPHIs;
792   SmallPtrSet<Value *, 4> PHIContents;
793   analyzeInitialPHIs(BBEnd, NeededPHIs, PHIContents);
794   unsigned NumOrigPHIs = NeededPHIs.size();
795 
796   while (LRI.isValid()) {
797     auto Cand = analyzeInstructionForSinking(LRI, InstNum, MemoryInstNum,
798                                              NeededPHIs, PHIContents);
799     if (!Cand)
800       break;
801     Cand->calculateCost(NumOrigPHIs, Preds.size());
802     Candidates.emplace_back(*Cand);
803     --LRI;
804   }
805 
806   llvm::stable_sort(Candidates, std::greater<SinkingInstructionCandidate>());
807   LLVM_DEBUG(dbgs() << " -- Sinking candidates:\n"; for (auto &C
808                                                          : Candidates) dbgs()
809                                                     << "  " << C << "\n";);
810 
811   // Pick the top candidate, as long it is positive!
812   if (Candidates.empty() || Candidates.front().Cost <= 0)
813     return 0;
814   auto C = Candidates.front();
815 
816   LLVM_DEBUG(dbgs() << " -- Sinking: " << C << "\n");
817   BasicBlock *InsertBB = BBEnd;
818   if (C.Blocks.size() < NumOrigPreds) {
819     LLVM_DEBUG(dbgs() << " -- Splitting edge to ";
820                BBEnd->printAsOperand(dbgs()); dbgs() << "\n");
821     InsertBB = SplitBlockPredecessors(BBEnd, C.Blocks, ".gvnsink.split");
822     if (!InsertBB) {
823       LLVM_DEBUG(dbgs() << " -- FAILED to split edge!\n");
824       // Edge couldn't be split.
825       return 0;
826     }
827   }
828 
829   for (unsigned I = 0; I < C.NumInstructions; ++I)
830     sinkLastInstruction(C.Blocks, InsertBB);
831 
832   return C.NumInstructions;
833 }
834 
835 void GVNSink::sinkLastInstruction(ArrayRef<BasicBlock *> Blocks,
836                                   BasicBlock *BBEnd) {
837   SmallVector<Instruction *, 4> Insts;
838   for (BasicBlock *BB : Blocks)
839     Insts.push_back(BB->getTerminator()->getPrevNode());
840   Instruction *I0 = Insts.front();
841 
842   SmallVector<Value *, 4> NewOperands;
843   for (unsigned O = 0, E = I0->getNumOperands(); O != E; ++O) {
844     bool NeedPHI = llvm::any_of(Insts, [&I0, O](const Instruction *I) {
845       return I->getOperand(O) != I0->getOperand(O);
846     });
847     if (!NeedPHI) {
848       NewOperands.push_back(I0->getOperand(O));
849       continue;
850     }
851 
852     // Create a new PHI in the successor block and populate it.
853     auto *Op = I0->getOperand(O);
854     assert(!Op->getType()->isTokenTy() && "Can't PHI tokens!");
855     auto *PN = PHINode::Create(Op->getType(), Insts.size(),
856                                Op->getName() + ".sink", &BBEnd->front());
857     for (auto *I : Insts)
858       PN->addIncoming(I->getOperand(O), I->getParent());
859     NewOperands.push_back(PN);
860   }
861 
862   // Arbitrarily use I0 as the new "common" instruction; remap its operands
863   // and move it to the start of the successor block.
864   for (unsigned O = 0, E = I0->getNumOperands(); O != E; ++O)
865     I0->getOperandUse(O).set(NewOperands[O]);
866   I0->moveBefore(&*BBEnd->getFirstInsertionPt());
867 
868   // Update metadata and IR flags.
869   for (auto *I : Insts)
870     if (I != I0) {
871       combineMetadataForCSE(I0, I, true);
872       I0->andIRFlags(I);
873     }
874 
875   for (auto *I : Insts)
876     if (I != I0)
877       I->replaceAllUsesWith(I0);
878   foldPointlessPHINodes(BBEnd);
879 
880   // Finally nuke all instructions apart from the common instruction.
881   for (auto *I : Insts)
882     if (I != I0)
883       I->eraseFromParent();
884 
885   NumRemoved += Insts.size() - 1;
886 }
887 
888 ////////////////////////////////////////////////////////////////////////////////
889 // Pass machinery / boilerplate
890 
891 class GVNSinkLegacyPass : public FunctionPass {
892 public:
893   static char ID;
894 
895   GVNSinkLegacyPass() : FunctionPass(ID) {
896     initializeGVNSinkLegacyPassPass(*PassRegistry::getPassRegistry());
897   }
898 
899   bool runOnFunction(Function &F) override {
900     if (skipFunction(F))
901       return false;
902     GVNSink G;
903     return G.run(F);
904   }
905 
906   void getAnalysisUsage(AnalysisUsage &AU) const override {
907     AU.addPreserved<GlobalsAAWrapperPass>();
908   }
909 };
910 
911 } // end anonymous namespace
912 
913 PreservedAnalyses GVNSinkPass::run(Function &F, FunctionAnalysisManager &AM) {
914   GVNSink G;
915   if (!G.run(F))
916     return PreservedAnalyses::all();
917   return PreservedAnalyses::none();
918 }
919 
920 char GVNSinkLegacyPass::ID = 0;
921 
922 INITIALIZE_PASS_BEGIN(GVNSinkLegacyPass, "gvn-sink",
923                       "Early GVN sinking of Expressions", false, false)
924 INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
925 INITIALIZE_PASS_DEPENDENCY(PostDominatorTreeWrapperPass)
926 INITIALIZE_PASS_END(GVNSinkLegacyPass, "gvn-sink",
927                     "Early GVN sinking of Expressions", false, false)
928 
929 FunctionPass *llvm::createGVNSinkPass() { return new GVNSinkLegacyPass(); }
930