1 //===- StructurizeCFG.cpp -------------------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "llvm/Transforms/Scalar/StructurizeCFG.h"
10 #include "llvm/ADT/DenseMap.h"
11 #include "llvm/ADT/MapVector.h"
12 #include "llvm/ADT/SCCIterator.h"
13 #include "llvm/ADT/STLExtras.h"
14 #include "llvm/ADT/SmallPtrSet.h"
15 #include "llvm/ADT/SmallVector.h"
16 #include "llvm/Analysis/InstructionSimplify.h"
17 #include "llvm/Analysis/LegacyDivergenceAnalysis.h"
18 #include "llvm/Analysis/RegionInfo.h"
19 #include "llvm/Analysis/RegionIterator.h"
20 #include "llvm/Analysis/RegionPass.h"
21 #include "llvm/IR/BasicBlock.h"
22 #include "llvm/IR/CFG.h"
23 #include "llvm/IR/Constants.h"
24 #include "llvm/IR/Dominators.h"
25 #include "llvm/IR/Function.h"
26 #include "llvm/IR/InstrTypes.h"
27 #include "llvm/IR/Instruction.h"
28 #include "llvm/IR/Instructions.h"
29 #include "llvm/IR/Metadata.h"
30 #include "llvm/IR/PassManager.h"
31 #include "llvm/IR/PatternMatch.h"
32 #include "llvm/IR/Type.h"
33 #include "llvm/IR/Use.h"
34 #include "llvm/IR/Value.h"
35 #include "llvm/IR/ValueHandle.h"
36 #include "llvm/InitializePasses.h"
37 #include "llvm/Pass.h"
38 #include "llvm/Support/Casting.h"
39 #include "llvm/Support/CommandLine.h"
40 #include "llvm/Support/Debug.h"
41 #include "llvm/Support/raw_ostream.h"
42 #include "llvm/Transforms/Scalar.h"
43 #include "llvm/Transforms/Utils.h"
44 #include "llvm/Transforms/Utils/Local.h"
45 #include "llvm/Transforms/Utils/SSAUpdater.h"
46 #include <algorithm>
47 #include <cassert>
48 #include <utility>
49 
50 using namespace llvm;
51 using namespace llvm::PatternMatch;
52 
53 #define DEBUG_TYPE "structurizecfg"
54 
55 // The name for newly created blocks.
56 const char FlowBlockName[] = "Flow";
57 
58 namespace {
59 
60 static cl::opt<bool> ForceSkipUniformRegions(
61   "structurizecfg-skip-uniform-regions",
62   cl::Hidden,
63   cl::desc("Force whether the StructurizeCFG pass skips uniform regions"),
64   cl::init(false));
65 
66 static cl::opt<bool>
67     RelaxedUniformRegions("structurizecfg-relaxed-uniform-regions", cl::Hidden,
68                           cl::desc("Allow relaxed uniform region checks"),
69                           cl::init(true));
70 
71 // Definition of the complex types used in this pass.
72 
73 using BBValuePair = std::pair<BasicBlock *, Value *>;
74 
75 using RNVector = SmallVector<RegionNode *, 8>;
76 using BBVector = SmallVector<BasicBlock *, 8>;
77 using BranchVector = SmallVector<BranchInst *, 8>;
78 using BBValueVector = SmallVector<BBValuePair, 2>;
79 
80 using BBSet = SmallPtrSet<BasicBlock *, 8>;
81 
82 using PhiMap = MapVector<PHINode *, BBValueVector>;
83 using BB2BBVecMap = MapVector<BasicBlock *, BBVector>;
84 
85 using BBPhiMap = DenseMap<BasicBlock *, PhiMap>;
86 using BBPredicates = DenseMap<BasicBlock *, Value *>;
87 using PredMap = DenseMap<BasicBlock *, BBPredicates>;
88 using BB2BBMap = DenseMap<BasicBlock *, BasicBlock *>;
89 
90 // A traits type that is intended to be used in graph algorithms. The graph
91 // traits starts at an entry node, and traverses the RegionNodes that are in
92 // the Nodes set.
93 struct SubGraphTraits {
94   using NodeRef = std::pair<RegionNode *, SmallDenseSet<RegionNode *> *>;
95   using BaseSuccIterator = GraphTraits<RegionNode *>::ChildIteratorType;
96 
97   // This wraps a set of Nodes into the iterator, so we know which edges to
98   // filter out.
99   class WrappedSuccIterator
100       : public iterator_adaptor_base<
101             WrappedSuccIterator, BaseSuccIterator,
102             typename std::iterator_traits<BaseSuccIterator>::iterator_category,
103             NodeRef, std::ptrdiff_t, NodeRef *, NodeRef> {
104     SmallDenseSet<RegionNode *> *Nodes;
105 
106   public:
107     WrappedSuccIterator(BaseSuccIterator It, SmallDenseSet<RegionNode *> *Nodes)
108         : iterator_adaptor_base(It), Nodes(Nodes) {}
109 
110     NodeRef operator*() const { return {*I, Nodes}; }
111   };
112 
113   static bool filterAll(const NodeRef &N) { return true; }
114   static bool filterSet(const NodeRef &N) { return N.second->count(N.first); }
115 
116   using ChildIteratorType =
117       filter_iterator<WrappedSuccIterator, bool (*)(const NodeRef &)>;
118 
119   static NodeRef getEntryNode(Region *R) {
120     return {GraphTraits<Region *>::getEntryNode(R), nullptr};
121   }
122 
123   static NodeRef getEntryNode(NodeRef N) { return N; }
124 
125   static iterator_range<ChildIteratorType> children(const NodeRef &N) {
126     auto *filter = N.second ? &filterSet : &filterAll;
127     return make_filter_range(
128         make_range<WrappedSuccIterator>(
129             {GraphTraits<RegionNode *>::child_begin(N.first), N.second},
130             {GraphTraits<RegionNode *>::child_end(N.first), N.second}),
131         filter);
132   }
133 
134   static ChildIteratorType child_begin(const NodeRef &N) {
135     return children(N).begin();
136   }
137 
138   static ChildIteratorType child_end(const NodeRef &N) {
139     return children(N).end();
140   }
141 };
142 
143 /// Finds the nearest common dominator of a set of BasicBlocks.
144 ///
145 /// For every BB you add to the set, you can specify whether we "remember" the
146 /// block.  When you get the common dominator, you can also ask whether it's one
147 /// of the blocks we remembered.
148 class NearestCommonDominator {
149   DominatorTree *DT;
150   BasicBlock *Result = nullptr;
151   bool ResultIsRemembered = false;
152 
153   /// Add BB to the resulting dominator.
154   void addBlock(BasicBlock *BB, bool Remember) {
155     if (!Result) {
156       Result = BB;
157       ResultIsRemembered = Remember;
158       return;
159     }
160 
161     BasicBlock *NewResult = DT->findNearestCommonDominator(Result, BB);
162     if (NewResult != Result)
163       ResultIsRemembered = false;
164     if (NewResult == BB)
165       ResultIsRemembered |= Remember;
166     Result = NewResult;
167   }
168 
169 public:
170   explicit NearestCommonDominator(DominatorTree *DomTree) : DT(DomTree) {}
171 
172   void addBlock(BasicBlock *BB) {
173     addBlock(BB, /* Remember = */ false);
174   }
175 
176   void addAndRememberBlock(BasicBlock *BB) {
177     addBlock(BB, /* Remember = */ true);
178   }
179 
180   /// Get the nearest common dominator of all the BBs added via addBlock() and
181   /// addAndRememberBlock().
182   BasicBlock *result() { return Result; }
183 
184   /// Is the BB returned by getResult() one of the blocks we added to the set
185   /// with addAndRememberBlock()?
186   bool resultIsRememberedBlock() { return ResultIsRemembered; }
187 };
188 
189 /// Transforms the control flow graph on one single entry/exit region
190 /// at a time.
191 ///
192 /// After the transform all "If"/"Then"/"Else" style control flow looks like
193 /// this:
194 ///
195 /// \verbatim
196 /// 1
197 /// ||
198 /// | |
199 /// 2 |
200 /// | /
201 /// |/
202 /// 3
203 /// ||   Where:
204 /// | |  1 = "If" block, calculates the condition
205 /// 4 |  2 = "Then" subregion, runs if the condition is true
206 /// | /  3 = "Flow" blocks, newly inserted flow blocks, rejoins the flow
207 /// |/   4 = "Else" optional subregion, runs if the condition is false
208 /// 5    5 = "End" block, also rejoins the control flow
209 /// \endverbatim
210 ///
211 /// Control flow is expressed as a branch where the true exit goes into the
212 /// "Then"/"Else" region, while the false exit skips the region
213 /// The condition for the optional "Else" region is expressed as a PHI node.
214 /// The incoming values of the PHI node are true for the "If" edge and false
215 /// for the "Then" edge.
216 ///
217 /// Additionally to that even complicated loops look like this:
218 ///
219 /// \verbatim
220 /// 1
221 /// ||
222 /// | |
223 /// 2 ^  Where:
224 /// | /  1 = "Entry" block
225 /// |/   2 = "Loop" optional subregion, with all exits at "Flow" block
226 /// 3    3 = "Flow" block, with back edge to entry block
227 /// |
228 /// \endverbatim
229 ///
230 /// The back edge of the "Flow" block is always on the false side of the branch
231 /// while the true side continues the general flow. So the loop condition
232 /// consist of a network of PHI nodes where the true incoming values expresses
233 /// breaks and the false values expresses continue states.
234 
235 class StructurizeCFG {
236   Type *Boolean;
237   ConstantInt *BoolTrue;
238   ConstantInt *BoolFalse;
239   UndefValue *BoolUndef;
240 
241   Function *Func;
242   Region *ParentRegion;
243 
244   LegacyDivergenceAnalysis *DA = nullptr;
245   DominatorTree *DT;
246 
247   SmallVector<RegionNode *, 8> Order;
248   BBSet Visited;
249 
250   SmallVector<WeakVH, 8> AffectedPhis;
251   BBPhiMap DeletedPhis;
252   BB2BBVecMap AddedPhis;
253 
254   PredMap Predicates;
255   BranchVector Conditions;
256 
257   BB2BBMap Loops;
258   PredMap LoopPreds;
259   BranchVector LoopConds;
260 
261   RegionNode *PrevNode;
262 
263   void orderNodes();
264 
265   void analyzeLoops(RegionNode *N);
266 
267   Value *buildCondition(BranchInst *Term, unsigned Idx, bool Invert);
268 
269   void gatherPredicates(RegionNode *N);
270 
271   void collectInfos();
272 
273   void insertConditions(bool Loops);
274 
275   void simplifyConditions();
276 
277   void delPhiValues(BasicBlock *From, BasicBlock *To);
278 
279   void addPhiValues(BasicBlock *From, BasicBlock *To);
280 
281   void setPhiValues();
282 
283   void simplifyAffectedPhis();
284 
285   void killTerminator(BasicBlock *BB);
286 
287   void changeExit(RegionNode *Node, BasicBlock *NewExit,
288                   bool IncludeDominator);
289 
290   BasicBlock *getNextFlow(BasicBlock *Dominator);
291 
292   BasicBlock *needPrefix(bool NeedEmpty);
293 
294   BasicBlock *needPostfix(BasicBlock *Flow, bool ExitUseAllowed);
295 
296   void setPrevNode(BasicBlock *BB);
297 
298   bool dominatesPredicates(BasicBlock *BB, RegionNode *Node);
299 
300   bool isPredictableTrue(RegionNode *Node);
301 
302   void wireFlow(bool ExitUseAllowed, BasicBlock *LoopEnd);
303 
304   void handleLoops(bool ExitUseAllowed, BasicBlock *LoopEnd);
305 
306   void createFlow();
307 
308   void rebuildSSA();
309 
310 public:
311   void init(Region *R);
312   bool run(Region *R, DominatorTree *DT);
313   bool makeUniformRegion(Region *R, LegacyDivergenceAnalysis *DA);
314 };
315 
316 class StructurizeCFGLegacyPass : public RegionPass {
317   bool SkipUniformRegions;
318 
319 public:
320   static char ID;
321 
322   explicit StructurizeCFGLegacyPass(bool SkipUniformRegions_ = false)
323       : RegionPass(ID), SkipUniformRegions(SkipUniformRegions_) {
324     if (ForceSkipUniformRegions.getNumOccurrences())
325       SkipUniformRegions = ForceSkipUniformRegions.getValue();
326     initializeStructurizeCFGLegacyPassPass(*PassRegistry::getPassRegistry());
327   }
328 
329   bool runOnRegion(Region *R, RGPassManager &RGM) override {
330     StructurizeCFG SCFG;
331     SCFG.init(R);
332     if (SkipUniformRegions) {
333       LegacyDivergenceAnalysis *DA = &getAnalysis<LegacyDivergenceAnalysis>();
334       if (SCFG.makeUniformRegion(R, DA))
335         return false;
336     }
337     DominatorTree *DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
338     return SCFG.run(R, DT);
339   }
340 
341   StringRef getPassName() const override { return "Structurize control flow"; }
342 
343   void getAnalysisUsage(AnalysisUsage &AU) const override {
344     if (SkipUniformRegions)
345       AU.addRequired<LegacyDivergenceAnalysis>();
346     AU.addRequiredID(LowerSwitchID);
347     AU.addRequired<DominatorTreeWrapperPass>();
348 
349     AU.addPreserved<DominatorTreeWrapperPass>();
350     RegionPass::getAnalysisUsage(AU);
351   }
352 };
353 
354 } // end anonymous namespace
355 
356 char StructurizeCFGLegacyPass::ID = 0;
357 
358 INITIALIZE_PASS_BEGIN(StructurizeCFGLegacyPass, "structurizecfg",
359                       "Structurize the CFG", false, false)
360 INITIALIZE_PASS_DEPENDENCY(LegacyDivergenceAnalysis)
361 INITIALIZE_PASS_DEPENDENCY(LowerSwitchLegacyPass)
362 INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
363 INITIALIZE_PASS_DEPENDENCY(RegionInfoPass)
364 INITIALIZE_PASS_END(StructurizeCFGLegacyPass, "structurizecfg",
365                     "Structurize the CFG", false, false)
366 
367 /// Build up the general order of nodes, by performing a topological sort of the
368 /// parent region's nodes, while ensuring that there is no outer cycle node
369 /// between any two inner cycle nodes.
370 void StructurizeCFG::orderNodes() {
371   Order.resize(std::distance(GraphTraits<Region *>::nodes_begin(ParentRegion),
372                              GraphTraits<Region *>::nodes_end(ParentRegion)));
373   if (Order.empty())
374     return;
375 
376   SmallDenseSet<RegionNode *> Nodes;
377   auto EntryNode = SubGraphTraits::getEntryNode(ParentRegion);
378 
379   // A list of range indices of SCCs in Order, to be processed.
380   SmallVector<std::pair<unsigned, unsigned>, 8> WorkList;
381   unsigned I = 0, E = Order.size();
382   while (true) {
383     // Run through all the SCCs in the subgraph starting with Entry.
384     for (auto SCCI =
385              scc_iterator<SubGraphTraits::NodeRef, SubGraphTraits>::begin(
386                  EntryNode);
387          !SCCI.isAtEnd(); ++SCCI) {
388       auto &SCC = *SCCI;
389 
390       // An SCC up to the size of 2, can be reduced to an entry (the last node),
391       // and a possible additional node. Therefore, it is already in order, and
392       // there is no need to add it to the work-list.
393       unsigned Size = SCC.size();
394       if (Size > 2)
395         WorkList.emplace_back(I, I + Size);
396 
397       // Add the SCC nodes to the Order array.
398       for (auto &N : SCC) {
399         assert(I < E && "SCC size mismatch!");
400         Order[I++] = N.first;
401       }
402     }
403     assert(I == E && "SCC size mismatch!");
404 
405     // If there are no more SCCs to order, then we are done.
406     if (WorkList.empty())
407       break;
408 
409     std::tie(I, E) = WorkList.pop_back_val();
410 
411     // Collect the set of nodes in the SCC's subgraph. These are only the
412     // possible child nodes; we do not add the entry (last node) otherwise we
413     // will have the same exact SCC all over again.
414     Nodes.clear();
415     Nodes.insert(Order.begin() + I, Order.begin() + E - 1);
416 
417     // Update the entry node.
418     EntryNode.first = Order[E - 1];
419     EntryNode.second = &Nodes;
420   }
421 }
422 
423 /// Determine the end of the loops
424 void StructurizeCFG::analyzeLoops(RegionNode *N) {
425   if (N->isSubRegion()) {
426     // Test for exit as back edge
427     BasicBlock *Exit = N->getNodeAs<Region>()->getExit();
428     if (Visited.count(Exit))
429       Loops[Exit] = N->getEntry();
430 
431   } else {
432     // Test for successors as back edge
433     BasicBlock *BB = N->getNodeAs<BasicBlock>();
434     BranchInst *Term = cast<BranchInst>(BB->getTerminator());
435 
436     for (BasicBlock *Succ : Term->successors())
437       if (Visited.count(Succ))
438         Loops[Succ] = BB;
439   }
440 }
441 
442 /// Build the condition for one edge
443 Value *StructurizeCFG::buildCondition(BranchInst *Term, unsigned Idx,
444                                       bool Invert) {
445   Value *Cond = Invert ? BoolFalse : BoolTrue;
446   if (Term->isConditional()) {
447     Cond = Term->getCondition();
448 
449     if (Idx != (unsigned)Invert)
450       Cond = invertCondition(Cond);
451   }
452   return Cond;
453 }
454 
455 /// Analyze the predecessors of each block and build up predicates
456 void StructurizeCFG::gatherPredicates(RegionNode *N) {
457   RegionInfo *RI = ParentRegion->getRegionInfo();
458   BasicBlock *BB = N->getEntry();
459   BBPredicates &Pred = Predicates[BB];
460   BBPredicates &LPred = LoopPreds[BB];
461 
462   for (BasicBlock *P : predecessors(BB)) {
463     // Ignore it if it's a branch from outside into our region entry
464     if (!ParentRegion->contains(P))
465       continue;
466 
467     Region *R = RI->getRegionFor(P);
468     if (R == ParentRegion) {
469       // It's a top level block in our region
470       BranchInst *Term = cast<BranchInst>(P->getTerminator());
471       for (unsigned i = 0, e = Term->getNumSuccessors(); i != e; ++i) {
472         BasicBlock *Succ = Term->getSuccessor(i);
473         if (Succ != BB)
474           continue;
475 
476         if (Visited.count(P)) {
477           // Normal forward edge
478           if (Term->isConditional()) {
479             // Try to treat it like an ELSE block
480             BasicBlock *Other = Term->getSuccessor(!i);
481             if (Visited.count(Other) && !Loops.count(Other) &&
482                 !Pred.count(Other) && !Pred.count(P)) {
483 
484               Pred[Other] = BoolFalse;
485               Pred[P] = BoolTrue;
486               continue;
487             }
488           }
489           Pred[P] = buildCondition(Term, i, false);
490         } else {
491           // Back edge
492           LPred[P] = buildCondition(Term, i, true);
493         }
494       }
495     } else {
496       // It's an exit from a sub region
497       while (R->getParent() != ParentRegion)
498         R = R->getParent();
499 
500       // Edge from inside a subregion to its entry, ignore it
501       if (*R == *N)
502         continue;
503 
504       BasicBlock *Entry = R->getEntry();
505       if (Visited.count(Entry))
506         Pred[Entry] = BoolTrue;
507       else
508         LPred[Entry] = BoolFalse;
509     }
510   }
511 }
512 
513 /// Collect various loop and predicate infos
514 void StructurizeCFG::collectInfos() {
515   // Reset predicate
516   Predicates.clear();
517 
518   // and loop infos
519   Loops.clear();
520   LoopPreds.clear();
521 
522   // Reset the visited nodes
523   Visited.clear();
524 
525   for (RegionNode *RN : reverse(Order)) {
526     LLVM_DEBUG(dbgs() << "Visiting: "
527                       << (RN->isSubRegion() ? "SubRegion with entry: " : "")
528                       << RN->getEntry()->getName() << "\n");
529 
530     // Analyze all the conditions leading to a node
531     gatherPredicates(RN);
532 
533     // Remember that we've seen this node
534     Visited.insert(RN->getEntry());
535 
536     // Find the last back edges
537     analyzeLoops(RN);
538   }
539 }
540 
541 /// Insert the missing branch conditions
542 void StructurizeCFG::insertConditions(bool Loops) {
543   BranchVector &Conds = Loops ? LoopConds : Conditions;
544   Value *Default = Loops ? BoolTrue : BoolFalse;
545   SSAUpdater PhiInserter;
546 
547   for (BranchInst *Term : Conds) {
548     assert(Term->isConditional());
549 
550     BasicBlock *Parent = Term->getParent();
551     BasicBlock *SuccTrue = Term->getSuccessor(0);
552     BasicBlock *SuccFalse = Term->getSuccessor(1);
553 
554     PhiInserter.Initialize(Boolean, "");
555     PhiInserter.AddAvailableValue(&Func->getEntryBlock(), Default);
556     PhiInserter.AddAvailableValue(Loops ? SuccFalse : Parent, Default);
557 
558     BBPredicates &Preds = Loops ? LoopPreds[SuccFalse] : Predicates[SuccTrue];
559 
560     NearestCommonDominator Dominator(DT);
561     Dominator.addBlock(Parent);
562 
563     Value *ParentValue = nullptr;
564     for (std::pair<BasicBlock *, Value *> BBAndPred : Preds) {
565       BasicBlock *BB = BBAndPred.first;
566       Value *Pred = BBAndPred.second;
567 
568       if (BB == Parent) {
569         ParentValue = Pred;
570         break;
571       }
572       PhiInserter.AddAvailableValue(BB, Pred);
573       Dominator.addAndRememberBlock(BB);
574     }
575 
576     if (ParentValue) {
577       Term->setCondition(ParentValue);
578     } else {
579       if (!Dominator.resultIsRememberedBlock())
580         PhiInserter.AddAvailableValue(Dominator.result(), Default);
581 
582       Term->setCondition(PhiInserter.GetValueInMiddleOfBlock(Parent));
583     }
584   }
585 }
586 
587 /// Simplify any inverted conditions that were built by buildConditions.
588 void StructurizeCFG::simplifyConditions() {
589   SmallVector<Instruction *> InstToErase;
590   for (auto &I : concat<PredMap::value_type>(Predicates, LoopPreds)) {
591     auto &Preds = I.second;
592     for (auto &J : Preds) {
593       auto &Cond = J.second;
594       Instruction *Inverted;
595       if (match(Cond, m_Not(m_OneUse(m_Instruction(Inverted)))) &&
596           !Cond->use_empty()) {
597         if (auto *InvertedCmp = dyn_cast<CmpInst>(Inverted)) {
598           InvertedCmp->setPredicate(InvertedCmp->getInversePredicate());
599           Cond->replaceAllUsesWith(InvertedCmp);
600           InstToErase.push_back(cast<Instruction>(Cond));
601         }
602       }
603     }
604   }
605   for (auto *I : InstToErase)
606     I->eraseFromParent();
607 }
608 
609 /// Remove all PHI values coming from "From" into "To" and remember
610 /// them in DeletedPhis
611 void StructurizeCFG::delPhiValues(BasicBlock *From, BasicBlock *To) {
612   PhiMap &Map = DeletedPhis[To];
613   for (PHINode &Phi : To->phis()) {
614     bool Recorded = false;
615     while (Phi.getBasicBlockIndex(From) != -1) {
616       Value *Deleted = Phi.removeIncomingValue(From, false);
617       Map[&Phi].push_back(std::make_pair(From, Deleted));
618       if (!Recorded) {
619         AffectedPhis.push_back(&Phi);
620         Recorded = true;
621       }
622     }
623   }
624 }
625 
626 /// Add a dummy PHI value as soon as we knew the new predecessor
627 void StructurizeCFG::addPhiValues(BasicBlock *From, BasicBlock *To) {
628   for (PHINode &Phi : To->phis()) {
629     Value *Undef = UndefValue::get(Phi.getType());
630     Phi.addIncoming(Undef, From);
631   }
632   AddedPhis[To].push_back(From);
633 }
634 
635 /// Add the real PHI value as soon as everything is set up
636 void StructurizeCFG::setPhiValues() {
637   SmallVector<PHINode *, 8> InsertedPhis;
638   SSAUpdater Updater(&InsertedPhis);
639   for (const auto &AddedPhi : AddedPhis) {
640     BasicBlock *To = AddedPhi.first;
641     const BBVector &From = AddedPhi.second;
642 
643     if (!DeletedPhis.count(To))
644       continue;
645 
646     PhiMap &Map = DeletedPhis[To];
647     for (const auto &PI : Map) {
648       PHINode *Phi = PI.first;
649       Value *Undef = UndefValue::get(Phi->getType());
650       Updater.Initialize(Phi->getType(), "");
651       Updater.AddAvailableValue(&Func->getEntryBlock(), Undef);
652       Updater.AddAvailableValue(To, Undef);
653 
654       NearestCommonDominator Dominator(DT);
655       Dominator.addBlock(To);
656       for (const auto &VI : PI.second) {
657         Updater.AddAvailableValue(VI.first, VI.second);
658         Dominator.addAndRememberBlock(VI.first);
659       }
660 
661       if (!Dominator.resultIsRememberedBlock())
662         Updater.AddAvailableValue(Dominator.result(), Undef);
663 
664       for (BasicBlock *FI : From)
665         Phi->setIncomingValueForBlock(FI, Updater.GetValueAtEndOfBlock(FI));
666       AffectedPhis.push_back(Phi);
667     }
668 
669     DeletedPhis.erase(To);
670   }
671   assert(DeletedPhis.empty());
672 
673   AffectedPhis.append(InsertedPhis.begin(), InsertedPhis.end());
674 }
675 
676 void StructurizeCFG::simplifyAffectedPhis() {
677   bool Changed;
678   do {
679     Changed = false;
680     SimplifyQuery Q(Func->getParent()->getDataLayout());
681     Q.DT = DT;
682     for (WeakVH VH : AffectedPhis) {
683       if (auto Phi = dyn_cast_or_null<PHINode>(VH)) {
684         if (auto NewValue = simplifyInstruction(Phi, Q)) {
685           Phi->replaceAllUsesWith(NewValue);
686           Phi->eraseFromParent();
687           Changed = true;
688         }
689       }
690     }
691   } while (Changed);
692 }
693 
694 /// Remove phi values from all successors and then remove the terminator.
695 void StructurizeCFG::killTerminator(BasicBlock *BB) {
696   Instruction *Term = BB->getTerminator();
697   if (!Term)
698     return;
699 
700   for (BasicBlock *Succ : successors(BB))
701     delPhiValues(BB, Succ);
702 
703   if (DA)
704     DA->removeValue(Term);
705   Term->eraseFromParent();
706 }
707 
708 /// Let node exit(s) point to NewExit
709 void StructurizeCFG::changeExit(RegionNode *Node, BasicBlock *NewExit,
710                                 bool IncludeDominator) {
711   if (Node->isSubRegion()) {
712     Region *SubRegion = Node->getNodeAs<Region>();
713     BasicBlock *OldExit = SubRegion->getExit();
714     BasicBlock *Dominator = nullptr;
715 
716     // Find all the edges from the sub region to the exit.
717     // We use make_early_inc_range here because we modify BB's terminator.
718     for (BasicBlock *BB : llvm::make_early_inc_range(predecessors(OldExit))) {
719       if (!SubRegion->contains(BB))
720         continue;
721 
722       // Modify the edges to point to the new exit
723       delPhiValues(BB, OldExit);
724       BB->getTerminator()->replaceUsesOfWith(OldExit, NewExit);
725       addPhiValues(BB, NewExit);
726 
727       // Find the new dominator (if requested)
728       if (IncludeDominator) {
729         if (!Dominator)
730           Dominator = BB;
731         else
732           Dominator = DT->findNearestCommonDominator(Dominator, BB);
733       }
734     }
735 
736     // Change the dominator (if requested)
737     if (Dominator)
738       DT->changeImmediateDominator(NewExit, Dominator);
739 
740     // Update the region info
741     SubRegion->replaceExit(NewExit);
742   } else {
743     BasicBlock *BB = Node->getNodeAs<BasicBlock>();
744     killTerminator(BB);
745     BranchInst::Create(NewExit, BB);
746     addPhiValues(BB, NewExit);
747     if (IncludeDominator)
748       DT->changeImmediateDominator(NewExit, BB);
749   }
750 }
751 
752 /// Create a new flow node and update dominator tree and region info
753 BasicBlock *StructurizeCFG::getNextFlow(BasicBlock *Dominator) {
754   LLVMContext &Context = Func->getContext();
755   BasicBlock *Insert = Order.empty() ? ParentRegion->getExit() :
756                        Order.back()->getEntry();
757   BasicBlock *Flow = BasicBlock::Create(Context, FlowBlockName,
758                                         Func, Insert);
759   DT->addNewBlock(Flow, Dominator);
760   ParentRegion->getRegionInfo()->setRegionFor(Flow, ParentRegion);
761   return Flow;
762 }
763 
764 /// Create a new or reuse the previous node as flow node
765 BasicBlock *StructurizeCFG::needPrefix(bool NeedEmpty) {
766   BasicBlock *Entry = PrevNode->getEntry();
767 
768   if (!PrevNode->isSubRegion()) {
769     killTerminator(Entry);
770     if (!NeedEmpty || Entry->getFirstInsertionPt() == Entry->end())
771       return Entry;
772   }
773 
774   // create a new flow node
775   BasicBlock *Flow = getNextFlow(Entry);
776 
777   // and wire it up
778   changeExit(PrevNode, Flow, true);
779   PrevNode = ParentRegion->getBBNode(Flow);
780   return Flow;
781 }
782 
783 /// Returns the region exit if possible, otherwise just a new flow node
784 BasicBlock *StructurizeCFG::needPostfix(BasicBlock *Flow,
785                                         bool ExitUseAllowed) {
786   if (!Order.empty() || !ExitUseAllowed)
787     return getNextFlow(Flow);
788 
789   BasicBlock *Exit = ParentRegion->getExit();
790   DT->changeImmediateDominator(Exit, Flow);
791   addPhiValues(Flow, Exit);
792   return Exit;
793 }
794 
795 /// Set the previous node
796 void StructurizeCFG::setPrevNode(BasicBlock *BB) {
797   PrevNode = ParentRegion->contains(BB) ? ParentRegion->getBBNode(BB)
798                                         : nullptr;
799 }
800 
801 /// Does BB dominate all the predicates of Node?
802 bool StructurizeCFG::dominatesPredicates(BasicBlock *BB, RegionNode *Node) {
803   BBPredicates &Preds = Predicates[Node->getEntry()];
804   return llvm::all_of(Preds, [&](std::pair<BasicBlock *, Value *> Pred) {
805     return DT->dominates(BB, Pred.first);
806   });
807 }
808 
809 /// Can we predict that this node will always be called?
810 bool StructurizeCFG::isPredictableTrue(RegionNode *Node) {
811   BBPredicates &Preds = Predicates[Node->getEntry()];
812   bool Dominated = false;
813 
814   // Regionentry is always true
815   if (!PrevNode)
816     return true;
817 
818   for (std::pair<BasicBlock*, Value*> Pred : Preds) {
819     BasicBlock *BB = Pred.first;
820     Value *V = Pred.second;
821 
822     if (V != BoolTrue)
823       return false;
824 
825     if (!Dominated && DT->dominates(BB, PrevNode->getEntry()))
826       Dominated = true;
827   }
828 
829   // TODO: The dominator check is too strict
830   return Dominated;
831 }
832 
833 /// Take one node from the order vector and wire it up
834 void StructurizeCFG::wireFlow(bool ExitUseAllowed,
835                               BasicBlock *LoopEnd) {
836   RegionNode *Node = Order.pop_back_val();
837   Visited.insert(Node->getEntry());
838 
839   if (isPredictableTrue(Node)) {
840     // Just a linear flow
841     if (PrevNode) {
842       changeExit(PrevNode, Node->getEntry(), true);
843     }
844     PrevNode = Node;
845   } else {
846     // Insert extra prefix node (or reuse last one)
847     BasicBlock *Flow = needPrefix(false);
848 
849     // Insert extra postfix node (or use exit instead)
850     BasicBlock *Entry = Node->getEntry();
851     BasicBlock *Next = needPostfix(Flow, ExitUseAllowed);
852 
853     // let it point to entry and next block
854     Conditions.push_back(BranchInst::Create(Entry, Next, BoolUndef, Flow));
855     addPhiValues(Flow, Entry);
856     DT->changeImmediateDominator(Entry, Flow);
857 
858     PrevNode = Node;
859     while (!Order.empty() && !Visited.count(LoopEnd) &&
860            dominatesPredicates(Entry, Order.back())) {
861       handleLoops(false, LoopEnd);
862     }
863 
864     changeExit(PrevNode, Next, false);
865     setPrevNode(Next);
866   }
867 }
868 
869 void StructurizeCFG::handleLoops(bool ExitUseAllowed,
870                                  BasicBlock *LoopEnd) {
871   RegionNode *Node = Order.back();
872   BasicBlock *LoopStart = Node->getEntry();
873 
874   if (!Loops.count(LoopStart)) {
875     wireFlow(ExitUseAllowed, LoopEnd);
876     return;
877   }
878 
879   if (!isPredictableTrue(Node))
880     LoopStart = needPrefix(true);
881 
882   LoopEnd = Loops[Node->getEntry()];
883   wireFlow(false, LoopEnd);
884   while (!Visited.count(LoopEnd)) {
885     handleLoops(false, LoopEnd);
886   }
887 
888   // If the start of the loop is the entry block, we can't branch to it so
889   // insert a new dummy entry block.
890   Function *LoopFunc = LoopStart->getParent();
891   if (LoopStart == &LoopFunc->getEntryBlock()) {
892     LoopStart->setName("entry.orig");
893 
894     BasicBlock *NewEntry =
895       BasicBlock::Create(LoopStart->getContext(),
896                          "entry",
897                          LoopFunc,
898                          LoopStart);
899     BranchInst::Create(LoopStart, NewEntry);
900     DT->setNewRoot(NewEntry);
901   }
902 
903   // Create an extra loop end node
904   LoopEnd = needPrefix(false);
905   BasicBlock *Next = needPostfix(LoopEnd, ExitUseAllowed);
906   LoopConds.push_back(BranchInst::Create(Next, LoopStart,
907                                          BoolUndef, LoopEnd));
908   addPhiValues(LoopEnd, LoopStart);
909   setPrevNode(Next);
910 }
911 
912 /// After this function control flow looks like it should be, but
913 /// branches and PHI nodes only have undefined conditions.
914 void StructurizeCFG::createFlow() {
915   BasicBlock *Exit = ParentRegion->getExit();
916   bool EntryDominatesExit = DT->dominates(ParentRegion->getEntry(), Exit);
917 
918   AffectedPhis.clear();
919   DeletedPhis.clear();
920   AddedPhis.clear();
921   Conditions.clear();
922   LoopConds.clear();
923 
924   PrevNode = nullptr;
925   Visited.clear();
926 
927   while (!Order.empty()) {
928     handleLoops(EntryDominatesExit, nullptr);
929   }
930 
931   if (PrevNode)
932     changeExit(PrevNode, Exit, EntryDominatesExit);
933   else
934     assert(EntryDominatesExit);
935 }
936 
937 /// Handle a rare case where the disintegrated nodes instructions
938 /// no longer dominate all their uses. Not sure if this is really necessary
939 void StructurizeCFG::rebuildSSA() {
940   SSAUpdater Updater;
941   for (BasicBlock *BB : ParentRegion->blocks())
942     for (Instruction &I : *BB) {
943       bool Initialized = false;
944       // We may modify the use list as we iterate over it, so we use
945       // make_early_inc_range.
946       for (Use &U : llvm::make_early_inc_range(I.uses())) {
947         Instruction *User = cast<Instruction>(U.getUser());
948         if (User->getParent() == BB) {
949           continue;
950         } else if (PHINode *UserPN = dyn_cast<PHINode>(User)) {
951           if (UserPN->getIncomingBlock(U) == BB)
952             continue;
953         }
954 
955         if (DT->dominates(&I, User))
956           continue;
957 
958         if (!Initialized) {
959           Value *Undef = UndefValue::get(I.getType());
960           Updater.Initialize(I.getType(), "");
961           Updater.AddAvailableValue(&Func->getEntryBlock(), Undef);
962           Updater.AddAvailableValue(BB, &I);
963           Initialized = true;
964         }
965         Updater.RewriteUseAfterInsertions(U);
966       }
967     }
968 }
969 
970 static bool hasOnlyUniformBranches(Region *R, unsigned UniformMDKindID,
971                                    const LegacyDivergenceAnalysis &DA) {
972   // Bool for if all sub-regions are uniform.
973   bool SubRegionsAreUniform = true;
974   // Count of how many direct children are conditional.
975   unsigned ConditionalDirectChildren = 0;
976 
977   for (auto E : R->elements()) {
978     if (!E->isSubRegion()) {
979       auto Br = dyn_cast<BranchInst>(E->getEntry()->getTerminator());
980       if (!Br || !Br->isConditional())
981         continue;
982 
983       if (!DA.isUniform(Br))
984         return false;
985 
986       // One of our direct children is conditional.
987       ConditionalDirectChildren++;
988 
989       LLVM_DEBUG(dbgs() << "BB: " << Br->getParent()->getName()
990                         << " has uniform terminator\n");
991     } else {
992       // Explicitly refuse to treat regions as uniform if they have non-uniform
993       // subregions. We cannot rely on DivergenceAnalysis for branches in
994       // subregions because those branches may have been removed and re-created,
995       // so we look for our metadata instead.
996       //
997       // Warning: It would be nice to treat regions as uniform based only on
998       // their direct child basic blocks' terminators, regardless of whether
999       // subregions are uniform or not. However, this requires a very careful
1000       // look at SIAnnotateControlFlow to make sure nothing breaks there.
1001       for (auto BB : E->getNodeAs<Region>()->blocks()) {
1002         auto Br = dyn_cast<BranchInst>(BB->getTerminator());
1003         if (!Br || !Br->isConditional())
1004           continue;
1005 
1006         if (!Br->getMetadata(UniformMDKindID)) {
1007           // Early exit if we cannot have relaxed uniform regions.
1008           if (!RelaxedUniformRegions)
1009             return false;
1010 
1011           SubRegionsAreUniform = false;
1012           break;
1013         }
1014       }
1015     }
1016   }
1017 
1018   // Our region is uniform if:
1019   // 1. All conditional branches that are direct children are uniform (checked
1020   // above).
1021   // 2. And either:
1022   //   a. All sub-regions are uniform.
1023   //   b. There is one or less conditional branches among the direct children.
1024   return SubRegionsAreUniform || (ConditionalDirectChildren <= 1);
1025 }
1026 
1027 void StructurizeCFG::init(Region *R) {
1028   LLVMContext &Context = R->getEntry()->getContext();
1029 
1030   Boolean = Type::getInt1Ty(Context);
1031   BoolTrue = ConstantInt::getTrue(Context);
1032   BoolFalse = ConstantInt::getFalse(Context);
1033   BoolUndef = UndefValue::get(Boolean);
1034 
1035   this->DA = nullptr;
1036 }
1037 
1038 bool StructurizeCFG::makeUniformRegion(Region *R,
1039                                        LegacyDivergenceAnalysis *DA) {
1040   if (R->isTopLevelRegion())
1041     return false;
1042 
1043   this->DA = DA;
1044   // TODO: We could probably be smarter here with how we handle sub-regions.
1045   // We currently rely on the fact that metadata is set by earlier invocations
1046   // of the pass on sub-regions, and that this metadata doesn't get lost --
1047   // but we shouldn't rely on metadata for correctness!
1048   unsigned UniformMDKindID =
1049       R->getEntry()->getContext().getMDKindID("structurizecfg.uniform");
1050 
1051   if (hasOnlyUniformBranches(R, UniformMDKindID, *DA)) {
1052     LLVM_DEBUG(dbgs() << "Skipping region with uniform control flow: " << *R
1053                       << '\n');
1054 
1055     // Mark all direct child block terminators as having been treated as
1056     // uniform. To account for a possible future in which non-uniform
1057     // sub-regions are treated more cleverly, indirect children are not
1058     // marked as uniform.
1059     MDNode *MD = MDNode::get(R->getEntry()->getParent()->getContext(), {});
1060     for (RegionNode *E : R->elements()) {
1061       if (E->isSubRegion())
1062         continue;
1063 
1064       if (Instruction *Term = E->getEntry()->getTerminator())
1065         Term->setMetadata(UniformMDKindID, MD);
1066     }
1067 
1068     return true;
1069   }
1070   return false;
1071 }
1072 
1073 /// Run the transformation for each region found
1074 bool StructurizeCFG::run(Region *R, DominatorTree *DT) {
1075   if (R->isTopLevelRegion())
1076     return false;
1077 
1078   this->DT = DT;
1079 
1080   Func = R->getEntry()->getParent();
1081   ParentRegion = R;
1082 
1083   orderNodes();
1084   collectInfos();
1085   createFlow();
1086   insertConditions(false);
1087   insertConditions(true);
1088   setPhiValues();
1089   simplifyConditions();
1090   simplifyAffectedPhis();
1091   rebuildSSA();
1092 
1093   // Cleanup
1094   Order.clear();
1095   Visited.clear();
1096   DeletedPhis.clear();
1097   AddedPhis.clear();
1098   Predicates.clear();
1099   Conditions.clear();
1100   Loops.clear();
1101   LoopPreds.clear();
1102   LoopConds.clear();
1103 
1104   return true;
1105 }
1106 
1107 Pass *llvm::createStructurizeCFGPass(bool SkipUniformRegions) {
1108   return new StructurizeCFGLegacyPass(SkipUniformRegions);
1109 }
1110 
1111 static void addRegionIntoQueue(Region &R, std::vector<Region *> &Regions) {
1112   Regions.push_back(&R);
1113   for (const auto &E : R)
1114     addRegionIntoQueue(*E, Regions);
1115 }
1116 
1117 PreservedAnalyses StructurizeCFGPass::run(Function &F,
1118                                           FunctionAnalysisManager &AM) {
1119 
1120   bool Changed = false;
1121   DominatorTree *DT = &AM.getResult<DominatorTreeAnalysis>(F);
1122   auto &RI = AM.getResult<RegionInfoAnalysis>(F);
1123   std::vector<Region *> Regions;
1124   addRegionIntoQueue(*RI.getTopLevelRegion(), Regions);
1125   while (!Regions.empty()) {
1126     Region *R = Regions.back();
1127     StructurizeCFG SCFG;
1128     SCFG.init(R);
1129     Changed |= SCFG.run(R, DT);
1130     Regions.pop_back();
1131   }
1132   if (!Changed)
1133     return PreservedAnalyses::all();
1134   PreservedAnalyses PA;
1135   PA.preserve<DominatorTreeAnalysis>();
1136   return PA;
1137 }
1138