1 //===- StructurizeCFG.cpp -------------------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "llvm/Transforms/Scalar/StructurizeCFG.h"
10 #include "llvm/ADT/DenseMap.h"
11 #include "llvm/ADT/MapVector.h"
12 #include "llvm/ADT/SCCIterator.h"
13 #include "llvm/ADT/STLExtras.h"
14 #include "llvm/ADT/SmallPtrSet.h"
15 #include "llvm/ADT/SmallVector.h"
16 #include "llvm/Analysis/InstructionSimplify.h"
17 #include "llvm/Analysis/LegacyDivergenceAnalysis.h"
18 #include "llvm/Analysis/RegionInfo.h"
19 #include "llvm/Analysis/RegionIterator.h"
20 #include "llvm/Analysis/RegionPass.h"
21 #include "llvm/IR/BasicBlock.h"
22 #include "llvm/IR/CFG.h"
23 #include "llvm/IR/Constants.h"
24 #include "llvm/IR/Dominators.h"
25 #include "llvm/IR/Function.h"
26 #include "llvm/IR/InstrTypes.h"
27 #include "llvm/IR/Instruction.h"
28 #include "llvm/IR/Instructions.h"
29 #include "llvm/IR/Metadata.h"
30 #include "llvm/IR/PassManager.h"
31 #include "llvm/IR/PatternMatch.h"
32 #include "llvm/IR/Type.h"
33 #include "llvm/IR/Use.h"
34 #include "llvm/IR/Value.h"
35 #include "llvm/IR/ValueHandle.h"
36 #include "llvm/InitializePasses.h"
37 #include "llvm/Pass.h"
38 #include "llvm/Support/Casting.h"
39 #include "llvm/Support/CommandLine.h"
40 #include "llvm/Support/Debug.h"
41 #include "llvm/Support/raw_ostream.h"
42 #include "llvm/Transforms/Scalar.h"
43 #include "llvm/Transforms/Utils.h"
44 #include "llvm/Transforms/Utils/Local.h"
45 #include "llvm/Transforms/Utils/SSAUpdater.h"
46 #include <algorithm>
47 #include <cassert>
48 #include <utility>
49 
50 using namespace llvm;
51 using namespace llvm::PatternMatch;
52 
53 #define DEBUG_TYPE "structurizecfg"
54 
55 // The name for newly created blocks.
56 const char FlowBlockName[] = "Flow";
57 
58 namespace {
59 
60 static cl::opt<bool> ForceSkipUniformRegions(
61   "structurizecfg-skip-uniform-regions",
62   cl::Hidden,
63   cl::desc("Force whether the StructurizeCFG pass skips uniform regions"),
64   cl::init(false));
65 
66 static cl::opt<bool>
67     RelaxedUniformRegions("structurizecfg-relaxed-uniform-regions", cl::Hidden,
68                           cl::desc("Allow relaxed uniform region checks"),
69                           cl::init(true));
70 
71 static cl::opt<unsigned>
72     ReorderNodeSize("structurizecfg-node-reorder-size",
73                      cl::desc("Limit region size for reordering nodes"),
74                      cl::init(100), cl::Hidden);
75 
76 // Definition of the complex types used in this pass.
77 
78 using BBValuePair = std::pair<BasicBlock *, Value *>;
79 
80 using RNVector = SmallVector<RegionNode *, 8>;
81 using BBVector = SmallVector<BasicBlock *, 8>;
82 using BranchVector = SmallVector<BranchInst *, 8>;
83 using BBValueVector = SmallVector<BBValuePair, 2>;
84 
85 using BBSet = SmallPtrSet<BasicBlock *, 8>;
86 
87 using PhiMap = MapVector<PHINode *, BBValueVector>;
88 using BB2BBVecMap = MapVector<BasicBlock *, BBVector>;
89 
90 using BBPhiMap = DenseMap<BasicBlock *, PhiMap>;
91 using BBPredicates = DenseMap<BasicBlock *, Value *>;
92 using PredMap = DenseMap<BasicBlock *, BBPredicates>;
93 using BB2BBMap = DenseMap<BasicBlock *, BasicBlock *>;
94 
95 // A traits type that is intended to be used in graph algorithms. The graph
96 // traits starts at an entry node, and traverses the RegionNodes that are in
97 // the Nodes set.
98 struct SubGraphTraits {
99   using NodeRef = std::pair<RegionNode *, SmallDenseSet<RegionNode *> *>;
100   using BaseSuccIterator = GraphTraits<RegionNode *>::ChildIteratorType;
101 
102   // This wraps a set of Nodes into the iterator, so we know which edges to
103   // filter out.
104   class WrappedSuccIterator
105       : public iterator_adaptor_base<
106             WrappedSuccIterator, BaseSuccIterator,
107             typename std::iterator_traits<BaseSuccIterator>::iterator_category,
108             NodeRef, std::ptrdiff_t, NodeRef *, NodeRef> {
109     SmallDenseSet<RegionNode *> *Nodes;
110 
111   public:
112     WrappedSuccIterator(BaseSuccIterator It, SmallDenseSet<RegionNode *> *Nodes)
113         : iterator_adaptor_base(It), Nodes(Nodes) {}
114 
115     NodeRef operator*() const { return {*I, Nodes}; }
116   };
117 
118   static bool filterAll(const NodeRef &N) { return true; }
119   static bool filterSet(const NodeRef &N) { return N.second->count(N.first); }
120 
121   using ChildIteratorType =
122       filter_iterator<WrappedSuccIterator, bool (*)(const NodeRef &)>;
123 
124   static NodeRef getEntryNode(Region *R) {
125     return {GraphTraits<Region *>::getEntryNode(R), nullptr};
126   }
127 
128   static NodeRef getEntryNode(NodeRef N) { return N; }
129 
130   static iterator_range<ChildIteratorType> children(const NodeRef &N) {
131     auto *filter = N.second ? &filterSet : &filterAll;
132     return make_filter_range(
133         make_range<WrappedSuccIterator>(
134             {GraphTraits<RegionNode *>::child_begin(N.first), N.second},
135             {GraphTraits<RegionNode *>::child_end(N.first), N.second}),
136         filter);
137   }
138 
139   static ChildIteratorType child_begin(const NodeRef &N) {
140     return children(N).begin();
141   }
142 
143   static ChildIteratorType child_end(const NodeRef &N) {
144     return children(N).end();
145   }
146 };
147 
148 /// Finds the nearest common dominator of a set of BasicBlocks.
149 ///
150 /// For every BB you add to the set, you can specify whether we "remember" the
151 /// block.  When you get the common dominator, you can also ask whether it's one
152 /// of the blocks we remembered.
153 class NearestCommonDominator {
154   DominatorTree *DT;
155   BasicBlock *Result = nullptr;
156   bool ResultIsRemembered = false;
157 
158   /// Add BB to the resulting dominator.
159   void addBlock(BasicBlock *BB, bool Remember) {
160     if (!Result) {
161       Result = BB;
162       ResultIsRemembered = Remember;
163       return;
164     }
165 
166     BasicBlock *NewResult = DT->findNearestCommonDominator(Result, BB);
167     if (NewResult != Result)
168       ResultIsRemembered = false;
169     if (NewResult == BB)
170       ResultIsRemembered |= Remember;
171     Result = NewResult;
172   }
173 
174 public:
175   explicit NearestCommonDominator(DominatorTree *DomTree) : DT(DomTree) {}
176 
177   void addBlock(BasicBlock *BB) {
178     addBlock(BB, /* Remember = */ false);
179   }
180 
181   void addAndRememberBlock(BasicBlock *BB) {
182     addBlock(BB, /* Remember = */ true);
183   }
184 
185   /// Get the nearest common dominator of all the BBs added via addBlock() and
186   /// addAndRememberBlock().
187   BasicBlock *result() { return Result; }
188 
189   /// Is the BB returned by getResult() one of the blocks we added to the set
190   /// with addAndRememberBlock()?
191   bool resultIsRememberedBlock() { return ResultIsRemembered; }
192 };
193 
194 /// Transforms the control flow graph on one single entry/exit region
195 /// at a time.
196 ///
197 /// After the transform all "If"/"Then"/"Else" style control flow looks like
198 /// this:
199 ///
200 /// \verbatim
201 /// 1
202 /// ||
203 /// | |
204 /// 2 |
205 /// | /
206 /// |/
207 /// 3
208 /// ||   Where:
209 /// | |  1 = "If" block, calculates the condition
210 /// 4 |  2 = "Then" subregion, runs if the condition is true
211 /// | /  3 = "Flow" blocks, newly inserted flow blocks, rejoins the flow
212 /// |/   4 = "Else" optional subregion, runs if the condition is false
213 /// 5    5 = "End" block, also rejoins the control flow
214 /// \endverbatim
215 ///
216 /// Control flow is expressed as a branch where the true exit goes into the
217 /// "Then"/"Else" region, while the false exit skips the region
218 /// The condition for the optional "Else" region is expressed as a PHI node.
219 /// The incoming values of the PHI node are true for the "If" edge and false
220 /// for the "Then" edge.
221 ///
222 /// Additionally to that even complicated loops look like this:
223 ///
224 /// \verbatim
225 /// 1
226 /// ||
227 /// | |
228 /// 2 ^  Where:
229 /// | /  1 = "Entry" block
230 /// |/   2 = "Loop" optional subregion, with all exits at "Flow" block
231 /// 3    3 = "Flow" block, with back edge to entry block
232 /// |
233 /// \endverbatim
234 ///
235 /// The back edge of the "Flow" block is always on the false side of the branch
236 /// while the true side continues the general flow. So the loop condition
237 /// consist of a network of PHI nodes where the true incoming values expresses
238 /// breaks and the false values expresses continue states.
239 
240 class StructurizeCFG {
241   Type *Boolean;
242   ConstantInt *BoolTrue;
243   ConstantInt *BoolFalse;
244   UndefValue *BoolUndef;
245 
246   Function *Func;
247   Region *ParentRegion;
248 
249   LegacyDivergenceAnalysis *DA = nullptr;
250   DominatorTree *DT;
251 
252   SmallVector<RegionNode *, 8> Order;
253   BBSet Visited;
254 
255   SmallVector<WeakVH, 8> AffectedPhis;
256   BBPhiMap DeletedPhis;
257   BB2BBVecMap AddedPhis;
258 
259   PredMap Predicates;
260   BranchVector Conditions;
261 
262   BB2BBMap Loops;
263   PredMap LoopPreds;
264   BranchVector LoopConds;
265 
266   RegionNode *PrevNode;
267 
268   void orderNodes();
269 
270   void reorderNodes();
271 
272   void analyzeLoops(RegionNode *N);
273 
274   Value *buildCondition(BranchInst *Term, unsigned Idx, bool Invert);
275 
276   void gatherPredicates(RegionNode *N);
277 
278   void collectInfos();
279 
280   void insertConditions(bool Loops);
281 
282   void simplifyConditions();
283 
284   void delPhiValues(BasicBlock *From, BasicBlock *To);
285 
286   void addPhiValues(BasicBlock *From, BasicBlock *To);
287 
288   void setPhiValues();
289 
290   void simplifyAffectedPhis();
291 
292   void killTerminator(BasicBlock *BB);
293 
294   void changeExit(RegionNode *Node, BasicBlock *NewExit,
295                   bool IncludeDominator);
296 
297   BasicBlock *getNextFlow(BasicBlock *Dominator);
298 
299   BasicBlock *needPrefix(bool NeedEmpty);
300 
301   BasicBlock *needPostfix(BasicBlock *Flow, bool ExitUseAllowed);
302 
303   void setPrevNode(BasicBlock *BB);
304 
305   bool dominatesPredicates(BasicBlock *BB, RegionNode *Node);
306 
307   bool isPredictableTrue(RegionNode *Node);
308 
309   void wireFlow(bool ExitUseAllowed, BasicBlock *LoopEnd);
310 
311   void handleLoops(bool ExitUseAllowed, BasicBlock *LoopEnd);
312 
313   void createFlow();
314 
315   void rebuildSSA();
316 
317 public:
318   void init(Region *R);
319   bool run(Region *R, DominatorTree *DT);
320   bool makeUniformRegion(Region *R, LegacyDivergenceAnalysis *DA);
321 };
322 
323 class StructurizeCFGLegacyPass : public RegionPass {
324   bool SkipUniformRegions;
325 
326 public:
327   static char ID;
328 
329   explicit StructurizeCFGLegacyPass(bool SkipUniformRegions_ = false)
330       : RegionPass(ID), SkipUniformRegions(SkipUniformRegions_) {
331     if (ForceSkipUniformRegions.getNumOccurrences())
332       SkipUniformRegions = ForceSkipUniformRegions.getValue();
333     initializeStructurizeCFGLegacyPassPass(*PassRegistry::getPassRegistry());
334   }
335 
336   bool runOnRegion(Region *R, RGPassManager &RGM) override {
337     StructurizeCFG SCFG;
338     SCFG.init(R);
339     if (SkipUniformRegions) {
340       LegacyDivergenceAnalysis *DA = &getAnalysis<LegacyDivergenceAnalysis>();
341       if (SCFG.makeUniformRegion(R, DA))
342         return false;
343     }
344     DominatorTree *DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
345     return SCFG.run(R, DT);
346   }
347 
348   StringRef getPassName() const override { return "Structurize control flow"; }
349 
350   void getAnalysisUsage(AnalysisUsage &AU) const override {
351     if (SkipUniformRegions)
352       AU.addRequired<LegacyDivergenceAnalysis>();
353     AU.addRequiredID(LowerSwitchID);
354     AU.addRequired<DominatorTreeWrapperPass>();
355 
356     AU.addPreserved<DominatorTreeWrapperPass>();
357     RegionPass::getAnalysisUsage(AU);
358   }
359 };
360 
361 } // end anonymous namespace
362 
363 char StructurizeCFGLegacyPass::ID = 0;
364 
365 INITIALIZE_PASS_BEGIN(StructurizeCFGLegacyPass, "structurizecfg",
366                       "Structurize the CFG", false, false)
367 INITIALIZE_PASS_DEPENDENCY(LegacyDivergenceAnalysis)
368 INITIALIZE_PASS_DEPENDENCY(LowerSwitchLegacyPass)
369 INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
370 INITIALIZE_PASS_DEPENDENCY(RegionInfoPass)
371 INITIALIZE_PASS_END(StructurizeCFGLegacyPass, "structurizecfg",
372                     "Structurize the CFG", false, false)
373 
374 /// Build up the general order of nodes, by performing a topological sort of the
375 /// parent region's nodes, while ensuring that there is no outer cycle node
376 /// between any two inner cycle nodes.
377 void StructurizeCFG::orderNodes() {
378   Order.resize(std::distance(GraphTraits<Region *>::nodes_begin(ParentRegion),
379                              GraphTraits<Region *>::nodes_end(ParentRegion)));
380   if (Order.empty())
381     return;
382 
383   SmallDenseSet<RegionNode *> Nodes;
384   auto EntryNode = SubGraphTraits::getEntryNode(ParentRegion);
385 
386   // A list of range indices of SCCs in Order, to be processed.
387   SmallVector<std::pair<unsigned, unsigned>, 8> WorkList;
388   unsigned I = 0, E = Order.size();
389   while (true) {
390     // Run through all the SCCs in the subgraph starting with Entry.
391     for (auto SCCI =
392              scc_iterator<SubGraphTraits::NodeRef, SubGraphTraits>::begin(
393                  EntryNode);
394          !SCCI.isAtEnd(); ++SCCI) {
395       auto &SCC = *SCCI;
396 
397       // An SCC up to the size of 2, can be reduced to an entry (the last node),
398       // and a possible additional node. Therefore, it is already in order, and
399       // there is no need to add it to the work-list.
400       unsigned Size = SCC.size();
401       if (Size > 2)
402         WorkList.emplace_back(I, I + Size);
403 
404       // Add the SCC nodes to the Order array.
405       for (auto &N : SCC) {
406         assert(I < E && "SCC size mismatch!");
407         Order[I++] = N.first;
408       }
409     }
410     assert(I == E && "SCC size mismatch!");
411 
412     // If there are no more SCCs to order, then we are done.
413     if (WorkList.empty())
414       break;
415 
416     std::tie(I, E) = WorkList.pop_back_val();
417 
418     // Collect the set of nodes in the SCC's subgraph. These are only the
419     // possible child nodes; we do not add the entry (last node) otherwise we
420     // will have the same exact SCC all over again.
421     Nodes.clear();
422     Nodes.insert(Order.begin() + I, Order.begin() + E - 1);
423 
424     // Update the entry node.
425     EntryNode.first = Order[E - 1];
426     EntryNode.second = &Nodes;
427   }
428 }
429 
430 /// Change the node ordering to decrease the range of live values, especially
431 /// the values that capture the control flow path for branches. We do this
432 /// by moving blocks with a single predecessor and successor to appear after
433 /// predecessor. The motivation is to move some loop exit blocks into a loop.
434 /// In cases where a loop has a large number of exit blocks, this reduces the
435 /// amount of values needed across the loop boundary.
436 void StructurizeCFG::reorderNodes() {
437   SmallVector<RegionNode *, 8> NewOrder;
438   DenseMap<BasicBlock *, unsigned> MoveTo;
439   BitVector Moved(Order.size());
440 
441   // The benefits of reordering nodes occurs for large regions.
442   if (Order.size() <= ReorderNodeSize)
443     return;
444 
445   // The algorithm works with two passes over Order. The first pass identifies
446   // the blocks to move and the position to move them to. The second pass
447   // creates the new order based upon this information. We move blocks with
448   // a single predecessor and successor. If there are multiple candidates then
449   // maintain the original order.
450   BBSet Seen;
451   for (int I = Order.size() - 1; I >= 0; --I) {
452     auto *BB = Order[I]->getEntry();
453     Seen.insert(BB);
454     auto *Pred = BB->getSinglePredecessor();
455     auto *Succ = BB->getSingleSuccessor();
456     // Consider only those basic blocks that have a predecessor in Order and a
457     // successor that exits the region. The region may contain subregions that
458     // have been structurized and are not included in Order.
459     if (Pred && Succ && Seen.count(Pred) && Succ == ParentRegion->getExit() &&
460         !MoveTo.count(Pred)) {
461       MoveTo[Pred] = I;
462       Moved.set(I);
463     }
464   }
465 
466   // If no blocks have been moved then the original order is good.
467   if (!Moved.count())
468     return;
469 
470   for (size_t I = 0, E = Order.size(); I < E; ++I) {
471     auto *BB = Order[I]->getEntry();
472     if (MoveTo.count(BB))
473       NewOrder.push_back(Order[MoveTo[BB]]);
474     if (!Moved[I])
475       NewOrder.push_back(Order[I]);
476   }
477 
478   Order.assign(NewOrder);
479 }
480 
481 /// Determine the end of the loops
482 void StructurizeCFG::analyzeLoops(RegionNode *N) {
483   if (N->isSubRegion()) {
484     // Test for exit as back edge
485     BasicBlock *Exit = N->getNodeAs<Region>()->getExit();
486     if (Visited.count(Exit))
487       Loops[Exit] = N->getEntry();
488 
489   } else {
490     // Test for successors as back edge
491     BasicBlock *BB = N->getNodeAs<BasicBlock>();
492     BranchInst *Term = cast<BranchInst>(BB->getTerminator());
493 
494     for (BasicBlock *Succ : Term->successors())
495       if (Visited.count(Succ))
496         Loops[Succ] = BB;
497   }
498 }
499 
500 /// Build the condition for one edge
501 Value *StructurizeCFG::buildCondition(BranchInst *Term, unsigned Idx,
502                                       bool Invert) {
503   Value *Cond = Invert ? BoolFalse : BoolTrue;
504   if (Term->isConditional()) {
505     Cond = Term->getCondition();
506 
507     if (Idx != (unsigned)Invert)
508       Cond = invertCondition(Cond);
509   }
510   return Cond;
511 }
512 
513 /// Analyze the predecessors of each block and build up predicates
514 void StructurizeCFG::gatherPredicates(RegionNode *N) {
515   RegionInfo *RI = ParentRegion->getRegionInfo();
516   BasicBlock *BB = N->getEntry();
517   BBPredicates &Pred = Predicates[BB];
518   BBPredicates &LPred = LoopPreds[BB];
519 
520   for (BasicBlock *P : predecessors(BB)) {
521     // Ignore it if it's a branch from outside into our region entry
522     if (!ParentRegion->contains(P))
523       continue;
524 
525     Region *R = RI->getRegionFor(P);
526     if (R == ParentRegion) {
527       // It's a top level block in our region
528       BranchInst *Term = cast<BranchInst>(P->getTerminator());
529       for (unsigned i = 0, e = Term->getNumSuccessors(); i != e; ++i) {
530         BasicBlock *Succ = Term->getSuccessor(i);
531         if (Succ != BB)
532           continue;
533 
534         if (Visited.count(P)) {
535           // Normal forward edge
536           if (Term->isConditional()) {
537             // Try to treat it like an ELSE block
538             BasicBlock *Other = Term->getSuccessor(!i);
539             if (Visited.count(Other) && !Loops.count(Other) &&
540                 !Pred.count(Other) && !Pred.count(P)) {
541 
542               Pred[Other] = BoolFalse;
543               Pred[P] = BoolTrue;
544               continue;
545             }
546           }
547           Pred[P] = buildCondition(Term, i, false);
548         } else {
549           // Back edge
550           LPred[P] = buildCondition(Term, i, true);
551         }
552       }
553     } else {
554       // It's an exit from a sub region
555       while (R->getParent() != ParentRegion)
556         R = R->getParent();
557 
558       // Edge from inside a subregion to its entry, ignore it
559       if (*R == *N)
560         continue;
561 
562       BasicBlock *Entry = R->getEntry();
563       if (Visited.count(Entry))
564         Pred[Entry] = BoolTrue;
565       else
566         LPred[Entry] = BoolFalse;
567     }
568   }
569 }
570 
571 /// Collect various loop and predicate infos
572 void StructurizeCFG::collectInfos() {
573   // Reset predicate
574   Predicates.clear();
575 
576   // and loop infos
577   Loops.clear();
578   LoopPreds.clear();
579 
580   // Reset the visited nodes
581   Visited.clear();
582 
583   for (RegionNode *RN : reverse(Order)) {
584     LLVM_DEBUG(dbgs() << "Visiting: "
585                       << (RN->isSubRegion() ? "SubRegion with entry: " : "")
586                       << RN->getEntry()->getName() << "\n");
587 
588     // Analyze all the conditions leading to a node
589     gatherPredicates(RN);
590 
591     // Remember that we've seen this node
592     Visited.insert(RN->getEntry());
593 
594     // Find the last back edges
595     analyzeLoops(RN);
596   }
597 }
598 
599 /// Insert the missing branch conditions
600 void StructurizeCFG::insertConditions(bool Loops) {
601   BranchVector &Conds = Loops ? LoopConds : Conditions;
602   Value *Default = Loops ? BoolTrue : BoolFalse;
603   SSAUpdater PhiInserter;
604 
605   for (BranchInst *Term : Conds) {
606     assert(Term->isConditional());
607 
608     BasicBlock *Parent = Term->getParent();
609     BasicBlock *SuccTrue = Term->getSuccessor(0);
610     BasicBlock *SuccFalse = Term->getSuccessor(1);
611 
612     PhiInserter.Initialize(Boolean, "");
613     PhiInserter.AddAvailableValue(&Func->getEntryBlock(), Default);
614     PhiInserter.AddAvailableValue(Loops ? SuccFalse : Parent, Default);
615 
616     BBPredicates &Preds = Loops ? LoopPreds[SuccFalse] : Predicates[SuccTrue];
617 
618     NearestCommonDominator Dominator(DT);
619     Dominator.addBlock(Parent);
620 
621     Value *ParentValue = nullptr;
622     for (std::pair<BasicBlock *, Value *> BBAndPred : Preds) {
623       BasicBlock *BB = BBAndPred.first;
624       Value *Pred = BBAndPred.second;
625 
626       if (BB == Parent) {
627         ParentValue = Pred;
628         break;
629       }
630       PhiInserter.AddAvailableValue(BB, Pred);
631       Dominator.addAndRememberBlock(BB);
632     }
633 
634     if (ParentValue) {
635       Term->setCondition(ParentValue);
636     } else {
637       if (!Dominator.resultIsRememberedBlock())
638         PhiInserter.AddAvailableValue(Dominator.result(), Default);
639 
640       Term->setCondition(PhiInserter.GetValueInMiddleOfBlock(Parent));
641     }
642   }
643 }
644 
645 /// Simplify any inverted conditions that were built by buildConditions.
646 void StructurizeCFG::simplifyConditions() {
647   SmallVector<Instruction *> InstToErase;
648   for (auto &I : concat<PredMap::value_type>(Predicates, LoopPreds)) {
649     auto &Preds = I.second;
650     for (auto &J : Preds) {
651       auto &Cond = J.second;
652       Instruction *Inverted;
653       if (match(Cond, m_Not(m_OneUse(m_Instruction(Inverted)))) &&
654           !Cond->use_empty()) {
655         if (auto *InvertedCmp = dyn_cast<CmpInst>(Inverted)) {
656           InvertedCmp->setPredicate(InvertedCmp->getInversePredicate());
657           Cond->replaceAllUsesWith(InvertedCmp);
658           InstToErase.push_back(cast<Instruction>(Cond));
659         }
660       }
661     }
662   }
663   for (auto *I : InstToErase)
664     I->eraseFromParent();
665 }
666 
667 /// Remove all PHI values coming from "From" into "To" and remember
668 /// them in DeletedPhis
669 void StructurizeCFG::delPhiValues(BasicBlock *From, BasicBlock *To) {
670   PhiMap &Map = DeletedPhis[To];
671   for (PHINode &Phi : To->phis()) {
672     bool Recorded = false;
673     while (Phi.getBasicBlockIndex(From) != -1) {
674       Value *Deleted = Phi.removeIncomingValue(From, false);
675       Map[&Phi].push_back(std::make_pair(From, Deleted));
676       if (!Recorded) {
677         AffectedPhis.push_back(&Phi);
678         Recorded = true;
679       }
680     }
681   }
682 }
683 
684 /// Add a dummy PHI value as soon as we knew the new predecessor
685 void StructurizeCFG::addPhiValues(BasicBlock *From, BasicBlock *To) {
686   for (PHINode &Phi : To->phis()) {
687     Value *Undef = UndefValue::get(Phi.getType());
688     Phi.addIncoming(Undef, From);
689   }
690   AddedPhis[To].push_back(From);
691 }
692 
693 /// Add the real PHI value as soon as everything is set up
694 void StructurizeCFG::setPhiValues() {
695   SmallVector<PHINode *, 8> InsertedPhis;
696   SSAUpdater Updater(&InsertedPhis);
697   for (const auto &AddedPhi : AddedPhis) {
698     BasicBlock *To = AddedPhi.first;
699     const BBVector &From = AddedPhi.second;
700 
701     if (!DeletedPhis.count(To))
702       continue;
703 
704     PhiMap &Map = DeletedPhis[To];
705     for (const auto &PI : Map) {
706       PHINode *Phi = PI.first;
707       Value *Undef = UndefValue::get(Phi->getType());
708       Updater.Initialize(Phi->getType(), "");
709       Updater.AddAvailableValue(&Func->getEntryBlock(), Undef);
710       Updater.AddAvailableValue(To, Undef);
711 
712       NearestCommonDominator Dominator(DT);
713       Dominator.addBlock(To);
714       for (const auto &VI : PI.second) {
715         Updater.AddAvailableValue(VI.first, VI.second);
716         Dominator.addAndRememberBlock(VI.first);
717       }
718 
719       if (!Dominator.resultIsRememberedBlock())
720         Updater.AddAvailableValue(Dominator.result(), Undef);
721 
722       for (BasicBlock *FI : From)
723         Phi->setIncomingValueForBlock(FI, Updater.GetValueAtEndOfBlock(FI));
724       AffectedPhis.push_back(Phi);
725     }
726 
727     DeletedPhis.erase(To);
728   }
729   assert(DeletedPhis.empty());
730 
731   AffectedPhis.append(InsertedPhis.begin(), InsertedPhis.end());
732 }
733 
734 void StructurizeCFG::simplifyAffectedPhis() {
735   bool Changed;
736   do {
737     Changed = false;
738     SimplifyQuery Q(Func->getParent()->getDataLayout());
739     Q.DT = DT;
740     for (WeakVH VH : AffectedPhis) {
741       if (auto Phi = dyn_cast_or_null<PHINode>(VH)) {
742         if (auto NewValue = simplifyInstruction(Phi, Q)) {
743           Phi->replaceAllUsesWith(NewValue);
744           Phi->eraseFromParent();
745           Changed = true;
746         }
747       }
748     }
749   } while (Changed);
750 }
751 
752 /// Remove phi values from all successors and then remove the terminator.
753 void StructurizeCFG::killTerminator(BasicBlock *BB) {
754   Instruction *Term = BB->getTerminator();
755   if (!Term)
756     return;
757 
758   for (BasicBlock *Succ : successors(BB))
759     delPhiValues(BB, Succ);
760 
761   if (DA)
762     DA->removeValue(Term);
763   Term->eraseFromParent();
764 }
765 
766 /// Let node exit(s) point to NewExit
767 void StructurizeCFG::changeExit(RegionNode *Node, BasicBlock *NewExit,
768                                 bool IncludeDominator) {
769   if (Node->isSubRegion()) {
770     Region *SubRegion = Node->getNodeAs<Region>();
771     BasicBlock *OldExit = SubRegion->getExit();
772     BasicBlock *Dominator = nullptr;
773 
774     // Find all the edges from the sub region to the exit.
775     // We use make_early_inc_range here because we modify BB's terminator.
776     for (BasicBlock *BB : llvm::make_early_inc_range(predecessors(OldExit))) {
777       if (!SubRegion->contains(BB))
778         continue;
779 
780       // Modify the edges to point to the new exit
781       delPhiValues(BB, OldExit);
782       BB->getTerminator()->replaceUsesOfWith(OldExit, NewExit);
783       addPhiValues(BB, NewExit);
784 
785       // Find the new dominator (if requested)
786       if (IncludeDominator) {
787         if (!Dominator)
788           Dominator = BB;
789         else
790           Dominator = DT->findNearestCommonDominator(Dominator, BB);
791       }
792     }
793 
794     // Change the dominator (if requested)
795     if (Dominator)
796       DT->changeImmediateDominator(NewExit, Dominator);
797 
798     // Update the region info
799     SubRegion->replaceExit(NewExit);
800   } else {
801     BasicBlock *BB = Node->getNodeAs<BasicBlock>();
802     killTerminator(BB);
803     BranchInst::Create(NewExit, BB);
804     addPhiValues(BB, NewExit);
805     if (IncludeDominator)
806       DT->changeImmediateDominator(NewExit, BB);
807   }
808 }
809 
810 /// Create a new flow node and update dominator tree and region info
811 BasicBlock *StructurizeCFG::getNextFlow(BasicBlock *Dominator) {
812   LLVMContext &Context = Func->getContext();
813   BasicBlock *Insert = Order.empty() ? ParentRegion->getExit() :
814                        Order.back()->getEntry();
815   BasicBlock *Flow = BasicBlock::Create(Context, FlowBlockName,
816                                         Func, Insert);
817   DT->addNewBlock(Flow, Dominator);
818   ParentRegion->getRegionInfo()->setRegionFor(Flow, ParentRegion);
819   return Flow;
820 }
821 
822 /// Create a new or reuse the previous node as flow node
823 BasicBlock *StructurizeCFG::needPrefix(bool NeedEmpty) {
824   BasicBlock *Entry = PrevNode->getEntry();
825 
826   if (!PrevNode->isSubRegion()) {
827     killTerminator(Entry);
828     if (!NeedEmpty || Entry->getFirstInsertionPt() == Entry->end())
829       return Entry;
830   }
831 
832   // create a new flow node
833   BasicBlock *Flow = getNextFlow(Entry);
834 
835   // and wire it up
836   changeExit(PrevNode, Flow, true);
837   PrevNode = ParentRegion->getBBNode(Flow);
838   return Flow;
839 }
840 
841 /// Returns the region exit if possible, otherwise just a new flow node
842 BasicBlock *StructurizeCFG::needPostfix(BasicBlock *Flow,
843                                         bool ExitUseAllowed) {
844   if (!Order.empty() || !ExitUseAllowed)
845     return getNextFlow(Flow);
846 
847   BasicBlock *Exit = ParentRegion->getExit();
848   DT->changeImmediateDominator(Exit, Flow);
849   addPhiValues(Flow, Exit);
850   return Exit;
851 }
852 
853 /// Set the previous node
854 void StructurizeCFG::setPrevNode(BasicBlock *BB) {
855   PrevNode = ParentRegion->contains(BB) ? ParentRegion->getBBNode(BB)
856                                         : nullptr;
857 }
858 
859 /// Does BB dominate all the predicates of Node?
860 bool StructurizeCFG::dominatesPredicates(BasicBlock *BB, RegionNode *Node) {
861   BBPredicates &Preds = Predicates[Node->getEntry()];
862   return llvm::all_of(Preds, [&](std::pair<BasicBlock *, Value *> Pred) {
863     return DT->dominates(BB, Pred.first);
864   });
865 }
866 
867 /// Can we predict that this node will always be called?
868 bool StructurizeCFG::isPredictableTrue(RegionNode *Node) {
869   BBPredicates &Preds = Predicates[Node->getEntry()];
870   bool Dominated = false;
871 
872   // Regionentry is always true
873   if (!PrevNode)
874     return true;
875 
876   for (std::pair<BasicBlock*, Value*> Pred : Preds) {
877     BasicBlock *BB = Pred.first;
878     Value *V = Pred.second;
879 
880     if (V != BoolTrue)
881       return false;
882 
883     if (!Dominated && DT->dominates(BB, PrevNode->getEntry()))
884       Dominated = true;
885   }
886 
887   // TODO: The dominator check is too strict
888   return Dominated;
889 }
890 
891 /// Take one node from the order vector and wire it up
892 void StructurizeCFG::wireFlow(bool ExitUseAllowed,
893                               BasicBlock *LoopEnd) {
894   RegionNode *Node = Order.pop_back_val();
895   Visited.insert(Node->getEntry());
896 
897   if (isPredictableTrue(Node)) {
898     // Just a linear flow
899     if (PrevNode) {
900       changeExit(PrevNode, Node->getEntry(), true);
901     }
902     PrevNode = Node;
903   } else {
904     // Insert extra prefix node (or reuse last one)
905     BasicBlock *Flow = needPrefix(false);
906 
907     // Insert extra postfix node (or use exit instead)
908     BasicBlock *Entry = Node->getEntry();
909     BasicBlock *Next = needPostfix(Flow, ExitUseAllowed);
910 
911     // let it point to entry and next block
912     Conditions.push_back(BranchInst::Create(Entry, Next, BoolUndef, Flow));
913     addPhiValues(Flow, Entry);
914     DT->changeImmediateDominator(Entry, Flow);
915 
916     PrevNode = Node;
917     while (!Order.empty() && !Visited.count(LoopEnd) &&
918            dominatesPredicates(Entry, Order.back())) {
919       handleLoops(false, LoopEnd);
920     }
921 
922     changeExit(PrevNode, Next, false);
923     setPrevNode(Next);
924   }
925 }
926 
927 void StructurizeCFG::handleLoops(bool ExitUseAllowed,
928                                  BasicBlock *LoopEnd) {
929   RegionNode *Node = Order.back();
930   BasicBlock *LoopStart = Node->getEntry();
931 
932   if (!Loops.count(LoopStart)) {
933     wireFlow(ExitUseAllowed, LoopEnd);
934     return;
935   }
936 
937   if (!isPredictableTrue(Node))
938     LoopStart = needPrefix(true);
939 
940   LoopEnd = Loops[Node->getEntry()];
941   wireFlow(false, LoopEnd);
942   while (!Visited.count(LoopEnd)) {
943     handleLoops(false, LoopEnd);
944   }
945 
946   // If the start of the loop is the entry block, we can't branch to it so
947   // insert a new dummy entry block.
948   Function *LoopFunc = LoopStart->getParent();
949   if (LoopStart == &LoopFunc->getEntryBlock()) {
950     LoopStart->setName("entry.orig");
951 
952     BasicBlock *NewEntry =
953       BasicBlock::Create(LoopStart->getContext(),
954                          "entry",
955                          LoopFunc,
956                          LoopStart);
957     BranchInst::Create(LoopStart, NewEntry);
958     DT->setNewRoot(NewEntry);
959   }
960 
961   // Create an extra loop end node
962   LoopEnd = needPrefix(false);
963   BasicBlock *Next = needPostfix(LoopEnd, ExitUseAllowed);
964   LoopConds.push_back(BranchInst::Create(Next, LoopStart,
965                                          BoolUndef, LoopEnd));
966   addPhiValues(LoopEnd, LoopStart);
967   setPrevNode(Next);
968 }
969 
970 /// After this function control flow looks like it should be, but
971 /// branches and PHI nodes only have undefined conditions.
972 void StructurizeCFG::createFlow() {
973   BasicBlock *Exit = ParentRegion->getExit();
974   bool EntryDominatesExit = DT->dominates(ParentRegion->getEntry(), Exit);
975 
976   AffectedPhis.clear();
977   DeletedPhis.clear();
978   AddedPhis.clear();
979   Conditions.clear();
980   LoopConds.clear();
981 
982   PrevNode = nullptr;
983   Visited.clear();
984 
985   while (!Order.empty()) {
986     handleLoops(EntryDominatesExit, nullptr);
987   }
988 
989   if (PrevNode)
990     changeExit(PrevNode, Exit, EntryDominatesExit);
991   else
992     assert(EntryDominatesExit);
993 }
994 
995 /// Handle a rare case where the disintegrated nodes instructions
996 /// no longer dominate all their uses. Not sure if this is really necessary
997 void StructurizeCFG::rebuildSSA() {
998   SSAUpdater Updater;
999   for (BasicBlock *BB : ParentRegion->blocks())
1000     for (Instruction &I : *BB) {
1001       bool Initialized = false;
1002       // We may modify the use list as we iterate over it, so we use
1003       // make_early_inc_range.
1004       for (Use &U : llvm::make_early_inc_range(I.uses())) {
1005         Instruction *User = cast<Instruction>(U.getUser());
1006         if (User->getParent() == BB) {
1007           continue;
1008         } else if (PHINode *UserPN = dyn_cast<PHINode>(User)) {
1009           if (UserPN->getIncomingBlock(U) == BB)
1010             continue;
1011         }
1012 
1013         if (DT->dominates(&I, User))
1014           continue;
1015 
1016         if (!Initialized) {
1017           Value *Undef = UndefValue::get(I.getType());
1018           Updater.Initialize(I.getType(), "");
1019           Updater.AddAvailableValue(&Func->getEntryBlock(), Undef);
1020           Updater.AddAvailableValue(BB, &I);
1021           Initialized = true;
1022         }
1023         Updater.RewriteUseAfterInsertions(U);
1024       }
1025     }
1026 }
1027 
1028 static bool hasOnlyUniformBranches(Region *R, unsigned UniformMDKindID,
1029                                    const LegacyDivergenceAnalysis &DA) {
1030   // Bool for if all sub-regions are uniform.
1031   bool SubRegionsAreUniform = true;
1032   // Count of how many direct children are conditional.
1033   unsigned ConditionalDirectChildren = 0;
1034 
1035   for (auto E : R->elements()) {
1036     if (!E->isSubRegion()) {
1037       auto Br = dyn_cast<BranchInst>(E->getEntry()->getTerminator());
1038       if (!Br || !Br->isConditional())
1039         continue;
1040 
1041       if (!DA.isUniform(Br))
1042         return false;
1043 
1044       // One of our direct children is conditional.
1045       ConditionalDirectChildren++;
1046 
1047       LLVM_DEBUG(dbgs() << "BB: " << Br->getParent()->getName()
1048                         << " has uniform terminator\n");
1049     } else {
1050       // Explicitly refuse to treat regions as uniform if they have non-uniform
1051       // subregions. We cannot rely on DivergenceAnalysis for branches in
1052       // subregions because those branches may have been removed and re-created,
1053       // so we look for our metadata instead.
1054       //
1055       // Warning: It would be nice to treat regions as uniform based only on
1056       // their direct child basic blocks' terminators, regardless of whether
1057       // subregions are uniform or not. However, this requires a very careful
1058       // look at SIAnnotateControlFlow to make sure nothing breaks there.
1059       for (auto BB : E->getNodeAs<Region>()->blocks()) {
1060         auto Br = dyn_cast<BranchInst>(BB->getTerminator());
1061         if (!Br || !Br->isConditional())
1062           continue;
1063 
1064         if (!Br->getMetadata(UniformMDKindID)) {
1065           // Early exit if we cannot have relaxed uniform regions.
1066           if (!RelaxedUniformRegions)
1067             return false;
1068 
1069           SubRegionsAreUniform = false;
1070           break;
1071         }
1072       }
1073     }
1074   }
1075 
1076   // Our region is uniform if:
1077   // 1. All conditional branches that are direct children are uniform (checked
1078   // above).
1079   // 2. And either:
1080   //   a. All sub-regions are uniform.
1081   //   b. There is one or less conditional branches among the direct children.
1082   return SubRegionsAreUniform || (ConditionalDirectChildren <= 1);
1083 }
1084 
1085 void StructurizeCFG::init(Region *R) {
1086   LLVMContext &Context = R->getEntry()->getContext();
1087 
1088   Boolean = Type::getInt1Ty(Context);
1089   BoolTrue = ConstantInt::getTrue(Context);
1090   BoolFalse = ConstantInt::getFalse(Context);
1091   BoolUndef = UndefValue::get(Boolean);
1092 
1093   this->DA = nullptr;
1094 }
1095 
1096 bool StructurizeCFG::makeUniformRegion(Region *R,
1097                                        LegacyDivergenceAnalysis *DA) {
1098   if (R->isTopLevelRegion())
1099     return false;
1100 
1101   this->DA = DA;
1102   // TODO: We could probably be smarter here with how we handle sub-regions.
1103   // We currently rely on the fact that metadata is set by earlier invocations
1104   // of the pass on sub-regions, and that this metadata doesn't get lost --
1105   // but we shouldn't rely on metadata for correctness!
1106   unsigned UniformMDKindID =
1107       R->getEntry()->getContext().getMDKindID("structurizecfg.uniform");
1108 
1109   if (hasOnlyUniformBranches(R, UniformMDKindID, *DA)) {
1110     LLVM_DEBUG(dbgs() << "Skipping region with uniform control flow: " << *R
1111                       << '\n');
1112 
1113     // Mark all direct child block terminators as having been treated as
1114     // uniform. To account for a possible future in which non-uniform
1115     // sub-regions are treated more cleverly, indirect children are not
1116     // marked as uniform.
1117     MDNode *MD = MDNode::get(R->getEntry()->getParent()->getContext(), {});
1118     for (RegionNode *E : R->elements()) {
1119       if (E->isSubRegion())
1120         continue;
1121 
1122       if (Instruction *Term = E->getEntry()->getTerminator())
1123         Term->setMetadata(UniformMDKindID, MD);
1124     }
1125 
1126     return true;
1127   }
1128   return false;
1129 }
1130 
1131 /// Run the transformation for each region found
1132 bool StructurizeCFG::run(Region *R, DominatorTree *DT) {
1133   if (R->isTopLevelRegion())
1134     return false;
1135 
1136   this->DT = DT;
1137 
1138   Func = R->getEntry()->getParent();
1139   ParentRegion = R;
1140 
1141   orderNodes();
1142   reorderNodes();
1143   collectInfos();
1144   createFlow();
1145   insertConditions(false);
1146   insertConditions(true);
1147   setPhiValues();
1148   simplifyConditions();
1149   simplifyAffectedPhis();
1150   rebuildSSA();
1151 
1152   // Cleanup
1153   Order.clear();
1154   Visited.clear();
1155   DeletedPhis.clear();
1156   AddedPhis.clear();
1157   Predicates.clear();
1158   Conditions.clear();
1159   Loops.clear();
1160   LoopPreds.clear();
1161   LoopConds.clear();
1162 
1163   return true;
1164 }
1165 
1166 Pass *llvm::createStructurizeCFGPass(bool SkipUniformRegions) {
1167   return new StructurizeCFGLegacyPass(SkipUniformRegions);
1168 }
1169 
1170 static void addRegionIntoQueue(Region &R, std::vector<Region *> &Regions) {
1171   Regions.push_back(&R);
1172   for (const auto &E : R)
1173     addRegionIntoQueue(*E, Regions);
1174 }
1175 
1176 PreservedAnalyses StructurizeCFGPass::run(Function &F,
1177                                           FunctionAnalysisManager &AM) {
1178 
1179   bool Changed = false;
1180   DominatorTree *DT = &AM.getResult<DominatorTreeAnalysis>(F);
1181   auto &RI = AM.getResult<RegionInfoAnalysis>(F);
1182   std::vector<Region *> Regions;
1183   addRegionIntoQueue(*RI.getTopLevelRegion(), Regions);
1184   while (!Regions.empty()) {
1185     Region *R = Regions.back();
1186     StructurizeCFG SCFG;
1187     SCFG.init(R);
1188     Changed |= SCFG.run(R, DT);
1189     Regions.pop_back();
1190   }
1191   if (!Changed)
1192     return PreservedAnalyses::all();
1193   PreservedAnalyses PA;
1194   PA.preserve<DominatorTreeAnalysis>();
1195   return PA;
1196 }
1197