1 //===- StructurizeCFG.cpp -------------------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "llvm/ADT/DenseMap.h"
10 #include "llvm/ADT/MapVector.h"
11 #include "llvm/ADT/SCCIterator.h"
12 #include "llvm/ADT/STLExtras.h"
13 #include "llvm/ADT/SmallPtrSet.h"
14 #include "llvm/ADT/SmallVector.h"
15 #include "llvm/Analysis/InstructionSimplify.h"
16 #include "llvm/Analysis/LegacyDivergenceAnalysis.h"
17 #include "llvm/Analysis/RegionInfo.h"
18 #include "llvm/Analysis/RegionIterator.h"
19 #include "llvm/Analysis/RegionPass.h"
20 #include "llvm/IR/Argument.h"
21 #include "llvm/IR/BasicBlock.h"
22 #include "llvm/IR/CFG.h"
23 #include "llvm/IR/Constant.h"
24 #include "llvm/IR/Constants.h"
25 #include "llvm/IR/Dominators.h"
26 #include "llvm/IR/Function.h"
27 #include "llvm/IR/InstrTypes.h"
28 #include "llvm/IR/Instruction.h"
29 #include "llvm/IR/Instructions.h"
30 #include "llvm/IR/Metadata.h"
31 #include "llvm/IR/PatternMatch.h"
32 #include "llvm/IR/Type.h"
33 #include "llvm/IR/Use.h"
34 #include "llvm/IR/User.h"
35 #include "llvm/IR/Value.h"
36 #include "llvm/IR/ValueHandle.h"
37 #include "llvm/InitializePasses.h"
38 #include "llvm/Pass.h"
39 #include "llvm/Support/Casting.h"
40 #include "llvm/Support/CommandLine.h"
41 #include "llvm/Support/Debug.h"
42 #include "llvm/Support/ErrorHandling.h"
43 #include "llvm/Support/raw_ostream.h"
44 #include "llvm/Transforms/Scalar.h"
45 #include "llvm/Transforms/Utils.h"
46 #include "llvm/Transforms/Utils/Local.h"
47 #include "llvm/Transforms/Utils/SSAUpdater.h"
48 #include <algorithm>
49 #include <cassert>
50 #include <utility>
51 
52 using namespace llvm;
53 using namespace llvm::PatternMatch;
54 
55 #define DEBUG_TYPE "structurizecfg"
56 
57 // The name for newly created blocks.
58 static const char *const FlowBlockName = "Flow";
59 
60 namespace {
61 
62 static cl::opt<bool> ForceSkipUniformRegions(
63   "structurizecfg-skip-uniform-regions",
64   cl::Hidden,
65   cl::desc("Force whether the StructurizeCFG pass skips uniform regions"),
66   cl::init(false));
67 
68 static cl::opt<bool>
69     RelaxedUniformRegions("structurizecfg-relaxed-uniform-regions", cl::Hidden,
70                           cl::desc("Allow relaxed uniform region checks"),
71                           cl::init(true));
72 
73 // Definition of the complex types used in this pass.
74 
75 using BBValuePair = std::pair<BasicBlock *, Value *>;
76 
77 using RNVector = SmallVector<RegionNode *, 8>;
78 using BBVector = SmallVector<BasicBlock *, 8>;
79 using BranchVector = SmallVector<BranchInst *, 8>;
80 using BBValueVector = SmallVector<BBValuePair, 2>;
81 
82 using BBSet = SmallPtrSet<BasicBlock *, 8>;
83 
84 using PhiMap = MapVector<PHINode *, BBValueVector>;
85 using BB2BBVecMap = MapVector<BasicBlock *, BBVector>;
86 
87 using BBPhiMap = DenseMap<BasicBlock *, PhiMap>;
88 using BBPredicates = DenseMap<BasicBlock *, Value *>;
89 using PredMap = DenseMap<BasicBlock *, BBPredicates>;
90 using BB2BBMap = DenseMap<BasicBlock *, BasicBlock *>;
91 
92 // A traits type that is intended to be used in graph algorithms. The graph
93 // traits starts at an entry node, and traverses the RegionNodes that are in
94 // the Nodes set.
95 struct SubGraphTraits {
96   using NodeRef = std::pair<RegionNode *, SmallDenseSet<RegionNode *> *>;
97   using BaseSuccIterator = GraphTraits<RegionNode *>::ChildIteratorType;
98 
99   // This wraps a set of Nodes into the iterator, so we know which edges to
100   // filter out.
101   class WrappedSuccIterator
102       : public iterator_adaptor_base<
103             WrappedSuccIterator, BaseSuccIterator,
104             typename std::iterator_traits<BaseSuccIterator>::iterator_category,
105             NodeRef, std::ptrdiff_t, NodeRef *, NodeRef> {
106     SmallDenseSet<RegionNode *> *Nodes;
107 
108   public:
WrappedSuccIterator(BaseSuccIterator It,SmallDenseSet<RegionNode * > * Nodes)109     WrappedSuccIterator(BaseSuccIterator It, SmallDenseSet<RegionNode *> *Nodes)
110         : iterator_adaptor_base(It), Nodes(Nodes) {}
111 
operator *() const112     NodeRef operator*() const { return {*I, Nodes}; }
113   };
114 
filterAll__anona33d439f0111::SubGraphTraits115   static bool filterAll(const NodeRef &N) { return true; }
filterSet__anona33d439f0111::SubGraphTraits116   static bool filterSet(const NodeRef &N) { return N.second->count(N.first); }
117 
118   using ChildIteratorType =
119       filter_iterator<WrappedSuccIterator, bool (*)(const NodeRef &)>;
120 
getEntryNode__anona33d439f0111::SubGraphTraits121   static NodeRef getEntryNode(Region *R) {
122     return {GraphTraits<Region *>::getEntryNode(R), nullptr};
123   }
124 
getEntryNode__anona33d439f0111::SubGraphTraits125   static NodeRef getEntryNode(NodeRef N) { return N; }
126 
children__anona33d439f0111::SubGraphTraits127   static iterator_range<ChildIteratorType> children(const NodeRef &N) {
128     auto *filter = N.second ? &filterSet : &filterAll;
129     return make_filter_range(
130         make_range<WrappedSuccIterator>(
131             {GraphTraits<RegionNode *>::child_begin(N.first), N.second},
132             {GraphTraits<RegionNode *>::child_end(N.first), N.second}),
133         filter);
134   }
135 
child_begin__anona33d439f0111::SubGraphTraits136   static ChildIteratorType child_begin(const NodeRef &N) {
137     return children(N).begin();
138   }
139 
child_end__anona33d439f0111::SubGraphTraits140   static ChildIteratorType child_end(const NodeRef &N) {
141     return children(N).end();
142   }
143 };
144 
145 /// Finds the nearest common dominator of a set of BasicBlocks.
146 ///
147 /// For every BB you add to the set, you can specify whether we "remember" the
148 /// block.  When you get the common dominator, you can also ask whether it's one
149 /// of the blocks we remembered.
150 class NearestCommonDominator {
151   DominatorTree *DT;
152   BasicBlock *Result = nullptr;
153   bool ResultIsRemembered = false;
154 
155   /// Add BB to the resulting dominator.
addBlock(BasicBlock * BB,bool Remember)156   void addBlock(BasicBlock *BB, bool Remember) {
157     if (!Result) {
158       Result = BB;
159       ResultIsRemembered = Remember;
160       return;
161     }
162 
163     BasicBlock *NewResult = DT->findNearestCommonDominator(Result, BB);
164     if (NewResult != Result)
165       ResultIsRemembered = false;
166     if (NewResult == BB)
167       ResultIsRemembered |= Remember;
168     Result = NewResult;
169   }
170 
171 public:
NearestCommonDominator(DominatorTree * DomTree)172   explicit NearestCommonDominator(DominatorTree *DomTree) : DT(DomTree) {}
173 
addBlock(BasicBlock * BB)174   void addBlock(BasicBlock *BB) {
175     addBlock(BB, /* Remember = */ false);
176   }
177 
addAndRememberBlock(BasicBlock * BB)178   void addAndRememberBlock(BasicBlock *BB) {
179     addBlock(BB, /* Remember = */ true);
180   }
181 
182   /// Get the nearest common dominator of all the BBs added via addBlock() and
183   /// addAndRememberBlock().
result()184   BasicBlock *result() { return Result; }
185 
186   /// Is the BB returned by getResult() one of the blocks we added to the set
187   /// with addAndRememberBlock()?
resultIsRememberedBlock()188   bool resultIsRememberedBlock() { return ResultIsRemembered; }
189 };
190 
191 /// Transforms the control flow graph on one single entry/exit region
192 /// at a time.
193 ///
194 /// After the transform all "If"/"Then"/"Else" style control flow looks like
195 /// this:
196 ///
197 /// \verbatim
198 /// 1
199 /// ||
200 /// | |
201 /// 2 |
202 /// | /
203 /// |/
204 /// 3
205 /// ||   Where:
206 /// | |  1 = "If" block, calculates the condition
207 /// 4 |  2 = "Then" subregion, runs if the condition is true
208 /// | /  3 = "Flow" blocks, newly inserted flow blocks, rejoins the flow
209 /// |/   4 = "Else" optional subregion, runs if the condition is false
210 /// 5    5 = "End" block, also rejoins the control flow
211 /// \endverbatim
212 ///
213 /// Control flow is expressed as a branch where the true exit goes into the
214 /// "Then"/"Else" region, while the false exit skips the region
215 /// The condition for the optional "Else" region is expressed as a PHI node.
216 /// The incoming values of the PHI node are true for the "If" edge and false
217 /// for the "Then" edge.
218 ///
219 /// Additionally to that even complicated loops look like this:
220 ///
221 /// \verbatim
222 /// 1
223 /// ||
224 /// | |
225 /// 2 ^  Where:
226 /// | /  1 = "Entry" block
227 /// |/   2 = "Loop" optional subregion, with all exits at "Flow" block
228 /// 3    3 = "Flow" block, with back edge to entry block
229 /// |
230 /// \endverbatim
231 ///
232 /// The back edge of the "Flow" block is always on the false side of the branch
233 /// while the true side continues the general flow. So the loop condition
234 /// consist of a network of PHI nodes where the true incoming values expresses
235 /// breaks and the false values expresses continue states.
236 class StructurizeCFG : public RegionPass {
237   bool SkipUniformRegions;
238 
239   Type *Boolean;
240   ConstantInt *BoolTrue;
241   ConstantInt *BoolFalse;
242   UndefValue *BoolUndef;
243 
244   Function *Func;
245   Region *ParentRegion;
246 
247   LegacyDivergenceAnalysis *DA;
248   DominatorTree *DT;
249 
250   SmallVector<RegionNode *, 8> Order;
251   BBSet Visited;
252 
253   SmallVector<WeakVH, 8> AffectedPhis;
254   BBPhiMap DeletedPhis;
255   BB2BBVecMap AddedPhis;
256 
257   PredMap Predicates;
258   BranchVector Conditions;
259 
260   BB2BBMap Loops;
261   PredMap LoopPreds;
262   BranchVector LoopConds;
263 
264   RegionNode *PrevNode;
265 
266   void orderNodes();
267 
268   void analyzeLoops(RegionNode *N);
269 
270   Value *buildCondition(BranchInst *Term, unsigned Idx, bool Invert);
271 
272   void gatherPredicates(RegionNode *N);
273 
274   void collectInfos();
275 
276   void insertConditions(bool Loops);
277 
278   void delPhiValues(BasicBlock *From, BasicBlock *To);
279 
280   void addPhiValues(BasicBlock *From, BasicBlock *To);
281 
282   void setPhiValues();
283 
284   void simplifyAffectedPhis();
285 
286   void killTerminator(BasicBlock *BB);
287 
288   void changeExit(RegionNode *Node, BasicBlock *NewExit,
289                   bool IncludeDominator);
290 
291   BasicBlock *getNextFlow(BasicBlock *Dominator);
292 
293   BasicBlock *needPrefix(bool NeedEmpty);
294 
295   BasicBlock *needPostfix(BasicBlock *Flow, bool ExitUseAllowed);
296 
297   void setPrevNode(BasicBlock *BB);
298 
299   bool dominatesPredicates(BasicBlock *BB, RegionNode *Node);
300 
301   bool isPredictableTrue(RegionNode *Node);
302 
303   void wireFlow(bool ExitUseAllowed, BasicBlock *LoopEnd);
304 
305   void handleLoops(bool ExitUseAllowed, BasicBlock *LoopEnd);
306 
307   void createFlow();
308 
309   void rebuildSSA();
310 
311 public:
312   static char ID;
313 
StructurizeCFG(bool SkipUniformRegions_=false)314   explicit StructurizeCFG(bool SkipUniformRegions_ = false)
315       : RegionPass(ID),
316         SkipUniformRegions(SkipUniformRegions_) {
317     if (ForceSkipUniformRegions.getNumOccurrences())
318       SkipUniformRegions = ForceSkipUniformRegions.getValue();
319     initializeStructurizeCFGPass(*PassRegistry::getPassRegistry());
320   }
321 
322   bool doInitialization(Region *R, RGPassManager &RGM) override;
323 
324   bool runOnRegion(Region *R, RGPassManager &RGM) override;
325 
getPassName() const326   StringRef getPassName() const override { return "Structurize control flow"; }
327 
getAnalysisUsage(AnalysisUsage & AU) const328   void getAnalysisUsage(AnalysisUsage &AU) const override {
329     if (SkipUniformRegions)
330       AU.addRequired<LegacyDivergenceAnalysis>();
331     AU.addRequiredID(LowerSwitchID);
332     AU.addRequired<DominatorTreeWrapperPass>();
333 
334     AU.addPreserved<DominatorTreeWrapperPass>();
335     RegionPass::getAnalysisUsage(AU);
336   }
337 };
338 
339 } // end anonymous namespace
340 
341 char StructurizeCFG::ID = 0;
342 
343 INITIALIZE_PASS_BEGIN(StructurizeCFG, "structurizecfg", "Structurize the CFG",
344                       false, false)
INITIALIZE_PASS_DEPENDENCY(LegacyDivergenceAnalysis)345 INITIALIZE_PASS_DEPENDENCY(LegacyDivergenceAnalysis)
346 INITIALIZE_PASS_DEPENDENCY(LowerSwitch)
347 INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
348 INITIALIZE_PASS_DEPENDENCY(RegionInfoPass)
349 INITIALIZE_PASS_END(StructurizeCFG, "structurizecfg", "Structurize the CFG",
350                     false, false)
351 
352 /// Initialize the types and constants used in the pass
353 bool StructurizeCFG::doInitialization(Region *R, RGPassManager &RGM) {
354   LLVMContext &Context = R->getEntry()->getContext();
355 
356   Boolean = Type::getInt1Ty(Context);
357   BoolTrue = ConstantInt::getTrue(Context);
358   BoolFalse = ConstantInt::getFalse(Context);
359   BoolUndef = UndefValue::get(Boolean);
360 
361   return false;
362 }
363 
364 /// Build up the general order of nodes, by performing a topological sort of the
365 /// parent region's nodes, while ensuring that there is no outer cycle node
366 /// between any two inner cycle nodes.
orderNodes()367 void StructurizeCFG::orderNodes() {
368   Order.resize(std::distance(GraphTraits<Region *>::nodes_begin(ParentRegion),
369                              GraphTraits<Region *>::nodes_end(ParentRegion)));
370   if (Order.empty())
371     return;
372 
373   SmallDenseSet<RegionNode *> Nodes;
374   auto EntryNode = SubGraphTraits::getEntryNode(ParentRegion);
375 
376   // A list of range indices of SCCs in Order, to be processed.
377   SmallVector<std::pair<unsigned, unsigned>, 8> WorkList;
378   unsigned I = 0, E = Order.size();
379   while (true) {
380     // Run through all the SCCs in the subgraph starting with Entry.
381     for (auto SCCI =
382              scc_iterator<SubGraphTraits::NodeRef, SubGraphTraits>::begin(
383                  EntryNode);
384          !SCCI.isAtEnd(); ++SCCI) {
385       auto &SCC = *SCCI;
386 
387       // An SCC up to the size of 2, can be reduced to an entry (the last node),
388       // and a possible additional node. Therefore, it is already in order, and
389       // there is no need to add it to the work-list.
390       unsigned Size = SCC.size();
391       if (Size > 2)
392         WorkList.emplace_back(I, I + Size);
393 
394       // Add the SCC nodes to the Order array.
395       for (auto &N : SCC) {
396         assert(I < E && "SCC size mismatch!");
397         Order[I++] = N.first;
398       }
399     }
400     assert(I == E && "SCC size mismatch!");
401 
402     // If there are no more SCCs to order, then we are done.
403     if (WorkList.empty())
404       break;
405 
406     std::tie(I, E) = WorkList.pop_back_val();
407 
408     // Collect the set of nodes in the SCC's subgraph. These are only the
409     // possible child nodes; we do not add the entry (last node) otherwise we
410     // will have the same exact SCC all over again.
411     Nodes.clear();
412     Nodes.insert(Order.begin() + I, Order.begin() + E - 1);
413 
414     // Update the entry node.
415     EntryNode.first = Order[E - 1];
416     EntryNode.second = &Nodes;
417   }
418 }
419 
420 /// Determine the end of the loops
analyzeLoops(RegionNode * N)421 void StructurizeCFG::analyzeLoops(RegionNode *N) {
422   if (N->isSubRegion()) {
423     // Test for exit as back edge
424     BasicBlock *Exit = N->getNodeAs<Region>()->getExit();
425     if (Visited.count(Exit))
426       Loops[Exit] = N->getEntry();
427 
428   } else {
429     // Test for successors as back edge
430     BasicBlock *BB = N->getNodeAs<BasicBlock>();
431     BranchInst *Term = cast<BranchInst>(BB->getTerminator());
432 
433     for (BasicBlock *Succ : Term->successors())
434       if (Visited.count(Succ))
435         Loops[Succ] = BB;
436   }
437 }
438 
439 /// Build the condition for one edge
buildCondition(BranchInst * Term,unsigned Idx,bool Invert)440 Value *StructurizeCFG::buildCondition(BranchInst *Term, unsigned Idx,
441                                       bool Invert) {
442   Value *Cond = Invert ? BoolFalse : BoolTrue;
443   if (Term->isConditional()) {
444     Cond = Term->getCondition();
445 
446     if (Idx != (unsigned)Invert)
447       Cond = invertCondition(Cond);
448   }
449   return Cond;
450 }
451 
452 /// Analyze the predecessors of each block and build up predicates
gatherPredicates(RegionNode * N)453 void StructurizeCFG::gatherPredicates(RegionNode *N) {
454   RegionInfo *RI = ParentRegion->getRegionInfo();
455   BasicBlock *BB = N->getEntry();
456   BBPredicates &Pred = Predicates[BB];
457   BBPredicates &LPred = LoopPreds[BB];
458 
459   for (BasicBlock *P : predecessors(BB)) {
460     // Ignore it if it's a branch from outside into our region entry
461     if (!ParentRegion->contains(P))
462       continue;
463 
464     Region *R = RI->getRegionFor(P);
465     if (R == ParentRegion) {
466       // It's a top level block in our region
467       BranchInst *Term = cast<BranchInst>(P->getTerminator());
468       for (unsigned i = 0, e = Term->getNumSuccessors(); i != e; ++i) {
469         BasicBlock *Succ = Term->getSuccessor(i);
470         if (Succ != BB)
471           continue;
472 
473         if (Visited.count(P)) {
474           // Normal forward edge
475           if (Term->isConditional()) {
476             // Try to treat it like an ELSE block
477             BasicBlock *Other = Term->getSuccessor(!i);
478             if (Visited.count(Other) && !Loops.count(Other) &&
479                 !Pred.count(Other) && !Pred.count(P)) {
480 
481               Pred[Other] = BoolFalse;
482               Pred[P] = BoolTrue;
483               continue;
484             }
485           }
486           Pred[P] = buildCondition(Term, i, false);
487         } else {
488           // Back edge
489           LPred[P] = buildCondition(Term, i, true);
490         }
491       }
492     } else {
493       // It's an exit from a sub region
494       while (R->getParent() != ParentRegion)
495         R = R->getParent();
496 
497       // Edge from inside a subregion to its entry, ignore it
498       if (*R == *N)
499         continue;
500 
501       BasicBlock *Entry = R->getEntry();
502       if (Visited.count(Entry))
503         Pred[Entry] = BoolTrue;
504       else
505         LPred[Entry] = BoolFalse;
506     }
507   }
508 }
509 
510 /// Collect various loop and predicate infos
collectInfos()511 void StructurizeCFG::collectInfos() {
512   // Reset predicate
513   Predicates.clear();
514 
515   // and loop infos
516   Loops.clear();
517   LoopPreds.clear();
518 
519   // Reset the visited nodes
520   Visited.clear();
521 
522   for (RegionNode *RN : reverse(Order)) {
523     LLVM_DEBUG(dbgs() << "Visiting: "
524                       << (RN->isSubRegion() ? "SubRegion with entry: " : "")
525                       << RN->getEntry()->getName() << "\n");
526 
527     // Analyze all the conditions leading to a node
528     gatherPredicates(RN);
529 
530     // Remember that we've seen this node
531     Visited.insert(RN->getEntry());
532 
533     // Find the last back edges
534     analyzeLoops(RN);
535   }
536 }
537 
538 /// Insert the missing branch conditions
insertConditions(bool Loops)539 void StructurizeCFG::insertConditions(bool Loops) {
540   BranchVector &Conds = Loops ? LoopConds : Conditions;
541   Value *Default = Loops ? BoolTrue : BoolFalse;
542   SSAUpdater PhiInserter;
543 
544   for (BranchInst *Term : Conds) {
545     assert(Term->isConditional());
546 
547     BasicBlock *Parent = Term->getParent();
548     BasicBlock *SuccTrue = Term->getSuccessor(0);
549     BasicBlock *SuccFalse = Term->getSuccessor(1);
550 
551     PhiInserter.Initialize(Boolean, "");
552     PhiInserter.AddAvailableValue(&Func->getEntryBlock(), Default);
553     PhiInserter.AddAvailableValue(Loops ? SuccFalse : Parent, Default);
554 
555     BBPredicates &Preds = Loops ? LoopPreds[SuccFalse] : Predicates[SuccTrue];
556 
557     NearestCommonDominator Dominator(DT);
558     Dominator.addBlock(Parent);
559 
560     Value *ParentValue = nullptr;
561     for (std::pair<BasicBlock *, Value *> BBAndPred : Preds) {
562       BasicBlock *BB = BBAndPred.first;
563       Value *Pred = BBAndPred.second;
564 
565       if (BB == Parent) {
566         ParentValue = Pred;
567         break;
568       }
569       PhiInserter.AddAvailableValue(BB, Pred);
570       Dominator.addAndRememberBlock(BB);
571     }
572 
573     if (ParentValue) {
574       Term->setCondition(ParentValue);
575     } else {
576       if (!Dominator.resultIsRememberedBlock())
577         PhiInserter.AddAvailableValue(Dominator.result(), Default);
578 
579       Term->setCondition(PhiInserter.GetValueInMiddleOfBlock(Parent));
580     }
581   }
582 }
583 
584 /// Remove all PHI values coming from "From" into "To" and remember
585 /// them in DeletedPhis
delPhiValues(BasicBlock * From,BasicBlock * To)586 void StructurizeCFG::delPhiValues(BasicBlock *From, BasicBlock *To) {
587   PhiMap &Map = DeletedPhis[To];
588   for (PHINode &Phi : To->phis()) {
589     bool Recorded = false;
590     while (Phi.getBasicBlockIndex(From) != -1) {
591       Value *Deleted = Phi.removeIncomingValue(From, false);
592       Map[&Phi].push_back(std::make_pair(From, Deleted));
593       if (!Recorded) {
594         AffectedPhis.push_back(&Phi);
595         Recorded = true;
596       }
597     }
598   }
599 }
600 
601 /// Add a dummy PHI value as soon as we knew the new predecessor
addPhiValues(BasicBlock * From,BasicBlock * To)602 void StructurizeCFG::addPhiValues(BasicBlock *From, BasicBlock *To) {
603   for (PHINode &Phi : To->phis()) {
604     Value *Undef = UndefValue::get(Phi.getType());
605     Phi.addIncoming(Undef, From);
606   }
607   AddedPhis[To].push_back(From);
608 }
609 
610 /// Add the real PHI value as soon as everything is set up
setPhiValues()611 void StructurizeCFG::setPhiValues() {
612   SmallVector<PHINode *, 8> InsertedPhis;
613   SSAUpdater Updater(&InsertedPhis);
614   for (const auto &AddedPhi : AddedPhis) {
615     BasicBlock *To = AddedPhi.first;
616     const BBVector &From = AddedPhi.second;
617 
618     if (!DeletedPhis.count(To))
619       continue;
620 
621     PhiMap &Map = DeletedPhis[To];
622     for (const auto &PI : Map) {
623       PHINode *Phi = PI.first;
624       Value *Undef = UndefValue::get(Phi->getType());
625       Updater.Initialize(Phi->getType(), "");
626       Updater.AddAvailableValue(&Func->getEntryBlock(), Undef);
627       Updater.AddAvailableValue(To, Undef);
628 
629       NearestCommonDominator Dominator(DT);
630       Dominator.addBlock(To);
631       for (const auto &VI : PI.second) {
632         Updater.AddAvailableValue(VI.first, VI.second);
633         Dominator.addAndRememberBlock(VI.first);
634       }
635 
636       if (!Dominator.resultIsRememberedBlock())
637         Updater.AddAvailableValue(Dominator.result(), Undef);
638 
639       for (BasicBlock *FI : From)
640         Phi->setIncomingValueForBlock(FI, Updater.GetValueAtEndOfBlock(FI));
641       AffectedPhis.push_back(Phi);
642     }
643 
644     DeletedPhis.erase(To);
645   }
646   assert(DeletedPhis.empty());
647 
648   AffectedPhis.append(InsertedPhis.begin(), InsertedPhis.end());
649 }
650 
simplifyAffectedPhis()651 void StructurizeCFG::simplifyAffectedPhis() {
652   bool Changed;
653   do {
654     Changed = false;
655     SimplifyQuery Q(Func->getParent()->getDataLayout());
656     Q.DT = DT;
657     for (WeakVH VH : AffectedPhis) {
658       if (auto Phi = dyn_cast_or_null<PHINode>(VH)) {
659         if (auto NewValue = SimplifyInstruction(Phi, Q)) {
660           Phi->replaceAllUsesWith(NewValue);
661           Phi->eraseFromParent();
662           Changed = true;
663         }
664       }
665     }
666   } while (Changed);
667 }
668 
669 /// Remove phi values from all successors and then remove the terminator.
killTerminator(BasicBlock * BB)670 void StructurizeCFG::killTerminator(BasicBlock *BB) {
671   Instruction *Term = BB->getTerminator();
672   if (!Term)
673     return;
674 
675   for (succ_iterator SI = succ_begin(BB), SE = succ_end(BB);
676        SI != SE; ++SI)
677     delPhiValues(BB, *SI);
678 
679   if (DA)
680     DA->removeValue(Term);
681   Term->eraseFromParent();
682 }
683 
684 /// Let node exit(s) point to NewExit
changeExit(RegionNode * Node,BasicBlock * NewExit,bool IncludeDominator)685 void StructurizeCFG::changeExit(RegionNode *Node, BasicBlock *NewExit,
686                                 bool IncludeDominator) {
687   if (Node->isSubRegion()) {
688     Region *SubRegion = Node->getNodeAs<Region>();
689     BasicBlock *OldExit = SubRegion->getExit();
690     BasicBlock *Dominator = nullptr;
691 
692     // Find all the edges from the sub region to the exit
693     for (auto BBI = pred_begin(OldExit), E = pred_end(OldExit); BBI != E;) {
694       // Incrememt BBI before mucking with BB's terminator.
695       BasicBlock *BB = *BBI++;
696 
697       if (!SubRegion->contains(BB))
698         continue;
699 
700       // Modify the edges to point to the new exit
701       delPhiValues(BB, OldExit);
702       BB->getTerminator()->replaceUsesOfWith(OldExit, NewExit);
703       addPhiValues(BB, NewExit);
704 
705       // Find the new dominator (if requested)
706       if (IncludeDominator) {
707         if (!Dominator)
708           Dominator = BB;
709         else
710           Dominator = DT->findNearestCommonDominator(Dominator, BB);
711       }
712     }
713 
714     // Change the dominator (if requested)
715     if (Dominator)
716       DT->changeImmediateDominator(NewExit, Dominator);
717 
718     // Update the region info
719     SubRegion->replaceExit(NewExit);
720   } else {
721     BasicBlock *BB = Node->getNodeAs<BasicBlock>();
722     killTerminator(BB);
723     BranchInst::Create(NewExit, BB);
724     addPhiValues(BB, NewExit);
725     if (IncludeDominator)
726       DT->changeImmediateDominator(NewExit, BB);
727   }
728 }
729 
730 /// Create a new flow node and update dominator tree and region info
getNextFlow(BasicBlock * Dominator)731 BasicBlock *StructurizeCFG::getNextFlow(BasicBlock *Dominator) {
732   LLVMContext &Context = Func->getContext();
733   BasicBlock *Insert = Order.empty() ? ParentRegion->getExit() :
734                        Order.back()->getEntry();
735   BasicBlock *Flow = BasicBlock::Create(Context, FlowBlockName,
736                                         Func, Insert);
737   DT->addNewBlock(Flow, Dominator);
738   ParentRegion->getRegionInfo()->setRegionFor(Flow, ParentRegion);
739   return Flow;
740 }
741 
742 /// Create a new or reuse the previous node as flow node
needPrefix(bool NeedEmpty)743 BasicBlock *StructurizeCFG::needPrefix(bool NeedEmpty) {
744   BasicBlock *Entry = PrevNode->getEntry();
745 
746   if (!PrevNode->isSubRegion()) {
747     killTerminator(Entry);
748     if (!NeedEmpty || Entry->getFirstInsertionPt() == Entry->end())
749       return Entry;
750   }
751 
752   // create a new flow node
753   BasicBlock *Flow = getNextFlow(Entry);
754 
755   // and wire it up
756   changeExit(PrevNode, Flow, true);
757   PrevNode = ParentRegion->getBBNode(Flow);
758   return Flow;
759 }
760 
761 /// Returns the region exit if possible, otherwise just a new flow node
needPostfix(BasicBlock * Flow,bool ExitUseAllowed)762 BasicBlock *StructurizeCFG::needPostfix(BasicBlock *Flow,
763                                         bool ExitUseAllowed) {
764   if (!Order.empty() || !ExitUseAllowed)
765     return getNextFlow(Flow);
766 
767   BasicBlock *Exit = ParentRegion->getExit();
768   DT->changeImmediateDominator(Exit, Flow);
769   addPhiValues(Flow, Exit);
770   return Exit;
771 }
772 
773 /// Set the previous node
setPrevNode(BasicBlock * BB)774 void StructurizeCFG::setPrevNode(BasicBlock *BB) {
775   PrevNode = ParentRegion->contains(BB) ? ParentRegion->getBBNode(BB)
776                                         : nullptr;
777 }
778 
779 /// Does BB dominate all the predicates of Node?
dominatesPredicates(BasicBlock * BB,RegionNode * Node)780 bool StructurizeCFG::dominatesPredicates(BasicBlock *BB, RegionNode *Node) {
781   BBPredicates &Preds = Predicates[Node->getEntry()];
782   return llvm::all_of(Preds, [&](std::pair<BasicBlock *, Value *> Pred) {
783     return DT->dominates(BB, Pred.first);
784   });
785 }
786 
787 /// Can we predict that this node will always be called?
isPredictableTrue(RegionNode * Node)788 bool StructurizeCFG::isPredictableTrue(RegionNode *Node) {
789   BBPredicates &Preds = Predicates[Node->getEntry()];
790   bool Dominated = false;
791 
792   // Regionentry is always true
793   if (!PrevNode)
794     return true;
795 
796   for (std::pair<BasicBlock*, Value*> Pred : Preds) {
797     BasicBlock *BB = Pred.first;
798     Value *V = Pred.second;
799 
800     if (V != BoolTrue)
801       return false;
802 
803     if (!Dominated && DT->dominates(BB, PrevNode->getEntry()))
804       Dominated = true;
805   }
806 
807   // TODO: The dominator check is too strict
808   return Dominated;
809 }
810 
811 /// Take one node from the order vector and wire it up
wireFlow(bool ExitUseAllowed,BasicBlock * LoopEnd)812 void StructurizeCFG::wireFlow(bool ExitUseAllowed,
813                               BasicBlock *LoopEnd) {
814   RegionNode *Node = Order.pop_back_val();
815   Visited.insert(Node->getEntry());
816 
817   if (isPredictableTrue(Node)) {
818     // Just a linear flow
819     if (PrevNode) {
820       changeExit(PrevNode, Node->getEntry(), true);
821     }
822     PrevNode = Node;
823   } else {
824     // Insert extra prefix node (or reuse last one)
825     BasicBlock *Flow = needPrefix(false);
826 
827     // Insert extra postfix node (or use exit instead)
828     BasicBlock *Entry = Node->getEntry();
829     BasicBlock *Next = needPostfix(Flow, ExitUseAllowed);
830 
831     // let it point to entry and next block
832     Conditions.push_back(BranchInst::Create(Entry, Next, BoolUndef, Flow));
833     addPhiValues(Flow, Entry);
834     DT->changeImmediateDominator(Entry, Flow);
835 
836     PrevNode = Node;
837     while (!Order.empty() && !Visited.count(LoopEnd) &&
838            dominatesPredicates(Entry, Order.back())) {
839       handleLoops(false, LoopEnd);
840     }
841 
842     changeExit(PrevNode, Next, false);
843     setPrevNode(Next);
844   }
845 }
846 
handleLoops(bool ExitUseAllowed,BasicBlock * LoopEnd)847 void StructurizeCFG::handleLoops(bool ExitUseAllowed,
848                                  BasicBlock *LoopEnd) {
849   RegionNode *Node = Order.back();
850   BasicBlock *LoopStart = Node->getEntry();
851 
852   if (!Loops.count(LoopStart)) {
853     wireFlow(ExitUseAllowed, LoopEnd);
854     return;
855   }
856 
857   if (!isPredictableTrue(Node))
858     LoopStart = needPrefix(true);
859 
860   LoopEnd = Loops[Node->getEntry()];
861   wireFlow(false, LoopEnd);
862   while (!Visited.count(LoopEnd)) {
863     handleLoops(false, LoopEnd);
864   }
865 
866   // If the start of the loop is the entry block, we can't branch to it so
867   // insert a new dummy entry block.
868   Function *LoopFunc = LoopStart->getParent();
869   if (LoopStart == &LoopFunc->getEntryBlock()) {
870     LoopStart->setName("entry.orig");
871 
872     BasicBlock *NewEntry =
873       BasicBlock::Create(LoopStart->getContext(),
874                          "entry",
875                          LoopFunc,
876                          LoopStart);
877     BranchInst::Create(LoopStart, NewEntry);
878     DT->setNewRoot(NewEntry);
879   }
880 
881   // Create an extra loop end node
882   LoopEnd = needPrefix(false);
883   BasicBlock *Next = needPostfix(LoopEnd, ExitUseAllowed);
884   LoopConds.push_back(BranchInst::Create(Next, LoopStart,
885                                          BoolUndef, LoopEnd));
886   addPhiValues(LoopEnd, LoopStart);
887   setPrevNode(Next);
888 }
889 
890 /// After this function control flow looks like it should be, but
891 /// branches and PHI nodes only have undefined conditions.
createFlow()892 void StructurizeCFG::createFlow() {
893   BasicBlock *Exit = ParentRegion->getExit();
894   bool EntryDominatesExit = DT->dominates(ParentRegion->getEntry(), Exit);
895 
896   AffectedPhis.clear();
897   DeletedPhis.clear();
898   AddedPhis.clear();
899   Conditions.clear();
900   LoopConds.clear();
901 
902   PrevNode = nullptr;
903   Visited.clear();
904 
905   while (!Order.empty()) {
906     handleLoops(EntryDominatesExit, nullptr);
907   }
908 
909   if (PrevNode)
910     changeExit(PrevNode, Exit, EntryDominatesExit);
911   else
912     assert(EntryDominatesExit);
913 }
914 
915 /// Handle a rare case where the disintegrated nodes instructions
916 /// no longer dominate all their uses. Not sure if this is really necessary
rebuildSSA()917 void StructurizeCFG::rebuildSSA() {
918   SSAUpdater Updater;
919   for (BasicBlock *BB : ParentRegion->blocks())
920     for (Instruction &I : *BB) {
921       bool Initialized = false;
922       // We may modify the use list as we iterate over it, so be careful to
923       // compute the next element in the use list at the top of the loop.
924       for (auto UI = I.use_begin(), E = I.use_end(); UI != E;) {
925         Use &U = *UI++;
926         Instruction *User = cast<Instruction>(U.getUser());
927         if (User->getParent() == BB) {
928           continue;
929         } else if (PHINode *UserPN = dyn_cast<PHINode>(User)) {
930           if (UserPN->getIncomingBlock(U) == BB)
931             continue;
932         }
933 
934         if (DT->dominates(&I, User))
935           continue;
936 
937         if (!Initialized) {
938           Value *Undef = UndefValue::get(I.getType());
939           Updater.Initialize(I.getType(), "");
940           Updater.AddAvailableValue(&Func->getEntryBlock(), Undef);
941           Updater.AddAvailableValue(BB, &I);
942           Initialized = true;
943         }
944         Updater.RewriteUseAfterInsertions(U);
945       }
946     }
947 }
948 
hasOnlyUniformBranches(Region * R,unsigned UniformMDKindID,const LegacyDivergenceAnalysis & DA)949 static bool hasOnlyUniformBranches(Region *R, unsigned UniformMDKindID,
950                                    const LegacyDivergenceAnalysis &DA) {
951   // Bool for if all sub-regions are uniform.
952   bool SubRegionsAreUniform = true;
953   // Count of how many direct children are conditional.
954   unsigned ConditionalDirectChildren = 0;
955 
956   for (auto E : R->elements()) {
957     if (!E->isSubRegion()) {
958       auto Br = dyn_cast<BranchInst>(E->getEntry()->getTerminator());
959       if (!Br || !Br->isConditional())
960         continue;
961 
962       if (!DA.isUniform(Br))
963         return false;
964 
965       // One of our direct children is conditional.
966       ConditionalDirectChildren++;
967 
968       LLVM_DEBUG(dbgs() << "BB: " << Br->getParent()->getName()
969                         << " has uniform terminator\n");
970     } else {
971       // Explicitly refuse to treat regions as uniform if they have non-uniform
972       // subregions. We cannot rely on DivergenceAnalysis for branches in
973       // subregions because those branches may have been removed and re-created,
974       // so we look for our metadata instead.
975       //
976       // Warning: It would be nice to treat regions as uniform based only on
977       // their direct child basic blocks' terminators, regardless of whether
978       // subregions are uniform or not. However, this requires a very careful
979       // look at SIAnnotateControlFlow to make sure nothing breaks there.
980       for (auto BB : E->getNodeAs<Region>()->blocks()) {
981         auto Br = dyn_cast<BranchInst>(BB->getTerminator());
982         if (!Br || !Br->isConditional())
983           continue;
984 
985         if (!Br->getMetadata(UniformMDKindID)) {
986           // Early exit if we cannot have relaxed uniform regions.
987           if (!RelaxedUniformRegions)
988             return false;
989 
990           SubRegionsAreUniform = false;
991           break;
992         }
993       }
994     }
995   }
996 
997   // Our region is uniform if:
998   // 1. All conditional branches that are direct children are uniform (checked
999   // above).
1000   // 2. And either:
1001   //   a. All sub-regions are uniform.
1002   //   b. There is one or less conditional branches among the direct children.
1003   return SubRegionsAreUniform || (ConditionalDirectChildren <= 1);
1004 }
1005 
1006 /// Run the transformation for each region found
runOnRegion(Region * R,RGPassManager & RGM)1007 bool StructurizeCFG::runOnRegion(Region *R, RGPassManager &RGM) {
1008   if (R->isTopLevelRegion())
1009     return false;
1010 
1011   DA = nullptr;
1012 
1013   if (SkipUniformRegions) {
1014     // TODO: We could probably be smarter here with how we handle sub-regions.
1015     // We currently rely on the fact that metadata is set by earlier invocations
1016     // of the pass on sub-regions, and that this metadata doesn't get lost --
1017     // but we shouldn't rely on metadata for correctness!
1018     unsigned UniformMDKindID =
1019         R->getEntry()->getContext().getMDKindID("structurizecfg.uniform");
1020     DA = &getAnalysis<LegacyDivergenceAnalysis>();
1021 
1022     if (hasOnlyUniformBranches(R, UniformMDKindID, *DA)) {
1023       LLVM_DEBUG(dbgs() << "Skipping region with uniform control flow: " << *R
1024                         << '\n');
1025 
1026       // Mark all direct child block terminators as having been treated as
1027       // uniform. To account for a possible future in which non-uniform
1028       // sub-regions are treated more cleverly, indirect children are not
1029       // marked as uniform.
1030       MDNode *MD = MDNode::get(R->getEntry()->getParent()->getContext(), {});
1031       for (RegionNode *E : R->elements()) {
1032         if (E->isSubRegion())
1033           continue;
1034 
1035         if (Instruction *Term = E->getEntry()->getTerminator())
1036           Term->setMetadata(UniformMDKindID, MD);
1037       }
1038 
1039       return false;
1040     }
1041   }
1042 
1043   Func = R->getEntry()->getParent();
1044   ParentRegion = R;
1045 
1046   DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
1047 
1048   orderNodes();
1049   collectInfos();
1050   createFlow();
1051   insertConditions(false);
1052   insertConditions(true);
1053   setPhiValues();
1054   simplifyAffectedPhis();
1055   rebuildSSA();
1056 
1057   // Cleanup
1058   Order.clear();
1059   Visited.clear();
1060   DeletedPhis.clear();
1061   AddedPhis.clear();
1062   Predicates.clear();
1063   Conditions.clear();
1064   Loops.clear();
1065   LoopPreds.clear();
1066   LoopConds.clear();
1067 
1068   return true;
1069 }
1070 
createStructurizeCFGPass(bool SkipUniformRegions)1071 Pass *llvm::createStructurizeCFGPass(bool SkipUniformRegions) {
1072   return new StructurizeCFG(SkipUniformRegions);
1073 }
1074