1 //===- StructurizeCFG.cpp -------------------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8
9 #include "llvm/ADT/DenseMap.h"
10 #include "llvm/ADT/MapVector.h"
11 #include "llvm/ADT/SCCIterator.h"
12 #include "llvm/ADT/STLExtras.h"
13 #include "llvm/ADT/SmallPtrSet.h"
14 #include "llvm/ADT/SmallVector.h"
15 #include "llvm/Analysis/InstructionSimplify.h"
16 #include "llvm/Analysis/LegacyDivergenceAnalysis.h"
17 #include "llvm/Analysis/RegionInfo.h"
18 #include "llvm/Analysis/RegionIterator.h"
19 #include "llvm/Analysis/RegionPass.h"
20 #include "llvm/IR/Argument.h"
21 #include "llvm/IR/BasicBlock.h"
22 #include "llvm/IR/CFG.h"
23 #include "llvm/IR/Constant.h"
24 #include "llvm/IR/Constants.h"
25 #include "llvm/IR/Dominators.h"
26 #include "llvm/IR/Function.h"
27 #include "llvm/IR/InstrTypes.h"
28 #include "llvm/IR/Instruction.h"
29 #include "llvm/IR/Instructions.h"
30 #include "llvm/IR/Metadata.h"
31 #include "llvm/IR/PatternMatch.h"
32 #include "llvm/IR/Type.h"
33 #include "llvm/IR/Use.h"
34 #include "llvm/IR/User.h"
35 #include "llvm/IR/Value.h"
36 #include "llvm/IR/ValueHandle.h"
37 #include "llvm/InitializePasses.h"
38 #include "llvm/Pass.h"
39 #include "llvm/Support/Casting.h"
40 #include "llvm/Support/CommandLine.h"
41 #include "llvm/Support/Debug.h"
42 #include "llvm/Support/ErrorHandling.h"
43 #include "llvm/Support/raw_ostream.h"
44 #include "llvm/Transforms/Scalar.h"
45 #include "llvm/Transforms/Utils.h"
46 #include "llvm/Transforms/Utils/Local.h"
47 #include "llvm/Transforms/Utils/SSAUpdater.h"
48 #include <algorithm>
49 #include <cassert>
50 #include <utility>
51
52 using namespace llvm;
53 using namespace llvm::PatternMatch;
54
55 #define DEBUG_TYPE "structurizecfg"
56
57 // The name for newly created blocks.
58 static const char *const FlowBlockName = "Flow";
59
60 namespace {
61
62 static cl::opt<bool> ForceSkipUniformRegions(
63 "structurizecfg-skip-uniform-regions",
64 cl::Hidden,
65 cl::desc("Force whether the StructurizeCFG pass skips uniform regions"),
66 cl::init(false));
67
68 static cl::opt<bool>
69 RelaxedUniformRegions("structurizecfg-relaxed-uniform-regions", cl::Hidden,
70 cl::desc("Allow relaxed uniform region checks"),
71 cl::init(true));
72
73 // Definition of the complex types used in this pass.
74
75 using BBValuePair = std::pair<BasicBlock *, Value *>;
76
77 using RNVector = SmallVector<RegionNode *, 8>;
78 using BBVector = SmallVector<BasicBlock *, 8>;
79 using BranchVector = SmallVector<BranchInst *, 8>;
80 using BBValueVector = SmallVector<BBValuePair, 2>;
81
82 using BBSet = SmallPtrSet<BasicBlock *, 8>;
83
84 using PhiMap = MapVector<PHINode *, BBValueVector>;
85 using BB2BBVecMap = MapVector<BasicBlock *, BBVector>;
86
87 using BBPhiMap = DenseMap<BasicBlock *, PhiMap>;
88 using BBPredicates = DenseMap<BasicBlock *, Value *>;
89 using PredMap = DenseMap<BasicBlock *, BBPredicates>;
90 using BB2BBMap = DenseMap<BasicBlock *, BasicBlock *>;
91
92 // A traits type that is intended to be used in graph algorithms. The graph
93 // traits starts at an entry node, and traverses the RegionNodes that are in
94 // the Nodes set.
95 struct SubGraphTraits {
96 using NodeRef = std::pair<RegionNode *, SmallDenseSet<RegionNode *> *>;
97 using BaseSuccIterator = GraphTraits<RegionNode *>::ChildIteratorType;
98
99 // This wraps a set of Nodes into the iterator, so we know which edges to
100 // filter out.
101 class WrappedSuccIterator
102 : public iterator_adaptor_base<
103 WrappedSuccIterator, BaseSuccIterator,
104 typename std::iterator_traits<BaseSuccIterator>::iterator_category,
105 NodeRef, std::ptrdiff_t, NodeRef *, NodeRef> {
106 SmallDenseSet<RegionNode *> *Nodes;
107
108 public:
WrappedSuccIterator(BaseSuccIterator It,SmallDenseSet<RegionNode * > * Nodes)109 WrappedSuccIterator(BaseSuccIterator It, SmallDenseSet<RegionNode *> *Nodes)
110 : iterator_adaptor_base(It), Nodes(Nodes) {}
111
operator *() const112 NodeRef operator*() const { return {*I, Nodes}; }
113 };
114
filterAll__anona33d439f0111::SubGraphTraits115 static bool filterAll(const NodeRef &N) { return true; }
filterSet__anona33d439f0111::SubGraphTraits116 static bool filterSet(const NodeRef &N) { return N.second->count(N.first); }
117
118 using ChildIteratorType =
119 filter_iterator<WrappedSuccIterator, bool (*)(const NodeRef &)>;
120
getEntryNode__anona33d439f0111::SubGraphTraits121 static NodeRef getEntryNode(Region *R) {
122 return {GraphTraits<Region *>::getEntryNode(R), nullptr};
123 }
124
getEntryNode__anona33d439f0111::SubGraphTraits125 static NodeRef getEntryNode(NodeRef N) { return N; }
126
children__anona33d439f0111::SubGraphTraits127 static iterator_range<ChildIteratorType> children(const NodeRef &N) {
128 auto *filter = N.second ? &filterSet : &filterAll;
129 return make_filter_range(
130 make_range<WrappedSuccIterator>(
131 {GraphTraits<RegionNode *>::child_begin(N.first), N.second},
132 {GraphTraits<RegionNode *>::child_end(N.first), N.second}),
133 filter);
134 }
135
child_begin__anona33d439f0111::SubGraphTraits136 static ChildIteratorType child_begin(const NodeRef &N) {
137 return children(N).begin();
138 }
139
child_end__anona33d439f0111::SubGraphTraits140 static ChildIteratorType child_end(const NodeRef &N) {
141 return children(N).end();
142 }
143 };
144
145 /// Finds the nearest common dominator of a set of BasicBlocks.
146 ///
147 /// For every BB you add to the set, you can specify whether we "remember" the
148 /// block. When you get the common dominator, you can also ask whether it's one
149 /// of the blocks we remembered.
150 class NearestCommonDominator {
151 DominatorTree *DT;
152 BasicBlock *Result = nullptr;
153 bool ResultIsRemembered = false;
154
155 /// Add BB to the resulting dominator.
addBlock(BasicBlock * BB,bool Remember)156 void addBlock(BasicBlock *BB, bool Remember) {
157 if (!Result) {
158 Result = BB;
159 ResultIsRemembered = Remember;
160 return;
161 }
162
163 BasicBlock *NewResult = DT->findNearestCommonDominator(Result, BB);
164 if (NewResult != Result)
165 ResultIsRemembered = false;
166 if (NewResult == BB)
167 ResultIsRemembered |= Remember;
168 Result = NewResult;
169 }
170
171 public:
NearestCommonDominator(DominatorTree * DomTree)172 explicit NearestCommonDominator(DominatorTree *DomTree) : DT(DomTree) {}
173
addBlock(BasicBlock * BB)174 void addBlock(BasicBlock *BB) {
175 addBlock(BB, /* Remember = */ false);
176 }
177
addAndRememberBlock(BasicBlock * BB)178 void addAndRememberBlock(BasicBlock *BB) {
179 addBlock(BB, /* Remember = */ true);
180 }
181
182 /// Get the nearest common dominator of all the BBs added via addBlock() and
183 /// addAndRememberBlock().
result()184 BasicBlock *result() { return Result; }
185
186 /// Is the BB returned by getResult() one of the blocks we added to the set
187 /// with addAndRememberBlock()?
resultIsRememberedBlock()188 bool resultIsRememberedBlock() { return ResultIsRemembered; }
189 };
190
191 /// Transforms the control flow graph on one single entry/exit region
192 /// at a time.
193 ///
194 /// After the transform all "If"/"Then"/"Else" style control flow looks like
195 /// this:
196 ///
197 /// \verbatim
198 /// 1
199 /// ||
200 /// | |
201 /// 2 |
202 /// | /
203 /// |/
204 /// 3
205 /// || Where:
206 /// | | 1 = "If" block, calculates the condition
207 /// 4 | 2 = "Then" subregion, runs if the condition is true
208 /// | / 3 = "Flow" blocks, newly inserted flow blocks, rejoins the flow
209 /// |/ 4 = "Else" optional subregion, runs if the condition is false
210 /// 5 5 = "End" block, also rejoins the control flow
211 /// \endverbatim
212 ///
213 /// Control flow is expressed as a branch where the true exit goes into the
214 /// "Then"/"Else" region, while the false exit skips the region
215 /// The condition for the optional "Else" region is expressed as a PHI node.
216 /// The incoming values of the PHI node are true for the "If" edge and false
217 /// for the "Then" edge.
218 ///
219 /// Additionally to that even complicated loops look like this:
220 ///
221 /// \verbatim
222 /// 1
223 /// ||
224 /// | |
225 /// 2 ^ Where:
226 /// | / 1 = "Entry" block
227 /// |/ 2 = "Loop" optional subregion, with all exits at "Flow" block
228 /// 3 3 = "Flow" block, with back edge to entry block
229 /// |
230 /// \endverbatim
231 ///
232 /// The back edge of the "Flow" block is always on the false side of the branch
233 /// while the true side continues the general flow. So the loop condition
234 /// consist of a network of PHI nodes where the true incoming values expresses
235 /// breaks and the false values expresses continue states.
236 class StructurizeCFG : public RegionPass {
237 bool SkipUniformRegions;
238
239 Type *Boolean;
240 ConstantInt *BoolTrue;
241 ConstantInt *BoolFalse;
242 UndefValue *BoolUndef;
243
244 Function *Func;
245 Region *ParentRegion;
246
247 LegacyDivergenceAnalysis *DA;
248 DominatorTree *DT;
249
250 SmallVector<RegionNode *, 8> Order;
251 BBSet Visited;
252
253 SmallVector<WeakVH, 8> AffectedPhis;
254 BBPhiMap DeletedPhis;
255 BB2BBVecMap AddedPhis;
256
257 PredMap Predicates;
258 BranchVector Conditions;
259
260 BB2BBMap Loops;
261 PredMap LoopPreds;
262 BranchVector LoopConds;
263
264 RegionNode *PrevNode;
265
266 void orderNodes();
267
268 void analyzeLoops(RegionNode *N);
269
270 Value *buildCondition(BranchInst *Term, unsigned Idx, bool Invert);
271
272 void gatherPredicates(RegionNode *N);
273
274 void collectInfos();
275
276 void insertConditions(bool Loops);
277
278 void delPhiValues(BasicBlock *From, BasicBlock *To);
279
280 void addPhiValues(BasicBlock *From, BasicBlock *To);
281
282 void setPhiValues();
283
284 void simplifyAffectedPhis();
285
286 void killTerminator(BasicBlock *BB);
287
288 void changeExit(RegionNode *Node, BasicBlock *NewExit,
289 bool IncludeDominator);
290
291 BasicBlock *getNextFlow(BasicBlock *Dominator);
292
293 BasicBlock *needPrefix(bool NeedEmpty);
294
295 BasicBlock *needPostfix(BasicBlock *Flow, bool ExitUseAllowed);
296
297 void setPrevNode(BasicBlock *BB);
298
299 bool dominatesPredicates(BasicBlock *BB, RegionNode *Node);
300
301 bool isPredictableTrue(RegionNode *Node);
302
303 void wireFlow(bool ExitUseAllowed, BasicBlock *LoopEnd);
304
305 void handleLoops(bool ExitUseAllowed, BasicBlock *LoopEnd);
306
307 void createFlow();
308
309 void rebuildSSA();
310
311 public:
312 static char ID;
313
StructurizeCFG(bool SkipUniformRegions_=false)314 explicit StructurizeCFG(bool SkipUniformRegions_ = false)
315 : RegionPass(ID),
316 SkipUniformRegions(SkipUniformRegions_) {
317 if (ForceSkipUniformRegions.getNumOccurrences())
318 SkipUniformRegions = ForceSkipUniformRegions.getValue();
319 initializeStructurizeCFGPass(*PassRegistry::getPassRegistry());
320 }
321
322 bool doInitialization(Region *R, RGPassManager &RGM) override;
323
324 bool runOnRegion(Region *R, RGPassManager &RGM) override;
325
getPassName() const326 StringRef getPassName() const override { return "Structurize control flow"; }
327
getAnalysisUsage(AnalysisUsage & AU) const328 void getAnalysisUsage(AnalysisUsage &AU) const override {
329 if (SkipUniformRegions)
330 AU.addRequired<LegacyDivergenceAnalysis>();
331 AU.addRequiredID(LowerSwitchID);
332 AU.addRequired<DominatorTreeWrapperPass>();
333
334 AU.addPreserved<DominatorTreeWrapperPass>();
335 RegionPass::getAnalysisUsage(AU);
336 }
337 };
338
339 } // end anonymous namespace
340
341 char StructurizeCFG::ID = 0;
342
343 INITIALIZE_PASS_BEGIN(StructurizeCFG, "structurizecfg", "Structurize the CFG",
344 false, false)
INITIALIZE_PASS_DEPENDENCY(LegacyDivergenceAnalysis)345 INITIALIZE_PASS_DEPENDENCY(LegacyDivergenceAnalysis)
346 INITIALIZE_PASS_DEPENDENCY(LowerSwitch)
347 INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
348 INITIALIZE_PASS_DEPENDENCY(RegionInfoPass)
349 INITIALIZE_PASS_END(StructurizeCFG, "structurizecfg", "Structurize the CFG",
350 false, false)
351
352 /// Initialize the types and constants used in the pass
353 bool StructurizeCFG::doInitialization(Region *R, RGPassManager &RGM) {
354 LLVMContext &Context = R->getEntry()->getContext();
355
356 Boolean = Type::getInt1Ty(Context);
357 BoolTrue = ConstantInt::getTrue(Context);
358 BoolFalse = ConstantInt::getFalse(Context);
359 BoolUndef = UndefValue::get(Boolean);
360
361 return false;
362 }
363
364 /// Build up the general order of nodes, by performing a topological sort of the
365 /// parent region's nodes, while ensuring that there is no outer cycle node
366 /// between any two inner cycle nodes.
orderNodes()367 void StructurizeCFG::orderNodes() {
368 Order.resize(std::distance(GraphTraits<Region *>::nodes_begin(ParentRegion),
369 GraphTraits<Region *>::nodes_end(ParentRegion)));
370 if (Order.empty())
371 return;
372
373 SmallDenseSet<RegionNode *> Nodes;
374 auto EntryNode = SubGraphTraits::getEntryNode(ParentRegion);
375
376 // A list of range indices of SCCs in Order, to be processed.
377 SmallVector<std::pair<unsigned, unsigned>, 8> WorkList;
378 unsigned I = 0, E = Order.size();
379 while (true) {
380 // Run through all the SCCs in the subgraph starting with Entry.
381 for (auto SCCI =
382 scc_iterator<SubGraphTraits::NodeRef, SubGraphTraits>::begin(
383 EntryNode);
384 !SCCI.isAtEnd(); ++SCCI) {
385 auto &SCC = *SCCI;
386
387 // An SCC up to the size of 2, can be reduced to an entry (the last node),
388 // and a possible additional node. Therefore, it is already in order, and
389 // there is no need to add it to the work-list.
390 unsigned Size = SCC.size();
391 if (Size > 2)
392 WorkList.emplace_back(I, I + Size);
393
394 // Add the SCC nodes to the Order array.
395 for (auto &N : SCC) {
396 assert(I < E && "SCC size mismatch!");
397 Order[I++] = N.first;
398 }
399 }
400 assert(I == E && "SCC size mismatch!");
401
402 // If there are no more SCCs to order, then we are done.
403 if (WorkList.empty())
404 break;
405
406 std::tie(I, E) = WorkList.pop_back_val();
407
408 // Collect the set of nodes in the SCC's subgraph. These are only the
409 // possible child nodes; we do not add the entry (last node) otherwise we
410 // will have the same exact SCC all over again.
411 Nodes.clear();
412 Nodes.insert(Order.begin() + I, Order.begin() + E - 1);
413
414 // Update the entry node.
415 EntryNode.first = Order[E - 1];
416 EntryNode.second = &Nodes;
417 }
418 }
419
420 /// Determine the end of the loops
analyzeLoops(RegionNode * N)421 void StructurizeCFG::analyzeLoops(RegionNode *N) {
422 if (N->isSubRegion()) {
423 // Test for exit as back edge
424 BasicBlock *Exit = N->getNodeAs<Region>()->getExit();
425 if (Visited.count(Exit))
426 Loops[Exit] = N->getEntry();
427
428 } else {
429 // Test for successors as back edge
430 BasicBlock *BB = N->getNodeAs<BasicBlock>();
431 BranchInst *Term = cast<BranchInst>(BB->getTerminator());
432
433 for (BasicBlock *Succ : Term->successors())
434 if (Visited.count(Succ))
435 Loops[Succ] = BB;
436 }
437 }
438
439 /// Build the condition for one edge
buildCondition(BranchInst * Term,unsigned Idx,bool Invert)440 Value *StructurizeCFG::buildCondition(BranchInst *Term, unsigned Idx,
441 bool Invert) {
442 Value *Cond = Invert ? BoolFalse : BoolTrue;
443 if (Term->isConditional()) {
444 Cond = Term->getCondition();
445
446 if (Idx != (unsigned)Invert)
447 Cond = invertCondition(Cond);
448 }
449 return Cond;
450 }
451
452 /// Analyze the predecessors of each block and build up predicates
gatherPredicates(RegionNode * N)453 void StructurizeCFG::gatherPredicates(RegionNode *N) {
454 RegionInfo *RI = ParentRegion->getRegionInfo();
455 BasicBlock *BB = N->getEntry();
456 BBPredicates &Pred = Predicates[BB];
457 BBPredicates &LPred = LoopPreds[BB];
458
459 for (BasicBlock *P : predecessors(BB)) {
460 // Ignore it if it's a branch from outside into our region entry
461 if (!ParentRegion->contains(P))
462 continue;
463
464 Region *R = RI->getRegionFor(P);
465 if (R == ParentRegion) {
466 // It's a top level block in our region
467 BranchInst *Term = cast<BranchInst>(P->getTerminator());
468 for (unsigned i = 0, e = Term->getNumSuccessors(); i != e; ++i) {
469 BasicBlock *Succ = Term->getSuccessor(i);
470 if (Succ != BB)
471 continue;
472
473 if (Visited.count(P)) {
474 // Normal forward edge
475 if (Term->isConditional()) {
476 // Try to treat it like an ELSE block
477 BasicBlock *Other = Term->getSuccessor(!i);
478 if (Visited.count(Other) && !Loops.count(Other) &&
479 !Pred.count(Other) && !Pred.count(P)) {
480
481 Pred[Other] = BoolFalse;
482 Pred[P] = BoolTrue;
483 continue;
484 }
485 }
486 Pred[P] = buildCondition(Term, i, false);
487 } else {
488 // Back edge
489 LPred[P] = buildCondition(Term, i, true);
490 }
491 }
492 } else {
493 // It's an exit from a sub region
494 while (R->getParent() != ParentRegion)
495 R = R->getParent();
496
497 // Edge from inside a subregion to its entry, ignore it
498 if (*R == *N)
499 continue;
500
501 BasicBlock *Entry = R->getEntry();
502 if (Visited.count(Entry))
503 Pred[Entry] = BoolTrue;
504 else
505 LPred[Entry] = BoolFalse;
506 }
507 }
508 }
509
510 /// Collect various loop and predicate infos
collectInfos()511 void StructurizeCFG::collectInfos() {
512 // Reset predicate
513 Predicates.clear();
514
515 // and loop infos
516 Loops.clear();
517 LoopPreds.clear();
518
519 // Reset the visited nodes
520 Visited.clear();
521
522 for (RegionNode *RN : reverse(Order)) {
523 LLVM_DEBUG(dbgs() << "Visiting: "
524 << (RN->isSubRegion() ? "SubRegion with entry: " : "")
525 << RN->getEntry()->getName() << "\n");
526
527 // Analyze all the conditions leading to a node
528 gatherPredicates(RN);
529
530 // Remember that we've seen this node
531 Visited.insert(RN->getEntry());
532
533 // Find the last back edges
534 analyzeLoops(RN);
535 }
536 }
537
538 /// Insert the missing branch conditions
insertConditions(bool Loops)539 void StructurizeCFG::insertConditions(bool Loops) {
540 BranchVector &Conds = Loops ? LoopConds : Conditions;
541 Value *Default = Loops ? BoolTrue : BoolFalse;
542 SSAUpdater PhiInserter;
543
544 for (BranchInst *Term : Conds) {
545 assert(Term->isConditional());
546
547 BasicBlock *Parent = Term->getParent();
548 BasicBlock *SuccTrue = Term->getSuccessor(0);
549 BasicBlock *SuccFalse = Term->getSuccessor(1);
550
551 PhiInserter.Initialize(Boolean, "");
552 PhiInserter.AddAvailableValue(&Func->getEntryBlock(), Default);
553 PhiInserter.AddAvailableValue(Loops ? SuccFalse : Parent, Default);
554
555 BBPredicates &Preds = Loops ? LoopPreds[SuccFalse] : Predicates[SuccTrue];
556
557 NearestCommonDominator Dominator(DT);
558 Dominator.addBlock(Parent);
559
560 Value *ParentValue = nullptr;
561 for (std::pair<BasicBlock *, Value *> BBAndPred : Preds) {
562 BasicBlock *BB = BBAndPred.first;
563 Value *Pred = BBAndPred.second;
564
565 if (BB == Parent) {
566 ParentValue = Pred;
567 break;
568 }
569 PhiInserter.AddAvailableValue(BB, Pred);
570 Dominator.addAndRememberBlock(BB);
571 }
572
573 if (ParentValue) {
574 Term->setCondition(ParentValue);
575 } else {
576 if (!Dominator.resultIsRememberedBlock())
577 PhiInserter.AddAvailableValue(Dominator.result(), Default);
578
579 Term->setCondition(PhiInserter.GetValueInMiddleOfBlock(Parent));
580 }
581 }
582 }
583
584 /// Remove all PHI values coming from "From" into "To" and remember
585 /// them in DeletedPhis
delPhiValues(BasicBlock * From,BasicBlock * To)586 void StructurizeCFG::delPhiValues(BasicBlock *From, BasicBlock *To) {
587 PhiMap &Map = DeletedPhis[To];
588 for (PHINode &Phi : To->phis()) {
589 bool Recorded = false;
590 while (Phi.getBasicBlockIndex(From) != -1) {
591 Value *Deleted = Phi.removeIncomingValue(From, false);
592 Map[&Phi].push_back(std::make_pair(From, Deleted));
593 if (!Recorded) {
594 AffectedPhis.push_back(&Phi);
595 Recorded = true;
596 }
597 }
598 }
599 }
600
601 /// Add a dummy PHI value as soon as we knew the new predecessor
addPhiValues(BasicBlock * From,BasicBlock * To)602 void StructurizeCFG::addPhiValues(BasicBlock *From, BasicBlock *To) {
603 for (PHINode &Phi : To->phis()) {
604 Value *Undef = UndefValue::get(Phi.getType());
605 Phi.addIncoming(Undef, From);
606 }
607 AddedPhis[To].push_back(From);
608 }
609
610 /// Add the real PHI value as soon as everything is set up
setPhiValues()611 void StructurizeCFG::setPhiValues() {
612 SmallVector<PHINode *, 8> InsertedPhis;
613 SSAUpdater Updater(&InsertedPhis);
614 for (const auto &AddedPhi : AddedPhis) {
615 BasicBlock *To = AddedPhi.first;
616 const BBVector &From = AddedPhi.second;
617
618 if (!DeletedPhis.count(To))
619 continue;
620
621 PhiMap &Map = DeletedPhis[To];
622 for (const auto &PI : Map) {
623 PHINode *Phi = PI.first;
624 Value *Undef = UndefValue::get(Phi->getType());
625 Updater.Initialize(Phi->getType(), "");
626 Updater.AddAvailableValue(&Func->getEntryBlock(), Undef);
627 Updater.AddAvailableValue(To, Undef);
628
629 NearestCommonDominator Dominator(DT);
630 Dominator.addBlock(To);
631 for (const auto &VI : PI.second) {
632 Updater.AddAvailableValue(VI.first, VI.second);
633 Dominator.addAndRememberBlock(VI.first);
634 }
635
636 if (!Dominator.resultIsRememberedBlock())
637 Updater.AddAvailableValue(Dominator.result(), Undef);
638
639 for (BasicBlock *FI : From)
640 Phi->setIncomingValueForBlock(FI, Updater.GetValueAtEndOfBlock(FI));
641 AffectedPhis.push_back(Phi);
642 }
643
644 DeletedPhis.erase(To);
645 }
646 assert(DeletedPhis.empty());
647
648 AffectedPhis.append(InsertedPhis.begin(), InsertedPhis.end());
649 }
650
simplifyAffectedPhis()651 void StructurizeCFG::simplifyAffectedPhis() {
652 bool Changed;
653 do {
654 Changed = false;
655 SimplifyQuery Q(Func->getParent()->getDataLayout());
656 Q.DT = DT;
657 for (WeakVH VH : AffectedPhis) {
658 if (auto Phi = dyn_cast_or_null<PHINode>(VH)) {
659 if (auto NewValue = SimplifyInstruction(Phi, Q)) {
660 Phi->replaceAllUsesWith(NewValue);
661 Phi->eraseFromParent();
662 Changed = true;
663 }
664 }
665 }
666 } while (Changed);
667 }
668
669 /// Remove phi values from all successors and then remove the terminator.
killTerminator(BasicBlock * BB)670 void StructurizeCFG::killTerminator(BasicBlock *BB) {
671 Instruction *Term = BB->getTerminator();
672 if (!Term)
673 return;
674
675 for (succ_iterator SI = succ_begin(BB), SE = succ_end(BB);
676 SI != SE; ++SI)
677 delPhiValues(BB, *SI);
678
679 if (DA)
680 DA->removeValue(Term);
681 Term->eraseFromParent();
682 }
683
684 /// Let node exit(s) point to NewExit
changeExit(RegionNode * Node,BasicBlock * NewExit,bool IncludeDominator)685 void StructurizeCFG::changeExit(RegionNode *Node, BasicBlock *NewExit,
686 bool IncludeDominator) {
687 if (Node->isSubRegion()) {
688 Region *SubRegion = Node->getNodeAs<Region>();
689 BasicBlock *OldExit = SubRegion->getExit();
690 BasicBlock *Dominator = nullptr;
691
692 // Find all the edges from the sub region to the exit
693 for (auto BBI = pred_begin(OldExit), E = pred_end(OldExit); BBI != E;) {
694 // Incrememt BBI before mucking with BB's terminator.
695 BasicBlock *BB = *BBI++;
696
697 if (!SubRegion->contains(BB))
698 continue;
699
700 // Modify the edges to point to the new exit
701 delPhiValues(BB, OldExit);
702 BB->getTerminator()->replaceUsesOfWith(OldExit, NewExit);
703 addPhiValues(BB, NewExit);
704
705 // Find the new dominator (if requested)
706 if (IncludeDominator) {
707 if (!Dominator)
708 Dominator = BB;
709 else
710 Dominator = DT->findNearestCommonDominator(Dominator, BB);
711 }
712 }
713
714 // Change the dominator (if requested)
715 if (Dominator)
716 DT->changeImmediateDominator(NewExit, Dominator);
717
718 // Update the region info
719 SubRegion->replaceExit(NewExit);
720 } else {
721 BasicBlock *BB = Node->getNodeAs<BasicBlock>();
722 killTerminator(BB);
723 BranchInst::Create(NewExit, BB);
724 addPhiValues(BB, NewExit);
725 if (IncludeDominator)
726 DT->changeImmediateDominator(NewExit, BB);
727 }
728 }
729
730 /// Create a new flow node and update dominator tree and region info
getNextFlow(BasicBlock * Dominator)731 BasicBlock *StructurizeCFG::getNextFlow(BasicBlock *Dominator) {
732 LLVMContext &Context = Func->getContext();
733 BasicBlock *Insert = Order.empty() ? ParentRegion->getExit() :
734 Order.back()->getEntry();
735 BasicBlock *Flow = BasicBlock::Create(Context, FlowBlockName,
736 Func, Insert);
737 DT->addNewBlock(Flow, Dominator);
738 ParentRegion->getRegionInfo()->setRegionFor(Flow, ParentRegion);
739 return Flow;
740 }
741
742 /// Create a new or reuse the previous node as flow node
needPrefix(bool NeedEmpty)743 BasicBlock *StructurizeCFG::needPrefix(bool NeedEmpty) {
744 BasicBlock *Entry = PrevNode->getEntry();
745
746 if (!PrevNode->isSubRegion()) {
747 killTerminator(Entry);
748 if (!NeedEmpty || Entry->getFirstInsertionPt() == Entry->end())
749 return Entry;
750 }
751
752 // create a new flow node
753 BasicBlock *Flow = getNextFlow(Entry);
754
755 // and wire it up
756 changeExit(PrevNode, Flow, true);
757 PrevNode = ParentRegion->getBBNode(Flow);
758 return Flow;
759 }
760
761 /// Returns the region exit if possible, otherwise just a new flow node
needPostfix(BasicBlock * Flow,bool ExitUseAllowed)762 BasicBlock *StructurizeCFG::needPostfix(BasicBlock *Flow,
763 bool ExitUseAllowed) {
764 if (!Order.empty() || !ExitUseAllowed)
765 return getNextFlow(Flow);
766
767 BasicBlock *Exit = ParentRegion->getExit();
768 DT->changeImmediateDominator(Exit, Flow);
769 addPhiValues(Flow, Exit);
770 return Exit;
771 }
772
773 /// Set the previous node
setPrevNode(BasicBlock * BB)774 void StructurizeCFG::setPrevNode(BasicBlock *BB) {
775 PrevNode = ParentRegion->contains(BB) ? ParentRegion->getBBNode(BB)
776 : nullptr;
777 }
778
779 /// Does BB dominate all the predicates of Node?
dominatesPredicates(BasicBlock * BB,RegionNode * Node)780 bool StructurizeCFG::dominatesPredicates(BasicBlock *BB, RegionNode *Node) {
781 BBPredicates &Preds = Predicates[Node->getEntry()];
782 return llvm::all_of(Preds, [&](std::pair<BasicBlock *, Value *> Pred) {
783 return DT->dominates(BB, Pred.first);
784 });
785 }
786
787 /// Can we predict that this node will always be called?
isPredictableTrue(RegionNode * Node)788 bool StructurizeCFG::isPredictableTrue(RegionNode *Node) {
789 BBPredicates &Preds = Predicates[Node->getEntry()];
790 bool Dominated = false;
791
792 // Regionentry is always true
793 if (!PrevNode)
794 return true;
795
796 for (std::pair<BasicBlock*, Value*> Pred : Preds) {
797 BasicBlock *BB = Pred.first;
798 Value *V = Pred.second;
799
800 if (V != BoolTrue)
801 return false;
802
803 if (!Dominated && DT->dominates(BB, PrevNode->getEntry()))
804 Dominated = true;
805 }
806
807 // TODO: The dominator check is too strict
808 return Dominated;
809 }
810
811 /// Take one node from the order vector and wire it up
wireFlow(bool ExitUseAllowed,BasicBlock * LoopEnd)812 void StructurizeCFG::wireFlow(bool ExitUseAllowed,
813 BasicBlock *LoopEnd) {
814 RegionNode *Node = Order.pop_back_val();
815 Visited.insert(Node->getEntry());
816
817 if (isPredictableTrue(Node)) {
818 // Just a linear flow
819 if (PrevNode) {
820 changeExit(PrevNode, Node->getEntry(), true);
821 }
822 PrevNode = Node;
823 } else {
824 // Insert extra prefix node (or reuse last one)
825 BasicBlock *Flow = needPrefix(false);
826
827 // Insert extra postfix node (or use exit instead)
828 BasicBlock *Entry = Node->getEntry();
829 BasicBlock *Next = needPostfix(Flow, ExitUseAllowed);
830
831 // let it point to entry and next block
832 Conditions.push_back(BranchInst::Create(Entry, Next, BoolUndef, Flow));
833 addPhiValues(Flow, Entry);
834 DT->changeImmediateDominator(Entry, Flow);
835
836 PrevNode = Node;
837 while (!Order.empty() && !Visited.count(LoopEnd) &&
838 dominatesPredicates(Entry, Order.back())) {
839 handleLoops(false, LoopEnd);
840 }
841
842 changeExit(PrevNode, Next, false);
843 setPrevNode(Next);
844 }
845 }
846
handleLoops(bool ExitUseAllowed,BasicBlock * LoopEnd)847 void StructurizeCFG::handleLoops(bool ExitUseAllowed,
848 BasicBlock *LoopEnd) {
849 RegionNode *Node = Order.back();
850 BasicBlock *LoopStart = Node->getEntry();
851
852 if (!Loops.count(LoopStart)) {
853 wireFlow(ExitUseAllowed, LoopEnd);
854 return;
855 }
856
857 if (!isPredictableTrue(Node))
858 LoopStart = needPrefix(true);
859
860 LoopEnd = Loops[Node->getEntry()];
861 wireFlow(false, LoopEnd);
862 while (!Visited.count(LoopEnd)) {
863 handleLoops(false, LoopEnd);
864 }
865
866 // If the start of the loop is the entry block, we can't branch to it so
867 // insert a new dummy entry block.
868 Function *LoopFunc = LoopStart->getParent();
869 if (LoopStart == &LoopFunc->getEntryBlock()) {
870 LoopStart->setName("entry.orig");
871
872 BasicBlock *NewEntry =
873 BasicBlock::Create(LoopStart->getContext(),
874 "entry",
875 LoopFunc,
876 LoopStart);
877 BranchInst::Create(LoopStart, NewEntry);
878 DT->setNewRoot(NewEntry);
879 }
880
881 // Create an extra loop end node
882 LoopEnd = needPrefix(false);
883 BasicBlock *Next = needPostfix(LoopEnd, ExitUseAllowed);
884 LoopConds.push_back(BranchInst::Create(Next, LoopStart,
885 BoolUndef, LoopEnd));
886 addPhiValues(LoopEnd, LoopStart);
887 setPrevNode(Next);
888 }
889
890 /// After this function control flow looks like it should be, but
891 /// branches and PHI nodes only have undefined conditions.
createFlow()892 void StructurizeCFG::createFlow() {
893 BasicBlock *Exit = ParentRegion->getExit();
894 bool EntryDominatesExit = DT->dominates(ParentRegion->getEntry(), Exit);
895
896 AffectedPhis.clear();
897 DeletedPhis.clear();
898 AddedPhis.clear();
899 Conditions.clear();
900 LoopConds.clear();
901
902 PrevNode = nullptr;
903 Visited.clear();
904
905 while (!Order.empty()) {
906 handleLoops(EntryDominatesExit, nullptr);
907 }
908
909 if (PrevNode)
910 changeExit(PrevNode, Exit, EntryDominatesExit);
911 else
912 assert(EntryDominatesExit);
913 }
914
915 /// Handle a rare case where the disintegrated nodes instructions
916 /// no longer dominate all their uses. Not sure if this is really necessary
rebuildSSA()917 void StructurizeCFG::rebuildSSA() {
918 SSAUpdater Updater;
919 for (BasicBlock *BB : ParentRegion->blocks())
920 for (Instruction &I : *BB) {
921 bool Initialized = false;
922 // We may modify the use list as we iterate over it, so be careful to
923 // compute the next element in the use list at the top of the loop.
924 for (auto UI = I.use_begin(), E = I.use_end(); UI != E;) {
925 Use &U = *UI++;
926 Instruction *User = cast<Instruction>(U.getUser());
927 if (User->getParent() == BB) {
928 continue;
929 } else if (PHINode *UserPN = dyn_cast<PHINode>(User)) {
930 if (UserPN->getIncomingBlock(U) == BB)
931 continue;
932 }
933
934 if (DT->dominates(&I, User))
935 continue;
936
937 if (!Initialized) {
938 Value *Undef = UndefValue::get(I.getType());
939 Updater.Initialize(I.getType(), "");
940 Updater.AddAvailableValue(&Func->getEntryBlock(), Undef);
941 Updater.AddAvailableValue(BB, &I);
942 Initialized = true;
943 }
944 Updater.RewriteUseAfterInsertions(U);
945 }
946 }
947 }
948
hasOnlyUniformBranches(Region * R,unsigned UniformMDKindID,const LegacyDivergenceAnalysis & DA)949 static bool hasOnlyUniformBranches(Region *R, unsigned UniformMDKindID,
950 const LegacyDivergenceAnalysis &DA) {
951 // Bool for if all sub-regions are uniform.
952 bool SubRegionsAreUniform = true;
953 // Count of how many direct children are conditional.
954 unsigned ConditionalDirectChildren = 0;
955
956 for (auto E : R->elements()) {
957 if (!E->isSubRegion()) {
958 auto Br = dyn_cast<BranchInst>(E->getEntry()->getTerminator());
959 if (!Br || !Br->isConditional())
960 continue;
961
962 if (!DA.isUniform(Br))
963 return false;
964
965 // One of our direct children is conditional.
966 ConditionalDirectChildren++;
967
968 LLVM_DEBUG(dbgs() << "BB: " << Br->getParent()->getName()
969 << " has uniform terminator\n");
970 } else {
971 // Explicitly refuse to treat regions as uniform if they have non-uniform
972 // subregions. We cannot rely on DivergenceAnalysis for branches in
973 // subregions because those branches may have been removed and re-created,
974 // so we look for our metadata instead.
975 //
976 // Warning: It would be nice to treat regions as uniform based only on
977 // their direct child basic blocks' terminators, regardless of whether
978 // subregions are uniform or not. However, this requires a very careful
979 // look at SIAnnotateControlFlow to make sure nothing breaks there.
980 for (auto BB : E->getNodeAs<Region>()->blocks()) {
981 auto Br = dyn_cast<BranchInst>(BB->getTerminator());
982 if (!Br || !Br->isConditional())
983 continue;
984
985 if (!Br->getMetadata(UniformMDKindID)) {
986 // Early exit if we cannot have relaxed uniform regions.
987 if (!RelaxedUniformRegions)
988 return false;
989
990 SubRegionsAreUniform = false;
991 break;
992 }
993 }
994 }
995 }
996
997 // Our region is uniform if:
998 // 1. All conditional branches that are direct children are uniform (checked
999 // above).
1000 // 2. And either:
1001 // a. All sub-regions are uniform.
1002 // b. There is one or less conditional branches among the direct children.
1003 return SubRegionsAreUniform || (ConditionalDirectChildren <= 1);
1004 }
1005
1006 /// Run the transformation for each region found
runOnRegion(Region * R,RGPassManager & RGM)1007 bool StructurizeCFG::runOnRegion(Region *R, RGPassManager &RGM) {
1008 if (R->isTopLevelRegion())
1009 return false;
1010
1011 DA = nullptr;
1012
1013 if (SkipUniformRegions) {
1014 // TODO: We could probably be smarter here with how we handle sub-regions.
1015 // We currently rely on the fact that metadata is set by earlier invocations
1016 // of the pass on sub-regions, and that this metadata doesn't get lost --
1017 // but we shouldn't rely on metadata for correctness!
1018 unsigned UniformMDKindID =
1019 R->getEntry()->getContext().getMDKindID("structurizecfg.uniform");
1020 DA = &getAnalysis<LegacyDivergenceAnalysis>();
1021
1022 if (hasOnlyUniformBranches(R, UniformMDKindID, *DA)) {
1023 LLVM_DEBUG(dbgs() << "Skipping region with uniform control flow: " << *R
1024 << '\n');
1025
1026 // Mark all direct child block terminators as having been treated as
1027 // uniform. To account for a possible future in which non-uniform
1028 // sub-regions are treated more cleverly, indirect children are not
1029 // marked as uniform.
1030 MDNode *MD = MDNode::get(R->getEntry()->getParent()->getContext(), {});
1031 for (RegionNode *E : R->elements()) {
1032 if (E->isSubRegion())
1033 continue;
1034
1035 if (Instruction *Term = E->getEntry()->getTerminator())
1036 Term->setMetadata(UniformMDKindID, MD);
1037 }
1038
1039 return false;
1040 }
1041 }
1042
1043 Func = R->getEntry()->getParent();
1044 ParentRegion = R;
1045
1046 DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
1047
1048 orderNodes();
1049 collectInfos();
1050 createFlow();
1051 insertConditions(false);
1052 insertConditions(true);
1053 setPhiValues();
1054 simplifyAffectedPhis();
1055 rebuildSSA();
1056
1057 // Cleanup
1058 Order.clear();
1059 Visited.clear();
1060 DeletedPhis.clear();
1061 AddedPhis.clear();
1062 Predicates.clear();
1063 Conditions.clear();
1064 Loops.clear();
1065 LoopPreds.clear();
1066 LoopConds.clear();
1067
1068 return true;
1069 }
1070
createStructurizeCFGPass(bool SkipUniformRegions)1071 Pass *llvm::createStructurizeCFGPass(bool SkipUniformRegions) {
1072 return new StructurizeCFG(SkipUniformRegions);
1073 }
1074