1 //===- StructurizeCFG.cpp -------------------------------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "llvm/Transforms/Scalar/StructurizeCFG.h" 10 #include "llvm/ADT/DenseMap.h" 11 #include "llvm/ADT/MapVector.h" 12 #include "llvm/ADT/SCCIterator.h" 13 #include "llvm/ADT/STLExtras.h" 14 #include "llvm/ADT/SmallPtrSet.h" 15 #include "llvm/ADT/SmallVector.h" 16 #include "llvm/Analysis/InstructionSimplify.h" 17 #include "llvm/Analysis/LegacyDivergenceAnalysis.h" 18 #include "llvm/Analysis/RegionInfo.h" 19 #include "llvm/Analysis/RegionIterator.h" 20 #include "llvm/Analysis/RegionPass.h" 21 #include "llvm/IR/BasicBlock.h" 22 #include "llvm/IR/CFG.h" 23 #include "llvm/IR/Constants.h" 24 #include "llvm/IR/Dominators.h" 25 #include "llvm/IR/Function.h" 26 #include "llvm/IR/InstrTypes.h" 27 #include "llvm/IR/Instruction.h" 28 #include "llvm/IR/Instructions.h" 29 #include "llvm/IR/Metadata.h" 30 #include "llvm/IR/PassManager.h" 31 #include "llvm/IR/PatternMatch.h" 32 #include "llvm/IR/Type.h" 33 #include "llvm/IR/Use.h" 34 #include "llvm/IR/Value.h" 35 #include "llvm/IR/ValueHandle.h" 36 #include "llvm/InitializePasses.h" 37 #include "llvm/Pass.h" 38 #include "llvm/Support/Casting.h" 39 #include "llvm/Support/CommandLine.h" 40 #include "llvm/Support/Debug.h" 41 #include "llvm/Support/raw_ostream.h" 42 #include "llvm/Transforms/Scalar.h" 43 #include "llvm/Transforms/Utils.h" 44 #include "llvm/Transforms/Utils/Local.h" 45 #include "llvm/Transforms/Utils/SSAUpdater.h" 46 #include <algorithm> 47 #include <cassert> 48 #include <utility> 49 50 using namespace llvm; 51 using namespace llvm::PatternMatch; 52 53 #define DEBUG_TYPE "structurizecfg" 54 55 // The name for newly created blocks. 56 const char FlowBlockName[] = "Flow"; 57 58 namespace { 59 60 static cl::opt<bool> ForceSkipUniformRegions( 61 "structurizecfg-skip-uniform-regions", 62 cl::Hidden, 63 cl::desc("Force whether the StructurizeCFG pass skips uniform regions"), 64 cl::init(false)); 65 66 static cl::opt<bool> 67 RelaxedUniformRegions("structurizecfg-relaxed-uniform-regions", cl::Hidden, 68 cl::desc("Allow relaxed uniform region checks"), 69 cl::init(true)); 70 71 static cl::opt<unsigned> 72 ReorderNodeSize("structurizecfg-node-reorder-size", 73 cl::desc("Limit region size for reordering nodes"), 74 cl::init(100), cl::Hidden); 75 76 // Definition of the complex types used in this pass. 77 78 using BBValuePair = std::pair<BasicBlock *, Value *>; 79 80 using RNVector = SmallVector<RegionNode *, 8>; 81 using BBVector = SmallVector<BasicBlock *, 8>; 82 using BranchVector = SmallVector<BranchInst *, 8>; 83 using BBValueVector = SmallVector<BBValuePair, 2>; 84 85 using BBSet = SmallPtrSet<BasicBlock *, 8>; 86 87 using PhiMap = MapVector<PHINode *, BBValueVector>; 88 using BB2BBVecMap = MapVector<BasicBlock *, BBVector>; 89 90 using BBPhiMap = DenseMap<BasicBlock *, PhiMap>; 91 using BBPredicates = DenseMap<BasicBlock *, Value *>; 92 using PredMap = DenseMap<BasicBlock *, BBPredicates>; 93 using BB2BBMap = DenseMap<BasicBlock *, BasicBlock *>; 94 95 // A traits type that is intended to be used in graph algorithms. The graph 96 // traits starts at an entry node, and traverses the RegionNodes that are in 97 // the Nodes set. 98 struct SubGraphTraits { 99 using NodeRef = std::pair<RegionNode *, SmallDenseSet<RegionNode *> *>; 100 using BaseSuccIterator = GraphTraits<RegionNode *>::ChildIteratorType; 101 102 // This wraps a set of Nodes into the iterator, so we know which edges to 103 // filter out. 104 class WrappedSuccIterator 105 : public iterator_adaptor_base< 106 WrappedSuccIterator, BaseSuccIterator, 107 typename std::iterator_traits<BaseSuccIterator>::iterator_category, 108 NodeRef, std::ptrdiff_t, NodeRef *, NodeRef> { 109 SmallDenseSet<RegionNode *> *Nodes; 110 111 public: 112 WrappedSuccIterator(BaseSuccIterator It, SmallDenseSet<RegionNode *> *Nodes) 113 : iterator_adaptor_base(It), Nodes(Nodes) {} 114 115 NodeRef operator*() const { return {*I, Nodes}; } 116 }; 117 118 static bool filterAll(const NodeRef &N) { return true; } 119 static bool filterSet(const NodeRef &N) { return N.second->count(N.first); } 120 121 using ChildIteratorType = 122 filter_iterator<WrappedSuccIterator, bool (*)(const NodeRef &)>; 123 124 static NodeRef getEntryNode(Region *R) { 125 return {GraphTraits<Region *>::getEntryNode(R), nullptr}; 126 } 127 128 static NodeRef getEntryNode(NodeRef N) { return N; } 129 130 static iterator_range<ChildIteratorType> children(const NodeRef &N) { 131 auto *filter = N.second ? &filterSet : &filterAll; 132 return make_filter_range( 133 make_range<WrappedSuccIterator>( 134 {GraphTraits<RegionNode *>::child_begin(N.first), N.second}, 135 {GraphTraits<RegionNode *>::child_end(N.first), N.second}), 136 filter); 137 } 138 139 static ChildIteratorType child_begin(const NodeRef &N) { 140 return children(N).begin(); 141 } 142 143 static ChildIteratorType child_end(const NodeRef &N) { 144 return children(N).end(); 145 } 146 }; 147 148 /// Finds the nearest common dominator of a set of BasicBlocks. 149 /// 150 /// For every BB you add to the set, you can specify whether we "remember" the 151 /// block. When you get the common dominator, you can also ask whether it's one 152 /// of the blocks we remembered. 153 class NearestCommonDominator { 154 DominatorTree *DT; 155 BasicBlock *Result = nullptr; 156 bool ResultIsRemembered = false; 157 158 /// Add BB to the resulting dominator. 159 void addBlock(BasicBlock *BB, bool Remember) { 160 if (!Result) { 161 Result = BB; 162 ResultIsRemembered = Remember; 163 return; 164 } 165 166 BasicBlock *NewResult = DT->findNearestCommonDominator(Result, BB); 167 if (NewResult != Result) 168 ResultIsRemembered = false; 169 if (NewResult == BB) 170 ResultIsRemembered |= Remember; 171 Result = NewResult; 172 } 173 174 public: 175 explicit NearestCommonDominator(DominatorTree *DomTree) : DT(DomTree) {} 176 177 void addBlock(BasicBlock *BB) { 178 addBlock(BB, /* Remember = */ false); 179 } 180 181 void addAndRememberBlock(BasicBlock *BB) { 182 addBlock(BB, /* Remember = */ true); 183 } 184 185 /// Get the nearest common dominator of all the BBs added via addBlock() and 186 /// addAndRememberBlock(). 187 BasicBlock *result() { return Result; } 188 189 /// Is the BB returned by getResult() one of the blocks we added to the set 190 /// with addAndRememberBlock()? 191 bool resultIsRememberedBlock() { return ResultIsRemembered; } 192 }; 193 194 /// Transforms the control flow graph on one single entry/exit region 195 /// at a time. 196 /// 197 /// After the transform all "If"/"Then"/"Else" style control flow looks like 198 /// this: 199 /// 200 /// \verbatim 201 /// 1 202 /// || 203 /// | | 204 /// 2 | 205 /// | / 206 /// |/ 207 /// 3 208 /// || Where: 209 /// | | 1 = "If" block, calculates the condition 210 /// 4 | 2 = "Then" subregion, runs if the condition is true 211 /// | / 3 = "Flow" blocks, newly inserted flow blocks, rejoins the flow 212 /// |/ 4 = "Else" optional subregion, runs if the condition is false 213 /// 5 5 = "End" block, also rejoins the control flow 214 /// \endverbatim 215 /// 216 /// Control flow is expressed as a branch where the true exit goes into the 217 /// "Then"/"Else" region, while the false exit skips the region 218 /// The condition for the optional "Else" region is expressed as a PHI node. 219 /// The incoming values of the PHI node are true for the "If" edge and false 220 /// for the "Then" edge. 221 /// 222 /// Additionally to that even complicated loops look like this: 223 /// 224 /// \verbatim 225 /// 1 226 /// || 227 /// | | 228 /// 2 ^ Where: 229 /// | / 1 = "Entry" block 230 /// |/ 2 = "Loop" optional subregion, with all exits at "Flow" block 231 /// 3 3 = "Flow" block, with back edge to entry block 232 /// | 233 /// \endverbatim 234 /// 235 /// The back edge of the "Flow" block is always on the false side of the branch 236 /// while the true side continues the general flow. So the loop condition 237 /// consist of a network of PHI nodes where the true incoming values expresses 238 /// breaks and the false values expresses continue states. 239 240 class StructurizeCFG { 241 Type *Boolean; 242 ConstantInt *BoolTrue; 243 ConstantInt *BoolFalse; 244 UndefValue *BoolUndef; 245 246 Function *Func; 247 Region *ParentRegion; 248 249 LegacyDivergenceAnalysis *DA = nullptr; 250 DominatorTree *DT; 251 252 SmallVector<RegionNode *, 8> Order; 253 BBSet Visited; 254 255 SmallVector<WeakVH, 8> AffectedPhis; 256 BBPhiMap DeletedPhis; 257 BB2BBVecMap AddedPhis; 258 259 PredMap Predicates; 260 BranchVector Conditions; 261 262 BB2BBMap Loops; 263 PredMap LoopPreds; 264 BranchVector LoopConds; 265 266 RegionNode *PrevNode; 267 268 void orderNodes(); 269 270 void reorderNodes(); 271 272 void analyzeLoops(RegionNode *N); 273 274 Value *buildCondition(BranchInst *Term, unsigned Idx, bool Invert); 275 276 void gatherPredicates(RegionNode *N); 277 278 void collectInfos(); 279 280 void insertConditions(bool Loops); 281 282 void simplifyConditions(); 283 284 void delPhiValues(BasicBlock *From, BasicBlock *To); 285 286 void addPhiValues(BasicBlock *From, BasicBlock *To); 287 288 void setPhiValues(); 289 290 void simplifyAffectedPhis(); 291 292 void killTerminator(BasicBlock *BB); 293 294 void changeExit(RegionNode *Node, BasicBlock *NewExit, 295 bool IncludeDominator); 296 297 BasicBlock *getNextFlow(BasicBlock *Dominator); 298 299 BasicBlock *needPrefix(bool NeedEmpty); 300 301 BasicBlock *needPostfix(BasicBlock *Flow, bool ExitUseAllowed); 302 303 void setPrevNode(BasicBlock *BB); 304 305 bool dominatesPredicates(BasicBlock *BB, RegionNode *Node); 306 307 bool isPredictableTrue(RegionNode *Node); 308 309 void wireFlow(bool ExitUseAllowed, BasicBlock *LoopEnd); 310 311 void handleLoops(bool ExitUseAllowed, BasicBlock *LoopEnd); 312 313 void createFlow(); 314 315 void rebuildSSA(); 316 317 public: 318 void init(Region *R); 319 bool run(Region *R, DominatorTree *DT); 320 bool makeUniformRegion(Region *R, LegacyDivergenceAnalysis *DA); 321 }; 322 323 class StructurizeCFGLegacyPass : public RegionPass { 324 bool SkipUniformRegions; 325 326 public: 327 static char ID; 328 329 explicit StructurizeCFGLegacyPass(bool SkipUniformRegions_ = false) 330 : RegionPass(ID), SkipUniformRegions(SkipUniformRegions_) { 331 if (ForceSkipUniformRegions.getNumOccurrences()) 332 SkipUniformRegions = ForceSkipUniformRegions.getValue(); 333 initializeStructurizeCFGLegacyPassPass(*PassRegistry::getPassRegistry()); 334 } 335 336 bool runOnRegion(Region *R, RGPassManager &RGM) override { 337 StructurizeCFG SCFG; 338 SCFG.init(R); 339 if (SkipUniformRegions) { 340 LegacyDivergenceAnalysis *DA = &getAnalysis<LegacyDivergenceAnalysis>(); 341 if (SCFG.makeUniformRegion(R, DA)) 342 return false; 343 } 344 DominatorTree *DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree(); 345 return SCFG.run(R, DT); 346 } 347 348 StringRef getPassName() const override { return "Structurize control flow"; } 349 350 void getAnalysisUsage(AnalysisUsage &AU) const override { 351 if (SkipUniformRegions) 352 AU.addRequired<LegacyDivergenceAnalysis>(); 353 AU.addRequiredID(LowerSwitchID); 354 AU.addRequired<DominatorTreeWrapperPass>(); 355 356 AU.addPreserved<DominatorTreeWrapperPass>(); 357 RegionPass::getAnalysisUsage(AU); 358 } 359 }; 360 361 } // end anonymous namespace 362 363 char StructurizeCFGLegacyPass::ID = 0; 364 365 INITIALIZE_PASS_BEGIN(StructurizeCFGLegacyPass, "structurizecfg", 366 "Structurize the CFG", false, false) 367 INITIALIZE_PASS_DEPENDENCY(LegacyDivergenceAnalysis) 368 INITIALIZE_PASS_DEPENDENCY(LowerSwitchLegacyPass) 369 INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) 370 INITIALIZE_PASS_DEPENDENCY(RegionInfoPass) 371 INITIALIZE_PASS_END(StructurizeCFGLegacyPass, "structurizecfg", 372 "Structurize the CFG", false, false) 373 374 /// Build up the general order of nodes, by performing a topological sort of the 375 /// parent region's nodes, while ensuring that there is no outer cycle node 376 /// between any two inner cycle nodes. 377 void StructurizeCFG::orderNodes() { 378 Order.resize(std::distance(GraphTraits<Region *>::nodes_begin(ParentRegion), 379 GraphTraits<Region *>::nodes_end(ParentRegion))); 380 if (Order.empty()) 381 return; 382 383 SmallDenseSet<RegionNode *> Nodes; 384 auto EntryNode = SubGraphTraits::getEntryNode(ParentRegion); 385 386 // A list of range indices of SCCs in Order, to be processed. 387 SmallVector<std::pair<unsigned, unsigned>, 8> WorkList; 388 unsigned I = 0, E = Order.size(); 389 while (true) { 390 // Run through all the SCCs in the subgraph starting with Entry. 391 for (auto SCCI = 392 scc_iterator<SubGraphTraits::NodeRef, SubGraphTraits>::begin( 393 EntryNode); 394 !SCCI.isAtEnd(); ++SCCI) { 395 auto &SCC = *SCCI; 396 397 // An SCC up to the size of 2, can be reduced to an entry (the last node), 398 // and a possible additional node. Therefore, it is already in order, and 399 // there is no need to add it to the work-list. 400 unsigned Size = SCC.size(); 401 if (Size > 2) 402 WorkList.emplace_back(I, I + Size); 403 404 // Add the SCC nodes to the Order array. 405 for (auto &N : SCC) { 406 assert(I < E && "SCC size mismatch!"); 407 Order[I++] = N.first; 408 } 409 } 410 assert(I == E && "SCC size mismatch!"); 411 412 // If there are no more SCCs to order, then we are done. 413 if (WorkList.empty()) 414 break; 415 416 std::tie(I, E) = WorkList.pop_back_val(); 417 418 // Collect the set of nodes in the SCC's subgraph. These are only the 419 // possible child nodes; we do not add the entry (last node) otherwise we 420 // will have the same exact SCC all over again. 421 Nodes.clear(); 422 Nodes.insert(Order.begin() + I, Order.begin() + E - 1); 423 424 // Update the entry node. 425 EntryNode.first = Order[E - 1]; 426 EntryNode.second = &Nodes; 427 } 428 } 429 430 /// Change the node ordering to decrease the range of live values, especially 431 /// the values that capture the control flow path for branches. We do this 432 /// by moving blocks with a single predecessor and successor to appear after 433 /// predecessor. The motivation is to move some loop exit blocks into a loop. 434 /// In cases where a loop has a large number of exit blocks, this reduces the 435 /// amount of values needed across the loop boundary. 436 void StructurizeCFG::reorderNodes() { 437 SmallVector<RegionNode *, 8> NewOrder; 438 DenseMap<BasicBlock *, unsigned> MoveTo; 439 BitVector Moved(Order.size()); 440 441 // The benefits of reordering nodes occurs for large regions. 442 if (Order.size() <= ReorderNodeSize) 443 return; 444 445 // The algorithm works with two passes over Order. The first pass identifies 446 // the blocks to move and the position to move them to. The second pass 447 // creates the new order based upon this information. We move blocks with 448 // a single predecessor and successor. If there are multiple candidates then 449 // maintain the original order. 450 BBSet Seen; 451 for (int I = Order.size() - 1; I >= 0; --I) { 452 auto *BB = Order[I]->getEntry(); 453 Seen.insert(BB); 454 auto *Pred = BB->getSinglePredecessor(); 455 auto *Succ = BB->getSingleSuccessor(); 456 // Consider only those basic blocks that have a predecessor in Order and a 457 // successor that exits the region. The region may contain subregions that 458 // have been structurized and are not included in Order. 459 if (Pred && Succ && Seen.count(Pred) && Succ == ParentRegion->getExit() && 460 !MoveTo.count(Pred)) { 461 MoveTo[Pred] = I; 462 Moved.set(I); 463 } 464 } 465 466 // If no blocks have been moved then the original order is good. 467 if (!Moved.count()) 468 return; 469 470 for (size_t I = 0, E = Order.size(); I < E; ++I) { 471 auto *BB = Order[I]->getEntry(); 472 if (MoveTo.count(BB)) 473 NewOrder.push_back(Order[MoveTo[BB]]); 474 if (!Moved[I]) 475 NewOrder.push_back(Order[I]); 476 } 477 478 Order.assign(NewOrder); 479 } 480 481 /// Determine the end of the loops 482 void StructurizeCFG::analyzeLoops(RegionNode *N) { 483 if (N->isSubRegion()) { 484 // Test for exit as back edge 485 BasicBlock *Exit = N->getNodeAs<Region>()->getExit(); 486 if (Visited.count(Exit)) 487 Loops[Exit] = N->getEntry(); 488 489 } else { 490 // Test for successors as back edge 491 BasicBlock *BB = N->getNodeAs<BasicBlock>(); 492 BranchInst *Term = cast<BranchInst>(BB->getTerminator()); 493 494 for (BasicBlock *Succ : Term->successors()) 495 if (Visited.count(Succ)) 496 Loops[Succ] = BB; 497 } 498 } 499 500 /// Build the condition for one edge 501 Value *StructurizeCFG::buildCondition(BranchInst *Term, unsigned Idx, 502 bool Invert) { 503 Value *Cond = Invert ? BoolFalse : BoolTrue; 504 if (Term->isConditional()) { 505 Cond = Term->getCondition(); 506 507 if (Idx != (unsigned)Invert) 508 Cond = invertCondition(Cond); 509 } 510 return Cond; 511 } 512 513 /// Analyze the predecessors of each block and build up predicates 514 void StructurizeCFG::gatherPredicates(RegionNode *N) { 515 RegionInfo *RI = ParentRegion->getRegionInfo(); 516 BasicBlock *BB = N->getEntry(); 517 BBPredicates &Pred = Predicates[BB]; 518 BBPredicates &LPred = LoopPreds[BB]; 519 520 for (BasicBlock *P : predecessors(BB)) { 521 // Ignore it if it's a branch from outside into our region entry 522 if (!ParentRegion->contains(P)) 523 continue; 524 525 Region *R = RI->getRegionFor(P); 526 if (R == ParentRegion) { 527 // It's a top level block in our region 528 BranchInst *Term = cast<BranchInst>(P->getTerminator()); 529 for (unsigned i = 0, e = Term->getNumSuccessors(); i != e; ++i) { 530 BasicBlock *Succ = Term->getSuccessor(i); 531 if (Succ != BB) 532 continue; 533 534 if (Visited.count(P)) { 535 // Normal forward edge 536 if (Term->isConditional()) { 537 // Try to treat it like an ELSE block 538 BasicBlock *Other = Term->getSuccessor(!i); 539 if (Visited.count(Other) && !Loops.count(Other) && 540 !Pred.count(Other) && !Pred.count(P)) { 541 542 Pred[Other] = BoolFalse; 543 Pred[P] = BoolTrue; 544 continue; 545 } 546 } 547 Pred[P] = buildCondition(Term, i, false); 548 } else { 549 // Back edge 550 LPred[P] = buildCondition(Term, i, true); 551 } 552 } 553 } else { 554 // It's an exit from a sub region 555 while (R->getParent() != ParentRegion) 556 R = R->getParent(); 557 558 // Edge from inside a subregion to its entry, ignore it 559 if (*R == *N) 560 continue; 561 562 BasicBlock *Entry = R->getEntry(); 563 if (Visited.count(Entry)) 564 Pred[Entry] = BoolTrue; 565 else 566 LPred[Entry] = BoolFalse; 567 } 568 } 569 } 570 571 /// Collect various loop and predicate infos 572 void StructurizeCFG::collectInfos() { 573 // Reset predicate 574 Predicates.clear(); 575 576 // and loop infos 577 Loops.clear(); 578 LoopPreds.clear(); 579 580 // Reset the visited nodes 581 Visited.clear(); 582 583 for (RegionNode *RN : reverse(Order)) { 584 LLVM_DEBUG(dbgs() << "Visiting: " 585 << (RN->isSubRegion() ? "SubRegion with entry: " : "") 586 << RN->getEntry()->getName() << "\n"); 587 588 // Analyze all the conditions leading to a node 589 gatherPredicates(RN); 590 591 // Remember that we've seen this node 592 Visited.insert(RN->getEntry()); 593 594 // Find the last back edges 595 analyzeLoops(RN); 596 } 597 } 598 599 /// Insert the missing branch conditions 600 void StructurizeCFG::insertConditions(bool Loops) { 601 BranchVector &Conds = Loops ? LoopConds : Conditions; 602 Value *Default = Loops ? BoolTrue : BoolFalse; 603 SSAUpdater PhiInserter; 604 605 for (BranchInst *Term : Conds) { 606 assert(Term->isConditional()); 607 608 BasicBlock *Parent = Term->getParent(); 609 BasicBlock *SuccTrue = Term->getSuccessor(0); 610 BasicBlock *SuccFalse = Term->getSuccessor(1); 611 612 PhiInserter.Initialize(Boolean, ""); 613 PhiInserter.AddAvailableValue(&Func->getEntryBlock(), Default); 614 PhiInserter.AddAvailableValue(Loops ? SuccFalse : Parent, Default); 615 616 BBPredicates &Preds = Loops ? LoopPreds[SuccFalse] : Predicates[SuccTrue]; 617 618 NearestCommonDominator Dominator(DT); 619 Dominator.addBlock(Parent); 620 621 Value *ParentValue = nullptr; 622 for (std::pair<BasicBlock *, Value *> BBAndPred : Preds) { 623 BasicBlock *BB = BBAndPred.first; 624 Value *Pred = BBAndPred.second; 625 626 if (BB == Parent) { 627 ParentValue = Pred; 628 break; 629 } 630 PhiInserter.AddAvailableValue(BB, Pred); 631 Dominator.addAndRememberBlock(BB); 632 } 633 634 if (ParentValue) { 635 Term->setCondition(ParentValue); 636 } else { 637 if (!Dominator.resultIsRememberedBlock()) 638 PhiInserter.AddAvailableValue(Dominator.result(), Default); 639 640 Term->setCondition(PhiInserter.GetValueInMiddleOfBlock(Parent)); 641 } 642 } 643 } 644 645 /// Simplify any inverted conditions that were built by buildConditions. 646 void StructurizeCFG::simplifyConditions() { 647 SmallVector<Instruction *> InstToErase; 648 for (auto &I : concat<PredMap::value_type>(Predicates, LoopPreds)) { 649 auto &Preds = I.second; 650 for (auto &J : Preds) { 651 auto &Cond = J.second; 652 Instruction *Inverted; 653 if (match(Cond, m_Not(m_OneUse(m_Instruction(Inverted)))) && 654 !Cond->use_empty()) { 655 if (auto *InvertedCmp = dyn_cast<CmpInst>(Inverted)) { 656 InvertedCmp->setPredicate(InvertedCmp->getInversePredicate()); 657 Cond->replaceAllUsesWith(InvertedCmp); 658 InstToErase.push_back(cast<Instruction>(Cond)); 659 } 660 } 661 } 662 } 663 for (auto *I : InstToErase) 664 I->eraseFromParent(); 665 } 666 667 /// Remove all PHI values coming from "From" into "To" and remember 668 /// them in DeletedPhis 669 void StructurizeCFG::delPhiValues(BasicBlock *From, BasicBlock *To) { 670 PhiMap &Map = DeletedPhis[To]; 671 for (PHINode &Phi : To->phis()) { 672 bool Recorded = false; 673 while (Phi.getBasicBlockIndex(From) != -1) { 674 Value *Deleted = Phi.removeIncomingValue(From, false); 675 Map[&Phi].push_back(std::make_pair(From, Deleted)); 676 if (!Recorded) { 677 AffectedPhis.push_back(&Phi); 678 Recorded = true; 679 } 680 } 681 } 682 } 683 684 /// Add a dummy PHI value as soon as we knew the new predecessor 685 void StructurizeCFG::addPhiValues(BasicBlock *From, BasicBlock *To) { 686 for (PHINode &Phi : To->phis()) { 687 Value *Undef = UndefValue::get(Phi.getType()); 688 Phi.addIncoming(Undef, From); 689 } 690 AddedPhis[To].push_back(From); 691 } 692 693 /// Add the real PHI value as soon as everything is set up 694 void StructurizeCFG::setPhiValues() { 695 SmallVector<PHINode *, 8> InsertedPhis; 696 SSAUpdater Updater(&InsertedPhis); 697 for (const auto &AddedPhi : AddedPhis) { 698 BasicBlock *To = AddedPhi.first; 699 const BBVector &From = AddedPhi.second; 700 701 if (!DeletedPhis.count(To)) 702 continue; 703 704 PhiMap &Map = DeletedPhis[To]; 705 for (const auto &PI : Map) { 706 PHINode *Phi = PI.first; 707 Value *Undef = UndefValue::get(Phi->getType()); 708 Updater.Initialize(Phi->getType(), ""); 709 Updater.AddAvailableValue(&Func->getEntryBlock(), Undef); 710 Updater.AddAvailableValue(To, Undef); 711 712 NearestCommonDominator Dominator(DT); 713 Dominator.addBlock(To); 714 for (const auto &VI : PI.second) { 715 Updater.AddAvailableValue(VI.first, VI.second); 716 Dominator.addAndRememberBlock(VI.first); 717 } 718 719 if (!Dominator.resultIsRememberedBlock()) 720 Updater.AddAvailableValue(Dominator.result(), Undef); 721 722 for (BasicBlock *FI : From) 723 Phi->setIncomingValueForBlock(FI, Updater.GetValueAtEndOfBlock(FI)); 724 AffectedPhis.push_back(Phi); 725 } 726 727 DeletedPhis.erase(To); 728 } 729 assert(DeletedPhis.empty()); 730 731 AffectedPhis.append(InsertedPhis.begin(), InsertedPhis.end()); 732 } 733 734 void StructurizeCFG::simplifyAffectedPhis() { 735 bool Changed; 736 do { 737 Changed = false; 738 SimplifyQuery Q(Func->getParent()->getDataLayout()); 739 Q.DT = DT; 740 for (WeakVH VH : AffectedPhis) { 741 if (auto Phi = dyn_cast_or_null<PHINode>(VH)) { 742 if (auto NewValue = simplifyInstruction(Phi, Q)) { 743 Phi->replaceAllUsesWith(NewValue); 744 Phi->eraseFromParent(); 745 Changed = true; 746 } 747 } 748 } 749 } while (Changed); 750 } 751 752 /// Remove phi values from all successors and then remove the terminator. 753 void StructurizeCFG::killTerminator(BasicBlock *BB) { 754 Instruction *Term = BB->getTerminator(); 755 if (!Term) 756 return; 757 758 for (BasicBlock *Succ : successors(BB)) 759 delPhiValues(BB, Succ); 760 761 if (DA) 762 DA->removeValue(Term); 763 Term->eraseFromParent(); 764 } 765 766 /// Let node exit(s) point to NewExit 767 void StructurizeCFG::changeExit(RegionNode *Node, BasicBlock *NewExit, 768 bool IncludeDominator) { 769 if (Node->isSubRegion()) { 770 Region *SubRegion = Node->getNodeAs<Region>(); 771 BasicBlock *OldExit = SubRegion->getExit(); 772 BasicBlock *Dominator = nullptr; 773 774 // Find all the edges from the sub region to the exit. 775 // We use make_early_inc_range here because we modify BB's terminator. 776 for (BasicBlock *BB : llvm::make_early_inc_range(predecessors(OldExit))) { 777 if (!SubRegion->contains(BB)) 778 continue; 779 780 // Modify the edges to point to the new exit 781 delPhiValues(BB, OldExit); 782 BB->getTerminator()->replaceUsesOfWith(OldExit, NewExit); 783 addPhiValues(BB, NewExit); 784 785 // Find the new dominator (if requested) 786 if (IncludeDominator) { 787 if (!Dominator) 788 Dominator = BB; 789 else 790 Dominator = DT->findNearestCommonDominator(Dominator, BB); 791 } 792 } 793 794 // Change the dominator (if requested) 795 if (Dominator) 796 DT->changeImmediateDominator(NewExit, Dominator); 797 798 // Update the region info 799 SubRegion->replaceExit(NewExit); 800 } else { 801 BasicBlock *BB = Node->getNodeAs<BasicBlock>(); 802 killTerminator(BB); 803 BranchInst::Create(NewExit, BB); 804 addPhiValues(BB, NewExit); 805 if (IncludeDominator) 806 DT->changeImmediateDominator(NewExit, BB); 807 } 808 } 809 810 /// Create a new flow node and update dominator tree and region info 811 BasicBlock *StructurizeCFG::getNextFlow(BasicBlock *Dominator) { 812 LLVMContext &Context = Func->getContext(); 813 BasicBlock *Insert = Order.empty() ? ParentRegion->getExit() : 814 Order.back()->getEntry(); 815 BasicBlock *Flow = BasicBlock::Create(Context, FlowBlockName, 816 Func, Insert); 817 DT->addNewBlock(Flow, Dominator); 818 ParentRegion->getRegionInfo()->setRegionFor(Flow, ParentRegion); 819 return Flow; 820 } 821 822 /// Create a new or reuse the previous node as flow node 823 BasicBlock *StructurizeCFG::needPrefix(bool NeedEmpty) { 824 BasicBlock *Entry = PrevNode->getEntry(); 825 826 if (!PrevNode->isSubRegion()) { 827 killTerminator(Entry); 828 if (!NeedEmpty || Entry->getFirstInsertionPt() == Entry->end()) 829 return Entry; 830 } 831 832 // create a new flow node 833 BasicBlock *Flow = getNextFlow(Entry); 834 835 // and wire it up 836 changeExit(PrevNode, Flow, true); 837 PrevNode = ParentRegion->getBBNode(Flow); 838 return Flow; 839 } 840 841 /// Returns the region exit if possible, otherwise just a new flow node 842 BasicBlock *StructurizeCFG::needPostfix(BasicBlock *Flow, 843 bool ExitUseAllowed) { 844 if (!Order.empty() || !ExitUseAllowed) 845 return getNextFlow(Flow); 846 847 BasicBlock *Exit = ParentRegion->getExit(); 848 DT->changeImmediateDominator(Exit, Flow); 849 addPhiValues(Flow, Exit); 850 return Exit; 851 } 852 853 /// Set the previous node 854 void StructurizeCFG::setPrevNode(BasicBlock *BB) { 855 PrevNode = ParentRegion->contains(BB) ? ParentRegion->getBBNode(BB) 856 : nullptr; 857 } 858 859 /// Does BB dominate all the predicates of Node? 860 bool StructurizeCFG::dominatesPredicates(BasicBlock *BB, RegionNode *Node) { 861 BBPredicates &Preds = Predicates[Node->getEntry()]; 862 return llvm::all_of(Preds, [&](std::pair<BasicBlock *, Value *> Pred) { 863 return DT->dominates(BB, Pred.first); 864 }); 865 } 866 867 /// Can we predict that this node will always be called? 868 bool StructurizeCFG::isPredictableTrue(RegionNode *Node) { 869 BBPredicates &Preds = Predicates[Node->getEntry()]; 870 bool Dominated = false; 871 872 // Regionentry is always true 873 if (!PrevNode) 874 return true; 875 876 for (std::pair<BasicBlock*, Value*> Pred : Preds) { 877 BasicBlock *BB = Pred.first; 878 Value *V = Pred.second; 879 880 if (V != BoolTrue) 881 return false; 882 883 if (!Dominated && DT->dominates(BB, PrevNode->getEntry())) 884 Dominated = true; 885 } 886 887 // TODO: The dominator check is too strict 888 return Dominated; 889 } 890 891 /// Take one node from the order vector and wire it up 892 void StructurizeCFG::wireFlow(bool ExitUseAllowed, 893 BasicBlock *LoopEnd) { 894 RegionNode *Node = Order.pop_back_val(); 895 Visited.insert(Node->getEntry()); 896 897 if (isPredictableTrue(Node)) { 898 // Just a linear flow 899 if (PrevNode) { 900 changeExit(PrevNode, Node->getEntry(), true); 901 } 902 PrevNode = Node; 903 } else { 904 // Insert extra prefix node (or reuse last one) 905 BasicBlock *Flow = needPrefix(false); 906 907 // Insert extra postfix node (or use exit instead) 908 BasicBlock *Entry = Node->getEntry(); 909 BasicBlock *Next = needPostfix(Flow, ExitUseAllowed); 910 911 // let it point to entry and next block 912 Conditions.push_back(BranchInst::Create(Entry, Next, BoolUndef, Flow)); 913 addPhiValues(Flow, Entry); 914 DT->changeImmediateDominator(Entry, Flow); 915 916 PrevNode = Node; 917 while (!Order.empty() && !Visited.count(LoopEnd) && 918 dominatesPredicates(Entry, Order.back())) { 919 handleLoops(false, LoopEnd); 920 } 921 922 changeExit(PrevNode, Next, false); 923 setPrevNode(Next); 924 } 925 } 926 927 void StructurizeCFG::handleLoops(bool ExitUseAllowed, 928 BasicBlock *LoopEnd) { 929 RegionNode *Node = Order.back(); 930 BasicBlock *LoopStart = Node->getEntry(); 931 932 if (!Loops.count(LoopStart)) { 933 wireFlow(ExitUseAllowed, LoopEnd); 934 return; 935 } 936 937 if (!isPredictableTrue(Node)) 938 LoopStart = needPrefix(true); 939 940 LoopEnd = Loops[Node->getEntry()]; 941 wireFlow(false, LoopEnd); 942 while (!Visited.count(LoopEnd)) { 943 handleLoops(false, LoopEnd); 944 } 945 946 // If the start of the loop is the entry block, we can't branch to it so 947 // insert a new dummy entry block. 948 Function *LoopFunc = LoopStart->getParent(); 949 if (LoopStart == &LoopFunc->getEntryBlock()) { 950 LoopStart->setName("entry.orig"); 951 952 BasicBlock *NewEntry = 953 BasicBlock::Create(LoopStart->getContext(), 954 "entry", 955 LoopFunc, 956 LoopStart); 957 BranchInst::Create(LoopStart, NewEntry); 958 DT->setNewRoot(NewEntry); 959 } 960 961 // Create an extra loop end node 962 LoopEnd = needPrefix(false); 963 BasicBlock *Next = needPostfix(LoopEnd, ExitUseAllowed); 964 LoopConds.push_back(BranchInst::Create(Next, LoopStart, 965 BoolUndef, LoopEnd)); 966 addPhiValues(LoopEnd, LoopStart); 967 setPrevNode(Next); 968 } 969 970 /// After this function control flow looks like it should be, but 971 /// branches and PHI nodes only have undefined conditions. 972 void StructurizeCFG::createFlow() { 973 BasicBlock *Exit = ParentRegion->getExit(); 974 bool EntryDominatesExit = DT->dominates(ParentRegion->getEntry(), Exit); 975 976 AffectedPhis.clear(); 977 DeletedPhis.clear(); 978 AddedPhis.clear(); 979 Conditions.clear(); 980 LoopConds.clear(); 981 982 PrevNode = nullptr; 983 Visited.clear(); 984 985 while (!Order.empty()) { 986 handleLoops(EntryDominatesExit, nullptr); 987 } 988 989 if (PrevNode) 990 changeExit(PrevNode, Exit, EntryDominatesExit); 991 else 992 assert(EntryDominatesExit); 993 } 994 995 /// Handle a rare case where the disintegrated nodes instructions 996 /// no longer dominate all their uses. Not sure if this is really necessary 997 void StructurizeCFG::rebuildSSA() { 998 SSAUpdater Updater; 999 for (BasicBlock *BB : ParentRegion->blocks()) 1000 for (Instruction &I : *BB) { 1001 bool Initialized = false; 1002 // We may modify the use list as we iterate over it, so we use 1003 // make_early_inc_range. 1004 for (Use &U : llvm::make_early_inc_range(I.uses())) { 1005 Instruction *User = cast<Instruction>(U.getUser()); 1006 if (User->getParent() == BB) { 1007 continue; 1008 } else if (PHINode *UserPN = dyn_cast<PHINode>(User)) { 1009 if (UserPN->getIncomingBlock(U) == BB) 1010 continue; 1011 } 1012 1013 if (DT->dominates(&I, User)) 1014 continue; 1015 1016 if (!Initialized) { 1017 Value *Undef = UndefValue::get(I.getType()); 1018 Updater.Initialize(I.getType(), ""); 1019 Updater.AddAvailableValue(&Func->getEntryBlock(), Undef); 1020 Updater.AddAvailableValue(BB, &I); 1021 Initialized = true; 1022 } 1023 Updater.RewriteUseAfterInsertions(U); 1024 } 1025 } 1026 } 1027 1028 static bool hasOnlyUniformBranches(Region *R, unsigned UniformMDKindID, 1029 const LegacyDivergenceAnalysis &DA) { 1030 // Bool for if all sub-regions are uniform. 1031 bool SubRegionsAreUniform = true; 1032 // Count of how many direct children are conditional. 1033 unsigned ConditionalDirectChildren = 0; 1034 1035 for (auto E : R->elements()) { 1036 if (!E->isSubRegion()) { 1037 auto Br = dyn_cast<BranchInst>(E->getEntry()->getTerminator()); 1038 if (!Br || !Br->isConditional()) 1039 continue; 1040 1041 if (!DA.isUniform(Br)) 1042 return false; 1043 1044 // One of our direct children is conditional. 1045 ConditionalDirectChildren++; 1046 1047 LLVM_DEBUG(dbgs() << "BB: " << Br->getParent()->getName() 1048 << " has uniform terminator\n"); 1049 } else { 1050 // Explicitly refuse to treat regions as uniform if they have non-uniform 1051 // subregions. We cannot rely on DivergenceAnalysis for branches in 1052 // subregions because those branches may have been removed and re-created, 1053 // so we look for our metadata instead. 1054 // 1055 // Warning: It would be nice to treat regions as uniform based only on 1056 // their direct child basic blocks' terminators, regardless of whether 1057 // subregions are uniform or not. However, this requires a very careful 1058 // look at SIAnnotateControlFlow to make sure nothing breaks there. 1059 for (auto BB : E->getNodeAs<Region>()->blocks()) { 1060 auto Br = dyn_cast<BranchInst>(BB->getTerminator()); 1061 if (!Br || !Br->isConditional()) 1062 continue; 1063 1064 if (!Br->getMetadata(UniformMDKindID)) { 1065 // Early exit if we cannot have relaxed uniform regions. 1066 if (!RelaxedUniformRegions) 1067 return false; 1068 1069 SubRegionsAreUniform = false; 1070 break; 1071 } 1072 } 1073 } 1074 } 1075 1076 // Our region is uniform if: 1077 // 1. All conditional branches that are direct children are uniform (checked 1078 // above). 1079 // 2. And either: 1080 // a. All sub-regions are uniform. 1081 // b. There is one or less conditional branches among the direct children. 1082 return SubRegionsAreUniform || (ConditionalDirectChildren <= 1); 1083 } 1084 1085 void StructurizeCFG::init(Region *R) { 1086 LLVMContext &Context = R->getEntry()->getContext(); 1087 1088 Boolean = Type::getInt1Ty(Context); 1089 BoolTrue = ConstantInt::getTrue(Context); 1090 BoolFalse = ConstantInt::getFalse(Context); 1091 BoolUndef = UndefValue::get(Boolean); 1092 1093 this->DA = nullptr; 1094 } 1095 1096 bool StructurizeCFG::makeUniformRegion(Region *R, 1097 LegacyDivergenceAnalysis *DA) { 1098 if (R->isTopLevelRegion()) 1099 return false; 1100 1101 this->DA = DA; 1102 // TODO: We could probably be smarter here with how we handle sub-regions. 1103 // We currently rely on the fact that metadata is set by earlier invocations 1104 // of the pass on sub-regions, and that this metadata doesn't get lost -- 1105 // but we shouldn't rely on metadata for correctness! 1106 unsigned UniformMDKindID = 1107 R->getEntry()->getContext().getMDKindID("structurizecfg.uniform"); 1108 1109 if (hasOnlyUniformBranches(R, UniformMDKindID, *DA)) { 1110 LLVM_DEBUG(dbgs() << "Skipping region with uniform control flow: " << *R 1111 << '\n'); 1112 1113 // Mark all direct child block terminators as having been treated as 1114 // uniform. To account for a possible future in which non-uniform 1115 // sub-regions are treated more cleverly, indirect children are not 1116 // marked as uniform. 1117 MDNode *MD = MDNode::get(R->getEntry()->getParent()->getContext(), {}); 1118 for (RegionNode *E : R->elements()) { 1119 if (E->isSubRegion()) 1120 continue; 1121 1122 if (Instruction *Term = E->getEntry()->getTerminator()) 1123 Term->setMetadata(UniformMDKindID, MD); 1124 } 1125 1126 return true; 1127 } 1128 return false; 1129 } 1130 1131 /// Run the transformation for each region found 1132 bool StructurizeCFG::run(Region *R, DominatorTree *DT) { 1133 if (R->isTopLevelRegion()) 1134 return false; 1135 1136 this->DT = DT; 1137 1138 Func = R->getEntry()->getParent(); 1139 ParentRegion = R; 1140 1141 orderNodes(); 1142 reorderNodes(); 1143 collectInfos(); 1144 createFlow(); 1145 insertConditions(false); 1146 insertConditions(true); 1147 setPhiValues(); 1148 simplifyConditions(); 1149 simplifyAffectedPhis(); 1150 rebuildSSA(); 1151 1152 // Cleanup 1153 Order.clear(); 1154 Visited.clear(); 1155 DeletedPhis.clear(); 1156 AddedPhis.clear(); 1157 Predicates.clear(); 1158 Conditions.clear(); 1159 Loops.clear(); 1160 LoopPreds.clear(); 1161 LoopConds.clear(); 1162 1163 return true; 1164 } 1165 1166 Pass *llvm::createStructurizeCFGPass(bool SkipUniformRegions) { 1167 return new StructurizeCFGLegacyPass(SkipUniformRegions); 1168 } 1169 1170 static void addRegionIntoQueue(Region &R, std::vector<Region *> &Regions) { 1171 Regions.push_back(&R); 1172 for (const auto &E : R) 1173 addRegionIntoQueue(*E, Regions); 1174 } 1175 1176 PreservedAnalyses StructurizeCFGPass::run(Function &F, 1177 FunctionAnalysisManager &AM) { 1178 1179 bool Changed = false; 1180 DominatorTree *DT = &AM.getResult<DominatorTreeAnalysis>(F); 1181 auto &RI = AM.getResult<RegionInfoAnalysis>(F); 1182 std::vector<Region *> Regions; 1183 addRegionIntoQueue(*RI.getTopLevelRegion(), Regions); 1184 while (!Regions.empty()) { 1185 Region *R = Regions.back(); 1186 StructurizeCFG SCFG; 1187 SCFG.init(R); 1188 Changed |= SCFG.run(R, DT); 1189 Regions.pop_back(); 1190 } 1191 if (!Changed) 1192 return PreservedAnalyses::all(); 1193 PreservedAnalyses PA; 1194 PA.preserve<DominatorTreeAnalysis>(); 1195 return PA; 1196 } 1197