1 //=- WebAssemblyFixIrreducibleControlFlow.cpp - Fix irreducible control flow -//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 ///
9 /// \file
10 /// This file implements a pass that removes irreducible control flow.
11 /// Irreducible control flow means multiple-entry loops, which this pass
12 /// transforms to have a single entry.
13 ///
14 /// Note that LLVM has a generic pass that lowers irreducible control flow, but
15 /// it linearizes control flow, turning diamonds into two triangles, which is
16 /// both unnecessary and undesirable for WebAssembly.
17 ///
18 /// The big picture: We recursively process each "region", defined as a group
19 /// of blocks with a single entry and no branches back to that entry. A region
20 /// may be the entire function body, or the inner part of a loop, i.e., the
21 /// loop's body without branches back to the loop entry. In each region we fix
22 /// up multi-entry loops by adding a new block that can dispatch to each of the
23 /// loop entries, based on the value of a label "helper" variable, and we
24 /// replace direct branches to the entries with assignments to the label
25 /// variable and a branch to the dispatch block. Then the dispatch block is the
26 /// single entry in the loop containing the previous multiple entries. After
27 /// ensuring all the loops in a region are reducible, we recurse into them. The
28 /// total time complexity of this pass is:
29 ///
30 ///   O(NumBlocks * NumNestedLoops * NumIrreducibleLoops +
31 ///     NumLoops * NumLoops)
32 ///
33 /// This pass is similar to what the Relooper [1] does. Both identify looping
34 /// code that requires multiple entries, and resolve it in a similar way (in
35 /// Relooper terminology, we implement a Multiple shape in a Loop shape). Note
36 /// also that like the Relooper, we implement a "minimal" intervention: we only
37 /// use the "label" helper for the blocks we absolutely must and no others. We
38 /// also prioritize code size and do not duplicate code in order to resolve
39 /// irreducibility. The graph algorithms for finding loops and entries and so
40 /// forth are also similar to the Relooper. The main differences between this
41 /// pass and the Relooper are:
42 ///
43 ///  * We just care about irreducibility, so we just look at loops.
44 ///  * The Relooper emits structured control flow (with ifs etc.), while we
45 ///    emit a CFG.
46 ///
47 /// [1] Alon Zakai. 2011. Emscripten: an LLVM-to-JavaScript compiler. In
48 /// Proceedings of the ACM international conference companion on Object oriented
49 /// programming systems languages and applications companion (SPLASH '11). ACM,
50 /// New York, NY, USA, 301-312. DOI=10.1145/2048147.2048224
51 /// http://doi.acm.org/10.1145/2048147.2048224
52 ///
53 //===----------------------------------------------------------------------===//
54 
55 #include "MCTargetDesc/WebAssemblyMCTargetDesc.h"
56 #include "WebAssembly.h"
57 #include "WebAssemblySubtarget.h"
58 #include "llvm/CodeGen/MachineFunctionPass.h"
59 #include "llvm/CodeGen/MachineInstrBuilder.h"
60 #include "llvm/Support/Debug.h"
61 using namespace llvm;
62 
63 #define DEBUG_TYPE "wasm-fix-irreducible-control-flow"
64 
65 namespace {
66 
67 using BlockVector = SmallVector<MachineBasicBlock *, 4>;
68 using BlockSet = SmallPtrSet<MachineBasicBlock *, 4>;
69 
getSortedEntries(const BlockSet & Entries)70 static BlockVector getSortedEntries(const BlockSet &Entries) {
71   BlockVector SortedEntries(Entries.begin(), Entries.end());
72   llvm::sort(SortedEntries,
73              [](const MachineBasicBlock *A, const MachineBasicBlock *B) {
74                auto ANum = A->getNumber();
75                auto BNum = B->getNumber();
76                return ANum < BNum;
77              });
78   return SortedEntries;
79 }
80 
81 // Calculates reachability in a region. Ignores branches to blocks outside of
82 // the region, and ignores branches to the region entry (for the case where
83 // the region is the inner part of a loop).
84 class ReachabilityGraph {
85 public:
ReachabilityGraph(MachineBasicBlock * Entry,const BlockSet & Blocks)86   ReachabilityGraph(MachineBasicBlock *Entry, const BlockSet &Blocks)
87       : Entry(Entry), Blocks(Blocks) {
88 #ifndef NDEBUG
89     // The region must have a single entry.
90     for (auto *MBB : Blocks) {
91       if (MBB != Entry) {
92         for (auto *Pred : MBB->predecessors()) {
93           assert(inRegion(Pred));
94         }
95       }
96     }
97 #endif
98     calculate();
99   }
100 
canReach(MachineBasicBlock * From,MachineBasicBlock * To) const101   bool canReach(MachineBasicBlock *From, MachineBasicBlock *To) const {
102     assert(inRegion(From) && inRegion(To));
103     auto I = Reachable.find(From);
104     if (I == Reachable.end())
105       return false;
106     return I->second.count(To);
107   }
108 
109   // "Loopers" are blocks that are in a loop. We detect these by finding blocks
110   // that can reach themselves.
getLoopers() const111   const BlockSet &getLoopers() const { return Loopers; }
112 
113   // Get all blocks that are loop entries.
getLoopEntries() const114   const BlockSet &getLoopEntries() const { return LoopEntries; }
115 
116   // Get all blocks that enter a particular loop from outside.
getLoopEnterers(MachineBasicBlock * LoopEntry) const117   const BlockSet &getLoopEnterers(MachineBasicBlock *LoopEntry) const {
118     assert(inRegion(LoopEntry));
119     auto I = LoopEnterers.find(LoopEntry);
120     assert(I != LoopEnterers.end());
121     return I->second;
122   }
123 
124 private:
125   MachineBasicBlock *Entry;
126   const BlockSet &Blocks;
127 
128   BlockSet Loopers, LoopEntries;
129   DenseMap<MachineBasicBlock *, BlockSet> LoopEnterers;
130 
inRegion(MachineBasicBlock * MBB) const131   bool inRegion(MachineBasicBlock *MBB) const { return Blocks.count(MBB); }
132 
133   // Maps a block to all the other blocks it can reach.
134   DenseMap<MachineBasicBlock *, BlockSet> Reachable;
135 
calculate()136   void calculate() {
137     // Reachability computation work list. Contains pairs of recent additions
138     // (A, B) where we just added a link A => B.
139     using BlockPair = std::pair<MachineBasicBlock *, MachineBasicBlock *>;
140     SmallVector<BlockPair, 4> WorkList;
141 
142     // Add all relevant direct branches.
143     for (auto *MBB : Blocks) {
144       for (auto *Succ : MBB->successors()) {
145         if (Succ != Entry && inRegion(Succ)) {
146           Reachable[MBB].insert(Succ);
147           WorkList.emplace_back(MBB, Succ);
148         }
149       }
150     }
151 
152     while (!WorkList.empty()) {
153       MachineBasicBlock *MBB, *Succ;
154       std::tie(MBB, Succ) = WorkList.pop_back_val();
155       assert(inRegion(MBB) && Succ != Entry && inRegion(Succ));
156       if (MBB != Entry) {
157         // We recently added MBB => Succ, and that means we may have enabled
158         // Pred => MBB => Succ.
159         for (auto *Pred : MBB->predecessors()) {
160           if (Reachable[Pred].insert(Succ).second) {
161             WorkList.emplace_back(Pred, Succ);
162           }
163         }
164       }
165     }
166 
167     // Blocks that can return to themselves are in a loop.
168     for (auto *MBB : Blocks) {
169       if (canReach(MBB, MBB)) {
170         Loopers.insert(MBB);
171       }
172     }
173     assert(!Loopers.count(Entry));
174 
175     // Find the loop entries - loopers reachable from blocks not in that loop -
176     // and those outside blocks that reach them, the "loop enterers".
177     for (auto *Looper : Loopers) {
178       for (auto *Pred : Looper->predecessors()) {
179         // Pred can reach Looper. If Looper can reach Pred, it is in the loop;
180         // otherwise, it is a block that enters into the loop.
181         if (!canReach(Looper, Pred)) {
182           LoopEntries.insert(Looper);
183           LoopEnterers[Looper].insert(Pred);
184         }
185       }
186     }
187   }
188 };
189 
190 // Finds the blocks in a single-entry loop, given the loop entry and the
191 // list of blocks that enter the loop.
192 class LoopBlocks {
193 public:
LoopBlocks(MachineBasicBlock * Entry,const BlockSet & Enterers)194   LoopBlocks(MachineBasicBlock *Entry, const BlockSet &Enterers)
195       : Entry(Entry), Enterers(Enterers) {
196     calculate();
197   }
198 
getBlocks()199   BlockSet &getBlocks() { return Blocks; }
200 
201 private:
202   MachineBasicBlock *Entry;
203   const BlockSet &Enterers;
204 
205   BlockSet Blocks;
206 
calculate()207   void calculate() {
208     // Going backwards from the loop entry, if we ignore the blocks entering
209     // from outside, we will traverse all the blocks in the loop.
210     BlockVector WorkList;
211     BlockSet AddedToWorkList;
212     Blocks.insert(Entry);
213     for (auto *Pred : Entry->predecessors()) {
214       if (!Enterers.count(Pred)) {
215         WorkList.push_back(Pred);
216         AddedToWorkList.insert(Pred);
217       }
218     }
219 
220     while (!WorkList.empty()) {
221       auto *MBB = WorkList.pop_back_val();
222       assert(!Enterers.count(MBB));
223       if (Blocks.insert(MBB).second) {
224         for (auto *Pred : MBB->predecessors()) {
225           if (AddedToWorkList.insert(Pred).second)
226             WorkList.push_back(Pred);
227         }
228       }
229     }
230   }
231 };
232 
233 class WebAssemblyFixIrreducibleControlFlow final : public MachineFunctionPass {
getPassName() const234   StringRef getPassName() const override {
235     return "WebAssembly Fix Irreducible Control Flow";
236   }
237 
238   bool runOnMachineFunction(MachineFunction &MF) override;
239 
240   bool processRegion(MachineBasicBlock *Entry, BlockSet &Blocks,
241                      MachineFunction &MF);
242 
243   void makeSingleEntryLoop(BlockSet &Entries, BlockSet &Blocks,
244                            MachineFunction &MF, const ReachabilityGraph &Graph);
245 
246 public:
247   static char ID; // Pass identification, replacement for typeid
WebAssemblyFixIrreducibleControlFlow()248   WebAssemblyFixIrreducibleControlFlow() : MachineFunctionPass(ID) {}
249 };
250 
processRegion(MachineBasicBlock * Entry,BlockSet & Blocks,MachineFunction & MF)251 bool WebAssemblyFixIrreducibleControlFlow::processRegion(
252     MachineBasicBlock *Entry, BlockSet &Blocks, MachineFunction &MF) {
253   bool Changed = false;
254   // Remove irreducibility before processing child loops, which may take
255   // multiple iterations.
256   while (true) {
257     ReachabilityGraph Graph(Entry, Blocks);
258 
259     bool FoundIrreducibility = false;
260 
261     for (auto *LoopEntry : getSortedEntries(Graph.getLoopEntries())) {
262       // Find mutual entries - all entries which can reach this one, and
263       // are reached by it (that always includes LoopEntry itself). All mutual
264       // entries must be in the same loop, so if we have more than one, then we
265       // have irreducible control flow.
266       //
267       // (Note that we need to sort the entries here, as otherwise the order can
268       // matter: being mutual is a symmetric relationship, and each set of
269       // mutuals will be handled properly no matter which we see first. However,
270       // there can be multiple disjoint sets of mutuals, and which we process
271       // first changes the output.)
272       //
273       // Note that irreducibility may involve inner loops, e.g. imagine A
274       // starts one loop, and it has B inside it which starts an inner loop.
275       // If we add a branch from all the way on the outside to B, then in a
276       // sense B is no longer an "inner" loop, semantically speaking. We will
277       // fix that irreducibility by adding a block that dispatches to either
278       // either A or B, so B will no longer be an inner loop in our output.
279       // (A fancier approach might try to keep it as such.)
280       //
281       // Note that we still need to recurse into inner loops later, to handle
282       // the case where the irreducibility is entirely nested - we would not
283       // be able to identify that at this point, since the enclosing loop is
284       // a group of blocks all of whom can reach each other. (We'll see the
285       // irreducibility after removing branches to the top of that enclosing
286       // loop.)
287       BlockSet MutualLoopEntries;
288       MutualLoopEntries.insert(LoopEntry);
289       for (auto *OtherLoopEntry : Graph.getLoopEntries()) {
290         if (OtherLoopEntry != LoopEntry &&
291             Graph.canReach(LoopEntry, OtherLoopEntry) &&
292             Graph.canReach(OtherLoopEntry, LoopEntry)) {
293           MutualLoopEntries.insert(OtherLoopEntry);
294         }
295       }
296 
297       if (MutualLoopEntries.size() > 1) {
298         makeSingleEntryLoop(MutualLoopEntries, Blocks, MF, Graph);
299         FoundIrreducibility = true;
300         Changed = true;
301         break;
302       }
303     }
304     // Only go on to actually process the inner loops when we are done
305     // removing irreducible control flow and changing the graph. Modifying
306     // the graph as we go is possible, and that might let us avoid looking at
307     // the already-fixed loops again if we are careful, but all that is
308     // complex and bug-prone. Since irreducible loops are rare, just starting
309     // another iteration is best.
310     if (FoundIrreducibility) {
311       continue;
312     }
313 
314     for (auto *LoopEntry : Graph.getLoopEntries()) {
315       LoopBlocks InnerBlocks(LoopEntry, Graph.getLoopEnterers(LoopEntry));
316       // Each of these calls to processRegion may change the graph, but are
317       // guaranteed not to interfere with each other. The only changes we make
318       // to the graph are to add blocks on the way to a loop entry. As the
319       // loops are disjoint, that means we may only alter branches that exit
320       // another loop, which are ignored when recursing into that other loop
321       // anyhow.
322       if (processRegion(LoopEntry, InnerBlocks.getBlocks(), MF)) {
323         Changed = true;
324       }
325     }
326 
327     return Changed;
328   }
329 }
330 
331 // Given a set of entries to a single loop, create a single entry for that
332 // loop by creating a dispatch block for them, routing control flow using
333 // a helper variable. Also updates Blocks with any new blocks created, so
334 // that we properly track all the blocks in the region. But this does not update
335 // ReachabilityGraph; this will be updated in the caller of this function as
336 // needed.
makeSingleEntryLoop(BlockSet & Entries,BlockSet & Blocks,MachineFunction & MF,const ReachabilityGraph & Graph)337 void WebAssemblyFixIrreducibleControlFlow::makeSingleEntryLoop(
338     BlockSet &Entries, BlockSet &Blocks, MachineFunction &MF,
339     const ReachabilityGraph &Graph) {
340   assert(Entries.size() >= 2);
341 
342   // Sort the entries to ensure a deterministic build.
343   BlockVector SortedEntries = getSortedEntries(Entries);
344 
345 #ifndef NDEBUG
346   for (auto *Block : SortedEntries)
347     assert(Block->getNumber() != -1);
348   if (SortedEntries.size() > 1) {
349     for (auto I = SortedEntries.begin(), E = SortedEntries.end() - 1; I != E;
350          ++I) {
351       auto ANum = (*I)->getNumber();
352       auto BNum = (*(std::next(I)))->getNumber();
353       assert(ANum != BNum);
354     }
355   }
356 #endif
357 
358   // Create a dispatch block which will contain a jump table to the entries.
359   MachineBasicBlock *Dispatch = MF.CreateMachineBasicBlock();
360   MF.insert(MF.end(), Dispatch);
361   Blocks.insert(Dispatch);
362 
363   // Add the jump table.
364   const auto &TII = *MF.getSubtarget<WebAssemblySubtarget>().getInstrInfo();
365   MachineInstrBuilder MIB =
366       BuildMI(Dispatch, DebugLoc(), TII.get(WebAssembly::BR_TABLE_I32));
367 
368   // Add the register which will be used to tell the jump table which block to
369   // jump to.
370   MachineRegisterInfo &MRI = MF.getRegInfo();
371   Register Reg = MRI.createVirtualRegister(&WebAssembly::I32RegClass);
372   MIB.addReg(Reg);
373 
374   // Compute the indices in the superheader, one for each bad block, and
375   // add them as successors.
376   DenseMap<MachineBasicBlock *, unsigned> Indices;
377   for (auto *Entry : SortedEntries) {
378     auto Pair = Indices.insert(std::make_pair(Entry, 0));
379     assert(Pair.second);
380 
381     unsigned Index = MIB.getInstr()->getNumExplicitOperands() - 1;
382     Pair.first->second = Index;
383 
384     MIB.addMBB(Entry);
385     Dispatch->addSuccessor(Entry);
386   }
387 
388   // Rewrite the problematic successors for every block that wants to reach
389   // the bad blocks. For simplicity, we just introduce a new block for every
390   // edge we need to rewrite. (Fancier things are possible.)
391 
392   BlockVector AllPreds;
393   for (auto *Entry : SortedEntries) {
394     for (auto *Pred : Entry->predecessors()) {
395       if (Pred != Dispatch) {
396         AllPreds.push_back(Pred);
397       }
398     }
399   }
400 
401   // This set stores predecessors within this loop.
402   DenseSet<MachineBasicBlock *> InLoop;
403   for (auto *Pred : AllPreds) {
404     for (auto *Entry : Pred->successors()) {
405       if (!Entries.count(Entry))
406         continue;
407       if (Graph.canReach(Entry, Pred)) {
408         InLoop.insert(Pred);
409         break;
410       }
411     }
412   }
413 
414   // Record if each entry has a layout predecessor. This map stores
415   // <<loop entry, Predecessor is within the loop?>, layout predecessor>
416   DenseMap<PointerIntPair<MachineBasicBlock *, 1, bool>, MachineBasicBlock *>
417       EntryToLayoutPred;
418   for (auto *Pred : AllPreds) {
419     bool PredInLoop = InLoop.count(Pred);
420     for (auto *Entry : Pred->successors())
421       if (Entries.count(Entry) && Pred->isLayoutSuccessor(Entry))
422         EntryToLayoutPred[{Entry, PredInLoop}] = Pred;
423   }
424 
425   // We need to create at most two routing blocks per entry: one for
426   // predecessors outside the loop and one for predecessors inside the loop.
427   // This map stores
428   // <<loop entry, Predecessor is within the loop?>, routing block>
429   DenseMap<PointerIntPair<MachineBasicBlock *, 1, bool>, MachineBasicBlock *>
430       Map;
431   for (auto *Pred : AllPreds) {
432     bool PredInLoop = InLoop.count(Pred);
433     for (auto *Entry : Pred->successors()) {
434       if (!Entries.count(Entry) || Map.count({Entry, PredInLoop}))
435         continue;
436       // If there exists a layout predecessor of this entry and this predecessor
437       // is not that, we rather create a routing block after that layout
438       // predecessor to save a branch.
439       if (auto *OtherPred = EntryToLayoutPred.lookup({Entry, PredInLoop}))
440         if (OtherPred != Pred)
441           continue;
442 
443       // This is a successor we need to rewrite.
444       MachineBasicBlock *Routing = MF.CreateMachineBasicBlock();
445       MF.insert(Pred->isLayoutSuccessor(Entry)
446                     ? MachineFunction::iterator(Entry)
447                     : MF.end(),
448                 Routing);
449       Blocks.insert(Routing);
450 
451       // Set the jump table's register of the index of the block we wish to
452       // jump to, and jump to the jump table.
453       BuildMI(Routing, DebugLoc(), TII.get(WebAssembly::CONST_I32), Reg)
454           .addImm(Indices[Entry]);
455       BuildMI(Routing, DebugLoc(), TII.get(WebAssembly::BR)).addMBB(Dispatch);
456       Routing->addSuccessor(Dispatch);
457       Map[{Entry, PredInLoop}] = Routing;
458     }
459   }
460 
461   for (auto *Pred : AllPreds) {
462     bool PredInLoop = InLoop.count(Pred);
463     // Remap the terminator operands and the successor list.
464     for (MachineInstr &Term : Pred->terminators())
465       for (auto &Op : Term.explicit_uses())
466         if (Op.isMBB() && Indices.count(Op.getMBB()))
467           Op.setMBB(Map[{Op.getMBB(), PredInLoop}]);
468 
469     for (auto *Succ : Pred->successors()) {
470       if (!Entries.count(Succ))
471         continue;
472       auto *Routing = Map[{Succ, PredInLoop}];
473       Pred->replaceSuccessor(Succ, Routing);
474     }
475   }
476 
477   // Create a fake default label, because br_table requires one.
478   MIB.addMBB(MIB.getInstr()
479                  ->getOperand(MIB.getInstr()->getNumExplicitOperands() - 1)
480                  .getMBB());
481 }
482 
483 } // end anonymous namespace
484 
485 char WebAssemblyFixIrreducibleControlFlow::ID = 0;
486 INITIALIZE_PASS(WebAssemblyFixIrreducibleControlFlow, DEBUG_TYPE,
487                 "Removes irreducible control flow", false, false)
488 
createWebAssemblyFixIrreducibleControlFlow()489 FunctionPass *llvm::createWebAssemblyFixIrreducibleControlFlow() {
490   return new WebAssemblyFixIrreducibleControlFlow();
491 }
492 
493 // Test whether the given register has an ARGUMENT def.
hasArgumentDef(unsigned Reg,const MachineRegisterInfo & MRI)494 static bool hasArgumentDef(unsigned Reg, const MachineRegisterInfo &MRI) {
495   for (const auto &Def : MRI.def_instructions(Reg))
496     if (WebAssembly::isArgument(Def.getOpcode()))
497       return true;
498   return false;
499 }
500 
501 // Add a register definition with IMPLICIT_DEFs for every register to cover for
502 // register uses that don't have defs in every possible path.
503 // TODO: This is fairly heavy-handed; find a better approach.
addImplicitDefs(MachineFunction & MF)504 static void addImplicitDefs(MachineFunction &MF) {
505   const MachineRegisterInfo &MRI = MF.getRegInfo();
506   const auto &TII = *MF.getSubtarget<WebAssemblySubtarget>().getInstrInfo();
507   MachineBasicBlock &Entry = *MF.begin();
508   for (unsigned I = 0, E = MRI.getNumVirtRegs(); I < E; ++I) {
509     Register Reg = Register::index2VirtReg(I);
510 
511     // Skip unused registers.
512     if (MRI.use_nodbg_empty(Reg))
513       continue;
514 
515     // Skip registers that have an ARGUMENT definition.
516     if (hasArgumentDef(Reg, MRI))
517       continue;
518 
519     BuildMI(Entry, Entry.begin(), DebugLoc(),
520             TII.get(WebAssembly::IMPLICIT_DEF), Reg);
521   }
522 
523   // Move ARGUMENT_* instructions to the top of the entry block, so that their
524   // liveness reflects the fact that these really are live-in values.
525   for (MachineInstr &MI : llvm::make_early_inc_range(Entry)) {
526     if (WebAssembly::isArgument(MI.getOpcode())) {
527       MI.removeFromParent();
528       Entry.insert(Entry.begin(), &MI);
529     }
530   }
531 }
532 
runOnMachineFunction(MachineFunction & MF)533 bool WebAssemblyFixIrreducibleControlFlow::runOnMachineFunction(
534     MachineFunction &MF) {
535   LLVM_DEBUG(dbgs() << "********** Fixing Irreducible Control Flow **********\n"
536                        "********** Function: "
537                     << MF.getName() << '\n');
538 
539   // Start the recursive process on the entire function body.
540   BlockSet AllBlocks;
541   for (auto &MBB : MF) {
542     AllBlocks.insert(&MBB);
543   }
544 
545   if (LLVM_UNLIKELY(processRegion(&*MF.begin(), AllBlocks, MF))) {
546     // We rewrote part of the function; recompute relevant things.
547     MF.RenumberBlocks();
548     // Now we've inserted dispatch blocks, some register uses can have incoming
549     // paths without a def. For example, before this pass register %a was
550     // defined in BB1 and used in BB2, and there was only one path from BB1 and
551     // BB2. But if this pass inserts a dispatch block having multiple
552     // predecessors between the two BBs, now there are paths to BB2 without
553     // visiting BB1, and %a's use in BB2 is not dominated by its def. Adding
554     // IMPLICIT_DEFs to all regs is one simple way to fix it.
555     addImplicitDefs(MF);
556     return true;
557   }
558 
559   return false;
560 }
561