1 //===- DependenceGraphBuilder.cpp ------------------------------------------==//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 // This file implements common steps of the build algorithm for construction
9 // of dependence graphs such as DDG and PDG.
10 //===----------------------------------------------------------------------===//
11 
12 #include "llvm/Analysis/DependenceGraphBuilder.h"
13 #include "llvm/ADT/DepthFirstIterator.h"
14 #include "llvm/ADT/EnumeratedArray.h"
15 #include "llvm/ADT/SCCIterator.h"
16 #include "llvm/ADT/Statistic.h"
17 #include "llvm/Analysis/DDG.h"
18 
19 using namespace llvm;
20 
21 #define DEBUG_TYPE "dgb"
22 
23 STATISTIC(TotalGraphs, "Number of dependence graphs created.");
24 STATISTIC(TotalDefUseEdges, "Number of def-use edges created.");
25 STATISTIC(TotalMemoryEdges, "Number of memory dependence edges created.");
26 STATISTIC(TotalFineGrainedNodes, "Number of fine-grained nodes created.");
27 STATISTIC(TotalPiBlockNodes, "Number of pi-block nodes created.");
28 STATISTIC(TotalConfusedEdges,
29           "Number of confused memory dependencies between two nodes.");
30 STATISTIC(TotalEdgeReversals,
31           "Number of times the source and sink of dependence was reversed to "
32           "expose cycles in the graph.");
33 
34 using InstructionListType = SmallVector<Instruction *, 2>;
35 
36 //===--------------------------------------------------------------------===//
37 // AbstractDependenceGraphBuilder implementation
38 //===--------------------------------------------------------------------===//
39 
40 template <class G>
41 void AbstractDependenceGraphBuilder<G>::computeInstructionOrdinals() {
42   // The BBList is expected to be in program order.
43   size_t NextOrdinal = 1;
44   for (auto *BB : BBList)
45     for (auto &I : *BB)
46       InstOrdinalMap.insert(std::make_pair(&I, NextOrdinal++));
47 }
48 
49 template <class G>
50 void AbstractDependenceGraphBuilder<G>::createFineGrainedNodes() {
51   ++TotalGraphs;
52   assert(IMap.empty() && "Expected empty instruction map at start");
53   for (BasicBlock *BB : BBList)
54     for (Instruction &I : *BB) {
55       auto &NewNode = createFineGrainedNode(I);
56       IMap.insert(std::make_pair(&I, &NewNode));
57       NodeOrdinalMap.insert(std::make_pair(&NewNode, getOrdinal(I)));
58       ++TotalFineGrainedNodes;
59     }
60 }
61 
62 template <class G>
63 void AbstractDependenceGraphBuilder<G>::createAndConnectRootNode() {
64   // Create a root node that connects to every connected component of the graph.
65   // This is done to allow graph iterators to visit all the disjoint components
66   // of the graph, in a single walk.
67   //
68   // This algorithm works by going through each node of the graph and for each
69   // node N, do a DFS starting from N. A rooted edge is established between the
70   // root node and N (if N is not yet visited). All the nodes reachable from N
71   // are marked as visited and are skipped in the DFS of subsequent nodes.
72   //
73   // Note: This algorithm tries to limit the number of edges out of the root
74   // node to some extent, but there may be redundant edges created depending on
75   // the iteration order. For example for a graph {A -> B}, an edge from the
76   // root node is added to both nodes if B is visited before A. While it does
77   // not result in minimal number of edges, this approach saves compile-time
78   // while keeping the number of edges in check.
79   auto &RootNode = createRootNode();
80   df_iterator_default_set<const NodeType *, 4> Visited;
81   for (auto *N : Graph) {
82     if (*N == RootNode)
83       continue;
84     for (auto I : depth_first_ext(N, Visited))
85       if (I == N)
86         createRootedEdge(RootNode, *N);
87   }
88 }
89 
90 template <class G> void AbstractDependenceGraphBuilder<G>::createPiBlocks() {
91   if (!shouldCreatePiBlocks())
92     return;
93 
94   LLVM_DEBUG(dbgs() << "==== Start of Creation of Pi-Blocks ===\n");
95 
96   // The overall algorithm is as follows:
97   // 1. Identify SCCs and for each SCC create a pi-block node containing all
98   //    the nodes in that SCC.
99   // 2. Identify incoming edges incident to the nodes inside of the SCC and
100   //    reconnect them to the pi-block node.
101   // 3. Identify outgoing edges from the nodes inside of the SCC to nodes
102   //    outside of it and reconnect them so that the edges are coming out of the
103   //    SCC node instead.
104 
105   // Adding nodes as we iterate through the SCCs cause the SCC
106   // iterators to get invalidated. To prevent this invalidation, we first
107   // collect a list of nodes that are part of an SCC, and then iterate over
108   // those lists to create the pi-block nodes. Each element of the list is a
109   // list of nodes in an SCC. Note: trivial SCCs containing a single node are
110   // ignored.
111   SmallVector<NodeListType, 4> ListOfSCCs;
112   for (auto &SCC : make_range(scc_begin(&Graph), scc_end(&Graph))) {
113     if (SCC.size() > 1)
114       ListOfSCCs.emplace_back(SCC.begin(), SCC.end());
115   }
116 
117   for (NodeListType &NL : ListOfSCCs) {
118     LLVM_DEBUG(dbgs() << "Creating pi-block node with " << NL.size()
119                       << " nodes in it.\n");
120 
121     // SCC iterator may put the nodes in an order that's different from the
122     // program order. To preserve original program order, we sort the list of
123     // nodes based on ordinal numbers computed earlier.
124     llvm::sort(NL, [&](NodeType *LHS, NodeType *RHS) {
125       return getOrdinal(*LHS) < getOrdinal(*RHS);
126     });
127 
128     NodeType &PiNode = createPiBlock(NL);
129     ++TotalPiBlockNodes;
130 
131     // Build a set to speed up the lookup for edges whose targets
132     // are inside the SCC.
133     SmallPtrSet<NodeType *, 4> NodesInSCC(NL.begin(), NL.end());
134 
135     // We have the set of nodes in the SCC. We go through the set of nodes
136     // that are outside of the SCC and look for edges that cross the two sets.
137     for (NodeType *N : Graph) {
138 
139       // Skip the SCC node and all the nodes inside of it.
140       if (*N == PiNode || NodesInSCC.count(N))
141         continue;
142 
143       for (NodeType *SCCNode : NL) {
144 
145         enum Direction {
146           Incoming,      // Incoming edges to the SCC
147           Outgoing,      // Edges going ot of the SCC
148           DirectionCount // To make the enum usable as an array index.
149         };
150 
151         // Use these flags to help us avoid creating redundant edges. If there
152         // are more than one edges from an outside node to inside nodes, we only
153         // keep one edge from that node to the pi-block node. Similarly, if
154         // there are more than one edges from inside nodes to an outside node,
155         // we only keep one edge from the pi-block node to the outside node.
156         // There is a flag defined for each direction (incoming vs outgoing) and
157         // for each type of edge supported, using a two-dimensional boolean
158         // array.
159         using EdgeKind = typename EdgeType::EdgeKind;
160         EnumeratedArray<bool, EdgeKind> EdgeAlreadyCreated[DirectionCount]{
161             false, false};
162 
163         auto createEdgeOfKind = [this](NodeType &Src, NodeType &Dst,
164                                        const EdgeKind K) {
165           switch (K) {
166           case EdgeKind::RegisterDefUse:
167             createDefUseEdge(Src, Dst);
168             break;
169           case EdgeKind::MemoryDependence:
170             createMemoryEdge(Src, Dst);
171             break;
172           case EdgeKind::Rooted:
173             createRootedEdge(Src, Dst);
174             break;
175           default:
176             llvm_unreachable("Unsupported type of edge.");
177           }
178         };
179 
180         auto reconnectEdges = [&](NodeType *Src, NodeType *Dst, NodeType *New,
181                                   const Direction Dir) {
182           if (!Src->hasEdgeTo(*Dst))
183             return;
184           LLVM_DEBUG(dbgs()
185                      << "reconnecting("
186                      << (Dir == Direction::Incoming ? "incoming)" : "outgoing)")
187                      << ":\nSrc:" << *Src << "\nDst:" << *Dst
188                      << "\nNew:" << *New << "\n");
189           assert((Dir == Direction::Incoming || Dir == Direction::Outgoing) &&
190                  "Invalid direction.");
191 
192           SmallVector<EdgeType *, 10> EL;
193           Src->findEdgesTo(*Dst, EL);
194           for (EdgeType *OldEdge : EL) {
195             EdgeKind Kind = OldEdge->getKind();
196             if (!EdgeAlreadyCreated[Dir][Kind]) {
197               if (Dir == Direction::Incoming) {
198                 createEdgeOfKind(*Src, *New, Kind);
199                 LLVM_DEBUG(dbgs() << "created edge from Src to New.\n");
200               } else if (Dir == Direction::Outgoing) {
201                 createEdgeOfKind(*New, *Dst, Kind);
202                 LLVM_DEBUG(dbgs() << "created edge from New to Dst.\n");
203               }
204               EdgeAlreadyCreated[Dir][Kind] = true;
205             }
206             Src->removeEdge(*OldEdge);
207             destroyEdge(*OldEdge);
208             LLVM_DEBUG(dbgs() << "removed old edge between Src and Dst.\n\n");
209           }
210         };
211 
212         // Process incoming edges incident to the pi-block node.
213         reconnectEdges(N, SCCNode, &PiNode, Direction::Incoming);
214 
215         // Process edges that are coming out of the pi-block node.
216         reconnectEdges(SCCNode, N, &PiNode, Direction::Outgoing);
217       }
218     }
219   }
220 
221   // Ordinal maps are no longer needed.
222   InstOrdinalMap.clear();
223   NodeOrdinalMap.clear();
224 
225   LLVM_DEBUG(dbgs() << "==== End of Creation of Pi-Blocks ===\n");
226 }
227 
228 template <class G> void AbstractDependenceGraphBuilder<G>::createDefUseEdges() {
229   for (NodeType *N : Graph) {
230     InstructionListType SrcIList;
231     N->collectInstructions([](const Instruction *I) { return true; }, SrcIList);
232 
233     // Use a set to mark the targets that we link to N, so we don't add
234     // duplicate def-use edges when more than one instruction in a target node
235     // use results of instructions that are contained in N.
236     SmallPtrSet<NodeType *, 4> VisitedTargets;
237 
238     for (Instruction *II : SrcIList) {
239       for (User *U : II->users()) {
240         Instruction *UI = dyn_cast<Instruction>(U);
241         if (!UI)
242           continue;
243         NodeType *DstNode = nullptr;
244         if (IMap.find(UI) != IMap.end())
245           DstNode = IMap.find(UI)->second;
246 
247         // In the case of loops, the scope of the subgraph is all the
248         // basic blocks (and instructions within them) belonging to the loop. We
249         // simply ignore all the edges coming from (or going into) instructions
250         // or basic blocks outside of this range.
251         if (!DstNode) {
252           LLVM_DEBUG(
253               dbgs()
254               << "skipped def-use edge since the sink" << *UI
255               << " is outside the range of instructions being considered.\n");
256           continue;
257         }
258 
259         // Self dependencies are ignored because they are redundant and
260         // uninteresting.
261         if (DstNode == N) {
262           LLVM_DEBUG(dbgs()
263                      << "skipped def-use edge since the sink and the source ("
264                      << N << ") are the same.\n");
265           continue;
266         }
267 
268         if (VisitedTargets.insert(DstNode).second) {
269           createDefUseEdge(*N, *DstNode);
270           ++TotalDefUseEdges;
271         }
272       }
273     }
274   }
275 }
276 
277 template <class G>
278 void AbstractDependenceGraphBuilder<G>::createMemoryDependencyEdges() {
279   using DGIterator = typename G::iterator;
280   auto isMemoryAccess = [](const Instruction *I) {
281     return I->mayReadOrWriteMemory();
282   };
283   for (DGIterator SrcIt = Graph.begin(), E = Graph.end(); SrcIt != E; ++SrcIt) {
284     InstructionListType SrcIList;
285     (*SrcIt)->collectInstructions(isMemoryAccess, SrcIList);
286     if (SrcIList.empty())
287       continue;
288 
289     for (DGIterator DstIt = SrcIt; DstIt != E; ++DstIt) {
290       if (**SrcIt == **DstIt)
291         continue;
292       InstructionListType DstIList;
293       (*DstIt)->collectInstructions(isMemoryAccess, DstIList);
294       if (DstIList.empty())
295         continue;
296       bool ForwardEdgeCreated = false;
297       bool BackwardEdgeCreated = false;
298       for (Instruction *ISrc : SrcIList) {
299         for (Instruction *IDst : DstIList) {
300           auto D = DI.depends(ISrc, IDst, true);
301           if (!D)
302             continue;
303 
304           // If we have a dependence with its left-most non-'=' direction
305           // being '>' we need to reverse the direction of the edge, because
306           // the source of the dependence cannot occur after the sink. For
307           // confused dependencies, we will create edges in both directions to
308           // represent the possibility of a cycle.
309 
310           auto createConfusedEdges = [&](NodeType &Src, NodeType &Dst) {
311             if (!ForwardEdgeCreated) {
312               createMemoryEdge(Src, Dst);
313               ++TotalMemoryEdges;
314             }
315             if (!BackwardEdgeCreated) {
316               createMemoryEdge(Dst, Src);
317               ++TotalMemoryEdges;
318             }
319             ForwardEdgeCreated = BackwardEdgeCreated = true;
320             ++TotalConfusedEdges;
321           };
322 
323           auto createForwardEdge = [&](NodeType &Src, NodeType &Dst) {
324             if (!ForwardEdgeCreated) {
325               createMemoryEdge(Src, Dst);
326               ++TotalMemoryEdges;
327             }
328             ForwardEdgeCreated = true;
329           };
330 
331           auto createBackwardEdge = [&](NodeType &Src, NodeType &Dst) {
332             if (!BackwardEdgeCreated) {
333               createMemoryEdge(Dst, Src);
334               ++TotalMemoryEdges;
335             }
336             BackwardEdgeCreated = true;
337           };
338 
339           if (D->isConfused())
340             createConfusedEdges(**SrcIt, **DstIt);
341           else if (D->isOrdered() && !D->isLoopIndependent()) {
342             bool ReversedEdge = false;
343             for (unsigned Level = 1; Level <= D->getLevels(); ++Level) {
344               if (D->getDirection(Level) == Dependence::DVEntry::EQ)
345                 continue;
346               else if (D->getDirection(Level) == Dependence::DVEntry::GT) {
347                 createBackwardEdge(**SrcIt, **DstIt);
348                 ReversedEdge = true;
349                 ++TotalEdgeReversals;
350                 break;
351               } else if (D->getDirection(Level) == Dependence::DVEntry::LT)
352                 break;
353               else {
354                 createConfusedEdges(**SrcIt, **DstIt);
355                 break;
356               }
357             }
358             if (!ReversedEdge)
359               createForwardEdge(**SrcIt, **DstIt);
360           } else
361             createForwardEdge(**SrcIt, **DstIt);
362 
363           // Avoid creating duplicate edges.
364           if (ForwardEdgeCreated && BackwardEdgeCreated)
365             break;
366         }
367 
368         // If we've created edges in both directions, there is no more
369         // unique edge that we can create between these two nodes, so we
370         // can exit early.
371         if (ForwardEdgeCreated && BackwardEdgeCreated)
372           break;
373       }
374     }
375   }
376 }
377 
378 template <class G> void AbstractDependenceGraphBuilder<G>::simplify() {
379   if (!shouldSimplify())
380     return;
381   LLVM_DEBUG(dbgs() << "==== Start of Graph Simplification ===\n");
382 
383   // This algorithm works by first collecting a set of candidate nodes that have
384   // an out-degree of one (in terms of def-use edges), and then ignoring those
385   // whose targets have an in-degree more than one. Each node in the resulting
386   // set can then be merged with its corresponding target and put back into the
387   // worklist until no further merge candidates are available.
388   SmallPtrSet<NodeType *, 32> CandidateSourceNodes;
389 
390   // A mapping between nodes and their in-degree. To save space, this map
391   // only contains nodes that are targets of nodes in the CandidateSourceNodes.
392   DenseMap<NodeType *, unsigned> TargetInDegreeMap;
393 
394   for (NodeType *N : Graph) {
395     if (N->getEdges().size() != 1)
396       continue;
397     EdgeType &Edge = N->back();
398     if (!Edge.isDefUse())
399       continue;
400     CandidateSourceNodes.insert(N);
401 
402     // Insert an element into the in-degree map and initialize to zero. The
403     // count will get updated in the next step.
404     TargetInDegreeMap.insert({&Edge.getTargetNode(), 0});
405   }
406 
407   LLVM_DEBUG({
408     dbgs() << "Size of candidate src node list:" << CandidateSourceNodes.size()
409            << "\nNode with single outgoing def-use edge:\n";
410     for (NodeType *N : CandidateSourceNodes) {
411       dbgs() << N << "\n";
412     }
413   });
414 
415   for (NodeType *N : Graph) {
416     for (EdgeType *E : *N) {
417       NodeType *Tgt = &E->getTargetNode();
418       auto TgtIT = TargetInDegreeMap.find(Tgt);
419       if (TgtIT != TargetInDegreeMap.end())
420         ++(TgtIT->second);
421     }
422   }
423 
424   LLVM_DEBUG({
425     dbgs() << "Size of target in-degree map:" << TargetInDegreeMap.size()
426            << "\nContent of in-degree map:\n";
427     for (auto &I : TargetInDegreeMap) {
428       dbgs() << I.first << " --> " << I.second << "\n";
429     }
430   });
431 
432   SmallVector<NodeType *, 32> Worklist(CandidateSourceNodes.begin(),
433                                        CandidateSourceNodes.end());
434   while (!Worklist.empty()) {
435     NodeType &Src = *Worklist.pop_back_val();
436     // As nodes get merged, we need to skip any node that has been removed from
437     // the candidate set (see below).
438     if (!CandidateSourceNodes.erase(&Src))
439       continue;
440 
441     assert(Src.getEdges().size() == 1 &&
442            "Expected a single edge from the candidate src node.");
443     NodeType &Tgt = Src.back().getTargetNode();
444     assert(TargetInDegreeMap.find(&Tgt) != TargetInDegreeMap.end() &&
445            "Expected target to be in the in-degree map.");
446 
447     if (TargetInDegreeMap[&Tgt] != 1)
448       continue;
449 
450     if (!areNodesMergeable(Src, Tgt))
451       continue;
452 
453     // Do not merge if there is also an edge from target to src (immediate
454     // cycle).
455     if (Tgt.hasEdgeTo(Src))
456       continue;
457 
458     LLVM_DEBUG(dbgs() << "Merging:" << Src << "\nWith:" << Tgt << "\n");
459 
460     mergeNodes(Src, Tgt);
461 
462     // If the target node is in the candidate set itself, we need to put the
463     // src node back into the worklist again so it gives the target a chance
464     // to get merged into it. For example if we have:
465     // {(a)->(b), (b)->(c), (c)->(d), ...} and the worklist is initially {b, a},
466     // then after merging (a) and (b) together, we need to put (a,b) back in
467     // the worklist so that (c) can get merged in as well resulting in
468     // {(a,b,c) -> d}
469     // We also need to remove the old target (b), from the worklist. We first
470     // remove it from the candidate set here, and skip any item from the
471     // worklist that is not in the set.
472     if (CandidateSourceNodes.erase(&Tgt)) {
473       Worklist.push_back(&Src);
474       CandidateSourceNodes.insert(&Src);
475       LLVM_DEBUG(dbgs() << "Putting " << &Src << " back in the worklist.\n");
476     }
477   }
478   LLVM_DEBUG(dbgs() << "=== End of Graph Simplification ===\n");
479 }
480 
481 template <class G>
482 void AbstractDependenceGraphBuilder<G>::sortNodesTopologically() {
483 
484   // If we don't create pi-blocks, then we may not have a DAG.
485   if (!shouldCreatePiBlocks())
486     return;
487 
488   SmallVector<NodeType *, 64> NodesInPO;
489   using NodeKind = typename NodeType::NodeKind;
490   for (NodeType *N : post_order(&Graph)) {
491     if (N->getKind() == NodeKind::PiBlock) {
492       // Put members of the pi-block right after the pi-block itself, for
493       // convenience.
494       const NodeListType &PiBlockMembers = getNodesInPiBlock(*N);
495       NodesInPO.insert(NodesInPO.end(), PiBlockMembers.begin(),
496                        PiBlockMembers.end());
497     }
498     NodesInPO.push_back(N);
499   }
500 
501   size_t OldSize = Graph.Nodes.size();
502   Graph.Nodes.clear();
503   for (NodeType *N : reverse(NodesInPO))
504     Graph.Nodes.push_back(N);
505   if (Graph.Nodes.size() != OldSize)
506     assert(false &&
507            "Expected the number of nodes to stay the same after the sort");
508 }
509 
510 template class llvm::AbstractDependenceGraphBuilder<DataDependenceGraph>;
511 template class llvm::DependenceGraphInfo<DDGNode>;
512