1 //===- ExplodedGraph.h - Local, Path-Sens. "Exploded Graph" -----*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 //  This file defines the template classes ExplodedNode and ExplodedGraph,
10 //  which represent a path-sensitive, intra-procedural "exploded graph."
11 //  See "Precise interprocedural dataflow analysis via graph reachability"
12 //  by Reps, Horwitz, and Sagiv
13 //  (http://portal.acm.org/citation.cfm?id=199462) for the definition of an
14 //  exploded graph.
15 //
16 //===----------------------------------------------------------------------===//
17 
18 #ifndef LLVM_CLANG_STATICANALYZER_CORE_PATHSENSITIVE_EXPLODEDGRAPH_H
19 #define LLVM_CLANG_STATICANALYZER_CORE_PATHSENSITIVE_EXPLODEDGRAPH_H
20 
21 #include "clang/Analysis/AnalysisDeclContext.h"
22 #include "clang/Analysis/ProgramPoint.h"
23 #include "clang/Analysis/Support/BumpVector.h"
24 #include "clang/Basic/LLVM.h"
25 #include "clang/StaticAnalyzer/Core/PathSensitive/ProgramState.h"
26 #include "clang/StaticAnalyzer/Core/PathSensitive/ProgramState_Fwd.h"
27 #include "clang/StaticAnalyzer/Core/PathSensitive/SVals.h"
28 #include "llvm/ADT/ArrayRef.h"
29 #include "llvm/ADT/DenseMap.h"
30 #include "llvm/ADT/DepthFirstIterator.h"
31 #include "llvm/ADT/FoldingSet.h"
32 #include "llvm/ADT/GraphTraits.h"
33 #include "llvm/ADT/STLExtras.h"
34 #include "llvm/ADT/SetVector.h"
35 #include "llvm/ADT/iterator_range.h"
36 #include "llvm/Support/Allocator.h"
37 #include "llvm/Support/Compiler.h"
38 #include <cassert>
39 #include <cstdint>
40 #include <memory>
41 #include <optional>
42 #include <utility>
43 #include <vector>
44 
45 namespace clang {
46 
47 class CFG;
48 class Decl;
49 class Expr;
50 class ParentMap;
51 class Stmt;
52 
53 namespace ento {
54 
55 class ExplodedGraph;
56 
57 //===----------------------------------------------------------------------===//
58 // ExplodedGraph "implementation" classes.  These classes are not typed to
59 // contain a specific kind of state.  Typed-specialized versions are defined
60 // on top of these classes.
61 //===----------------------------------------------------------------------===//
62 
63 // ExplodedNode is not constified all over the engine because we need to add
64 // successors to it at any time after creating it.
65 
66 class ExplodedNode : public llvm::FoldingSetNode {
67   friend class BranchNodeBuilder;
68   friend class CoreEngine;
69   friend class EndOfFunctionNodeBuilder;
70   friend class ExplodedGraph;
71   friend class IndirectGotoNodeBuilder;
72   friend class NodeBuilder;
73   friend class SwitchNodeBuilder;
74 
75   /// Efficiently stores a list of ExplodedNodes, or an optional flag.
76   ///
77   /// NodeGroup provides opaque storage for a list of ExplodedNodes, optimizing
78   /// for the case when there is only one node in the group. This is a fairly
79   /// common case in an ExplodedGraph, where most nodes have only one
80   /// predecessor and many have only one successor. It can also be used to
81   /// store a flag rather than a node list, which ExplodedNode uses to mark
82   /// whether a node is a sink. If the flag is set, the group is implicitly
83   /// empty and no nodes may be added.
84   class NodeGroup {
85     // Conceptually a discriminated union. If the low bit is set, the node is
86     // a sink. If the low bit is not set, the pointer refers to the storage
87     // for the nodes in the group.
88     // This is not a PointerIntPair in order to keep the storage type opaque.
89     uintptr_t P;
90 
91   public:
92     NodeGroup(bool Flag = false) : P(Flag) {
93       assert(getFlag() == Flag);
94     }
95 
96     ExplodedNode * const *begin() const;
97 
98     ExplodedNode * const *end() const;
99 
100     unsigned size() const;
101 
102     bool empty() const { return P == 0 || getFlag() != 0; }
103 
104     /// Adds a node to the list.
105     ///
106     /// The group must not have been created with its flag set.
107     void addNode(ExplodedNode *N, ExplodedGraph &G);
108 
109     /// Replaces the single node in this group with a new node.
110     ///
111     /// Note that this should only be used when you know the group was not
112     /// created with its flag set, and that the group is empty or contains
113     /// only a single node.
114     void replaceNode(ExplodedNode *node);
115 
116     /// Returns whether this group was created with its flag set.
117     bool getFlag() const {
118       return (P & 1);
119     }
120   };
121 
122   /// Location - The program location (within a function body) associated
123   ///  with this node.
124   const ProgramPoint Location;
125 
126   /// State - The state associated with this node.
127   ProgramStateRef State;
128 
129   /// Preds - The predecessors of this node.
130   NodeGroup Preds;
131 
132   /// Succs - The successors of this node.
133   NodeGroup Succs;
134 
135   int64_t Id;
136 
137 public:
138   explicit ExplodedNode(const ProgramPoint &loc, ProgramStateRef state,
139                         int64_t Id, bool IsSink)
140       : Location(loc), State(std::move(state)), Succs(IsSink), Id(Id) {
141     assert(isSink() == IsSink);
142   }
143 
144   /// getLocation - Returns the edge associated with the given node.
145   ProgramPoint getLocation() const { return Location; }
146 
147   const LocationContext *getLocationContext() const {
148     return getLocation().getLocationContext();
149   }
150 
151   const StackFrameContext *getStackFrame() const {
152     return getLocation().getStackFrame();
153   }
154 
155   const Decl &getCodeDecl() const { return *getLocationContext()->getDecl(); }
156 
157   CFG &getCFG() const { return *getLocationContext()->getCFG(); }
158 
159   const CFGBlock *getCFGBlock() const;
160 
161   const ParentMap &getParentMap() const {
162     return getLocationContext()->getParentMap();
163   }
164 
165   template <typename T> T &getAnalysis() const {
166     return *getLocationContext()->getAnalysis<T>();
167   }
168 
169   const ProgramStateRef &getState() const { return State; }
170 
171   template <typename T> std::optional<T> getLocationAs() const & {
172     return Location.getAs<T>();
173   }
174 
175   /// Get the value of an arbitrary expression at this node.
176   SVal getSVal(const Stmt *S) const {
177     return getState()->getSVal(S, getLocationContext());
178   }
179 
180   static void Profile(llvm::FoldingSetNodeID &ID,
181                       const ProgramPoint &Loc,
182                       const ProgramStateRef &state,
183                       bool IsSink) {
184     ID.Add(Loc);
185     ID.AddPointer(state.get());
186     ID.AddBoolean(IsSink);
187   }
188 
189   void Profile(llvm::FoldingSetNodeID& ID) const {
190     // We avoid copy constructors by not using accessors.
191     Profile(ID, Location, State, isSink());
192   }
193 
194   /// addPredeccessor - Adds a predecessor to the current node, and
195   ///  in tandem add this node as a successor of the other node.
196   void addPredecessor(ExplodedNode *V, ExplodedGraph &G);
197 
198   unsigned succ_size() const { return Succs.size(); }
199   unsigned pred_size() const { return Preds.size(); }
200   bool succ_empty() const { return Succs.empty(); }
201   bool pred_empty() const { return Preds.empty(); }
202 
203   bool isSink() const { return Succs.getFlag(); }
204 
205   bool hasSinglePred() const {
206     return (pred_size() == 1);
207   }
208 
209   ExplodedNode *getFirstPred() {
210     return pred_empty() ? nullptr : *(pred_begin());
211   }
212 
213   const ExplodedNode *getFirstPred() const {
214     return const_cast<ExplodedNode*>(this)->getFirstPred();
215   }
216 
217   ExplodedNode *getFirstSucc() {
218     return succ_empty() ? nullptr : *(succ_begin());
219   }
220 
221   const ExplodedNode *getFirstSucc() const {
222     return const_cast<ExplodedNode*>(this)->getFirstSucc();
223   }
224 
225   // Iterators over successor and predecessor vertices.
226   using succ_iterator = ExplodedNode * const *;
227   using succ_range = llvm::iterator_range<succ_iterator>;
228 
229   using const_succ_iterator = const ExplodedNode * const *;
230   using const_succ_range = llvm::iterator_range<const_succ_iterator>;
231 
232   using pred_iterator = ExplodedNode * const *;
233   using pred_range = llvm::iterator_range<pred_iterator>;
234 
235   using const_pred_iterator = const ExplodedNode * const *;
236   using const_pred_range = llvm::iterator_range<const_pred_iterator>;
237 
238   pred_iterator pred_begin() { return Preds.begin(); }
239   pred_iterator pred_end() { return Preds.end(); }
240   pred_range preds() { return {Preds.begin(), Preds.end()}; }
241 
242   const_pred_iterator pred_begin() const {
243     return const_cast<ExplodedNode*>(this)->pred_begin();
244   }
245   const_pred_iterator pred_end() const {
246     return const_cast<ExplodedNode*>(this)->pred_end();
247   }
248   const_pred_range preds() const { return {Preds.begin(), Preds.end()}; }
249 
250   succ_iterator succ_begin() { return Succs.begin(); }
251   succ_iterator succ_end() { return Succs.end(); }
252   succ_range succs() { return {Succs.begin(), Succs.end()}; }
253 
254   const_succ_iterator succ_begin() const {
255     return const_cast<ExplodedNode*>(this)->succ_begin();
256   }
257   const_succ_iterator succ_end() const {
258     return const_cast<ExplodedNode*>(this)->succ_end();
259   }
260   const_succ_range succs() const { return {Succs.begin(), Succs.end()}; }
261 
262   int64_t getID() const { return Id; }
263 
264   /// The node is trivial if it has only one successor, only one predecessor,
265   /// it's predecessor has only one successor,
266   /// and its program state is the same as the program state of the previous
267   /// node.
268   /// Trivial nodes may be skipped while printing exploded graph.
269   bool isTrivial() const;
270 
271   /// If the node's program point corresponds to a statement, retrieve that
272   /// statement. Useful for figuring out where to put a warning or a note.
273   /// If the statement belongs to a body-farmed definition,
274   /// retrieve the call site for that definition.
275   const Stmt *getStmtForDiagnostics() const;
276 
277   /// Find the next statement that was executed on this node's execution path.
278   /// Useful for explaining control flow that follows the current node.
279   /// If the statement belongs to a body-farmed definition, retrieve the
280   /// call site for that definition.
281   const Stmt *getNextStmtForDiagnostics() const;
282 
283   /// Find the statement that was executed immediately before this node.
284   /// Useful when the node corresponds to a CFG block entrance.
285   /// If the statement belongs to a body-farmed definition, retrieve the
286   /// call site for that definition.
287   const Stmt *getPreviousStmtForDiagnostics() const;
288 
289   /// Find the statement that was executed at or immediately before this node.
290   /// Useful when any nearby statement will do.
291   /// If the statement belongs to a body-farmed definition, retrieve the
292   /// call site for that definition.
293   const Stmt *getCurrentOrPreviousStmtForDiagnostics() const;
294 
295 private:
296   void replaceSuccessor(ExplodedNode *node) { Succs.replaceNode(node); }
297   void replacePredecessor(ExplodedNode *node) { Preds.replaceNode(node); }
298 };
299 
300 using InterExplodedGraphMap =
301     llvm::DenseMap<const ExplodedNode *, const ExplodedNode *>;
302 
303 class ExplodedGraph {
304 protected:
305   friend class CoreEngine;
306 
307   // Type definitions.
308   using NodeVector = std::vector<ExplodedNode *>;
309 
310   /// The roots of the simulation graph. Usually there will be only
311   /// one, but clients are free to establish multiple subgraphs within a single
312   /// SimulGraph. Moreover, these subgraphs can often merge when paths from
313   /// different roots reach the same state at the same program location.
314   NodeVector Roots;
315 
316   /// The nodes in the simulation graph which have been
317   /// specially marked as the endpoint of an abstract simulation path.
318   NodeVector EndNodes;
319 
320   /// Nodes - The nodes in the graph.
321   llvm::FoldingSet<ExplodedNode> Nodes;
322 
323   /// BVC - Allocator and context for allocating nodes and their predecessor
324   /// and successor groups.
325   BumpVectorContext BVC;
326 
327   /// NumNodes - The number of nodes in the graph.
328   int64_t NumNodes = 0;
329 
330   /// A list of recently allocated nodes that can potentially be recycled.
331   NodeVector ChangedNodes;
332 
333   /// A list of nodes that can be reused.
334   NodeVector FreeNodes;
335 
336   /// Determines how often nodes are reclaimed.
337   ///
338   /// If this is 0, nodes will never be reclaimed.
339   unsigned ReclaimNodeInterval = 0;
340 
341   /// Counter to determine when to reclaim nodes.
342   unsigned ReclaimCounter;
343 
344 public:
345   ExplodedGraph();
346   ~ExplodedGraph();
347 
348   /// Retrieve the node associated with a (Location,State) pair,
349   ///  where the 'Location' is a ProgramPoint in the CFG.  If no node for
350   ///  this pair exists, it is created. IsNew is set to true if
351   ///  the node was freshly created.
352   ExplodedNode *getNode(const ProgramPoint &L, ProgramStateRef State,
353                         bool IsSink = false,
354                         bool* IsNew = nullptr);
355 
356   /// Create a node for a (Location, State) pair,
357   ///  but don't store it for deduplication later.  This
358   ///  is useful when copying an already completed
359   ///  ExplodedGraph for further processing.
360   ExplodedNode *createUncachedNode(const ProgramPoint &L,
361     ProgramStateRef State,
362     int64_t Id,
363     bool IsSink = false);
364 
365   std::unique_ptr<ExplodedGraph> MakeEmptyGraph() const {
366     return std::make_unique<ExplodedGraph>();
367   }
368 
369   /// addRoot - Add an untyped node to the set of roots.
370   ExplodedNode *addRoot(ExplodedNode *V) {
371     Roots.push_back(V);
372     return V;
373   }
374 
375   /// addEndOfPath - Add an untyped node to the set of EOP nodes.
376   ExplodedNode *addEndOfPath(ExplodedNode *V) {
377     EndNodes.push_back(V);
378     return V;
379   }
380 
381   unsigned num_roots() const { return Roots.size(); }
382   unsigned num_eops() const { return EndNodes.size(); }
383 
384   bool empty() const { return NumNodes == 0; }
385   unsigned size() const { return NumNodes; }
386 
387   void reserve(unsigned NodeCount) { Nodes.reserve(NodeCount); }
388 
389   // Iterators.
390   using NodeTy = ExplodedNode;
391   using AllNodesTy = llvm::FoldingSet<ExplodedNode>;
392   using roots_iterator = NodeVector::iterator;
393   using const_roots_iterator = NodeVector::const_iterator;
394   using eop_iterator = NodeVector::iterator;
395   using const_eop_iterator = NodeVector::const_iterator;
396   using node_iterator = AllNodesTy::iterator;
397   using const_node_iterator = AllNodesTy::const_iterator;
398 
399   llvm::iterator_range<node_iterator> nodes() { return Nodes; }
400 
401   llvm::iterator_range<const_node_iterator> nodes() const { return Nodes; }
402 
403   roots_iterator roots_begin() { return Roots.begin(); }
404 
405   roots_iterator roots_end() { return Roots.end(); }
406 
407   const_roots_iterator roots_begin() const { return Roots.begin(); }
408 
409   const_roots_iterator roots_end() const { return Roots.end(); }
410 
411   eop_iterator eop_begin() { return EndNodes.begin(); }
412 
413   eop_iterator eop_end() { return EndNodes.end(); }
414 
415   const_eop_iterator eop_begin() const { return EndNodes.begin(); }
416 
417   const_eop_iterator eop_end() const { return EndNodes.end(); }
418 
419   llvm::BumpPtrAllocator & getAllocator() { return BVC.getAllocator(); }
420   BumpVectorContext &getNodeAllocator() { return BVC; }
421 
422   using NodeMap = llvm::DenseMap<const ExplodedNode *, ExplodedNode *>;
423 
424   /// Creates a trimmed version of the graph that only contains paths leading
425   /// to the given nodes.
426   ///
427   /// \param Nodes The nodes which must appear in the final graph. Presumably
428   ///              these are end-of-path nodes (i.e. they have no successors).
429   /// \param[out] ForwardMap A optional map from nodes in this graph to nodes in
430   ///                        the returned graph.
431   /// \param[out] InverseMap An optional map from nodes in the returned graph to
432   ///                        nodes in this graph.
433   /// \returns The trimmed graph
434   std::unique_ptr<ExplodedGraph>
435   trim(ArrayRef<const NodeTy *> Nodes,
436        InterExplodedGraphMap *ForwardMap = nullptr,
437        InterExplodedGraphMap *InverseMap = nullptr) const;
438 
439   /// Enable tracking of recently allocated nodes for potential reclamation
440   /// when calling reclaimRecentlyAllocatedNodes().
441   void enableNodeReclamation(unsigned Interval) {
442     ReclaimCounter = ReclaimNodeInterval = Interval;
443   }
444 
445   /// Reclaim "uninteresting" nodes created since the last time this method
446   /// was called.
447   void reclaimRecentlyAllocatedNodes();
448 
449   /// Returns true if nodes for the given expression kind are always
450   ///        kept around.
451   static bool isInterestingLValueExpr(const Expr *Ex);
452 
453 private:
454   bool shouldCollect(const ExplodedNode *node);
455   void collectNode(ExplodedNode *node);
456 };
457 
458 class ExplodedNodeSet {
459   using ImplTy = llvm::SmallSetVector<ExplodedNode *, 4>;
460   ImplTy Impl;
461 
462 public:
463   ExplodedNodeSet(ExplodedNode *N) {
464     assert(N && !static_cast<ExplodedNode*>(N)->isSink());
465     Impl.insert(N);
466   }
467 
468   ExplodedNodeSet() = default;
469 
470   void Add(ExplodedNode *N) {
471     if (N && !static_cast<ExplodedNode*>(N)->isSink()) Impl.insert(N);
472   }
473 
474   using iterator = ImplTy::iterator;
475   using const_iterator = ImplTy::const_iterator;
476 
477   unsigned size() const { return Impl.size();  }
478   bool empty()    const { return Impl.empty(); }
479   bool erase(ExplodedNode *N) { return Impl.remove(N); }
480 
481   void clear() { Impl.clear(); }
482 
483   void insert(const ExplodedNodeSet &S) {
484     assert(&S != this);
485     if (empty())
486       Impl = S.Impl;
487     else
488       Impl.insert(S.begin(), S.end());
489   }
490 
491   iterator begin() { return Impl.begin(); }
492   iterator end() { return Impl.end(); }
493 
494   const_iterator begin() const { return Impl.begin(); }
495   const_iterator end() const { return Impl.end(); }
496 };
497 
498 } // namespace ento
499 
500 } // namespace clang
501 
502 // GraphTraits
503 
504 namespace llvm {
505   template <> struct GraphTraits<clang::ento::ExplodedGraph *> {
506     using GraphTy = clang::ento::ExplodedGraph *;
507     using NodeRef = clang::ento::ExplodedNode *;
508     using ChildIteratorType = clang::ento::ExplodedNode::succ_iterator;
509     using nodes_iterator = llvm::df_iterator<GraphTy>;
510 
511     static NodeRef getEntryNode(const GraphTy G) {
512       return *G->roots_begin();
513     }
514 
515     static bool predecessorOfTrivial(NodeRef N) {
516       return N->succ_size() == 1 && N->getFirstSucc()->isTrivial();
517     }
518 
519     static ChildIteratorType child_begin(NodeRef N) {
520       if (predecessorOfTrivial(N))
521         return child_begin(*N->succ_begin());
522       return N->succ_begin();
523     }
524 
525     static ChildIteratorType child_end(NodeRef N) {
526       if (predecessorOfTrivial(N))
527         return child_end(N->getFirstSucc());
528       return N->succ_end();
529     }
530 
531     static nodes_iterator nodes_begin(const GraphTy G) {
532       return df_begin(G);
533     }
534 
535     static nodes_iterator nodes_end(const GraphTy G) {
536       return df_end(G);
537     }
538   };
539 } // namespace llvm
540 
541 #endif // LLVM_CLANG_STATICANALYZER_CORE_PATHSENSITIVE_EXPLODEDGRAPH_H
542