1 //===- ExplodedGraph.h - Local, Path-Sens. "Exploded Graph" -----*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 //  This file defines the template classes ExplodedNode and ExplodedGraph,
10 //  which represent a path-sensitive, intra-procedural "exploded graph."
11 //  See "Precise interprocedural dataflow analysis via graph reachability"
12 //  by Reps, Horwitz, and Sagiv
13 //  (http://portal.acm.org/citation.cfm?id=199462) for the definition of an
14 //  exploded graph.
15 //
16 //===----------------------------------------------------------------------===//
17 
18 #ifndef LLVM_CLANG_STATICANALYZER_CORE_PATHSENSITIVE_EXPLODEDGRAPH_H
19 #define LLVM_CLANG_STATICANALYZER_CORE_PATHSENSITIVE_EXPLODEDGRAPH_H
20 
21 #include "clang/Analysis/AnalysisDeclContext.h"
22 #include "clang/Analysis/ProgramPoint.h"
23 #include "clang/Analysis/Support/BumpVector.h"
24 #include "clang/Basic/LLVM.h"
25 #include "clang/StaticAnalyzer/Core/PathSensitive/ProgramState.h"
26 #include "clang/StaticAnalyzer/Core/PathSensitive/ProgramState_Fwd.h"
27 #include "clang/StaticAnalyzer/Core/PathSensitive/SVals.h"
28 #include "llvm/ADT/ArrayRef.h"
29 #include "llvm/ADT/DenseMap.h"
30 #include "llvm/ADT/DepthFirstIterator.h"
31 #include "llvm/ADT/FoldingSet.h"
32 #include "llvm/ADT/GraphTraits.h"
33 #include "llvm/ADT/Optional.h"
34 #include "llvm/ADT/STLExtras.h"
35 #include "llvm/ADT/SetVector.h"
36 #include "llvm/Support/Allocator.h"
37 #include "llvm/Support/Compiler.h"
38 #include <cassert>
39 #include <cstdint>
40 #include <memory>
41 #include <utility>
42 #include <vector>
43 
44 namespace clang {
45 
46 class CFG;
47 class Decl;
48 class Expr;
49 class ParentMap;
50 class Stmt;
51 
52 namespace ento {
53 
54 class ExplodedGraph;
55 
56 //===----------------------------------------------------------------------===//
57 // ExplodedGraph "implementation" classes.  These classes are not typed to
58 // contain a specific kind of state.  Typed-specialized versions are defined
59 // on top of these classes.
60 //===----------------------------------------------------------------------===//
61 
62 // ExplodedNode is not constified all over the engine because we need to add
63 // successors to it at any time after creating it.
64 
65 class ExplodedNode : public llvm::FoldingSetNode {
66   friend class BranchNodeBuilder;
67   friend class CoreEngine;
68   friend class EndOfFunctionNodeBuilder;
69   friend class ExplodedGraph;
70   friend class IndirectGotoNodeBuilder;
71   friend class NodeBuilder;
72   friend class SwitchNodeBuilder;
73 
74   /// Efficiently stores a list of ExplodedNodes, or an optional flag.
75   ///
76   /// NodeGroup provides opaque storage for a list of ExplodedNodes, optimizing
77   /// for the case when there is only one node in the group. This is a fairly
78   /// common case in an ExplodedGraph, where most nodes have only one
79   /// predecessor and many have only one successor. It can also be used to
80   /// store a flag rather than a node list, which ExplodedNode uses to mark
81   /// whether a node is a sink. If the flag is set, the group is implicitly
82   /// empty and no nodes may be added.
83   class NodeGroup {
84     // Conceptually a discriminated union. If the low bit is set, the node is
85     // a sink. If the low bit is not set, the pointer refers to the storage
86     // for the nodes in the group.
87     // This is not a PointerIntPair in order to keep the storage type opaque.
88     uintptr_t P;
89 
90   public:
91     NodeGroup(bool Flag = false) : P(Flag) {
92       assert(getFlag() == Flag);
93     }
94 
95     ExplodedNode * const *begin() const;
96 
97     ExplodedNode * const *end() const;
98 
99     unsigned size() const;
100 
101     bool empty() const { return P == 0 || getFlag() != 0; }
102 
103     /// Adds a node to the list.
104     ///
105     /// The group must not have been created with its flag set.
106     void addNode(ExplodedNode *N, ExplodedGraph &G);
107 
108     /// Replaces the single node in this group with a new node.
109     ///
110     /// Note that this should only be used when you know the group was not
111     /// created with its flag set, and that the group is empty or contains
112     /// only a single node.
113     void replaceNode(ExplodedNode *node);
114 
115     /// Returns whether this group was created with its flag set.
116     bool getFlag() const {
117       return (P & 1);
118     }
119   };
120 
121   /// Location - The program location (within a function body) associated
122   ///  with this node.
123   const ProgramPoint Location;
124 
125   /// State - The state associated with this node.
126   ProgramStateRef State;
127 
128   /// Preds - The predecessors of this node.
129   NodeGroup Preds;
130 
131   /// Succs - The successors of this node.
132   NodeGroup Succs;
133 
134   int64_t Id;
135 
136 public:
137   explicit ExplodedNode(const ProgramPoint &loc, ProgramStateRef state,
138                         int64_t Id, bool IsSink)
139       : Location(loc), State(std::move(state)), Succs(IsSink), Id(Id) {
140     assert(isSink() == IsSink);
141   }
142 
143   /// getLocation - Returns the edge associated with the given node.
144   ProgramPoint getLocation() const { return Location; }
145 
146   const LocationContext *getLocationContext() const {
147     return getLocation().getLocationContext();
148   }
149 
150   const StackFrameContext *getStackFrame() const {
151     return getLocation().getStackFrame();
152   }
153 
154   const Decl &getCodeDecl() const { return *getLocationContext()->getDecl(); }
155 
156   CFG &getCFG() const { return *getLocationContext()->getCFG(); }
157 
158   const CFGBlock *getCFGBlock() const;
159 
160   const ParentMap &getParentMap() const {
161     return getLocationContext()->getParentMap();
162   }
163 
164   template <typename T>
165   T &getAnalysis() const {
166     return *getLocationContext()->getAnalysis<T>();
167   }
168 
169   const ProgramStateRef &getState() const { return State; }
170 
171   template <typename T>
172   Optional<T> getLocationAs() const LLVM_LVALUE_FUNCTION {
173     return Location.getAs<T>();
174   }
175 
176   /// Get the value of an arbitrary expression at this node.
177   SVal getSVal(const Stmt *S) const {
178     return getState()->getSVal(S, getLocationContext());
179   }
180 
181   static void Profile(llvm::FoldingSetNodeID &ID,
182                       const ProgramPoint &Loc,
183                       const ProgramStateRef &state,
184                       bool IsSink) {
185     ID.Add(Loc);
186     ID.AddPointer(state.get());
187     ID.AddBoolean(IsSink);
188   }
189 
190   void Profile(llvm::FoldingSetNodeID& ID) const {
191     // We avoid copy constructors by not using accessors.
192     Profile(ID, Location, State, isSink());
193   }
194 
195   /// addPredeccessor - Adds a predecessor to the current node, and
196   ///  in tandem add this node as a successor of the other node.
197   void addPredecessor(ExplodedNode *V, ExplodedGraph &G);
198 
199   unsigned succ_size() const { return Succs.size(); }
200   unsigned pred_size() const { return Preds.size(); }
201   bool succ_empty() const { return Succs.empty(); }
202   bool pred_empty() const { return Preds.empty(); }
203 
204   bool isSink() const { return Succs.getFlag(); }
205 
206   bool hasSinglePred() const {
207     return (pred_size() == 1);
208   }
209 
210   ExplodedNode *getFirstPred() {
211     return pred_empty() ? nullptr : *(pred_begin());
212   }
213 
214   const ExplodedNode *getFirstPred() const {
215     return const_cast<ExplodedNode*>(this)->getFirstPred();
216   }
217 
218   ExplodedNode *getFirstSucc() {
219     return succ_empty() ? nullptr : *(succ_begin());
220   }
221 
222   const ExplodedNode *getFirstSucc() const {
223     return const_cast<ExplodedNode*>(this)->getFirstSucc();
224   }
225 
226   // Iterators over successor and predecessor vertices.
227   using succ_iterator = ExplodedNode * const *;
228   using succ_range = llvm::iterator_range<succ_iterator>;
229 
230   using const_succ_iterator = const ExplodedNode * const *;
231   using const_succ_range = llvm::iterator_range<const_succ_iterator>;
232 
233   using pred_iterator = ExplodedNode * const *;
234   using pred_range = llvm::iterator_range<pred_iterator>;
235 
236   using const_pred_iterator = const ExplodedNode * const *;
237   using const_pred_range = llvm::iterator_range<const_pred_iterator>;
238 
239   pred_iterator pred_begin() { return Preds.begin(); }
240   pred_iterator pred_end() { return Preds.end(); }
241   pred_range preds() { return {Preds.begin(), Preds.end()}; }
242 
243   const_pred_iterator pred_begin() const {
244     return const_cast<ExplodedNode*>(this)->pred_begin();
245   }
246   const_pred_iterator pred_end() const {
247     return const_cast<ExplodedNode*>(this)->pred_end();
248   }
249   const_pred_range preds() const { return {Preds.begin(), Preds.end()}; }
250 
251   succ_iterator succ_begin() { return Succs.begin(); }
252   succ_iterator succ_end() { return Succs.end(); }
253   succ_range succs() { return {Succs.begin(), Succs.end()}; }
254 
255   const_succ_iterator succ_begin() const {
256     return const_cast<ExplodedNode*>(this)->succ_begin();
257   }
258   const_succ_iterator succ_end() const {
259     return const_cast<ExplodedNode*>(this)->succ_end();
260   }
261   const_succ_range succs() const { return {Succs.begin(), Succs.end()}; }
262 
263   int64_t getID() const { return Id; }
264 
265   /// The node is trivial if it has only one successor, only one predecessor,
266   /// it's predecessor has only one successor,
267   /// and its program state is the same as the program state of the previous
268   /// node.
269   /// Trivial nodes may be skipped while printing exploded graph.
270   bool isTrivial() const;
271 
272   /// If the node's program point corresponds to a statement, retrieve that
273   /// statement. Useful for figuring out where to put a warning or a note.
274   /// If the statement belongs to a body-farmed definition,
275   /// retrieve the call site for that definition.
276   const Stmt *getStmtForDiagnostics() const;
277 
278   /// Find the next statement that was executed on this node's execution path.
279   /// Useful for explaining control flow that follows the current node.
280   /// If the statement belongs to a body-farmed definition, retrieve the
281   /// call site for that definition.
282   const Stmt *getNextStmtForDiagnostics() const;
283 
284   /// Find the statement that was executed immediately before this node.
285   /// Useful when the node corresponds to a CFG block entrance.
286   /// If the statement belongs to a body-farmed definition, retrieve the
287   /// call site for that definition.
288   const Stmt *getPreviousStmtForDiagnostics() const;
289 
290   /// Find the statement that was executed at or immediately before this node.
291   /// Useful when any nearby statement will do.
292   /// If the statement belongs to a body-farmed definition, retrieve the
293   /// call site for that definition.
294   const Stmt *getCurrentOrPreviousStmtForDiagnostics() const;
295 
296 private:
297   void replaceSuccessor(ExplodedNode *node) { Succs.replaceNode(node); }
298   void replacePredecessor(ExplodedNode *node) { Preds.replaceNode(node); }
299 };
300 
301 using InterExplodedGraphMap =
302     llvm::DenseMap<const ExplodedNode *, const ExplodedNode *>;
303 
304 class ExplodedGraph {
305 protected:
306   friend class CoreEngine;
307 
308   // Type definitions.
309   using NodeVector = std::vector<ExplodedNode *>;
310 
311   /// The roots of the simulation graph. Usually there will be only
312   /// one, but clients are free to establish multiple subgraphs within a single
313   /// SimulGraph. Moreover, these subgraphs can often merge when paths from
314   /// different roots reach the same state at the same program location.
315   NodeVector Roots;
316 
317   /// The nodes in the simulation graph which have been
318   /// specially marked as the endpoint of an abstract simulation path.
319   NodeVector EndNodes;
320 
321   /// Nodes - The nodes in the graph.
322   llvm::FoldingSet<ExplodedNode> Nodes;
323 
324   /// BVC - Allocator and context for allocating nodes and their predecessor
325   /// and successor groups.
326   BumpVectorContext BVC;
327 
328   /// NumNodes - The number of nodes in the graph.
329   int64_t NumNodes = 0;
330 
331   /// A list of recently allocated nodes that can potentially be recycled.
332   NodeVector ChangedNodes;
333 
334   /// A list of nodes that can be reused.
335   NodeVector FreeNodes;
336 
337   /// Determines how often nodes are reclaimed.
338   ///
339   /// If this is 0, nodes will never be reclaimed.
340   unsigned ReclaimNodeInterval = 0;
341 
342   /// Counter to determine when to reclaim nodes.
343   unsigned ReclaimCounter;
344 
345 public:
346   ExplodedGraph();
347   ~ExplodedGraph();
348 
349   /// Retrieve the node associated with a (Location,State) pair,
350   ///  where the 'Location' is a ProgramPoint in the CFG.  If no node for
351   ///  this pair exists, it is created. IsNew is set to true if
352   ///  the node was freshly created.
353   ExplodedNode *getNode(const ProgramPoint &L, ProgramStateRef State,
354                         bool IsSink = false,
355                         bool* IsNew = nullptr);
356 
357   /// Create a node for a (Location, State) pair,
358   ///  but don't store it for deduplication later.  This
359   ///  is useful when copying an already completed
360   ///  ExplodedGraph for further processing.
361   ExplodedNode *createUncachedNode(const ProgramPoint &L,
362     ProgramStateRef State,
363     int64_t Id,
364     bool IsSink = false);
365 
366   std::unique_ptr<ExplodedGraph> MakeEmptyGraph() const {
367     return std::make_unique<ExplodedGraph>();
368   }
369 
370   /// addRoot - Add an untyped node to the set of roots.
371   ExplodedNode *addRoot(ExplodedNode *V) {
372     Roots.push_back(V);
373     return V;
374   }
375 
376   /// addEndOfPath - Add an untyped node to the set of EOP nodes.
377   ExplodedNode *addEndOfPath(ExplodedNode *V) {
378     EndNodes.push_back(V);
379     return V;
380   }
381 
382   unsigned num_roots() const { return Roots.size(); }
383   unsigned num_eops() const { return EndNodes.size(); }
384 
385   bool empty() const { return NumNodes == 0; }
386   unsigned size() const { return NumNodes; }
387 
388   void reserve(unsigned NodeCount) { Nodes.reserve(NodeCount); }
389 
390   // Iterators.
391   using NodeTy = ExplodedNode;
392   using AllNodesTy = llvm::FoldingSet<ExplodedNode>;
393   using roots_iterator = NodeVector::iterator;
394   using const_roots_iterator = NodeVector::const_iterator;
395   using eop_iterator = NodeVector::iterator;
396   using const_eop_iterator = NodeVector::const_iterator;
397   using node_iterator = AllNodesTy::iterator;
398   using const_node_iterator = AllNodesTy::const_iterator;
399 
400   node_iterator nodes_begin() { return Nodes.begin(); }
401 
402   node_iterator nodes_end() { return Nodes.end(); }
403 
404   const_node_iterator nodes_begin() const { return Nodes.begin(); }
405 
406   const_node_iterator nodes_end() const { return Nodes.end(); }
407 
408   roots_iterator roots_begin() { return Roots.begin(); }
409 
410   roots_iterator roots_end() { return Roots.end(); }
411 
412   const_roots_iterator roots_begin() const { return Roots.begin(); }
413 
414   const_roots_iterator roots_end() const { return Roots.end(); }
415 
416   eop_iterator eop_begin() { return EndNodes.begin(); }
417 
418   eop_iterator eop_end() { return EndNodes.end(); }
419 
420   const_eop_iterator eop_begin() const { return EndNodes.begin(); }
421 
422   const_eop_iterator eop_end() const { return EndNodes.end(); }
423 
424   llvm::BumpPtrAllocator & getAllocator() { return BVC.getAllocator(); }
425   BumpVectorContext &getNodeAllocator() { return BVC; }
426 
427   using NodeMap = llvm::DenseMap<const ExplodedNode *, ExplodedNode *>;
428 
429   /// Creates a trimmed version of the graph that only contains paths leading
430   /// to the given nodes.
431   ///
432   /// \param Nodes The nodes which must appear in the final graph. Presumably
433   ///              these are end-of-path nodes (i.e. they have no successors).
434   /// \param[out] ForwardMap A optional map from nodes in this graph to nodes in
435   ///                        the returned graph.
436   /// \param[out] InverseMap An optional map from nodes in the returned graph to
437   ///                        nodes in this graph.
438   /// \returns The trimmed graph
439   std::unique_ptr<ExplodedGraph>
440   trim(ArrayRef<const NodeTy *> Nodes,
441        InterExplodedGraphMap *ForwardMap = nullptr,
442        InterExplodedGraphMap *InverseMap = nullptr) const;
443 
444   /// Enable tracking of recently allocated nodes for potential reclamation
445   /// when calling reclaimRecentlyAllocatedNodes().
446   void enableNodeReclamation(unsigned Interval) {
447     ReclaimCounter = ReclaimNodeInterval = Interval;
448   }
449 
450   /// Reclaim "uninteresting" nodes created since the last time this method
451   /// was called.
452   void reclaimRecentlyAllocatedNodes();
453 
454   /// Returns true if nodes for the given expression kind are always
455   ///        kept around.
456   static bool isInterestingLValueExpr(const Expr *Ex);
457 
458 private:
459   bool shouldCollect(const ExplodedNode *node);
460   void collectNode(ExplodedNode *node);
461 };
462 
463 class ExplodedNodeSet {
464   using ImplTy = llvm::SmallSetVector<ExplodedNode *, 4>;
465   ImplTy Impl;
466 
467 public:
468   ExplodedNodeSet(ExplodedNode *N) {
469     assert(N && !static_cast<ExplodedNode*>(N)->isSink());
470     Impl.insert(N);
471   }
472 
473   ExplodedNodeSet() = default;
474 
475   void Add(ExplodedNode *N) {
476     if (N && !static_cast<ExplodedNode*>(N)->isSink()) Impl.insert(N);
477   }
478 
479   using iterator = ImplTy::iterator;
480   using const_iterator = ImplTy::const_iterator;
481 
482   unsigned size() const { return Impl.size();  }
483   bool empty()    const { return Impl.empty(); }
484   bool erase(ExplodedNode *N) { return Impl.remove(N); }
485 
486   void clear() { Impl.clear(); }
487 
488   void insert(const ExplodedNodeSet &S) {
489     assert(&S != this);
490     if (empty())
491       Impl = S.Impl;
492     else
493       Impl.insert(S.begin(), S.end());
494   }
495 
496   iterator begin() { return Impl.begin(); }
497   iterator end() { return Impl.end(); }
498 
499   const_iterator begin() const { return Impl.begin(); }
500   const_iterator end() const { return Impl.end(); }
501 };
502 
503 } // namespace ento
504 
505 } // namespace clang
506 
507 // GraphTraits
508 
509 namespace llvm {
510   template <> struct GraphTraits<clang::ento::ExplodedGraph *> {
511     using GraphTy = clang::ento::ExplodedGraph *;
512     using NodeRef = clang::ento::ExplodedNode *;
513     using ChildIteratorType = clang::ento::ExplodedNode::succ_iterator;
514     using nodes_iterator = llvm::df_iterator<GraphTy>;
515 
516     static NodeRef getEntryNode(const GraphTy G) {
517       return *G->roots_begin();
518     }
519 
520     static bool predecessorOfTrivial(NodeRef N) {
521       return N->succ_size() == 1 && N->getFirstSucc()->isTrivial();
522     }
523 
524     static ChildIteratorType child_begin(NodeRef N) {
525       if (predecessorOfTrivial(N))
526         return child_begin(*N->succ_begin());
527       return N->succ_begin();
528     }
529 
530     static ChildIteratorType child_end(NodeRef N) {
531       if (predecessorOfTrivial(N))
532         return child_end(N->getFirstSucc());
533       return N->succ_end();
534     }
535 
536     static nodes_iterator nodes_begin(const GraphTy G) {
537       return df_begin(G);
538     }
539 
540     static nodes_iterator nodes_end(const GraphTy G) {
541       return df_end(G);
542     }
543   };
544 } // namespace llvm
545 
546 #endif // LLVM_CLANG_STATICANALYZER_CORE_PATHSENSITIVE_EXPLODEDGRAPH_H
547