1 //===- ExplodedGraph.h - Local, Path-Sens. "Exploded Graph" -----*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 //  This file defines the template classes ExplodedNode and ExplodedGraph,
10 //  which represent a path-sensitive, intra-procedural "exploded graph."
11 //  See "Precise interprocedural dataflow analysis via graph reachability"
12 //  by Reps, Horwitz, and Sagiv
13 //  (http://portal.acm.org/citation.cfm?id=199462) for the definition of an
14 //  exploded graph.
15 //
16 //===----------------------------------------------------------------------===//
17 
18 #ifndef LLVM_CLANG_STATICANALYZER_CORE_PATHSENSITIVE_EXPLODEDGRAPH_H
19 #define LLVM_CLANG_STATICANALYZER_CORE_PATHSENSITIVE_EXPLODEDGRAPH_H
20 
21 #include "clang/Analysis/AnalysisDeclContext.h"
22 #include "clang/Analysis/ProgramPoint.h"
23 #include "clang/Analysis/Support/BumpVector.h"
24 #include "clang/Basic/LLVM.h"
25 #include "clang/StaticAnalyzer/Core/PathSensitive/ProgramState.h"
26 #include "clang/StaticAnalyzer/Core/PathSensitive/ProgramState_Fwd.h"
27 #include "clang/StaticAnalyzer/Core/PathSensitive/SVals.h"
28 #include "llvm/ADT/ArrayRef.h"
29 #include "llvm/ADT/DenseMap.h"
30 #include "llvm/ADT/DepthFirstIterator.h"
31 #include "llvm/ADT/FoldingSet.h"
32 #include "llvm/ADT/GraphTraits.h"
33 #include "llvm/ADT/Optional.h"
34 #include "llvm/ADT/STLExtras.h"
35 #include "llvm/ADT/SetVector.h"
36 #include "llvm/Support/Allocator.h"
37 #include "llvm/Support/Compiler.h"
38 #include <cassert>
39 #include <cstdint>
40 #include <memory>
41 #include <utility>
42 #include <vector>
43 
44 namespace clang {
45 
46 class CFG;
47 class Decl;
48 class Expr;
49 class ParentMap;
50 class Stmt;
51 
52 namespace ento {
53 
54 class ExplodedGraph;
55 
56 //===----------------------------------------------------------------------===//
57 // ExplodedGraph "implementation" classes.  These classes are not typed to
58 // contain a specific kind of state.  Typed-specialized versions are defined
59 // on top of these classes.
60 //===----------------------------------------------------------------------===//
61 
62 // ExplodedNode is not constified all over the engine because we need to add
63 // successors to it at any time after creating it.
64 
65 class ExplodedNode : public llvm::FoldingSetNode {
66   friend class BranchNodeBuilder;
67   friend class CoreEngine;
68   friend class EndOfFunctionNodeBuilder;
69   friend class ExplodedGraph;
70   friend class IndirectGotoNodeBuilder;
71   friend class NodeBuilder;
72   friend class SwitchNodeBuilder;
73 
74   /// Efficiently stores a list of ExplodedNodes, or an optional flag.
75   ///
76   /// NodeGroup provides opaque storage for a list of ExplodedNodes, optimizing
77   /// for the case when there is only one node in the group. This is a fairly
78   /// common case in an ExplodedGraph, where most nodes have only one
79   /// predecessor and many have only one successor. It can also be used to
80   /// store a flag rather than a node list, which ExplodedNode uses to mark
81   /// whether a node is a sink. If the flag is set, the group is implicitly
82   /// empty and no nodes may be added.
83   class NodeGroup {
84     // Conceptually a discriminated union. If the low bit is set, the node is
85     // a sink. If the low bit is not set, the pointer refers to the storage
86     // for the nodes in the group.
87     // This is not a PointerIntPair in order to keep the storage type opaque.
88     uintptr_t P;
89 
90   public:
91     NodeGroup(bool Flag = false) : P(Flag) {
92       assert(getFlag() == Flag);
93     }
94 
95     ExplodedNode * const *begin() const;
96 
97     ExplodedNode * const *end() const;
98 
99     unsigned size() const;
100 
101     bool empty() const { return P == 0 || getFlag() != 0; }
102 
103     /// Adds a node to the list.
104     ///
105     /// The group must not have been created with its flag set.
106     void addNode(ExplodedNode *N, ExplodedGraph &G);
107 
108     /// Replaces the single node in this group with a new node.
109     ///
110     /// Note that this should only be used when you know the group was not
111     /// created with its flag set, and that the group is empty or contains
112     /// only a single node.
113     void replaceNode(ExplodedNode *node);
114 
115     /// Returns whether this group was created with its flag set.
116     bool getFlag() const {
117       return (P & 1);
118     }
119   };
120 
121   /// Location - The program location (within a function body) associated
122   ///  with this node.
123   const ProgramPoint Location;
124 
125   /// State - The state associated with this node.
126   ProgramStateRef State;
127 
128   /// Preds - The predecessors of this node.
129   NodeGroup Preds;
130 
131   /// Succs - The successors of this node.
132   NodeGroup Succs;
133 
134   int64_t Id;
135 
136 public:
137   explicit ExplodedNode(const ProgramPoint &loc, ProgramStateRef state,
138                         int64_t Id, bool IsSink)
139       : Location(loc), State(std::move(state)), Succs(IsSink), Id(Id) {
140     assert(isSink() == IsSink);
141   }
142 
143   /// getLocation - Returns the edge associated with the given node.
144   ProgramPoint getLocation() const { return Location; }
145 
146   const LocationContext *getLocationContext() const {
147     return getLocation().getLocationContext();
148   }
149 
150   const StackFrameContext *getStackFrame() const {
151     return getLocation().getStackFrame();
152   }
153 
154   const Decl &getCodeDecl() const { return *getLocationContext()->getDecl(); }
155 
156   CFG &getCFG() const { return *getLocationContext()->getCFG(); }
157 
158   const CFGBlock *getCFGBlock() const;
159 
160   const ParentMap &getParentMap() const {
161     return getLocationContext()->getParentMap();
162   }
163 
164   template <typename T> T &getAnalysis() const {
165     return *getLocationContext()->getAnalysis<T>();
166   }
167 
168   const ProgramStateRef &getState() const { return State; }
169 
170   template <typename T> Optional<T> getLocationAs() const & {
171     return Location.getAs<T>();
172   }
173 
174   /// Get the value of an arbitrary expression at this node.
175   SVal getSVal(const Stmt *S) const {
176     return getState()->getSVal(S, getLocationContext());
177   }
178 
179   static void Profile(llvm::FoldingSetNodeID &ID,
180                       const ProgramPoint &Loc,
181                       const ProgramStateRef &state,
182                       bool IsSink) {
183     ID.Add(Loc);
184     ID.AddPointer(state.get());
185     ID.AddBoolean(IsSink);
186   }
187 
188   void Profile(llvm::FoldingSetNodeID& ID) const {
189     // We avoid copy constructors by not using accessors.
190     Profile(ID, Location, State, isSink());
191   }
192 
193   /// addPredeccessor - Adds a predecessor to the current node, and
194   ///  in tandem add this node as a successor of the other node.
195   void addPredecessor(ExplodedNode *V, ExplodedGraph &G);
196 
197   unsigned succ_size() const { return Succs.size(); }
198   unsigned pred_size() const { return Preds.size(); }
199   bool succ_empty() const { return Succs.empty(); }
200   bool pred_empty() const { return Preds.empty(); }
201 
202   bool isSink() const { return Succs.getFlag(); }
203 
204   bool hasSinglePred() const {
205     return (pred_size() == 1);
206   }
207 
208   ExplodedNode *getFirstPred() {
209     return pred_empty() ? nullptr : *(pred_begin());
210   }
211 
212   const ExplodedNode *getFirstPred() const {
213     return const_cast<ExplodedNode*>(this)->getFirstPred();
214   }
215 
216   ExplodedNode *getFirstSucc() {
217     return succ_empty() ? nullptr : *(succ_begin());
218   }
219 
220   const ExplodedNode *getFirstSucc() const {
221     return const_cast<ExplodedNode*>(this)->getFirstSucc();
222   }
223 
224   // Iterators over successor and predecessor vertices.
225   using succ_iterator = ExplodedNode * const *;
226   using succ_range = llvm::iterator_range<succ_iterator>;
227 
228   using const_succ_iterator = const ExplodedNode * const *;
229   using const_succ_range = llvm::iterator_range<const_succ_iterator>;
230 
231   using pred_iterator = ExplodedNode * const *;
232   using pred_range = llvm::iterator_range<pred_iterator>;
233 
234   using const_pred_iterator = const ExplodedNode * const *;
235   using const_pred_range = llvm::iterator_range<const_pred_iterator>;
236 
237   pred_iterator pred_begin() { return Preds.begin(); }
238   pred_iterator pred_end() { return Preds.end(); }
239   pred_range preds() { return {Preds.begin(), Preds.end()}; }
240 
241   const_pred_iterator pred_begin() const {
242     return const_cast<ExplodedNode*>(this)->pred_begin();
243   }
244   const_pred_iterator pred_end() const {
245     return const_cast<ExplodedNode*>(this)->pred_end();
246   }
247   const_pred_range preds() const { return {Preds.begin(), Preds.end()}; }
248 
249   succ_iterator succ_begin() { return Succs.begin(); }
250   succ_iterator succ_end() { return Succs.end(); }
251   succ_range succs() { return {Succs.begin(), Succs.end()}; }
252 
253   const_succ_iterator succ_begin() const {
254     return const_cast<ExplodedNode*>(this)->succ_begin();
255   }
256   const_succ_iterator succ_end() const {
257     return const_cast<ExplodedNode*>(this)->succ_end();
258   }
259   const_succ_range succs() const { return {Succs.begin(), Succs.end()}; }
260 
261   int64_t getID() const { return Id; }
262 
263   /// The node is trivial if it has only one successor, only one predecessor,
264   /// it's predecessor has only one successor,
265   /// and its program state is the same as the program state of the previous
266   /// node.
267   /// Trivial nodes may be skipped while printing exploded graph.
268   bool isTrivial() const;
269 
270   /// If the node's program point corresponds to a statement, retrieve that
271   /// statement. Useful for figuring out where to put a warning or a note.
272   /// If the statement belongs to a body-farmed definition,
273   /// retrieve the call site for that definition.
274   const Stmt *getStmtForDiagnostics() const;
275 
276   /// Find the next statement that was executed on this node's execution path.
277   /// Useful for explaining control flow that follows the current node.
278   /// If the statement belongs to a body-farmed definition, retrieve the
279   /// call site for that definition.
280   const Stmt *getNextStmtForDiagnostics() const;
281 
282   /// Find the statement that was executed immediately before this node.
283   /// Useful when the node corresponds to a CFG block entrance.
284   /// If the statement belongs to a body-farmed definition, retrieve the
285   /// call site for that definition.
286   const Stmt *getPreviousStmtForDiagnostics() const;
287 
288   /// Find the statement that was executed at or immediately before this node.
289   /// Useful when any nearby statement will do.
290   /// If the statement belongs to a body-farmed definition, retrieve the
291   /// call site for that definition.
292   const Stmt *getCurrentOrPreviousStmtForDiagnostics() const;
293 
294 private:
295   void replaceSuccessor(ExplodedNode *node) { Succs.replaceNode(node); }
296   void replacePredecessor(ExplodedNode *node) { Preds.replaceNode(node); }
297 };
298 
299 using InterExplodedGraphMap =
300     llvm::DenseMap<const ExplodedNode *, const ExplodedNode *>;
301 
302 class ExplodedGraph {
303 protected:
304   friend class CoreEngine;
305 
306   // Type definitions.
307   using NodeVector = std::vector<ExplodedNode *>;
308 
309   /// The roots of the simulation graph. Usually there will be only
310   /// one, but clients are free to establish multiple subgraphs within a single
311   /// SimulGraph. Moreover, these subgraphs can often merge when paths from
312   /// different roots reach the same state at the same program location.
313   NodeVector Roots;
314 
315   /// The nodes in the simulation graph which have been
316   /// specially marked as the endpoint of an abstract simulation path.
317   NodeVector EndNodes;
318 
319   /// Nodes - The nodes in the graph.
320   llvm::FoldingSet<ExplodedNode> Nodes;
321 
322   /// BVC - Allocator and context for allocating nodes and their predecessor
323   /// and successor groups.
324   BumpVectorContext BVC;
325 
326   /// NumNodes - The number of nodes in the graph.
327   int64_t NumNodes = 0;
328 
329   /// A list of recently allocated nodes that can potentially be recycled.
330   NodeVector ChangedNodes;
331 
332   /// A list of nodes that can be reused.
333   NodeVector FreeNodes;
334 
335   /// Determines how often nodes are reclaimed.
336   ///
337   /// If this is 0, nodes will never be reclaimed.
338   unsigned ReclaimNodeInterval = 0;
339 
340   /// Counter to determine when to reclaim nodes.
341   unsigned ReclaimCounter;
342 
343 public:
344   ExplodedGraph();
345   ~ExplodedGraph();
346 
347   /// Retrieve the node associated with a (Location,State) pair,
348   ///  where the 'Location' is a ProgramPoint in the CFG.  If no node for
349   ///  this pair exists, it is created. IsNew is set to true if
350   ///  the node was freshly created.
351   ExplodedNode *getNode(const ProgramPoint &L, ProgramStateRef State,
352                         bool IsSink = false,
353                         bool* IsNew = nullptr);
354 
355   /// Create a node for a (Location, State) pair,
356   ///  but don't store it for deduplication later.  This
357   ///  is useful when copying an already completed
358   ///  ExplodedGraph for further processing.
359   ExplodedNode *createUncachedNode(const ProgramPoint &L,
360     ProgramStateRef State,
361     int64_t Id,
362     bool IsSink = false);
363 
364   std::unique_ptr<ExplodedGraph> MakeEmptyGraph() const {
365     return std::make_unique<ExplodedGraph>();
366   }
367 
368   /// addRoot - Add an untyped node to the set of roots.
369   ExplodedNode *addRoot(ExplodedNode *V) {
370     Roots.push_back(V);
371     return V;
372   }
373 
374   /// addEndOfPath - Add an untyped node to the set of EOP nodes.
375   ExplodedNode *addEndOfPath(ExplodedNode *V) {
376     EndNodes.push_back(V);
377     return V;
378   }
379 
380   unsigned num_roots() const { return Roots.size(); }
381   unsigned num_eops() const { return EndNodes.size(); }
382 
383   bool empty() const { return NumNodes == 0; }
384   unsigned size() const { return NumNodes; }
385 
386   void reserve(unsigned NodeCount) { Nodes.reserve(NodeCount); }
387 
388   // Iterators.
389   using NodeTy = ExplodedNode;
390   using AllNodesTy = llvm::FoldingSet<ExplodedNode>;
391   using roots_iterator = NodeVector::iterator;
392   using const_roots_iterator = NodeVector::const_iterator;
393   using eop_iterator = NodeVector::iterator;
394   using const_eop_iterator = NodeVector::const_iterator;
395   using node_iterator = AllNodesTy::iterator;
396   using const_node_iterator = AllNodesTy::const_iterator;
397 
398   node_iterator nodes_begin() { return Nodes.begin(); }
399 
400   node_iterator nodes_end() { return Nodes.end(); }
401 
402   const_node_iterator nodes_begin() const { return Nodes.begin(); }
403 
404   const_node_iterator nodes_end() const { return Nodes.end(); }
405 
406   roots_iterator roots_begin() { return Roots.begin(); }
407 
408   roots_iterator roots_end() { return Roots.end(); }
409 
410   const_roots_iterator roots_begin() const { return Roots.begin(); }
411 
412   const_roots_iterator roots_end() const { return Roots.end(); }
413 
414   eop_iterator eop_begin() { return EndNodes.begin(); }
415 
416   eop_iterator eop_end() { return EndNodes.end(); }
417 
418   const_eop_iterator eop_begin() const { return EndNodes.begin(); }
419 
420   const_eop_iterator eop_end() const { return EndNodes.end(); }
421 
422   llvm::BumpPtrAllocator & getAllocator() { return BVC.getAllocator(); }
423   BumpVectorContext &getNodeAllocator() { return BVC; }
424 
425   using NodeMap = llvm::DenseMap<const ExplodedNode *, ExplodedNode *>;
426 
427   /// Creates a trimmed version of the graph that only contains paths leading
428   /// to the given nodes.
429   ///
430   /// \param Nodes The nodes which must appear in the final graph. Presumably
431   ///              these are end-of-path nodes (i.e. they have no successors).
432   /// \param[out] ForwardMap A optional map from nodes in this graph to nodes in
433   ///                        the returned graph.
434   /// \param[out] InverseMap An optional map from nodes in the returned graph to
435   ///                        nodes in this graph.
436   /// \returns The trimmed graph
437   std::unique_ptr<ExplodedGraph>
438   trim(ArrayRef<const NodeTy *> Nodes,
439        InterExplodedGraphMap *ForwardMap = nullptr,
440        InterExplodedGraphMap *InverseMap = nullptr) const;
441 
442   /// Enable tracking of recently allocated nodes for potential reclamation
443   /// when calling reclaimRecentlyAllocatedNodes().
444   void enableNodeReclamation(unsigned Interval) {
445     ReclaimCounter = ReclaimNodeInterval = Interval;
446   }
447 
448   /// Reclaim "uninteresting" nodes created since the last time this method
449   /// was called.
450   void reclaimRecentlyAllocatedNodes();
451 
452   /// Returns true if nodes for the given expression kind are always
453   ///        kept around.
454   static bool isInterestingLValueExpr(const Expr *Ex);
455 
456 private:
457   bool shouldCollect(const ExplodedNode *node);
458   void collectNode(ExplodedNode *node);
459 };
460 
461 class ExplodedNodeSet {
462   using ImplTy = llvm::SmallSetVector<ExplodedNode *, 4>;
463   ImplTy Impl;
464 
465 public:
466   ExplodedNodeSet(ExplodedNode *N) {
467     assert(N && !static_cast<ExplodedNode*>(N)->isSink());
468     Impl.insert(N);
469   }
470 
471   ExplodedNodeSet() = default;
472 
473   void Add(ExplodedNode *N) {
474     if (N && !static_cast<ExplodedNode*>(N)->isSink()) Impl.insert(N);
475   }
476 
477   using iterator = ImplTy::iterator;
478   using const_iterator = ImplTy::const_iterator;
479 
480   unsigned size() const { return Impl.size();  }
481   bool empty()    const { return Impl.empty(); }
482   bool erase(ExplodedNode *N) { return Impl.remove(N); }
483 
484   void clear() { Impl.clear(); }
485 
486   void insert(const ExplodedNodeSet &S) {
487     assert(&S != this);
488     if (empty())
489       Impl = S.Impl;
490     else
491       Impl.insert(S.begin(), S.end());
492   }
493 
494   iterator begin() { return Impl.begin(); }
495   iterator end() { return Impl.end(); }
496 
497   const_iterator begin() const { return Impl.begin(); }
498   const_iterator end() const { return Impl.end(); }
499 };
500 
501 } // namespace ento
502 
503 } // namespace clang
504 
505 // GraphTraits
506 
507 namespace llvm {
508   template <> struct GraphTraits<clang::ento::ExplodedGraph *> {
509     using GraphTy = clang::ento::ExplodedGraph *;
510     using NodeRef = clang::ento::ExplodedNode *;
511     using ChildIteratorType = clang::ento::ExplodedNode::succ_iterator;
512     using nodes_iterator = llvm::df_iterator<GraphTy>;
513 
514     static NodeRef getEntryNode(const GraphTy G) {
515       return *G->roots_begin();
516     }
517 
518     static bool predecessorOfTrivial(NodeRef N) {
519       return N->succ_size() == 1 && N->getFirstSucc()->isTrivial();
520     }
521 
522     static ChildIteratorType child_begin(NodeRef N) {
523       if (predecessorOfTrivial(N))
524         return child_begin(*N->succ_begin());
525       return N->succ_begin();
526     }
527 
528     static ChildIteratorType child_end(NodeRef N) {
529       if (predecessorOfTrivial(N))
530         return child_end(N->getFirstSucc());
531       return N->succ_end();
532     }
533 
534     static nodes_iterator nodes_begin(const GraphTy G) {
535       return df_begin(G);
536     }
537 
538     static nodes_iterator nodes_end(const GraphTy G) {
539       return df_end(G);
540     }
541   };
542 } // namespace llvm
543 
544 #endif // LLVM_CLANG_STATICANALYZER_CORE_PATHSENSITIVE_EXPLODEDGRAPH_H
545