1 //===- CoreEngine.h - Path-Sensitive Dataflow Engine ------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 //  This file defines a generic engine for intraprocedural, path-sensitive,
10 //  dataflow analysis via graph reachability.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #ifndef LLVM_CLANG_STATICANALYZER_CORE_PATHSENSITIVE_COREENGINE_H
15 #define LLVM_CLANG_STATICANALYZER_CORE_PATHSENSITIVE_COREENGINE_H
16 
17 #include "clang/AST/Stmt.h"
18 #include "clang/Analysis/AnalysisDeclContext.h"
19 #include "clang/Analysis/CFG.h"
20 #include "clang/Analysis/ProgramPoint.h"
21 #include "clang/Basic/LLVM.h"
22 #include "clang/StaticAnalyzer/Core/BugReporter/BugReporter.h"
23 #include "clang/StaticAnalyzer/Core/PathSensitive/BlockCounter.h"
24 #include "clang/StaticAnalyzer/Core/PathSensitive/ExplodedGraph.h"
25 #include "clang/StaticAnalyzer/Core/PathSensitive/ProgramState_Fwd.h"
26 #include "clang/StaticAnalyzer/Core/PathSensitive/WorkList.h"
27 #include "llvm/ADT/SmallVector.h"
28 #include "llvm/Support/Casting.h"
29 #include <cassert>
30 #include <memory>
31 #include <utility>
32 #include <vector>
33 
34 namespace clang {
35 
36 class AnalyzerOptions;
37 class CXXBindTemporaryExpr;
38 class Expr;
39 class LabelDecl;
40 
41 namespace ento {
42 
43 class FunctionSummariesTy;
44 class ExprEngine;
45 
46 //===----------------------------------------------------------------------===//
47 /// CoreEngine - Implements the core logic of the graph-reachability
48 ///   analysis. It traverses the CFG and generates the ExplodedGraph.
49 ///   Program "states" are treated as opaque void pointers.
50 ///   The template class CoreEngine (which subclasses CoreEngine)
51 ///   provides the matching component to the engine that knows the actual types
52 ///   for states.  Note that this engine only dispatches to transfer functions
53 ///   at the statement and block-level.  The analyses themselves must implement
54 ///   any transfer function logic and the sub-expression level (if any).
55 class CoreEngine {
56   friend class CommonNodeBuilder;
57   friend class EndOfFunctionNodeBuilder;
58   friend class ExprEngine;
59   friend class IndirectGotoNodeBuilder;
60   friend class NodeBuilder;
61   friend struct NodeBuilderContext;
62   friend class SwitchNodeBuilder;
63 
64 public:
65   using BlocksExhausted =
66       std::vector<std::pair<BlockEdge, const ExplodedNode *>>;
67 
68   using BlocksAborted =
69       std::vector<std::pair<const CFGBlock *, const ExplodedNode *>>;
70 
71 private:
72   ExprEngine &ExprEng;
73 
74   /// G - The simulation graph.  Each node is a (location,state) pair.
75   mutable ExplodedGraph G;
76 
77   /// WList - A set of queued nodes that need to be processed by the
78   ///  worklist algorithm.  It is up to the implementation of WList to decide
79   ///  the order that nodes are processed.
80   std::unique_ptr<WorkList> WList;
81 
82   /// BCounterFactory - A factory object for created BlockCounter objects.
83   ///   These are used to record for key nodes in the ExplodedGraph the
84   ///   number of times different CFGBlocks have been visited along a path.
85   BlockCounter::Factory BCounterFactory;
86 
87   /// The locations where we stopped doing work because we visited a location
88   ///  too many times.
89   BlocksExhausted blocksExhausted;
90 
91   /// The locations where we stopped because the engine aborted analysis,
92   /// usually because it could not reason about something.
93   BlocksAborted blocksAborted;
94 
95   /// The information about functions shared by the whole translation unit.
96   /// (This data is owned by AnalysisConsumer.)
97   FunctionSummariesTy *FunctionSummaries;
98 
99   /// Add path tags with some useful data along the path when we see that
100   /// something interesting is happening. This field is the allocator for such
101   /// tags.
102   DataTag::Factory DataTags;
103 
104   void generateNode(const ProgramPoint &Loc,
105                     ProgramStateRef State,
106                     ExplodedNode *Pred);
107 
108   void HandleBlockEdge(const BlockEdge &E, ExplodedNode *Pred);
109   void HandleBlockEntrance(const BlockEntrance &E, ExplodedNode *Pred);
110   void HandleBlockExit(const CFGBlock *B, ExplodedNode *Pred);
111 
112   void HandleCallEnter(const CallEnter &CE, ExplodedNode *Pred);
113 
114   void HandlePostStmt(const CFGBlock *B, unsigned StmtIdx, ExplodedNode *Pred);
115 
116   void HandleBranch(const Stmt *Cond, const Stmt *Term, const CFGBlock *B,
117                     ExplodedNode *Pred);
118   void HandleCleanupTemporaryBranch(const CXXBindTemporaryExpr *BTE,
119                                     const CFGBlock *B, ExplodedNode *Pred);
120 
121   /// Handle conditional logic for running static initializers.
122   void HandleStaticInit(const DeclStmt *DS, const CFGBlock *B,
123                         ExplodedNode *Pred);
124 
125   void HandleVirtualBaseBranch(const CFGBlock *B, ExplodedNode *Pred);
126 
127 private:
128   ExplodedNode *generateCallExitBeginNode(ExplodedNode *N,
129                                           const ReturnStmt *RS);
130 
131 public:
132   /// Construct a CoreEngine object to analyze the provided CFG.
133   CoreEngine(ExprEngine &exprengine,
134              FunctionSummariesTy *FS,
135              AnalyzerOptions &Opts);
136 
137   CoreEngine(const CoreEngine &) = delete;
138   CoreEngine &operator=(const CoreEngine &) = delete;
139 
140   /// getGraph - Returns the exploded graph.
141   ExplodedGraph &getGraph() { return G; }
142 
143   /// ExecuteWorkList - Run the worklist algorithm for a maximum number of
144   ///  steps.  Returns true if there is still simulation state on the worklist.
145   bool ExecuteWorkList(const LocationContext *L, unsigned Steps,
146                        ProgramStateRef InitState);
147 
148   /// Returns true if there is still simulation state on the worklist.
149   bool ExecuteWorkListWithInitialState(const LocationContext *L,
150                                        unsigned Steps,
151                                        ProgramStateRef InitState,
152                                        ExplodedNodeSet &Dst);
153 
154   /// Dispatch the work list item based on the given location information.
155   /// Use Pred parameter as the predecessor state.
156   void dispatchWorkItem(ExplodedNode* Pred, ProgramPoint Loc,
157                         const WorkListUnit& WU);
158 
159   // Functions for external checking of whether we have unfinished work
160   bool wasBlockAborted() const { return !blocksAborted.empty(); }
161   bool wasBlocksExhausted() const { return !blocksExhausted.empty(); }
162   bool hasWorkRemaining() const { return wasBlocksExhausted() ||
163                                          WList->hasWork() ||
164                                          wasBlockAborted(); }
165 
166   /// Inform the CoreEngine that a basic block was aborted because
167   /// it could not be completely analyzed.
168   void addAbortedBlock(const ExplodedNode *node, const CFGBlock *block) {
169     blocksAborted.push_back(std::make_pair(block, node));
170   }
171 
172   WorkList *getWorkList() const { return WList.get(); }
173 
174   BlocksExhausted::const_iterator blocks_exhausted_begin() const {
175     return blocksExhausted.begin();
176   }
177 
178   BlocksExhausted::const_iterator blocks_exhausted_end() const {
179     return blocksExhausted.end();
180   }
181 
182   BlocksAborted::const_iterator blocks_aborted_begin() const {
183     return blocksAborted.begin();
184   }
185 
186   BlocksAborted::const_iterator blocks_aborted_end() const {
187     return blocksAborted.end();
188   }
189 
190   /// Enqueue the given set of nodes onto the work list.
191   void enqueue(ExplodedNodeSet &Set);
192 
193   /// Enqueue nodes that were created as a result of processing
194   /// a statement onto the work list.
195   void enqueue(ExplodedNodeSet &Set, const CFGBlock *Block, unsigned Idx);
196 
197   /// enqueue the nodes corresponding to the end of function onto the
198   /// end of path / work list.
199   void enqueueEndOfFunction(ExplodedNodeSet &Set, const ReturnStmt *RS);
200 
201   /// Enqueue a single node created as a result of statement processing.
202   void enqueueStmtNode(ExplodedNode *N, const CFGBlock *Block, unsigned Idx);
203 
204   DataTag::Factory &getDataTags() { return DataTags; }
205 };
206 
207 // TODO: Turn into a class.
208 struct NodeBuilderContext {
209   const CoreEngine &Eng;
210   const CFGBlock *Block;
211   const LocationContext *LC;
212 
213   NodeBuilderContext(const CoreEngine &E, const CFGBlock *B, ExplodedNode *N)
214       : Eng(E), Block(B), LC(N->getLocationContext()) { assert(B); }
215 
216   /// Return the CFGBlock associated with this builder.
217   const CFGBlock *getBlock() const { return Block; }
218 
219   /// Returns the number of times the current basic block has been
220   /// visited on the exploded graph path.
221   unsigned blockCount() const {
222     return Eng.WList->getBlockCounter().getNumVisited(
223                     LC->getStackFrame(),
224                     Block->getBlockID());
225   }
226 };
227 
228 /// \class NodeBuilder
229 /// This is the simplest builder which generates nodes in the
230 /// ExplodedGraph.
231 ///
232 /// The main benefit of the builder is that it automatically tracks the
233 /// frontier nodes (or destination set). This is the set of nodes which should
234 /// be propagated to the next step / builder. They are the nodes which have been
235 /// added to the builder (either as the input node set or as the newly
236 /// constructed nodes) but did not have any outgoing transitions added.
237 class NodeBuilder {
238   virtual void anchor();
239 
240 protected:
241   const NodeBuilderContext &C;
242 
243   /// Specifies if the builder results have been finalized. For example, if it
244   /// is set to false, autotransitions are yet to be generated.
245   bool Finalized;
246 
247   bool HasGeneratedNodes = false;
248 
249   /// The frontier set - a set of nodes which need to be propagated after
250   /// the builder dies.
251   ExplodedNodeSet &Frontier;
252 
253   /// Checks if the results are ready.
254   virtual bool checkResults() {
255     return Finalized;
256   }
257 
258   bool hasNoSinksInFrontier() {
259     for (const auto  I : Frontier)
260       if (I->isSink())
261         return false;
262     return true;
263   }
264 
265   /// Allow subclasses to finalize results before result_begin() is executed.
266   virtual void finalizeResults() {}
267 
268   ExplodedNode *generateNodeImpl(const ProgramPoint &PP,
269                                  ProgramStateRef State,
270                                  ExplodedNode *Pred,
271                                  bool MarkAsSink = false);
272 
273 public:
274   NodeBuilder(ExplodedNode *SrcNode, ExplodedNodeSet &DstSet,
275               const NodeBuilderContext &Ctx, bool F = true)
276       : C(Ctx), Finalized(F), Frontier(DstSet) {
277     Frontier.Add(SrcNode);
278   }
279 
280   NodeBuilder(const ExplodedNodeSet &SrcSet, ExplodedNodeSet &DstSet,
281               const NodeBuilderContext &Ctx, bool F = true)
282       : C(Ctx), Finalized(F), Frontier(DstSet) {
283     Frontier.insert(SrcSet);
284     assert(hasNoSinksInFrontier());
285   }
286 
287   virtual ~NodeBuilder() = default;
288 
289   /// Generates a node in the ExplodedGraph.
290   ExplodedNode *generateNode(const ProgramPoint &PP,
291                              ProgramStateRef State,
292                              ExplodedNode *Pred) {
293     return generateNodeImpl(PP, State, Pred, false);
294   }
295 
296   /// Generates a sink in the ExplodedGraph.
297   ///
298   /// When a node is marked as sink, the exploration from the node is stopped -
299   /// the node becomes the last node on the path and certain kinds of bugs are
300   /// suppressed.
301   ExplodedNode *generateSink(const ProgramPoint &PP,
302                              ProgramStateRef State,
303                              ExplodedNode *Pred) {
304     return generateNodeImpl(PP, State, Pred, true);
305   }
306 
307   const ExplodedNodeSet &getResults() {
308     finalizeResults();
309     assert(checkResults());
310     return Frontier;
311   }
312 
313   using iterator = ExplodedNodeSet::iterator;
314 
315   /// Iterators through the results frontier.
316   iterator begin() {
317     finalizeResults();
318     assert(checkResults());
319     return Frontier.begin();
320   }
321 
322   iterator end() {
323     finalizeResults();
324     return Frontier.end();
325   }
326 
327   const NodeBuilderContext &getContext() { return C; }
328   bool hasGeneratedNodes() { return HasGeneratedNodes; }
329 
330   void takeNodes(const ExplodedNodeSet &S) {
331     for (const auto I : S)
332       Frontier.erase(I);
333   }
334 
335   void takeNodes(ExplodedNode *N) { Frontier.erase(N); }
336   void addNodes(const ExplodedNodeSet &S) { Frontier.insert(S); }
337   void addNodes(ExplodedNode *N) { Frontier.Add(N); }
338 };
339 
340 /// \class NodeBuilderWithSinks
341 /// This node builder keeps track of the generated sink nodes.
342 class NodeBuilderWithSinks: public NodeBuilder {
343   void anchor() override;
344 
345 protected:
346   SmallVector<ExplodedNode*, 2> sinksGenerated;
347   ProgramPoint &Location;
348 
349 public:
350   NodeBuilderWithSinks(ExplodedNode *Pred, ExplodedNodeSet &DstSet,
351                        const NodeBuilderContext &Ctx, ProgramPoint &L)
352       : NodeBuilder(Pred, DstSet, Ctx), Location(L) {}
353 
354   ExplodedNode *generateNode(ProgramStateRef State,
355                              ExplodedNode *Pred,
356                              const ProgramPointTag *Tag = nullptr) {
357     const ProgramPoint &LocalLoc = (Tag ? Location.withTag(Tag) : Location);
358     return NodeBuilder::generateNode(LocalLoc, State, Pred);
359   }
360 
361   ExplodedNode *generateSink(ProgramStateRef State, ExplodedNode *Pred,
362                              const ProgramPointTag *Tag = nullptr) {
363     const ProgramPoint &LocalLoc = (Tag ? Location.withTag(Tag) : Location);
364     ExplodedNode *N = NodeBuilder::generateSink(LocalLoc, State, Pred);
365     if (N && N->isSink())
366       sinksGenerated.push_back(N);
367     return N;
368   }
369 
370   const SmallVectorImpl<ExplodedNode*> &getSinks() const {
371     return sinksGenerated;
372   }
373 };
374 
375 /// \class StmtNodeBuilder
376 /// This builder class is useful for generating nodes that resulted from
377 /// visiting a statement. The main difference from its parent NodeBuilder is
378 /// that it creates a statement specific ProgramPoint.
379 class StmtNodeBuilder: public NodeBuilder {
380   NodeBuilder *EnclosingBldr;
381 
382 public:
383   /// Constructs a StmtNodeBuilder. If the builder is going to process
384   /// nodes currently owned by another builder(with larger scope), use
385   /// Enclosing builder to transfer ownership.
386   StmtNodeBuilder(ExplodedNode *SrcNode, ExplodedNodeSet &DstSet,
387                   const NodeBuilderContext &Ctx,
388                   NodeBuilder *Enclosing = nullptr)
389       : NodeBuilder(SrcNode, DstSet, Ctx), EnclosingBldr(Enclosing) {
390     if (EnclosingBldr)
391       EnclosingBldr->takeNodes(SrcNode);
392   }
393 
394   StmtNodeBuilder(ExplodedNodeSet &SrcSet, ExplodedNodeSet &DstSet,
395                   const NodeBuilderContext &Ctx,
396                   NodeBuilder *Enclosing = nullptr)
397       : NodeBuilder(SrcSet, DstSet, Ctx), EnclosingBldr(Enclosing) {
398     if (EnclosingBldr)
399       for (const auto I : SrcSet)
400         EnclosingBldr->takeNodes(I);
401   }
402 
403   ~StmtNodeBuilder() override;
404 
405   using NodeBuilder::generateNode;
406   using NodeBuilder::generateSink;
407 
408   ExplodedNode *generateNode(const Stmt *S,
409                              ExplodedNode *Pred,
410                              ProgramStateRef St,
411                              const ProgramPointTag *tag = nullptr,
412                              ProgramPoint::Kind K = ProgramPoint::PostStmtKind){
413     const ProgramPoint &L = ProgramPoint::getProgramPoint(S, K,
414                                   Pred->getLocationContext(), tag);
415     return NodeBuilder::generateNode(L, St, Pred);
416   }
417 
418   ExplodedNode *generateSink(const Stmt *S,
419                              ExplodedNode *Pred,
420                              ProgramStateRef St,
421                              const ProgramPointTag *tag = nullptr,
422                              ProgramPoint::Kind K = ProgramPoint::PostStmtKind){
423     const ProgramPoint &L = ProgramPoint::getProgramPoint(S, K,
424                                   Pred->getLocationContext(), tag);
425     return NodeBuilder::generateSink(L, St, Pred);
426   }
427 };
428 
429 /// BranchNodeBuilder is responsible for constructing the nodes
430 /// corresponding to the two branches of the if statement - true and false.
431 class BranchNodeBuilder: public NodeBuilder {
432   const CFGBlock *DstT;
433   const CFGBlock *DstF;
434 
435   bool InFeasibleTrue;
436   bool InFeasibleFalse;
437 
438   void anchor() override;
439 
440 public:
441   BranchNodeBuilder(ExplodedNode *SrcNode, ExplodedNodeSet &DstSet,
442                     const NodeBuilderContext &C,
443                     const CFGBlock *dstT, const CFGBlock *dstF)
444       : NodeBuilder(SrcNode, DstSet, C), DstT(dstT), DstF(dstF),
445         InFeasibleTrue(!DstT), InFeasibleFalse(!DstF) {
446     // The branch node builder does not generate autotransitions.
447     // If there are no successors it means that both branches are infeasible.
448     takeNodes(SrcNode);
449   }
450 
451   BranchNodeBuilder(const ExplodedNodeSet &SrcSet, ExplodedNodeSet &DstSet,
452                     const NodeBuilderContext &C,
453                     const CFGBlock *dstT, const CFGBlock *dstF)
454       : NodeBuilder(SrcSet, DstSet, C), DstT(dstT), DstF(dstF),
455         InFeasibleTrue(!DstT), InFeasibleFalse(!DstF) {
456     takeNodes(SrcSet);
457   }
458 
459   ExplodedNode *generateNode(ProgramStateRef State, bool branch,
460                              ExplodedNode *Pred);
461 
462   const CFGBlock *getTargetBlock(bool branch) const {
463     return branch ? DstT : DstF;
464   }
465 
466   void markInfeasible(bool branch) {
467     if (branch)
468       InFeasibleTrue = true;
469     else
470       InFeasibleFalse = true;
471   }
472 
473   bool isFeasible(bool branch) {
474     return branch ? !InFeasibleTrue : !InFeasibleFalse;
475   }
476 };
477 
478 class IndirectGotoNodeBuilder {
479   CoreEngine& Eng;
480   const CFGBlock *Src;
481   const CFGBlock &DispatchBlock;
482   const Expr *E;
483   ExplodedNode *Pred;
484 
485 public:
486   IndirectGotoNodeBuilder(ExplodedNode *pred, const CFGBlock *src,
487                     const Expr *e, const CFGBlock *dispatch, CoreEngine* eng)
488       : Eng(*eng), Src(src), DispatchBlock(*dispatch), E(e), Pred(pred) {}
489 
490   class iterator {
491     friend class IndirectGotoNodeBuilder;
492 
493     CFGBlock::const_succ_iterator I;
494 
495     iterator(CFGBlock::const_succ_iterator i) : I(i) {}
496 
497   public:
498     iterator &operator++() { ++I; return *this; }
499     bool operator!=(const iterator &X) const { return I != X.I; }
500 
501     const LabelDecl *getLabel() const {
502       return cast<LabelStmt>((*I)->getLabel())->getDecl();
503     }
504 
505     const CFGBlock *getBlock() const {
506       return *I;
507     }
508   };
509 
510   iterator begin() { return iterator(DispatchBlock.succ_begin()); }
511   iterator end() { return iterator(DispatchBlock.succ_end()); }
512 
513   ExplodedNode *generateNode(const iterator &I,
514                              ProgramStateRef State,
515                              bool isSink = false);
516 
517   const Expr *getTarget() const { return E; }
518 
519   ProgramStateRef getState() const { return Pred->State; }
520 
521   const LocationContext *getLocationContext() const {
522     return Pred->getLocationContext();
523   }
524 };
525 
526 class SwitchNodeBuilder {
527   CoreEngine& Eng;
528   const CFGBlock *Src;
529   const Expr *Condition;
530   ExplodedNode *Pred;
531 
532 public:
533   SwitchNodeBuilder(ExplodedNode *pred, const CFGBlock *src,
534                     const Expr *condition, CoreEngine* eng)
535       : Eng(*eng), Src(src), Condition(condition), Pred(pred) {}
536 
537   class iterator {
538     friend class SwitchNodeBuilder;
539 
540     CFGBlock::const_succ_reverse_iterator I;
541 
542     iterator(CFGBlock::const_succ_reverse_iterator i) : I(i) {}
543 
544   public:
545     iterator &operator++() { ++I; return *this; }
546     bool operator!=(const iterator &X) const { return I != X.I; }
547     bool operator==(const iterator &X) const { return I == X.I; }
548 
549     const CaseStmt *getCase() const {
550       return cast<CaseStmt>((*I)->getLabel());
551     }
552 
553     const CFGBlock *getBlock() const {
554       return *I;
555     }
556   };
557 
558   iterator begin() { return iterator(Src->succ_rbegin()+1); }
559   iterator end() { return iterator(Src->succ_rend()); }
560 
561   const SwitchStmt *getSwitch() const {
562     return cast<SwitchStmt>(Src->getTerminator());
563   }
564 
565   ExplodedNode *generateCaseStmtNode(const iterator &I,
566                                      ProgramStateRef State);
567 
568   ExplodedNode *generateDefaultCaseNode(ProgramStateRef State,
569                                         bool isSink = false);
570 
571   const Expr *getCondition() const { return Condition; }
572 
573   ProgramStateRef getState() const { return Pred->State; }
574 
575   const LocationContext *getLocationContext() const {
576     return Pred->getLocationContext();
577   }
578 };
579 
580 } // namespace ento
581 
582 } // namespace clang
583 
584 #endif // LLVM_CLANG_STATICANALYZER_CORE_PATHSENSITIVE_COREENGINE_H
585