1 //===-- DataflowAnalysisContext.h -------------------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 //  This file defines a DataflowAnalysisContext class that owns objects that
10 //  encompass the state of a program and stores context that is used during
11 //  dataflow analysis.
12 //
13 //===----------------------------------------------------------------------===//
14 
15 #ifndef LLVM_CLANG_ANALYSIS_FLOWSENSITIVE_DATAFLOWANALYSISCONTEXT_H
16 #define LLVM_CLANG_ANALYSIS_FLOWSENSITIVE_DATAFLOWANALYSISCONTEXT_H
17 
18 #include "clang/AST/Decl.h"
19 #include "clang/AST/Expr.h"
20 #include "clang/AST/TypeOrdering.h"
21 #include "clang/Analysis/FlowSensitive/Arena.h"
22 #include "clang/Analysis/FlowSensitive/ControlFlowContext.h"
23 #include "clang/Analysis/FlowSensitive/Solver.h"
24 #include "clang/Analysis/FlowSensitive/StorageLocation.h"
25 #include "clang/Analysis/FlowSensitive/Value.h"
26 #include "llvm/ADT/DenseMap.h"
27 #include "llvm/ADT/DenseSet.h"
28 #include "llvm/ADT/SetVector.h"
29 #include "llvm/Support/Compiler.h"
30 #include <cassert>
31 #include <memory>
32 #include <optional>
33 #include <type_traits>
34 #include <utility>
35 #include <vector>
36 
37 namespace clang {
38 namespace dataflow {
39 class Logger;
40 
41 /// Skip past nodes that the CFG does not emit. These nodes are invisible to
42 /// flow-sensitive analysis, and should be ignored as they will effectively not
43 /// exist.
44 ///
45 ///   * `ParenExpr` - The CFG takes the operator precedence into account, but
46 ///   otherwise omits the node afterwards.
47 ///
48 ///   * `ExprWithCleanups` - The CFG will generate the appropriate calls to
49 ///   destructors and then omit the node.
50 ///
51 const Expr &ignoreCFGOmittedNodes(const Expr &E);
52 const Stmt &ignoreCFGOmittedNodes(const Stmt &S);
53 
54 /// A set of `FieldDecl *`. Use `SmallSetVector` to guarantee deterministic
55 /// iteration order.
56 using FieldSet = llvm::SmallSetVector<const FieldDecl *, 4>;
57 
58 /// Returns the set of all fields in the type.
59 FieldSet getObjectFields(QualType Type);
60 
61 struct ContextSensitiveOptions {
62   /// The maximum depth to analyze. A value of zero is equivalent to disabling
63   /// context-sensitive analysis entirely.
64   unsigned Depth = 2;
65 };
66 
67 /// Owns objects that encompass the state of a program and stores context that
68 /// is used during dataflow analysis.
69 class DataflowAnalysisContext {
70 public:
71   struct Options {
72     /// Options for analyzing function bodies when present in the translation
73     /// unit, or empty to disable context-sensitive analysis. Note that this is
74     /// fundamentally limited: some constructs, such as recursion, are
75     /// explicitly unsupported.
76     std::optional<ContextSensitiveOptions> ContextSensitiveOpts;
77 
78     /// If provided, analysis details will be recorded here.
79     /// (This is always non-null within an AnalysisContext, the framework
80     /// provides a fallback no-op logger).
81     Logger *Log = nullptr;
82   };
83 
84   /// Constructs a dataflow analysis context.
85   ///
86   /// Requirements:
87   ///
88   ///  `S` must not be null.
89   DataflowAnalysisContext(std::unique_ptr<Solver> S,
90                           Options Opts = Options{
91                               /*ContextSensitiveOpts=*/std::nullopt,
92                               /*Logger=*/nullptr});
93   ~DataflowAnalysisContext();
94 
95   /// Returns a new storage location appropriate for `Type`.
96   ///
97   /// A null `Type` is interpreted as the pointee type of `std::nullptr_t`.
98   StorageLocation &createStorageLocation(QualType Type);
99 
100   /// Returns a stable storage location for `D`.
101   StorageLocation &getStableStorageLocation(const VarDecl &D);
102 
103   /// Returns a stable storage location for `E`.
104   StorageLocation &getStableStorageLocation(const Expr &E);
105 
106   /// Assigns `Loc` as the storage location of `D`.
107   ///
108   /// Requirements:
109   ///
110   ///  `D` must not be assigned a storage location.
111   void setStorageLocation(const ValueDecl &D, StorageLocation &Loc) {
112     assert(!DeclToLoc.contains(&D));
113     DeclToLoc[&D] = &Loc;
114   }
115 
116   /// Returns the storage location assigned to `D` or null if `D` has no
117   /// assigned storage location.
118   StorageLocation *getStorageLocation(const ValueDecl &D) const {
119     return DeclToLoc.lookup(&D);
120   }
121 
122   /// Assigns `Loc` as the storage location of `E`.
123   ///
124   /// Requirements:
125   ///
126   ///  `E` must not be assigned a storage location.
127   void setStorageLocation(const Expr &E, StorageLocation &Loc) {
128     const Expr &CanonE = ignoreCFGOmittedNodes(E);
129     assert(!ExprToLoc.contains(&CanonE));
130     ExprToLoc[&CanonE] = &Loc;
131   }
132 
133   /// Returns the storage location assigned to `E` or null if `E` has no
134   /// assigned storage location.
135   StorageLocation *getStorageLocation(const Expr &E) const {
136     return ExprToLoc.lookup(&ignoreCFGOmittedNodes(E));
137   }
138 
139   /// Returns a pointer value that represents a null pointer. Calls with
140   /// `PointeeType` that are canonically equivalent will return the same result.
141   /// A null `PointeeType` can be used for the pointee of `std::nullptr_t`.
142   PointerValue &getOrCreateNullPointerValue(QualType PointeeType);
143 
144   /// Adds `Constraint` to the flow condition identified by `Token`.
145   void addFlowConditionConstraint(Atom Token, const Formula &Constraint);
146 
147   /// Creates a new flow condition with the same constraints as the flow
148   /// condition identified by `Token` and returns its token.
149   Atom forkFlowCondition(Atom Token);
150 
151   /// Creates a new flow condition that represents the disjunction of the flow
152   /// conditions identified by `FirstToken` and `SecondToken`, and returns its
153   /// token.
154   Atom joinFlowConditions(Atom FirstToken, Atom SecondToken);
155 
156   /// Returns true if and only if the constraints of the flow condition
157   /// identified by `Token` imply that `Val` is true.
158   bool flowConditionImplies(Atom Token, const Formula &);
159 
160   /// Returns true if and only if the constraints of the flow condition
161   /// identified by `Token` are always true.
162   bool flowConditionIsTautology(Atom Token);
163 
164   /// Returns true if `Val1` is equivalent to `Val2`.
165   /// Note: This function doesn't take into account constraints on `Val1` and
166   /// `Val2` imposed by the flow condition.
167   bool equivalentFormulas(const Formula &Val1, const Formula &Val2);
168 
169   LLVM_DUMP_METHOD void dumpFlowCondition(Atom Token,
170                                           llvm::raw_ostream &OS = llvm::dbgs());
171 
172   /// Returns the `ControlFlowContext` registered for `F`, if any. Otherwise,
173   /// returns null.
174   const ControlFlowContext *getControlFlowContext(const FunctionDecl *F);
175 
176   const Options &getOptions() { return Opts; }
177 
178   Arena &arena() { return *A; }
179 
180   /// Returns the outcome of satisfiability checking on `Constraints`.
181   ///
182   /// Flow conditions are not incorporated, so they may need to be manually
183   /// included in `Constraints` to provide contextually-accurate results, e.g.
184   /// if any definitions or relationships of the values in `Constraints` have
185   /// been stored in flow conditions.
186   Solver::Result querySolver(llvm::SetVector<const Formula *> Constraints);
187 
188   /// Returns the fields of `Type`, limited to the set of fields modeled by this
189   /// context.
190   FieldSet getModeledFields(QualType Type);
191 
192 private:
193   friend class Environment;
194 
195   struct NullableQualTypeDenseMapInfo : private llvm::DenseMapInfo<QualType> {
196     static QualType getEmptyKey() {
197       // Allow a NULL `QualType` by using a different value as the empty key.
198       return QualType::getFromOpaquePtr(reinterpret_cast<Type *>(1));
199     }
200 
201     using DenseMapInfo::getHashValue;
202     using DenseMapInfo::getTombstoneKey;
203     using DenseMapInfo::isEqual;
204   };
205 
206   // Extends the set of modeled field declarations.
207   void addModeledFields(const FieldSet &Fields);
208 
209   /// Adds all constraints of the flow condition identified by `Token` and all
210   /// of its transitive dependencies to `Constraints`. `VisitedTokens` is used
211   /// to track tokens of flow conditions that were already visited by recursive
212   /// calls.
213   void addTransitiveFlowConditionConstraints(
214       Atom Token, llvm::SetVector<const Formula *> &Constraints,
215       llvm::DenseSet<Atom> &VisitedTokens);
216 
217   /// Returns true if the solver is able to prove that there is no satisfying
218   /// assignment for `Constraints`
219   bool isUnsatisfiable(llvm::SetVector<const Formula *> Constraints) {
220     return querySolver(std::move(Constraints)).getStatus() ==
221            Solver::Result::Status::Unsatisfiable;
222   }
223 
224   std::unique_ptr<Solver> S;
225   std::unique_ptr<Arena> A;
226 
227   // Maps from program declarations and statements to storage locations that are
228   // assigned to them. These assignments are global (aggregated across all basic
229   // blocks) and are used to produce stable storage locations when the same
230   // basic blocks are evaluated multiple times. The storage locations that are
231   // in scope for a particular basic block are stored in `Environment`.
232   llvm::DenseMap<const ValueDecl *, StorageLocation *> DeclToLoc;
233   llvm::DenseMap<const Expr *, StorageLocation *> ExprToLoc;
234 
235   // Null pointer values, keyed by the canonical pointee type.
236   //
237   // FIXME: The pointer values are indexed by the pointee types which are
238   // required to initialize the `PointeeLoc` field in `PointerValue`. Consider
239   // creating a type-independent `NullPointerValue` without a `PointeeLoc`
240   // field.
241   llvm::DenseMap<QualType, PointerValue *, NullableQualTypeDenseMapInfo>
242       NullPointerVals;
243 
244   Options Opts;
245 
246   // Flow conditions are tracked symbolically: each unique flow condition is
247   // associated with a fresh symbolic variable (token), bound to the clause that
248   // defines the flow condition. Conceptually, each binding corresponds to an
249   // "iff" of the form `FC <=> (C1 ^ C2 ^ ...)` where `FC` is a flow condition
250   // token (an atomic boolean) and `Ci`s are the set of constraints in the flow
251   // flow condition clause. The set of constraints (C1 ^ C2 ^ ...) are stored in
252   // the `FlowConditionConstraints` map, keyed by the token of the flow
253   // condition.
254   //
255   // Flow conditions depend on other flow conditions if they are created using
256   // `forkFlowCondition` or `joinFlowConditions`. The graph of flow condition
257   // dependencies is stored in the `FlowConditionDeps` map.
258   llvm::DenseMap<Atom, llvm::DenseSet<Atom>> FlowConditionDeps;
259   llvm::DenseMap<Atom, const Formula *> FlowConditionConstraints;
260 
261   llvm::DenseMap<const FunctionDecl *, ControlFlowContext> FunctionContexts;
262 
263   // Fields modeled by environments covered by this context.
264   FieldSet ModeledFields;
265 
266   std::unique_ptr<Logger> LogOwner; // If created via flags.
267 };
268 
269 } // namespace dataflow
270 } // namespace clang
271 
272 #endif // LLVM_CLANG_ANALYSIS_FLOWSENSITIVE_DATAFLOWANALYSISCONTEXT_H
273