1 //===-- DataflowAnalysisContext.h -------------------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 //  This file defines a DataflowAnalysisContext class that owns objects that
10 //  encompass the state of a program and stores context that is used during
11 //  dataflow analysis.
12 //
13 //===----------------------------------------------------------------------===//
14 
15 #ifndef LLVM_CLANG_ANALYSIS_FLOWSENSITIVE_DATAFLOWANALYSISCONTEXT_H
16 #define LLVM_CLANG_ANALYSIS_FLOWSENSITIVE_DATAFLOWANALYSISCONTEXT_H
17 
18 #include "clang/AST/Decl.h"
19 #include "clang/AST/Expr.h"
20 #include "clang/AST/TypeOrdering.h"
21 #include "clang/Analysis/FlowSensitive/Arena.h"
22 #include "clang/Analysis/FlowSensitive/ControlFlowContext.h"
23 #include "clang/Analysis/FlowSensitive/Solver.h"
24 #include "clang/Analysis/FlowSensitive/StorageLocation.h"
25 #include "clang/Analysis/FlowSensitive/Value.h"
26 #include "llvm/ADT/DenseMap.h"
27 #include "llvm/ADT/DenseSet.h"
28 #include "llvm/ADT/SetVector.h"
29 #include "llvm/Support/Compiler.h"
30 #include <cassert>
31 #include <memory>
32 #include <optional>
33 #include <type_traits>
34 #include <utility>
35 #include <vector>
36 
37 namespace clang {
38 namespace dataflow {
39 class Logger;
40 
41 /// Skip past nodes that the CFG does not emit. These nodes are invisible to
42 /// flow-sensitive analysis, and should be ignored as they will effectively not
43 /// exist.
44 ///
45 ///   * `ParenExpr` - The CFG takes the operator precedence into account, but
46 ///   otherwise omits the node afterwards.
47 ///
48 ///   * `ExprWithCleanups` - The CFG will generate the appropriate calls to
49 ///   destructors and then omit the node.
50 ///
51 const Expr &ignoreCFGOmittedNodes(const Expr &E);
52 const Stmt &ignoreCFGOmittedNodes(const Stmt &S);
53 
54 /// A set of `FieldDecl *`. Use `SmallSetVector` to guarantee deterministic
55 /// iteration order.
56 using FieldSet = llvm::SmallSetVector<const FieldDecl *, 4>;
57 
58 /// Returns the set of all fields in the type.
59 FieldSet getObjectFields(QualType Type);
60 
61 /// Returns whether `Fields` and `FieldLocs` contain the same fields.
62 bool containsSameFields(const FieldSet &Fields,
63                         const RecordStorageLocation::FieldToLoc &FieldLocs);
64 
65 struct ContextSensitiveOptions {
66   /// The maximum depth to analyze. A value of zero is equivalent to disabling
67   /// context-sensitive analysis entirely.
68   unsigned Depth = 2;
69 };
70 
71 /// Owns objects that encompass the state of a program and stores context that
72 /// is used during dataflow analysis.
73 class DataflowAnalysisContext {
74 public:
75   struct Options {
76     /// Options for analyzing function bodies when present in the translation
77     /// unit, or empty to disable context-sensitive analysis. Note that this is
78     /// fundamentally limited: some constructs, such as recursion, are
79     /// explicitly unsupported.
80     std::optional<ContextSensitiveOptions> ContextSensitiveOpts;
81 
82     /// If provided, analysis details will be recorded here.
83     /// (This is always non-null within an AnalysisContext, the framework
84     /// provides a fallback no-op logger).
85     Logger *Log = nullptr;
86   };
87 
88   /// Constructs a dataflow analysis context.
89   ///
90   /// Requirements:
91   ///
92   ///  `S` must not be null.
93   DataflowAnalysisContext(std::unique_ptr<Solver> S,
94                           Options Opts = Options{
95                               /*ContextSensitiveOpts=*/std::nullopt,
96                               /*Logger=*/nullptr});
97   ~DataflowAnalysisContext();
98 
99   /// Sets a callback that returns the names and types of the synthetic fields
100   /// to add to a `RecordStorageLocation` of a given type.
101   /// Typically, this is called from the constructor of a `DataflowAnalysis`
102   ///
103   /// To maintain the invariant that all `RecordStorageLocation`s of a given
104   /// type have the same fields:
105   /// *  The callback must always return the same result for a given type
106   /// *  `setSyntheticFieldCallback()` must be called before any
107   //     `RecordStorageLocation`s are created.
setSyntheticFieldCallback(std::function<llvm::StringMap<QualType> (QualType)> CB)108   void setSyntheticFieldCallback(
109       std::function<llvm::StringMap<QualType>(QualType)> CB) {
110     assert(!RecordStorageLocationCreated);
111     SyntheticFieldCallback = CB;
112   }
113 
114   /// Returns a new storage location appropriate for `Type`.
115   ///
116   /// A null `Type` is interpreted as the pointee type of `std::nullptr_t`.
117   StorageLocation &createStorageLocation(QualType Type);
118 
119   /// Creates a `RecordStorageLocation` for the given type and with the given
120   /// fields.
121   ///
122   /// Requirements:
123   ///
124   ///  `FieldLocs` must contain exactly the fields returned by
125   ///  `getModeledFields(Type)`.
126   ///  `SyntheticFields` must contain exactly the fields returned by
127   ///  `getSyntheticFields(Type)`.
128   RecordStorageLocation &createRecordStorageLocation(
129       QualType Type, RecordStorageLocation::FieldToLoc FieldLocs,
130       RecordStorageLocation::SyntheticFieldMap SyntheticFields);
131 
132   /// Returns a stable storage location for `D`.
133   StorageLocation &getStableStorageLocation(const ValueDecl &D);
134 
135   /// Returns a stable storage location for `E`.
136   StorageLocation &getStableStorageLocation(const Expr &E);
137 
138   /// Returns a pointer value that represents a null pointer. Calls with
139   /// `PointeeType` that are canonically equivalent will return the same result.
140   /// A null `PointeeType` can be used for the pointee of `std::nullptr_t`.
141   PointerValue &getOrCreateNullPointerValue(QualType PointeeType);
142 
143   /// Adds `Constraint` to current and future flow conditions in this context.
144   ///
145   /// Invariants must contain only flow-insensitive information, i.e. facts that
146   /// are true on all paths through the program.
147   /// Information can be added eagerly (when analysis begins), or lazily (e.g.
148   /// when values are first used). The analysis must be careful that the same
149   /// information is added regardless of which order blocks are analyzed in.
150   void addInvariant(const Formula &Constraint);
151 
152   /// Adds `Constraint` to the flow condition identified by `Token`.
153   void addFlowConditionConstraint(Atom Token, const Formula &Constraint);
154 
155   /// Creates a new flow condition with the same constraints as the flow
156   /// condition identified by `Token` and returns its token.
157   Atom forkFlowCondition(Atom Token);
158 
159   /// Creates a new flow condition that represents the disjunction of the flow
160   /// conditions identified by `FirstToken` and `SecondToken`, and returns its
161   /// token.
162   Atom joinFlowConditions(Atom FirstToken, Atom SecondToken);
163 
164   /// Returns true if the constraints of the flow condition identified by
165   /// `Token` imply that `F` is true.
166   /// Returns false if the flow condition does not imply `F` or if the solver
167   /// times out.
168   bool flowConditionImplies(Atom Token, const Formula &F);
169 
170   /// Returns true if the constraints of the flow condition identified by
171   /// `Token` still allow `F` to be true.
172   /// Returns false if the flow condition implies that `F` is false or if the
173   /// solver times out.
174   bool flowConditionAllows(Atom Token, const Formula &F);
175 
176   /// Returns true if `Val1` is equivalent to `Val2`.
177   /// Note: This function doesn't take into account constraints on `Val1` and
178   /// `Val2` imposed by the flow condition.
179   bool equivalentFormulas(const Formula &Val1, const Formula &Val2);
180 
181   LLVM_DUMP_METHOD void dumpFlowCondition(Atom Token,
182                                           llvm::raw_ostream &OS = llvm::dbgs());
183 
184   /// Returns the `ControlFlowContext` registered for `F`, if any. Otherwise,
185   /// returns null.
186   const ControlFlowContext *getControlFlowContext(const FunctionDecl *F);
187 
getOptions()188   const Options &getOptions() { return Opts; }
189 
arena()190   Arena &arena() { return *A; }
191 
192   /// Returns the outcome of satisfiability checking on `Constraints`.
193   ///
194   /// Flow conditions are not incorporated, so they may need to be manually
195   /// included in `Constraints` to provide contextually-accurate results, e.g.
196   /// if any definitions or relationships of the values in `Constraints` have
197   /// been stored in flow conditions.
198   Solver::Result querySolver(llvm::SetVector<const Formula *> Constraints);
199 
200   /// Returns the fields of `Type`, limited to the set of fields modeled by this
201   /// context.
202   FieldSet getModeledFields(QualType Type);
203 
204   /// Returns the names and types of the synthetic fields for the given record
205   /// type.
getSyntheticFields(QualType Type)206   llvm::StringMap<QualType> getSyntheticFields(QualType Type) {
207     assert(Type->isRecordType());
208     if (SyntheticFieldCallback)
209       return SyntheticFieldCallback(Type);
210     return {};
211   }
212 
213 private:
214   friend class Environment;
215 
216   struct NullableQualTypeDenseMapInfo : private llvm::DenseMapInfo<QualType> {
getEmptyKeyNullableQualTypeDenseMapInfo217     static QualType getEmptyKey() {
218       // Allow a NULL `QualType` by using a different value as the empty key.
219       return QualType::getFromOpaquePtr(reinterpret_cast<Type *>(1));
220     }
221 
222     using DenseMapInfo::getHashValue;
223     using DenseMapInfo::getTombstoneKey;
224     using DenseMapInfo::isEqual;
225   };
226 
227   // Extends the set of modeled field declarations.
228   void addModeledFields(const FieldSet &Fields);
229 
230   /// Adds all constraints of the flow condition identified by `Token` and all
231   /// of its transitive dependencies to `Constraints`.
232   void
233   addTransitiveFlowConditionConstraints(Atom Token,
234                                         llvm::SetVector<const Formula *> &Out);
235 
236   /// Returns true if the solver is able to prove that there is a satisfying
237   /// assignment for `Constraints`.
isSatisfiable(llvm::SetVector<const Formula * > Constraints)238   bool isSatisfiable(llvm::SetVector<const Formula *> Constraints) {
239     return querySolver(std::move(Constraints)).getStatus() ==
240            Solver::Result::Status::Satisfiable;
241   }
242 
243   /// Returns true if the solver is able to prove that there is no satisfying
244   /// assignment for `Constraints`
isUnsatisfiable(llvm::SetVector<const Formula * > Constraints)245   bool isUnsatisfiable(llvm::SetVector<const Formula *> Constraints) {
246     return querySolver(std::move(Constraints)).getStatus() ==
247            Solver::Result::Status::Unsatisfiable;
248   }
249 
250   std::unique_ptr<Solver> S;
251   std::unique_ptr<Arena> A;
252 
253   // Maps from program declarations and statements to storage locations that are
254   // assigned to them. These assignments are global (aggregated across all basic
255   // blocks) and are used to produce stable storage locations when the same
256   // basic blocks are evaluated multiple times. The storage locations that are
257   // in scope for a particular basic block are stored in `Environment`.
258   llvm::DenseMap<const ValueDecl *, StorageLocation *> DeclToLoc;
259   llvm::DenseMap<const Expr *, StorageLocation *> ExprToLoc;
260 
261   // Null pointer values, keyed by the canonical pointee type.
262   //
263   // FIXME: The pointer values are indexed by the pointee types which are
264   // required to initialize the `PointeeLoc` field in `PointerValue`. Consider
265   // creating a type-independent `NullPointerValue` without a `PointeeLoc`
266   // field.
267   llvm::DenseMap<QualType, PointerValue *, NullableQualTypeDenseMapInfo>
268       NullPointerVals;
269 
270   Options Opts;
271 
272   // Flow conditions are tracked symbolically: each unique flow condition is
273   // associated with a fresh symbolic variable (token), bound to the clause that
274   // defines the flow condition. Conceptually, each binding corresponds to an
275   // "iff" of the form `FC <=> (C1 ^ C2 ^ ...)` where `FC` is a flow condition
276   // token (an atomic boolean) and `Ci`s are the set of constraints in the flow
277   // flow condition clause. The set of constraints (C1 ^ C2 ^ ...) are stored in
278   // the `FlowConditionConstraints` map, keyed by the token of the flow
279   // condition.
280   //
281   // Flow conditions depend on other flow conditions if they are created using
282   // `forkFlowCondition` or `joinFlowConditions`. The graph of flow condition
283   // dependencies is stored in the `FlowConditionDeps` map.
284   llvm::DenseMap<Atom, llvm::DenseSet<Atom>> FlowConditionDeps;
285   llvm::DenseMap<Atom, const Formula *> FlowConditionConstraints;
286   const Formula *Invariant = nullptr;
287 
288   llvm::DenseMap<const FunctionDecl *, ControlFlowContext> FunctionContexts;
289 
290   // Fields modeled by environments covered by this context.
291   FieldSet ModeledFields;
292 
293   std::unique_ptr<Logger> LogOwner; // If created via flags.
294 
295   std::function<llvm::StringMap<QualType>(QualType)> SyntheticFieldCallback;
296 
297   /// Has any `RecordStorageLocation` been created yet?
298   bool RecordStorageLocationCreated = false;
299 };
300 
301 } // namespace dataflow
302 } // namespace clang
303 
304 #endif // LLVM_CLANG_ANALYSIS_FLOWSENSITIVE_DATAFLOWANALYSISCONTEXT_H
305