1 //===-- DataflowAnalysisContext.h -------------------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 //  This file defines a DataflowAnalysisContext class that owns objects that
10 //  encompass the state of a program and stores context that is used during
11 //  dataflow analysis.
12 //
13 //===----------------------------------------------------------------------===//
14 
15 #ifndef LLVM_CLANG_ANALYSIS_FLOWSENSITIVE_DATAFLOWANALYSISCONTEXT_H
16 #define LLVM_CLANG_ANALYSIS_FLOWSENSITIVE_DATAFLOWANALYSISCONTEXT_H
17 
18 #include "clang/AST/Decl.h"
19 #include "clang/AST/Expr.h"
20 #include "clang/AST/TypeOrdering.h"
21 #include "clang/Analysis/FlowSensitive/ControlFlowContext.h"
22 #include "clang/Analysis/FlowSensitive/Solver.h"
23 #include "clang/Analysis/FlowSensitive/StorageLocation.h"
24 #include "clang/Analysis/FlowSensitive/Value.h"
25 #include "llvm/ADT/DenseMap.h"
26 #include "llvm/ADT/DenseSet.h"
27 #include "llvm/Support/Compiler.h"
28 #include <cassert>
29 #include <memory>
30 #include <optional>
31 #include <type_traits>
32 #include <utility>
33 #include <vector>
34 
35 namespace clang {
36 namespace dataflow {
37 
38 /// Skip past nodes that the CFG does not emit. These nodes are invisible to
39 /// flow-sensitive analysis, and should be ignored as they will effectively not
40 /// exist.
41 ///
42 ///   * `ParenExpr` - The CFG takes the operator precedence into account, but
43 ///   otherwise omits the node afterwards.
44 ///
45 ///   * `ExprWithCleanups` - The CFG will generate the appropriate calls to
46 ///   destructors and then omit the node.
47 ///
48 const Expr &ignoreCFGOmittedNodes(const Expr &E);
49 const Stmt &ignoreCFGOmittedNodes(const Stmt &S);
50 
51 /// Returns the set of all fields in the type.
52 llvm::DenseSet<const FieldDecl *> getObjectFields(QualType Type);
53 
54 struct ContextSensitiveOptions {
55   /// The maximum depth to analyze. A value of zero is equivalent to disabling
56   /// context-sensitive analysis entirely.
57   unsigned Depth = 2;
58 };
59 
60 /// Owns objects that encompass the state of a program and stores context that
61 /// is used during dataflow analysis.
62 class DataflowAnalysisContext {
63 public:
64   struct Options {
65     /// Options for analyzing function bodies when present in the translation
66     /// unit, or empty to disable context-sensitive analysis. Note that this is
67     /// fundamentally limited: some constructs, such as recursion, are
68     /// explicitly unsupported.
69     std::optional<ContextSensitiveOptions> ContextSensitiveOpts;
70   };
71 
72   /// Constructs a dataflow analysis context.
73   ///
74   /// Requirements:
75   ///
76   ///  `S` must not be null.
77   DataflowAnalysisContext(std::unique_ptr<Solver> S,
78                           Options Opts = Options{
79                               /*ContextSensitiveOpts=*/std::nullopt})
80       : S(std::move(S)), TrueVal(createAtomicBoolValue()),
81         FalseVal(createAtomicBoolValue()), Opts(Opts) {
82     assert(this->S != nullptr);
83   }
84 
85   /// Takes ownership of `Loc` and returns a reference to it.
86   ///
87   /// Requirements:
88   ///
89   ///  `Loc` must not be null.
90   template <typename T>
91   std::enable_if_t<std::is_base_of<StorageLocation, T>::value, T &>
92   takeOwnership(std::unique_ptr<T> Loc) {
93     assert(Loc != nullptr);
94     Locs.push_back(std::move(Loc));
95     return *cast<T>(Locs.back().get());
96   }
97 
98   /// Takes ownership of `Val` and returns a reference to it.
99   ///
100   /// Requirements:
101   ///
102   ///  `Val` must not be null.
103   template <typename T>
104   std::enable_if_t<std::is_base_of<Value, T>::value, T &>
105   takeOwnership(std::unique_ptr<T> Val) {
106     assert(Val != nullptr);
107     Vals.push_back(std::move(Val));
108     return *cast<T>(Vals.back().get());
109   }
110 
111   /// Returns a new storage location appropriate for `Type`.
112   ///
113   /// A null `Type` is interpreted as the pointee type of `std::nullptr_t`.
114   StorageLocation &createStorageLocation(QualType Type);
115 
116   /// Returns a stable storage location for `D`.
117   StorageLocation &getStableStorageLocation(const VarDecl &D);
118 
119   /// Returns a stable storage location for `E`.
120   StorageLocation &getStableStorageLocation(const Expr &E);
121 
122   /// Assigns `Loc` as the storage location of `D`.
123   ///
124   /// Requirements:
125   ///
126   ///  `D` must not be assigned a storage location.
127   void setStorageLocation(const ValueDecl &D, StorageLocation &Loc) {
128     assert(DeclToLoc.find(&D) == DeclToLoc.end());
129     DeclToLoc[&D] = &Loc;
130   }
131 
132   /// Returns the storage location assigned to `D` or null if `D` has no
133   /// assigned storage location.
134   StorageLocation *getStorageLocation(const ValueDecl &D) const {
135     auto It = DeclToLoc.find(&D);
136     return It == DeclToLoc.end() ? nullptr : It->second;
137   }
138 
139   /// Assigns `Loc` as the storage location of `E`.
140   ///
141   /// Requirements:
142   ///
143   ///  `E` must not be assigned a storage location.
144   void setStorageLocation(const Expr &E, StorageLocation &Loc) {
145     const Expr &CanonE = ignoreCFGOmittedNodes(E);
146     assert(ExprToLoc.find(&CanonE) == ExprToLoc.end());
147     ExprToLoc[&CanonE] = &Loc;
148   }
149 
150   /// Returns the storage location assigned to `E` or null if `E` has no
151   /// assigned storage location.
152   StorageLocation *getStorageLocation(const Expr &E) const {
153     auto It = ExprToLoc.find(&ignoreCFGOmittedNodes(E));
154     return It == ExprToLoc.end() ? nullptr : It->second;
155   }
156 
157   /// Returns a pointer value that represents a null pointer. Calls with
158   /// `PointeeType` that are canonically equivalent will return the same result.
159   /// A null `PointeeType` can be used for the pointee of `std::nullptr_t`.
160   PointerValue &getOrCreateNullPointerValue(QualType PointeeType);
161 
162   /// Returns a symbolic boolean value that models a boolean literal equal to
163   /// `Value`.
164   AtomicBoolValue &getBoolLiteralValue(bool Value) const {
165     return Value ? TrueVal : FalseVal;
166   }
167 
168   /// Creates an atomic boolean value.
169   AtomicBoolValue &createAtomicBoolValue() {
170     return takeOwnership(std::make_unique<AtomicBoolValue>());
171   }
172 
173   /// Creates a Top value for booleans. Each instance is unique and can be
174   /// assigned a distinct truth value during solving.
175   ///
176   /// FIXME: `Top iff Top` is true when both Tops are identical (by pointer
177   /// equality), but not when they are distinct values. We should improve the
178   /// implementation so that `Top iff Top` has a consistent meaning, regardless
179   /// of the identity of `Top`. Moreover, I think the meaning should be
180   /// `false`.
181   TopBoolValue &createTopBoolValue() {
182     return takeOwnership(std::make_unique<TopBoolValue>());
183   }
184 
185   /// Returns a boolean value that represents the conjunction of `LHS` and
186   /// `RHS`. Subsequent calls with the same arguments, regardless of their
187   /// order, will return the same result. If the given boolean values represent
188   /// the same value, the result will be the value itself.
189   BoolValue &getOrCreateConjunction(BoolValue &LHS, BoolValue &RHS);
190 
191   /// Returns a boolean value that represents the disjunction of `LHS` and
192   /// `RHS`. Subsequent calls with the same arguments, regardless of their
193   /// order, will return the same result. If the given boolean values represent
194   /// the same value, the result will be the value itself.
195   BoolValue &getOrCreateDisjunction(BoolValue &LHS, BoolValue &RHS);
196 
197   /// Returns a boolean value that represents the negation of `Val`. Subsequent
198   /// calls with the same argument will return the same result.
199   BoolValue &getOrCreateNegation(BoolValue &Val);
200 
201   /// Returns a boolean value that represents `LHS => RHS`. Subsequent calls
202   /// with the same arguments, will return the same result. If the given boolean
203   /// values represent the same value, the result will be a value that
204   /// represents the true boolean literal.
205   BoolValue &getOrCreateImplication(BoolValue &LHS, BoolValue &RHS);
206 
207   /// Returns a boolean value that represents `LHS <=> RHS`. Subsequent calls
208   /// with the same arguments, regardless of their order, will return the same
209   /// result. If the given boolean values represent the same value, the result
210   /// will be a value that represents the true boolean literal.
211   BoolValue &getOrCreateIff(BoolValue &LHS, BoolValue &RHS);
212 
213   /// Creates a fresh flow condition and returns a token that identifies it. The
214   /// token can be used to perform various operations on the flow condition such
215   /// as adding constraints to it, forking it, joining it with another flow
216   /// condition, or checking implications.
217   AtomicBoolValue &makeFlowConditionToken();
218 
219   /// Adds `Constraint` to the flow condition identified by `Token`.
220   void addFlowConditionConstraint(AtomicBoolValue &Token,
221                                   BoolValue &Constraint);
222 
223   /// Creates a new flow condition with the same constraints as the flow
224   /// condition identified by `Token` and returns its token.
225   AtomicBoolValue &forkFlowCondition(AtomicBoolValue &Token);
226 
227   /// Creates a new flow condition that represents the disjunction of the flow
228   /// conditions identified by `FirstToken` and `SecondToken`, and returns its
229   /// token.
230   AtomicBoolValue &joinFlowConditions(AtomicBoolValue &FirstToken,
231                                       AtomicBoolValue &SecondToken);
232 
233   // FIXME: This function returns the flow condition expressed directly as its
234   // constraints: (C1 AND C2 AND ...). This differs from the general approach in
235   // the framework where a flow condition is represented as a token (an atomic
236   // boolean) with dependencies and constraints tracked in `FlowConditionDeps`
237   // and `FlowConditionConstraints`: (FC <=> C1 AND C2 AND ...).
238   // Consider if we should make the representation of flow condition consistent,
239   // returning an atomic boolean token with separate constraints instead.
240   //
241   /// Builds and returns the logical formula defining the flow condition
242   /// identified by `Token`. If a value in the formula is present as a key in
243   /// `Substitutions`, it will be substituted with the value it maps to.
244   /// As an example, say we have flow condition tokens FC1, FC2, FC3 and
245   /// FlowConditionConstraints: { FC1: C1,
246   ///                             FC2: C2,
247   ///                             FC3: (FC1 v FC2) ^ C3 }
248   /// buildAndSubstituteFlowCondition(FC3, {{C1 -> C1'}}) will return a value
249   /// corresponding to (C1' v C2) ^ C3.
250   BoolValue &buildAndSubstituteFlowCondition(
251       AtomicBoolValue &Token,
252       llvm::DenseMap<AtomicBoolValue *, BoolValue *> Substitutions);
253 
254   /// Returns true if and only if the constraints of the flow condition
255   /// identified by `Token` imply that `Val` is true.
256   bool flowConditionImplies(AtomicBoolValue &Token, BoolValue &Val);
257 
258   /// Returns true if and only if the constraints of the flow condition
259   /// identified by `Token` are always true.
260   bool flowConditionIsTautology(AtomicBoolValue &Token);
261 
262   /// Returns true if `Val1` is equivalent to `Val2`.
263   /// Note: This function doesn't take into account constraints on `Val1` and
264   /// `Val2` imposed by the flow condition.
265   bool equivalentBoolValues(BoolValue &Val1, BoolValue &Val2);
266 
267   LLVM_DUMP_METHOD void dumpFlowCondition(AtomicBoolValue &Token);
268 
269   /// Returns the `ControlFlowContext` registered for `F`, if any. Otherwise,
270   /// returns null.
271   const ControlFlowContext *getControlFlowContext(const FunctionDecl *F);
272 
273   const Options &getOptions() { return Opts; }
274 
275 private:
276   friend class Environment;
277 
278   struct NullableQualTypeDenseMapInfo : private llvm::DenseMapInfo<QualType> {
279     static QualType getEmptyKey() {
280       // Allow a NULL `QualType` by using a different value as the empty key.
281       return QualType::getFromOpaquePtr(reinterpret_cast<Type *>(1));
282     }
283 
284     using DenseMapInfo::getHashValue;
285     using DenseMapInfo::getTombstoneKey;
286     using DenseMapInfo::isEqual;
287   };
288 
289   // Extends the set of modeled field declarations.
290   void addModeledFields(const llvm::DenseSet<const FieldDecl *> &Fields);
291 
292   /// Returns the fields of `Type`, limited to the set of fields modeled by this
293   /// context.
294   llvm::DenseSet<const FieldDecl *> getReferencedFields(QualType Type);
295 
296   /// Adds all constraints of the flow condition identified by `Token` and all
297   /// of its transitive dependencies to `Constraints`. `VisitedTokens` is used
298   /// to track tokens of flow conditions that were already visited by recursive
299   /// calls.
300   void addTransitiveFlowConditionConstraints(
301       AtomicBoolValue &Token, llvm::DenseSet<BoolValue *> &Constraints,
302       llvm::DenseSet<AtomicBoolValue *> &VisitedTokens);
303 
304   /// Returns the outcome of satisfiability checking on `Constraints`.
305   /// Possible outcomes are:
306   /// - `Satisfiable`: A satisfying assignment exists and is returned.
307   /// - `Unsatisfiable`: A satisfying assignment does not exist.
308   /// - `TimedOut`: The search for a satisfying assignment was not completed.
309   Solver::Result querySolver(llvm::DenseSet<BoolValue *> Constraints);
310 
311   /// Returns true if the solver is able to prove that there is no satisfying
312   /// assignment for `Constraints`
313   bool isUnsatisfiable(llvm::DenseSet<BoolValue *> Constraints) {
314     return querySolver(std::move(Constraints)).getStatus() ==
315            Solver::Result::Status::Unsatisfiable;
316   }
317 
318   /// Returns a boolean value as a result of substituting `Val` and its sub
319   /// values based on entries in `SubstitutionsCache`. Intermediate results are
320   /// stored in `SubstitutionsCache` to avoid reprocessing values that have
321   /// already been visited.
322   BoolValue &substituteBoolValue(
323       BoolValue &Val,
324       llvm::DenseMap<BoolValue *, BoolValue *> &SubstitutionsCache);
325 
326   /// Builds and returns the logical formula defining the flow condition
327   /// identified by `Token`, sub values may be substituted based on entries in
328   /// `SubstitutionsCache`. Intermediate results are stored in
329   /// `SubstitutionsCache` to avoid reprocessing values that have already been
330   /// visited.
331   BoolValue &buildAndSubstituteFlowConditionWithCache(
332       AtomicBoolValue &Token,
333       llvm::DenseMap<BoolValue *, BoolValue *> &SubstitutionsCache);
334 
335   std::unique_ptr<Solver> S;
336 
337   // Storage for the state of a program.
338   std::vector<std::unique_ptr<StorageLocation>> Locs;
339   std::vector<std::unique_ptr<Value>> Vals;
340 
341   // Maps from program declarations and statements to storage locations that are
342   // assigned to them. These assignments are global (aggregated across all basic
343   // blocks) and are used to produce stable storage locations when the same
344   // basic blocks are evaluated multiple times. The storage locations that are
345   // in scope for a particular basic block are stored in `Environment`.
346   llvm::DenseMap<const ValueDecl *, StorageLocation *> DeclToLoc;
347   llvm::DenseMap<const Expr *, StorageLocation *> ExprToLoc;
348 
349   // Null pointer values, keyed by the canonical pointee type.
350   //
351   // FIXME: The pointer values are indexed by the pointee types which are
352   // required to initialize the `PointeeLoc` field in `PointerValue`. Consider
353   // creating a type-independent `NullPointerValue` without a `PointeeLoc`
354   // field.
355   llvm::DenseMap<QualType, PointerValue *, NullableQualTypeDenseMapInfo>
356       NullPointerVals;
357 
358   AtomicBoolValue &TrueVal;
359   AtomicBoolValue &FalseVal;
360 
361   Options Opts;
362 
363   // Indices that are used to avoid recreating the same composite boolean
364   // values.
365   llvm::DenseMap<std::pair<BoolValue *, BoolValue *>, ConjunctionValue *>
366       ConjunctionVals;
367   llvm::DenseMap<std::pair<BoolValue *, BoolValue *>, DisjunctionValue *>
368       DisjunctionVals;
369   llvm::DenseMap<BoolValue *, NegationValue *> NegationVals;
370   llvm::DenseMap<std::pair<BoolValue *, BoolValue *>, ImplicationValue *>
371       ImplicationVals;
372   llvm::DenseMap<std::pair<BoolValue *, BoolValue *>, BiconditionalValue *>
373       BiconditionalVals;
374 
375   // Flow conditions are tracked symbolically: each unique flow condition is
376   // associated with a fresh symbolic variable (token), bound to the clause that
377   // defines the flow condition. Conceptually, each binding corresponds to an
378   // "iff" of the form `FC <=> (C1 ^ C2 ^ ...)` where `FC` is a flow condition
379   // token (an atomic boolean) and `Ci`s are the set of constraints in the flow
380   // flow condition clause. The set of constraints (C1 ^ C2 ^ ...) are stored in
381   // the `FlowConditionConstraints` map, keyed by the token of the flow
382   // condition.
383   //
384   // Flow conditions depend on other flow conditions if they are created using
385   // `forkFlowCondition` or `joinFlowConditions`. The graph of flow condition
386   // dependencies is stored in the `FlowConditionDeps` map.
387   llvm::DenseMap<AtomicBoolValue *, llvm::DenseSet<AtomicBoolValue *>>
388       FlowConditionDeps;
389   llvm::DenseMap<AtomicBoolValue *, BoolValue *> FlowConditionConstraints;
390 
391   llvm::DenseMap<const FunctionDecl *, ControlFlowContext> FunctionContexts;
392 
393   // Fields modeled by environments covered by this context.
394   llvm::DenseSet<const FieldDecl *> ModeledFields;
395 };
396 
397 } // namespace dataflow
398 } // namespace clang
399 
400 #endif // LLVM_CLANG_ANALYSIS_FLOWSENSITIVE_DATAFLOWANALYSISCONTEXT_H
401