1 //===-- DataflowAnalysisContext.h -------------------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 //  This file defines a DataflowAnalysisContext class that owns objects that
10 //  encompass the state of a program and stores context that is used during
11 //  dataflow analysis.
12 //
13 //===----------------------------------------------------------------------===//
14 
15 #ifndef LLVM_CLANG_ANALYSIS_FLOWSENSITIVE_DATAFLOWANALYSISCONTEXT_H
16 #define LLVM_CLANG_ANALYSIS_FLOWSENSITIVE_DATAFLOWANALYSISCONTEXT_H
17 
18 #include "clang/AST/Decl.h"
19 #include "clang/AST/Expr.h"
20 #include "clang/AST/TypeOrdering.h"
21 #include "clang/Analysis/FlowSensitive/Solver.h"
22 #include "clang/Analysis/FlowSensitive/StorageLocation.h"
23 #include "clang/Analysis/FlowSensitive/Value.h"
24 #include "llvm/ADT/DenseMap.h"
25 #include "llvm/ADT/DenseSet.h"
26 #include "llvm/Support/Compiler.h"
27 #include <cassert>
28 #include <memory>
29 #include <type_traits>
30 #include <utility>
31 #include <vector>
32 
33 namespace clang {
34 namespace dataflow {
35 
36 /// Skip past nodes that the CFG does not emit. These nodes are invisible to
37 /// flow-sensitive analysis, and should be ignored as they will effectively not
38 /// exist.
39 ///
40 ///   * `ParenExpr` - The CFG takes the operator precedence into account, but
41 ///   otherwise omits the node afterwards.
42 ///
43 ///   * `ExprWithCleanups` - The CFG will generate the appropriate calls to
44 ///   destructors and then omit the node.
45 ///
46 const Expr &ignoreCFGOmittedNodes(const Expr &E);
47 const Stmt &ignoreCFGOmittedNodes(const Stmt &S);
48 
49 /// Returns the set of all fields in the type.
50 llvm::DenseSet<const FieldDecl *> getObjectFields(QualType Type);
51 
52 /// Owns objects that encompass the state of a program and stores context that
53 /// is used during dataflow analysis.
54 class DataflowAnalysisContext {
55 public:
56   /// Constructs a dataflow analysis context.
57   ///
58   /// Requirements:
59   ///
60   ///  `S` must not be null.
61   DataflowAnalysisContext(std::unique_ptr<Solver> S)
62       : S(std::move(S)), TrueVal(createAtomicBoolValue()),
63         FalseVal(createAtomicBoolValue()) {
64     assert(this->S != nullptr);
65   }
66 
67   /// Takes ownership of `Loc` and returns a reference to it.
68   ///
69   /// Requirements:
70   ///
71   ///  `Loc` must not be null.
72   template <typename T>
73   typename std::enable_if<std::is_base_of<StorageLocation, T>::value, T &>::type
74   takeOwnership(std::unique_ptr<T> Loc) {
75     assert(Loc != nullptr);
76     Locs.push_back(std::move(Loc));
77     return *cast<T>(Locs.back().get());
78   }
79 
80   /// Takes ownership of `Val` and returns a reference to it.
81   ///
82   /// Requirements:
83   ///
84   ///  `Val` must not be null.
85   template <typename T>
86   typename std::enable_if<std::is_base_of<Value, T>::value, T &>::type
87   takeOwnership(std::unique_ptr<T> Val) {
88     assert(Val != nullptr);
89     Vals.push_back(std::move(Val));
90     return *cast<T>(Vals.back().get());
91   }
92 
93   /// Returns a stable storage location appropriate for `Type`.
94   ///
95   /// Requirements:
96   ///
97   ///  `Type` must not be null.
98   StorageLocation &getStableStorageLocation(QualType Type);
99 
100   /// Returns a stable storage location for `D`.
101   StorageLocation &getStableStorageLocation(const VarDecl &D);
102 
103   /// Returns a stable storage location for `E`.
104   StorageLocation &getStableStorageLocation(const Expr &E);
105 
106   /// Assigns `Loc` as the storage location of `D`.
107   ///
108   /// Requirements:
109   ///
110   ///  `D` must not be assigned a storage location.
111   void setStorageLocation(const ValueDecl &D, StorageLocation &Loc) {
112     assert(DeclToLoc.find(&D) == DeclToLoc.end());
113     DeclToLoc[&D] = &Loc;
114   }
115 
116   /// Returns the storage location assigned to `D` or null if `D` has no
117   /// assigned storage location.
118   StorageLocation *getStorageLocation(const ValueDecl &D) const {
119     auto It = DeclToLoc.find(&D);
120     return It == DeclToLoc.end() ? nullptr : It->second;
121   }
122 
123   /// Assigns `Loc` as the storage location of `E`.
124   ///
125   /// Requirements:
126   ///
127   ///  `E` must not be assigned a storage location.
128   void setStorageLocation(const Expr &E, StorageLocation &Loc) {
129     const Expr &CanonE = ignoreCFGOmittedNodes(E);
130     assert(ExprToLoc.find(&CanonE) == ExprToLoc.end());
131     ExprToLoc[&CanonE] = &Loc;
132   }
133 
134   /// Returns the storage location assigned to `E` or null if `E` has no
135   /// assigned storage location.
136   StorageLocation *getStorageLocation(const Expr &E) const {
137     auto It = ExprToLoc.find(&ignoreCFGOmittedNodes(E));
138     return It == ExprToLoc.end() ? nullptr : It->second;
139   }
140 
141   /// Assigns `Loc` as the storage location of the `this` pointee.
142   ///
143   /// Requirements:
144   ///
145   ///  The `this` pointee must not be assigned a storage location.
146   void setThisPointeeStorageLocation(StorageLocation &Loc) {
147     assert(ThisPointeeLoc == nullptr);
148     ThisPointeeLoc = &Loc;
149   }
150 
151   /// Returns the storage location assigned to the `this` pointee or null if the
152   /// `this` pointee has no assigned storage location.
153   StorageLocation *getThisPointeeStorageLocation() const {
154     return ThisPointeeLoc;
155   }
156 
157   /// Returns a pointer value that represents a null pointer. Calls with
158   /// `PointeeType` that are canonically equivalent will return the same result.
159   /// A null `PointeeType` can be used for the pointee of `std::nullptr_t`.
160   PointerValue &getOrCreateNullPointerValue(QualType PointeeType);
161 
162   /// Returns a symbolic boolean value that models a boolean literal equal to
163   /// `Value`.
164   AtomicBoolValue &getBoolLiteralValue(bool Value) const {
165     return Value ? TrueVal : FalseVal;
166   }
167 
168   /// Creates an atomic boolean value.
169   AtomicBoolValue &createAtomicBoolValue() {
170     return takeOwnership(std::make_unique<AtomicBoolValue>());
171   }
172 
173   /// Returns a boolean value that represents the conjunction of `LHS` and
174   /// `RHS`. Subsequent calls with the same arguments, regardless of their
175   /// order, will return the same result. If the given boolean values represent
176   /// the same value, the result will be the value itself.
177   BoolValue &getOrCreateConjunction(BoolValue &LHS, BoolValue &RHS);
178 
179   /// Returns a boolean value that represents the disjunction of `LHS` and
180   /// `RHS`. Subsequent calls with the same arguments, regardless of their
181   /// order, will return the same result. If the given boolean values represent
182   /// the same value, the result will be the value itself.
183   BoolValue &getOrCreateDisjunction(BoolValue &LHS, BoolValue &RHS);
184 
185   /// Returns a boolean value that represents the negation of `Val`. Subsequent
186   /// calls with the same argument will return the same result.
187   BoolValue &getOrCreateNegation(BoolValue &Val);
188 
189   /// Returns a boolean value that represents `LHS => RHS`. Subsequent calls
190   /// with the same arguments, will return the same result. If the given boolean
191   /// values represent the same value, the result will be a value that
192   /// represents the true boolean literal.
193   BoolValue &getOrCreateImplication(BoolValue &LHS, BoolValue &RHS);
194 
195   /// Returns a boolean value that represents `LHS <=> RHS`. Subsequent calls
196   /// with the same arguments, regardless of their order, will return the same
197   /// result. If the given boolean values represent the same value, the result
198   /// will be a value that represents the true boolean literal.
199   BoolValue &getOrCreateIff(BoolValue &LHS, BoolValue &RHS);
200 
201   /// Creates a fresh flow condition and returns a token that identifies it. The
202   /// token can be used to perform various operations on the flow condition such
203   /// as adding constraints to it, forking it, joining it with another flow
204   /// condition, or checking implications.
205   AtomicBoolValue &makeFlowConditionToken();
206 
207   /// Adds `Constraint` to the flow condition identified by `Token`.
208   void addFlowConditionConstraint(AtomicBoolValue &Token,
209                                   BoolValue &Constraint);
210 
211   /// Creates a new flow condition with the same constraints as the flow
212   /// condition identified by `Token` and returns its token.
213   AtomicBoolValue &forkFlowCondition(AtomicBoolValue &Token);
214 
215   /// Creates a new flow condition that represents the disjunction of the flow
216   /// conditions identified by `FirstToken` and `SecondToken`, and returns its
217   /// token.
218   AtomicBoolValue &joinFlowConditions(AtomicBoolValue &FirstToken,
219                                       AtomicBoolValue &SecondToken);
220 
221   // FIXME: This function returns the flow condition expressed directly as its
222   // constraints: (C1 AND C2 AND ...). This differs from the general approach in
223   // the framework where a flow condition is represented as a token (an atomic
224   // boolean) with dependencies and constraints tracked in `FlowConditionDeps`
225   // and `FlowConditionConstraints`: (FC <=> C1 AND C2 AND ...).
226   // Consider if we should make the representation of flow condition consistent,
227   // returning an atomic boolean token with separate constraints instead.
228   //
229   /// Builds and returns the logical formula defining the flow condition
230   /// identified by `Token`. If a value in the formula is present as a key in
231   /// `Substitutions`, it will be substituted with the value it maps to.
232   /// As an example, say we have flow condition tokens FC1, FC2, FC3 and
233   /// FlowConditionConstraints: { FC1: C1,
234   ///                             FC2: C2,
235   ///                             FC3: (FC1 v FC2) ^ C3 }
236   /// buildAndSubstituteFlowCondition(FC3, {{C1 -> C1'}}) will return a value
237   /// corresponding to (C1' v C2) ^ C3.
238   BoolValue &buildAndSubstituteFlowCondition(
239       AtomicBoolValue &Token,
240       llvm::DenseMap<AtomicBoolValue *, BoolValue *> Substitutions);
241 
242   /// Returns true if and only if the constraints of the flow condition
243   /// identified by `Token` imply that `Val` is true.
244   bool flowConditionImplies(AtomicBoolValue &Token, BoolValue &Val);
245 
246   /// Returns true if and only if the constraints of the flow condition
247   /// identified by `Token` are always true.
248   bool flowConditionIsTautology(AtomicBoolValue &Token);
249 
250   /// Returns true if `Val1` is equivalent to `Val2`.
251   /// Note: This function doesn't take into account constraints on `Val1` and
252   /// `Val2` imposed by the flow condition.
253   bool equivalentBoolValues(BoolValue &Val1, BoolValue &Val2);
254 
255   LLVM_DUMP_METHOD void dumpFlowCondition(AtomicBoolValue &Token);
256 
257 private:
258   struct NullableQualTypeDenseMapInfo : private llvm::DenseMapInfo<QualType> {
259     static QualType getEmptyKey() {
260       // Allow a NULL `QualType` by using a different value as the empty key.
261       return QualType::getFromOpaquePtr(reinterpret_cast<Type *>(1));
262     }
263 
264     using DenseMapInfo::getHashValue;
265     using DenseMapInfo::getTombstoneKey;
266     using DenseMapInfo::isEqual;
267   };
268 
269   /// Adds all constraints of the flow condition identified by `Token` and all
270   /// of its transitive dependencies to `Constraints`. `VisitedTokens` is used
271   /// to track tokens of flow conditions that were already visited by recursive
272   /// calls.
273   void addTransitiveFlowConditionConstraints(
274       AtomicBoolValue &Token, llvm::DenseSet<BoolValue *> &Constraints,
275       llvm::DenseSet<AtomicBoolValue *> &VisitedTokens);
276 
277   /// Returns the outcome of satisfiability checking on `Constraints`.
278   /// Possible outcomes are:
279   /// - `Satisfiable`: A satisfying assignment exists and is returned.
280   /// - `Unsatisfiable`: A satisfying assignment does not exist.
281   /// - `TimedOut`: The search for a satisfying assignment was not completed.
282   Solver::Result querySolver(llvm::DenseSet<BoolValue *> Constraints);
283 
284   /// Returns true if the solver is able to prove that there is no satisfying
285   /// assignment for `Constraints`
286   bool isUnsatisfiable(llvm::DenseSet<BoolValue *> Constraints) {
287     return querySolver(std::move(Constraints)).getStatus() ==
288            Solver::Result::Status::Unsatisfiable;
289   }
290 
291   /// Returns a boolean value as a result of substituting `Val` and its sub
292   /// values based on entries in `SubstitutionsCache`. Intermediate results are
293   /// stored in `SubstitutionsCache` to avoid reprocessing values that have
294   /// already been visited.
295   BoolValue &substituteBoolValue(
296       BoolValue &Val,
297       llvm::DenseMap<BoolValue *, BoolValue *> &SubstitutionsCache);
298 
299   /// Builds and returns the logical formula defining the flow condition
300   /// identified by `Token`, sub values may be substituted based on entries in
301   /// `SubstitutionsCache`. Intermediate results are stored in
302   /// `SubstitutionsCache` to avoid reprocessing values that have already been
303   /// visited.
304   BoolValue &buildAndSubstituteFlowConditionWithCache(
305       AtomicBoolValue &Token,
306       llvm::DenseMap<BoolValue *, BoolValue *> &SubstitutionsCache);
307 
308   std::unique_ptr<Solver> S;
309 
310   // Storage for the state of a program.
311   std::vector<std::unique_ptr<StorageLocation>> Locs;
312   std::vector<std::unique_ptr<Value>> Vals;
313 
314   // Maps from program declarations and statements to storage locations that are
315   // assigned to them. These assignments are global (aggregated across all basic
316   // blocks) and are used to produce stable storage locations when the same
317   // basic blocks are evaluated multiple times. The storage locations that are
318   // in scope for a particular basic block are stored in `Environment`.
319   llvm::DenseMap<const ValueDecl *, StorageLocation *> DeclToLoc;
320   llvm::DenseMap<const Expr *, StorageLocation *> ExprToLoc;
321 
322   StorageLocation *ThisPointeeLoc = nullptr;
323 
324   // Null pointer values, keyed by the canonical pointee type.
325   //
326   // FIXME: The pointer values are indexed by the pointee types which are
327   // required to initialize the `PointeeLoc` field in `PointerValue`. Consider
328   // creating a type-independent `NullPointerValue` without a `PointeeLoc`
329   // field.
330   llvm::DenseMap<QualType, PointerValue *, NullableQualTypeDenseMapInfo>
331       NullPointerVals;
332 
333   AtomicBoolValue &TrueVal;
334   AtomicBoolValue &FalseVal;
335 
336   // Indices that are used to avoid recreating the same composite boolean
337   // values.
338   llvm::DenseMap<std::pair<BoolValue *, BoolValue *>, ConjunctionValue *>
339       ConjunctionVals;
340   llvm::DenseMap<std::pair<BoolValue *, BoolValue *>, DisjunctionValue *>
341       DisjunctionVals;
342   llvm::DenseMap<BoolValue *, NegationValue *> NegationVals;
343   llvm::DenseMap<std::pair<BoolValue *, BoolValue *>, ImplicationValue *>
344       ImplicationVals;
345   llvm::DenseMap<std::pair<BoolValue *, BoolValue *>, BiconditionalValue *>
346       BiconditionalVals;
347 
348   // Flow conditions are tracked symbolically: each unique flow condition is
349   // associated with a fresh symbolic variable (token), bound to the clause that
350   // defines the flow condition. Conceptually, each binding corresponds to an
351   // "iff" of the form `FC <=> (C1 ^ C2 ^ ...)` where `FC` is a flow condition
352   // token (an atomic boolean) and `Ci`s are the set of constraints in the flow
353   // flow condition clause. The set of constraints (C1 ^ C2 ^ ...) are stored in
354   // the `FlowConditionConstraints` map, keyed by the token of the flow
355   // condition.
356   //
357   // Flow conditions depend on other flow conditions if they are created using
358   // `forkFlowCondition` or `joinFlowConditions`. The graph of flow condition
359   // dependencies is stored in the `FlowConditionDeps` map.
360   llvm::DenseMap<AtomicBoolValue *, llvm::DenseSet<AtomicBoolValue *>>
361       FlowConditionDeps;
362   llvm::DenseMap<AtomicBoolValue *, BoolValue *> FlowConditionConstraints;
363 };
364 
365 } // namespace dataflow
366 } // namespace clang
367 
368 #endif // LLVM_CLANG_ANALYSIS_FLOWSENSITIVE_DATAFLOWANALYSISCONTEXT_H
369