1 //===-- DataflowAnalysisContext.h -------------------------------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file defines a DataflowAnalysisContext class that owns objects that 10 // encompass the state of a program and stores context that is used during 11 // dataflow analysis. 12 // 13 //===----------------------------------------------------------------------===// 14 15 #ifndef LLVM_CLANG_ANALYSIS_FLOWSENSITIVE_DATAFLOWANALYSISCONTEXT_H 16 #define LLVM_CLANG_ANALYSIS_FLOWSENSITIVE_DATAFLOWANALYSISCONTEXT_H 17 18 #include "clang/AST/Decl.h" 19 #include "clang/AST/Expr.h" 20 #include "clang/AST/TypeOrdering.h" 21 #include "clang/Analysis/FlowSensitive/Arena.h" 22 #include "clang/Analysis/FlowSensitive/ControlFlowContext.h" 23 #include "clang/Analysis/FlowSensitive/Solver.h" 24 #include "clang/Analysis/FlowSensitive/StorageLocation.h" 25 #include "clang/Analysis/FlowSensitive/Value.h" 26 #include "llvm/ADT/DenseMap.h" 27 #include "llvm/ADT/DenseSet.h" 28 #include "llvm/ADT/SetVector.h" 29 #include "llvm/Support/Compiler.h" 30 #include <cassert> 31 #include <memory> 32 #include <optional> 33 #include <type_traits> 34 #include <utility> 35 #include <vector> 36 37 namespace clang { 38 namespace dataflow { 39 class Logger; 40 41 /// Skip past nodes that the CFG does not emit. These nodes are invisible to 42 /// flow-sensitive analysis, and should be ignored as they will effectively not 43 /// exist. 44 /// 45 /// * `ParenExpr` - The CFG takes the operator precedence into account, but 46 /// otherwise omits the node afterwards. 47 /// 48 /// * `ExprWithCleanups` - The CFG will generate the appropriate calls to 49 /// destructors and then omit the node. 50 /// 51 const Expr &ignoreCFGOmittedNodes(const Expr &E); 52 const Stmt &ignoreCFGOmittedNodes(const Stmt &S); 53 54 /// A set of `FieldDecl *`. Use `SmallSetVector` to guarantee deterministic 55 /// iteration order. 56 using FieldSet = llvm::SmallSetVector<const FieldDecl *, 4>; 57 58 /// Returns the set of all fields in the type. 59 FieldSet getObjectFields(QualType Type); 60 61 struct ContextSensitiveOptions { 62 /// The maximum depth to analyze. A value of zero is equivalent to disabling 63 /// context-sensitive analysis entirely. 64 unsigned Depth = 2; 65 }; 66 67 /// Owns objects that encompass the state of a program and stores context that 68 /// is used during dataflow analysis. 69 class DataflowAnalysisContext { 70 public: 71 struct Options { 72 /// Options for analyzing function bodies when present in the translation 73 /// unit, or empty to disable context-sensitive analysis. Note that this is 74 /// fundamentally limited: some constructs, such as recursion, are 75 /// explicitly unsupported. 76 std::optional<ContextSensitiveOptions> ContextSensitiveOpts; 77 78 /// If provided, analysis details will be recorded here. 79 /// (This is always non-null within an AnalysisContext, the framework 80 /// provides a fallback no-op logger). 81 Logger *Log = nullptr; 82 }; 83 84 /// Constructs a dataflow analysis context. 85 /// 86 /// Requirements: 87 /// 88 /// `S` must not be null. 89 DataflowAnalysisContext(std::unique_ptr<Solver> S, 90 Options Opts = Options{ 91 /*ContextSensitiveOpts=*/std::nullopt, 92 /*Logger=*/nullptr}); 93 ~DataflowAnalysisContext(); 94 95 /// Returns a new storage location appropriate for `Type`. 96 /// 97 /// A null `Type` is interpreted as the pointee type of `std::nullptr_t`. 98 StorageLocation &createStorageLocation(QualType Type); 99 100 /// Returns a stable storage location for `D`. 101 StorageLocation &getStableStorageLocation(const VarDecl &D); 102 103 /// Returns a stable storage location for `E`. 104 StorageLocation &getStableStorageLocation(const Expr &E); 105 106 /// Assigns `Loc` as the storage location of `D`. 107 /// 108 /// Requirements: 109 /// 110 /// `D` must not be assigned a storage location. 111 void setStorageLocation(const ValueDecl &D, StorageLocation &Loc) { 112 assert(!DeclToLoc.contains(&D)); 113 DeclToLoc[&D] = &Loc; 114 } 115 116 /// Returns the storage location assigned to `D` or null if `D` has no 117 /// assigned storage location. 118 StorageLocation *getStorageLocation(const ValueDecl &D) const { 119 return DeclToLoc.lookup(&D); 120 } 121 122 /// Assigns `Loc` as the storage location of `E`. 123 /// 124 /// Requirements: 125 /// 126 /// `E` must not be assigned a storage location. 127 void setStorageLocation(const Expr &E, StorageLocation &Loc) { 128 const Expr &CanonE = ignoreCFGOmittedNodes(E); 129 assert(!ExprToLoc.contains(&CanonE)); 130 ExprToLoc[&CanonE] = &Loc; 131 } 132 133 /// Returns the storage location assigned to `E` or null if `E` has no 134 /// assigned storage location. 135 StorageLocation *getStorageLocation(const Expr &E) const { 136 return ExprToLoc.lookup(&ignoreCFGOmittedNodes(E)); 137 } 138 139 /// Returns a pointer value that represents a null pointer. Calls with 140 /// `PointeeType` that are canonically equivalent will return the same result. 141 /// A null `PointeeType` can be used for the pointee of `std::nullptr_t`. 142 PointerValue &getOrCreateNullPointerValue(QualType PointeeType); 143 144 /// Adds `Constraint` to the flow condition identified by `Token`. 145 void addFlowConditionConstraint(Atom Token, const Formula &Constraint); 146 147 /// Creates a new flow condition with the same constraints as the flow 148 /// condition identified by `Token` and returns its token. 149 Atom forkFlowCondition(Atom Token); 150 151 /// Creates a new flow condition that represents the disjunction of the flow 152 /// conditions identified by `FirstToken` and `SecondToken`, and returns its 153 /// token. 154 Atom joinFlowConditions(Atom FirstToken, Atom SecondToken); 155 156 /// Returns true if and only if the constraints of the flow condition 157 /// identified by `Token` imply that `Val` is true. 158 bool flowConditionImplies(Atom Token, const Formula &); 159 160 /// Returns true if and only if the constraints of the flow condition 161 /// identified by `Token` are always true. 162 bool flowConditionIsTautology(Atom Token); 163 164 /// Returns true if `Val1` is equivalent to `Val2`. 165 /// Note: This function doesn't take into account constraints on `Val1` and 166 /// `Val2` imposed by the flow condition. 167 bool equivalentFormulas(const Formula &Val1, const Formula &Val2); 168 169 LLVM_DUMP_METHOD void dumpFlowCondition(Atom Token, 170 llvm::raw_ostream &OS = llvm::dbgs()); 171 172 /// Returns the `ControlFlowContext` registered for `F`, if any. Otherwise, 173 /// returns null. 174 const ControlFlowContext *getControlFlowContext(const FunctionDecl *F); 175 176 const Options &getOptions() { return Opts; } 177 178 Arena &arena() { return *A; } 179 180 /// Returns the outcome of satisfiability checking on `Constraints`. 181 /// 182 /// Flow conditions are not incorporated, so they may need to be manually 183 /// included in `Constraints` to provide contextually-accurate results, e.g. 184 /// if any definitions or relationships of the values in `Constraints` have 185 /// been stored in flow conditions. 186 Solver::Result querySolver(llvm::SetVector<const Formula *> Constraints); 187 188 /// Returns the fields of `Type`, limited to the set of fields modeled by this 189 /// context. 190 FieldSet getModeledFields(QualType Type); 191 192 private: 193 friend class Environment; 194 195 struct NullableQualTypeDenseMapInfo : private llvm::DenseMapInfo<QualType> { 196 static QualType getEmptyKey() { 197 // Allow a NULL `QualType` by using a different value as the empty key. 198 return QualType::getFromOpaquePtr(reinterpret_cast<Type *>(1)); 199 } 200 201 using DenseMapInfo::getHashValue; 202 using DenseMapInfo::getTombstoneKey; 203 using DenseMapInfo::isEqual; 204 }; 205 206 // Extends the set of modeled field declarations. 207 void addModeledFields(const FieldSet &Fields); 208 209 /// Adds all constraints of the flow condition identified by `Token` and all 210 /// of its transitive dependencies to `Constraints`. `VisitedTokens` is used 211 /// to track tokens of flow conditions that were already visited by recursive 212 /// calls. 213 void addTransitiveFlowConditionConstraints( 214 Atom Token, llvm::SetVector<const Formula *> &Constraints, 215 llvm::DenseSet<Atom> &VisitedTokens); 216 217 /// Returns true if the solver is able to prove that there is no satisfying 218 /// assignment for `Constraints` 219 bool isUnsatisfiable(llvm::SetVector<const Formula *> Constraints) { 220 return querySolver(std::move(Constraints)).getStatus() == 221 Solver::Result::Status::Unsatisfiable; 222 } 223 224 std::unique_ptr<Solver> S; 225 std::unique_ptr<Arena> A; 226 227 // Maps from program declarations and statements to storage locations that are 228 // assigned to them. These assignments are global (aggregated across all basic 229 // blocks) and are used to produce stable storage locations when the same 230 // basic blocks are evaluated multiple times. The storage locations that are 231 // in scope for a particular basic block are stored in `Environment`. 232 llvm::DenseMap<const ValueDecl *, StorageLocation *> DeclToLoc; 233 llvm::DenseMap<const Expr *, StorageLocation *> ExprToLoc; 234 235 // Null pointer values, keyed by the canonical pointee type. 236 // 237 // FIXME: The pointer values are indexed by the pointee types which are 238 // required to initialize the `PointeeLoc` field in `PointerValue`. Consider 239 // creating a type-independent `NullPointerValue` without a `PointeeLoc` 240 // field. 241 llvm::DenseMap<QualType, PointerValue *, NullableQualTypeDenseMapInfo> 242 NullPointerVals; 243 244 Options Opts; 245 246 // Flow conditions are tracked symbolically: each unique flow condition is 247 // associated with a fresh symbolic variable (token), bound to the clause that 248 // defines the flow condition. Conceptually, each binding corresponds to an 249 // "iff" of the form `FC <=> (C1 ^ C2 ^ ...)` where `FC` is a flow condition 250 // token (an atomic boolean) and `Ci`s are the set of constraints in the flow 251 // flow condition clause. The set of constraints (C1 ^ C2 ^ ...) are stored in 252 // the `FlowConditionConstraints` map, keyed by the token of the flow 253 // condition. 254 // 255 // Flow conditions depend on other flow conditions if they are created using 256 // `forkFlowCondition` or `joinFlowConditions`. The graph of flow condition 257 // dependencies is stored in the `FlowConditionDeps` map. 258 llvm::DenseMap<Atom, llvm::DenseSet<Atom>> FlowConditionDeps; 259 llvm::DenseMap<Atom, const Formula *> FlowConditionConstraints; 260 261 llvm::DenseMap<const FunctionDecl *, ControlFlowContext> FunctionContexts; 262 263 // Fields modeled by environments covered by this context. 264 FieldSet ModeledFields; 265 266 std::unique_ptr<Logger> LogOwner; // If created via flags. 267 }; 268 269 } // namespace dataflow 270 } // namespace clang 271 272 #endif // LLVM_CLANG_ANALYSIS_FLOWSENSITIVE_DATAFLOWANALYSISCONTEXT_H 273