1 //===-- DataflowAnalysisContext.h -------------------------------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file defines a DataflowAnalysisContext class that owns objects that 10 // encompass the state of a program and stores context that is used during 11 // dataflow analysis. 12 // 13 //===----------------------------------------------------------------------===// 14 15 #ifndef LLVM_CLANG_ANALYSIS_FLOWSENSITIVE_DATAFLOWANALYSISCONTEXT_H 16 #define LLVM_CLANG_ANALYSIS_FLOWSENSITIVE_DATAFLOWANALYSISCONTEXT_H 17 18 #include "clang/AST/Decl.h" 19 #include "clang/AST/Expr.h" 20 #include "clang/AST/TypeOrdering.h" 21 #include "clang/Analysis/FlowSensitive/Solver.h" 22 #include "clang/Analysis/FlowSensitive/StorageLocation.h" 23 #include "clang/Analysis/FlowSensitive/Value.h" 24 #include "llvm/ADT/DenseMap.h" 25 #include "llvm/ADT/DenseSet.h" 26 #include "llvm/Support/Compiler.h" 27 #include <cassert> 28 #include <memory> 29 #include <type_traits> 30 #include <utility> 31 #include <vector> 32 33 namespace clang { 34 namespace dataflow { 35 36 /// Skip past nodes that the CFG does not emit. These nodes are invisible to 37 /// flow-sensitive analysis, and should be ignored as they will effectively not 38 /// exist. 39 /// 40 /// * `ParenExpr` - The CFG takes the operator precedence into account, but 41 /// otherwise omits the node afterwards. 42 /// 43 /// * `ExprWithCleanups` - The CFG will generate the appropriate calls to 44 /// destructors and then omit the node. 45 /// 46 const Expr &ignoreCFGOmittedNodes(const Expr &E); 47 const Stmt &ignoreCFGOmittedNodes(const Stmt &S); 48 49 /// Returns the set of all fields in the type. 50 llvm::DenseSet<const FieldDecl *> getObjectFields(QualType Type); 51 52 /// Owns objects that encompass the state of a program and stores context that 53 /// is used during dataflow analysis. 54 class DataflowAnalysisContext { 55 public: 56 /// Constructs a dataflow analysis context. 57 /// 58 /// Requirements: 59 /// 60 /// `S` must not be null. 61 DataflowAnalysisContext(std::unique_ptr<Solver> S) 62 : S(std::move(S)), TrueVal(createAtomicBoolValue()), 63 FalseVal(createAtomicBoolValue()) { 64 assert(this->S != nullptr); 65 } 66 67 /// Takes ownership of `Loc` and returns a reference to it. 68 /// 69 /// Requirements: 70 /// 71 /// `Loc` must not be null. 72 template <typename T> 73 typename std::enable_if<std::is_base_of<StorageLocation, T>::value, T &>::type 74 takeOwnership(std::unique_ptr<T> Loc) { 75 assert(Loc != nullptr); 76 Locs.push_back(std::move(Loc)); 77 return *cast<T>(Locs.back().get()); 78 } 79 80 /// Takes ownership of `Val` and returns a reference to it. 81 /// 82 /// Requirements: 83 /// 84 /// `Val` must not be null. 85 template <typename T> 86 typename std::enable_if<std::is_base_of<Value, T>::value, T &>::type 87 takeOwnership(std::unique_ptr<T> Val) { 88 assert(Val != nullptr); 89 Vals.push_back(std::move(Val)); 90 return *cast<T>(Vals.back().get()); 91 } 92 93 /// Returns a stable storage location appropriate for `Type`. 94 /// 95 /// Requirements: 96 /// 97 /// `Type` must not be null. 98 StorageLocation &getStableStorageLocation(QualType Type); 99 100 /// Returns a stable storage location for `D`. 101 StorageLocation &getStableStorageLocation(const VarDecl &D); 102 103 /// Returns a stable storage location for `E`. 104 StorageLocation &getStableStorageLocation(const Expr &E); 105 106 /// Assigns `Loc` as the storage location of `D`. 107 /// 108 /// Requirements: 109 /// 110 /// `D` must not be assigned a storage location. 111 void setStorageLocation(const ValueDecl &D, StorageLocation &Loc) { 112 assert(DeclToLoc.find(&D) == DeclToLoc.end()); 113 DeclToLoc[&D] = &Loc; 114 } 115 116 /// Returns the storage location assigned to `D` or null if `D` has no 117 /// assigned storage location. 118 StorageLocation *getStorageLocation(const ValueDecl &D) const { 119 auto It = DeclToLoc.find(&D); 120 return It == DeclToLoc.end() ? nullptr : It->second; 121 } 122 123 /// Assigns `Loc` as the storage location of `E`. 124 /// 125 /// Requirements: 126 /// 127 /// `E` must not be assigned a storage location. 128 void setStorageLocation(const Expr &E, StorageLocation &Loc) { 129 const Expr &CanonE = ignoreCFGOmittedNodes(E); 130 assert(ExprToLoc.find(&CanonE) == ExprToLoc.end()); 131 ExprToLoc[&CanonE] = &Loc; 132 } 133 134 /// Returns the storage location assigned to `E` or null if `E` has no 135 /// assigned storage location. 136 StorageLocation *getStorageLocation(const Expr &E) const { 137 auto It = ExprToLoc.find(&ignoreCFGOmittedNodes(E)); 138 return It == ExprToLoc.end() ? nullptr : It->second; 139 } 140 141 /// Assigns `Loc` as the storage location of the `this` pointee. 142 /// 143 /// Requirements: 144 /// 145 /// The `this` pointee must not be assigned a storage location. 146 void setThisPointeeStorageLocation(StorageLocation &Loc) { 147 assert(ThisPointeeLoc == nullptr); 148 ThisPointeeLoc = &Loc; 149 } 150 151 /// Returns the storage location assigned to the `this` pointee or null if the 152 /// `this` pointee has no assigned storage location. 153 StorageLocation *getThisPointeeStorageLocation() const { 154 return ThisPointeeLoc; 155 } 156 157 /// Returns a pointer value that represents a null pointer. Calls with 158 /// `PointeeType` that are canonically equivalent will return the same result. 159 /// A null `PointeeType` can be used for the pointee of `std::nullptr_t`. 160 PointerValue &getOrCreateNullPointerValue(QualType PointeeType); 161 162 /// Returns a symbolic boolean value that models a boolean literal equal to 163 /// `Value`. 164 AtomicBoolValue &getBoolLiteralValue(bool Value) const { 165 return Value ? TrueVal : FalseVal; 166 } 167 168 /// Creates an atomic boolean value. 169 AtomicBoolValue &createAtomicBoolValue() { 170 return takeOwnership(std::make_unique<AtomicBoolValue>()); 171 } 172 173 /// Returns a boolean value that represents the conjunction of `LHS` and 174 /// `RHS`. Subsequent calls with the same arguments, regardless of their 175 /// order, will return the same result. If the given boolean values represent 176 /// the same value, the result will be the value itself. 177 BoolValue &getOrCreateConjunction(BoolValue &LHS, BoolValue &RHS); 178 179 /// Returns a boolean value that represents the disjunction of `LHS` and 180 /// `RHS`. Subsequent calls with the same arguments, regardless of their 181 /// order, will return the same result. If the given boolean values represent 182 /// the same value, the result will be the value itself. 183 BoolValue &getOrCreateDisjunction(BoolValue &LHS, BoolValue &RHS); 184 185 /// Returns a boolean value that represents the negation of `Val`. Subsequent 186 /// calls with the same argument will return the same result. 187 BoolValue &getOrCreateNegation(BoolValue &Val); 188 189 /// Returns a boolean value that represents `LHS => RHS`. Subsequent calls 190 /// with the same arguments, will return the same result. If the given boolean 191 /// values represent the same value, the result will be a value that 192 /// represents the true boolean literal. 193 BoolValue &getOrCreateImplication(BoolValue &LHS, BoolValue &RHS); 194 195 /// Returns a boolean value that represents `LHS <=> RHS`. Subsequent calls 196 /// with the same arguments, regardless of their order, will return the same 197 /// result. If the given boolean values represent the same value, the result 198 /// will be a value that represents the true boolean literal. 199 BoolValue &getOrCreateIff(BoolValue &LHS, BoolValue &RHS); 200 201 /// Creates a fresh flow condition and returns a token that identifies it. The 202 /// token can be used to perform various operations on the flow condition such 203 /// as adding constraints to it, forking it, joining it with another flow 204 /// condition, or checking implications. 205 AtomicBoolValue &makeFlowConditionToken(); 206 207 /// Adds `Constraint` to the flow condition identified by `Token`. 208 void addFlowConditionConstraint(AtomicBoolValue &Token, 209 BoolValue &Constraint); 210 211 /// Creates a new flow condition with the same constraints as the flow 212 /// condition identified by `Token` and returns its token. 213 AtomicBoolValue &forkFlowCondition(AtomicBoolValue &Token); 214 215 /// Creates a new flow condition that represents the disjunction of the flow 216 /// conditions identified by `FirstToken` and `SecondToken`, and returns its 217 /// token. 218 AtomicBoolValue &joinFlowConditions(AtomicBoolValue &FirstToken, 219 AtomicBoolValue &SecondToken); 220 221 // FIXME: This function returns the flow condition expressed directly as its 222 // constraints: (C1 AND C2 AND ...). This differs from the general approach in 223 // the framework where a flow condition is represented as a token (an atomic 224 // boolean) with dependencies and constraints tracked in `FlowConditionDeps` 225 // and `FlowConditionConstraints`: (FC <=> C1 AND C2 AND ...). 226 // Consider if we should make the representation of flow condition consistent, 227 // returning an atomic boolean token with separate constraints instead. 228 // 229 /// Builds and returns the logical formula defining the flow condition 230 /// identified by `Token`. If a value in the formula is present as a key in 231 /// `Substitutions`, it will be substituted with the value it maps to. 232 /// As an example, say we have flow condition tokens FC1, FC2, FC3 and 233 /// FlowConditionConstraints: { FC1: C1, 234 /// FC2: C2, 235 /// FC3: (FC1 v FC2) ^ C3 } 236 /// buildAndSubstituteFlowCondition(FC3, {{C1 -> C1'}}) will return a value 237 /// corresponding to (C1' v C2) ^ C3. 238 BoolValue &buildAndSubstituteFlowCondition( 239 AtomicBoolValue &Token, 240 llvm::DenseMap<AtomicBoolValue *, BoolValue *> Substitutions); 241 242 /// Returns true if and only if the constraints of the flow condition 243 /// identified by `Token` imply that `Val` is true. 244 bool flowConditionImplies(AtomicBoolValue &Token, BoolValue &Val); 245 246 /// Returns true if and only if the constraints of the flow condition 247 /// identified by `Token` are always true. 248 bool flowConditionIsTautology(AtomicBoolValue &Token); 249 250 /// Returns true if `Val1` is equivalent to `Val2`. 251 /// Note: This function doesn't take into account constraints on `Val1` and 252 /// `Val2` imposed by the flow condition. 253 bool equivalentBoolValues(BoolValue &Val1, BoolValue &Val2); 254 255 LLVM_DUMP_METHOD void dumpFlowCondition(AtomicBoolValue &Token); 256 257 private: 258 struct NullableQualTypeDenseMapInfo : private llvm::DenseMapInfo<QualType> { 259 static QualType getEmptyKey() { 260 // Allow a NULL `QualType` by using a different value as the empty key. 261 return QualType::getFromOpaquePtr(reinterpret_cast<Type *>(1)); 262 } 263 264 using DenseMapInfo::getHashValue; 265 using DenseMapInfo::getTombstoneKey; 266 using DenseMapInfo::isEqual; 267 }; 268 269 /// Adds all constraints of the flow condition identified by `Token` and all 270 /// of its transitive dependencies to `Constraints`. `VisitedTokens` is used 271 /// to track tokens of flow conditions that were already visited by recursive 272 /// calls. 273 void addTransitiveFlowConditionConstraints( 274 AtomicBoolValue &Token, llvm::DenseSet<BoolValue *> &Constraints, 275 llvm::DenseSet<AtomicBoolValue *> &VisitedTokens); 276 277 /// Returns the outcome of satisfiability checking on `Constraints`. 278 /// Possible outcomes are: 279 /// - `Satisfiable`: A satisfying assignment exists and is returned. 280 /// - `Unsatisfiable`: A satisfying assignment does not exist. 281 /// - `TimedOut`: The search for a satisfying assignment was not completed. 282 Solver::Result querySolver(llvm::DenseSet<BoolValue *> Constraints); 283 284 /// Returns true if the solver is able to prove that there is no satisfying 285 /// assignment for `Constraints` 286 bool isUnsatisfiable(llvm::DenseSet<BoolValue *> Constraints) { 287 return querySolver(std::move(Constraints)).getStatus() == 288 Solver::Result::Status::Unsatisfiable; 289 } 290 291 /// Returns a boolean value as a result of substituting `Val` and its sub 292 /// values based on entries in `SubstitutionsCache`. Intermediate results are 293 /// stored in `SubstitutionsCache` to avoid reprocessing values that have 294 /// already been visited. 295 BoolValue &substituteBoolValue( 296 BoolValue &Val, 297 llvm::DenseMap<BoolValue *, BoolValue *> &SubstitutionsCache); 298 299 /// Builds and returns the logical formula defining the flow condition 300 /// identified by `Token`, sub values may be substituted based on entries in 301 /// `SubstitutionsCache`. Intermediate results are stored in 302 /// `SubstitutionsCache` to avoid reprocessing values that have already been 303 /// visited. 304 BoolValue &buildAndSubstituteFlowConditionWithCache( 305 AtomicBoolValue &Token, 306 llvm::DenseMap<BoolValue *, BoolValue *> &SubstitutionsCache); 307 308 std::unique_ptr<Solver> S; 309 310 // Storage for the state of a program. 311 std::vector<std::unique_ptr<StorageLocation>> Locs; 312 std::vector<std::unique_ptr<Value>> Vals; 313 314 // Maps from program declarations and statements to storage locations that are 315 // assigned to them. These assignments are global (aggregated across all basic 316 // blocks) and are used to produce stable storage locations when the same 317 // basic blocks are evaluated multiple times. The storage locations that are 318 // in scope for a particular basic block are stored in `Environment`. 319 llvm::DenseMap<const ValueDecl *, StorageLocation *> DeclToLoc; 320 llvm::DenseMap<const Expr *, StorageLocation *> ExprToLoc; 321 322 StorageLocation *ThisPointeeLoc = nullptr; 323 324 // Null pointer values, keyed by the canonical pointee type. 325 // 326 // FIXME: The pointer values are indexed by the pointee types which are 327 // required to initialize the `PointeeLoc` field in `PointerValue`. Consider 328 // creating a type-independent `NullPointerValue` without a `PointeeLoc` 329 // field. 330 llvm::DenseMap<QualType, PointerValue *, NullableQualTypeDenseMapInfo> 331 NullPointerVals; 332 333 AtomicBoolValue &TrueVal; 334 AtomicBoolValue &FalseVal; 335 336 // Indices that are used to avoid recreating the same composite boolean 337 // values. 338 llvm::DenseMap<std::pair<BoolValue *, BoolValue *>, ConjunctionValue *> 339 ConjunctionVals; 340 llvm::DenseMap<std::pair<BoolValue *, BoolValue *>, DisjunctionValue *> 341 DisjunctionVals; 342 llvm::DenseMap<BoolValue *, NegationValue *> NegationVals; 343 llvm::DenseMap<std::pair<BoolValue *, BoolValue *>, ImplicationValue *> 344 ImplicationVals; 345 llvm::DenseMap<std::pair<BoolValue *, BoolValue *>, BiconditionalValue *> 346 BiconditionalVals; 347 348 // Flow conditions are tracked symbolically: each unique flow condition is 349 // associated with a fresh symbolic variable (token), bound to the clause that 350 // defines the flow condition. Conceptually, each binding corresponds to an 351 // "iff" of the form `FC <=> (C1 ^ C2 ^ ...)` where `FC` is a flow condition 352 // token (an atomic boolean) and `Ci`s are the set of constraints in the flow 353 // flow condition clause. The set of constraints (C1 ^ C2 ^ ...) are stored in 354 // the `FlowConditionConstraints` map, keyed by the token of the flow 355 // condition. 356 // 357 // Flow conditions depend on other flow conditions if they are created using 358 // `forkFlowCondition` or `joinFlowConditions`. The graph of flow condition 359 // dependencies is stored in the `FlowConditionDeps` map. 360 llvm::DenseMap<AtomicBoolValue *, llvm::DenseSet<AtomicBoolValue *>> 361 FlowConditionDeps; 362 llvm::DenseMap<AtomicBoolValue *, BoolValue *> FlowConditionConstraints; 363 }; 364 365 } // namespace dataflow 366 } // namespace clang 367 368 #endif // LLVM_CLANG_ANALYSIS_FLOWSENSITIVE_DATAFLOWANALYSISCONTEXT_H 369