1 //===-- DataflowAnalysisContext.h -------------------------------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file defines a DataflowAnalysisContext class that owns objects that 10 // encompass the state of a program and stores context that is used during 11 // dataflow analysis. 12 // 13 //===----------------------------------------------------------------------===// 14 15 #ifndef LLVM_CLANG_ANALYSIS_FLOWSENSITIVE_DATAFLOWANALYSISCONTEXT_H 16 #define LLVM_CLANG_ANALYSIS_FLOWSENSITIVE_DATAFLOWANALYSISCONTEXT_H 17 18 #include "clang/AST/Decl.h" 19 #include "clang/AST/Expr.h" 20 #include "clang/AST/TypeOrdering.h" 21 #include "clang/Analysis/FlowSensitive/ControlFlowContext.h" 22 #include "clang/Analysis/FlowSensitive/Solver.h" 23 #include "clang/Analysis/FlowSensitive/StorageLocation.h" 24 #include "clang/Analysis/FlowSensitive/Value.h" 25 #include "llvm/ADT/DenseMap.h" 26 #include "llvm/ADT/DenseSet.h" 27 #include "llvm/Support/Compiler.h" 28 #include <cassert> 29 #include <memory> 30 #include <optional> 31 #include <type_traits> 32 #include <utility> 33 #include <vector> 34 35 namespace clang { 36 namespace dataflow { 37 38 /// Skip past nodes that the CFG does not emit. These nodes are invisible to 39 /// flow-sensitive analysis, and should be ignored as they will effectively not 40 /// exist. 41 /// 42 /// * `ParenExpr` - The CFG takes the operator precedence into account, but 43 /// otherwise omits the node afterwards. 44 /// 45 /// * `ExprWithCleanups` - The CFG will generate the appropriate calls to 46 /// destructors and then omit the node. 47 /// 48 const Expr &ignoreCFGOmittedNodes(const Expr &E); 49 const Stmt &ignoreCFGOmittedNodes(const Stmt &S); 50 51 /// Returns the set of all fields in the type. 52 llvm::DenseSet<const FieldDecl *> getObjectFields(QualType Type); 53 54 struct ContextSensitiveOptions { 55 /// The maximum depth to analyze. A value of zero is equivalent to disabling 56 /// context-sensitive analysis entirely. 57 unsigned Depth = 2; 58 }; 59 60 /// Owns objects that encompass the state of a program and stores context that 61 /// is used during dataflow analysis. 62 class DataflowAnalysisContext { 63 public: 64 struct Options { 65 /// Options for analyzing function bodies when present in the translation 66 /// unit, or empty to disable context-sensitive analysis. Note that this is 67 /// fundamentally limited: some constructs, such as recursion, are 68 /// explicitly unsupported. 69 std::optional<ContextSensitiveOptions> ContextSensitiveOpts; 70 }; 71 72 /// Constructs a dataflow analysis context. 73 /// 74 /// Requirements: 75 /// 76 /// `S` must not be null. 77 DataflowAnalysisContext(std::unique_ptr<Solver> S, 78 Options Opts = Options{ 79 /*ContextSensitiveOpts=*/std::nullopt}) S(std::move (S))80 : S(std::move(S)), TrueVal(createAtomicBoolValue()), 81 FalseVal(createAtomicBoolValue()), Opts(Opts) { 82 assert(this->S != nullptr); 83 } 84 85 /// Takes ownership of `Loc` and returns a reference to it. 86 /// 87 /// Requirements: 88 /// 89 /// `Loc` must not be null. 90 template <typename T> 91 std::enable_if_t<std::is_base_of<StorageLocation, T>::value, T &> takeOwnership(std::unique_ptr<T> Loc)92 takeOwnership(std::unique_ptr<T> Loc) { 93 assert(Loc != nullptr); 94 Locs.push_back(std::move(Loc)); 95 return *cast<T>(Locs.back().get()); 96 } 97 98 /// Takes ownership of `Val` and returns a reference to it. 99 /// 100 /// Requirements: 101 /// 102 /// `Val` must not be null. 103 template <typename T> 104 std::enable_if_t<std::is_base_of<Value, T>::value, T &> takeOwnership(std::unique_ptr<T> Val)105 takeOwnership(std::unique_ptr<T> Val) { 106 assert(Val != nullptr); 107 Vals.push_back(std::move(Val)); 108 return *cast<T>(Vals.back().get()); 109 } 110 111 /// Returns a new storage location appropriate for `Type`. 112 /// 113 /// A null `Type` is interpreted as the pointee type of `std::nullptr_t`. 114 StorageLocation &createStorageLocation(QualType Type); 115 116 /// Returns a stable storage location for `D`. 117 StorageLocation &getStableStorageLocation(const VarDecl &D); 118 119 /// Returns a stable storage location for `E`. 120 StorageLocation &getStableStorageLocation(const Expr &E); 121 122 /// Assigns `Loc` as the storage location of `D`. 123 /// 124 /// Requirements: 125 /// 126 /// `D` must not be assigned a storage location. setStorageLocation(const ValueDecl & D,StorageLocation & Loc)127 void setStorageLocation(const ValueDecl &D, StorageLocation &Loc) { 128 assert(DeclToLoc.find(&D) == DeclToLoc.end()); 129 DeclToLoc[&D] = &Loc; 130 } 131 132 /// Returns the storage location assigned to `D` or null if `D` has no 133 /// assigned storage location. getStorageLocation(const ValueDecl & D)134 StorageLocation *getStorageLocation(const ValueDecl &D) const { 135 auto It = DeclToLoc.find(&D); 136 return It == DeclToLoc.end() ? nullptr : It->second; 137 } 138 139 /// Assigns `Loc` as the storage location of `E`. 140 /// 141 /// Requirements: 142 /// 143 /// `E` must not be assigned a storage location. setStorageLocation(const Expr & E,StorageLocation & Loc)144 void setStorageLocation(const Expr &E, StorageLocation &Loc) { 145 const Expr &CanonE = ignoreCFGOmittedNodes(E); 146 assert(ExprToLoc.find(&CanonE) == ExprToLoc.end()); 147 ExprToLoc[&CanonE] = &Loc; 148 } 149 150 /// Returns the storage location assigned to `E` or null if `E` has no 151 /// assigned storage location. getStorageLocation(const Expr & E)152 StorageLocation *getStorageLocation(const Expr &E) const { 153 auto It = ExprToLoc.find(&ignoreCFGOmittedNodes(E)); 154 return It == ExprToLoc.end() ? nullptr : It->second; 155 } 156 157 /// Returns a pointer value that represents a null pointer. Calls with 158 /// `PointeeType` that are canonically equivalent will return the same result. 159 /// A null `PointeeType` can be used for the pointee of `std::nullptr_t`. 160 PointerValue &getOrCreateNullPointerValue(QualType PointeeType); 161 162 /// Returns a symbolic boolean value that models a boolean literal equal to 163 /// `Value`. getBoolLiteralValue(bool Value)164 AtomicBoolValue &getBoolLiteralValue(bool Value) const { 165 return Value ? TrueVal : FalseVal; 166 } 167 168 /// Creates an atomic boolean value. createAtomicBoolValue()169 AtomicBoolValue &createAtomicBoolValue() { 170 return takeOwnership(std::make_unique<AtomicBoolValue>()); 171 } 172 173 /// Creates a Top value for booleans. Each instance is unique and can be 174 /// assigned a distinct truth value during solving. 175 /// 176 /// FIXME: `Top iff Top` is true when both Tops are identical (by pointer 177 /// equality), but not when they are distinct values. We should improve the 178 /// implementation so that `Top iff Top` has a consistent meaning, regardless 179 /// of the identity of `Top`. Moreover, I think the meaning should be 180 /// `false`. createTopBoolValue()181 TopBoolValue &createTopBoolValue() { 182 return takeOwnership(std::make_unique<TopBoolValue>()); 183 } 184 185 /// Returns a boolean value that represents the conjunction of `LHS` and 186 /// `RHS`. Subsequent calls with the same arguments, regardless of their 187 /// order, will return the same result. If the given boolean values represent 188 /// the same value, the result will be the value itself. 189 BoolValue &getOrCreateConjunction(BoolValue &LHS, BoolValue &RHS); 190 191 /// Returns a boolean value that represents the disjunction of `LHS` and 192 /// `RHS`. Subsequent calls with the same arguments, regardless of their 193 /// order, will return the same result. If the given boolean values represent 194 /// the same value, the result will be the value itself. 195 BoolValue &getOrCreateDisjunction(BoolValue &LHS, BoolValue &RHS); 196 197 /// Returns a boolean value that represents the negation of `Val`. Subsequent 198 /// calls with the same argument will return the same result. 199 BoolValue &getOrCreateNegation(BoolValue &Val); 200 201 /// Returns a boolean value that represents `LHS => RHS`. Subsequent calls 202 /// with the same arguments, will return the same result. If the given boolean 203 /// values represent the same value, the result will be a value that 204 /// represents the true boolean literal. 205 BoolValue &getOrCreateImplication(BoolValue &LHS, BoolValue &RHS); 206 207 /// Returns a boolean value that represents `LHS <=> RHS`. Subsequent calls 208 /// with the same arguments, regardless of their order, will return the same 209 /// result. If the given boolean values represent the same value, the result 210 /// will be a value that represents the true boolean literal. 211 BoolValue &getOrCreateIff(BoolValue &LHS, BoolValue &RHS); 212 213 /// Creates a fresh flow condition and returns a token that identifies it. The 214 /// token can be used to perform various operations on the flow condition such 215 /// as adding constraints to it, forking it, joining it with another flow 216 /// condition, or checking implications. 217 AtomicBoolValue &makeFlowConditionToken(); 218 219 /// Adds `Constraint` to the flow condition identified by `Token`. 220 void addFlowConditionConstraint(AtomicBoolValue &Token, 221 BoolValue &Constraint); 222 223 /// Creates a new flow condition with the same constraints as the flow 224 /// condition identified by `Token` and returns its token. 225 AtomicBoolValue &forkFlowCondition(AtomicBoolValue &Token); 226 227 /// Creates a new flow condition that represents the disjunction of the flow 228 /// conditions identified by `FirstToken` and `SecondToken`, and returns its 229 /// token. 230 AtomicBoolValue &joinFlowConditions(AtomicBoolValue &FirstToken, 231 AtomicBoolValue &SecondToken); 232 233 // FIXME: This function returns the flow condition expressed directly as its 234 // constraints: (C1 AND C2 AND ...). This differs from the general approach in 235 // the framework where a flow condition is represented as a token (an atomic 236 // boolean) with dependencies and constraints tracked in `FlowConditionDeps` 237 // and `FlowConditionConstraints`: (FC <=> C1 AND C2 AND ...). 238 // Consider if we should make the representation of flow condition consistent, 239 // returning an atomic boolean token with separate constraints instead. 240 // 241 /// Builds and returns the logical formula defining the flow condition 242 /// identified by `Token`. If a value in the formula is present as a key in 243 /// `Substitutions`, it will be substituted with the value it maps to. 244 /// As an example, say we have flow condition tokens FC1, FC2, FC3 and 245 /// FlowConditionConstraints: { FC1: C1, 246 /// FC2: C2, 247 /// FC3: (FC1 v FC2) ^ C3 } 248 /// buildAndSubstituteFlowCondition(FC3, {{C1 -> C1'}}) will return a value 249 /// corresponding to (C1' v C2) ^ C3. 250 BoolValue &buildAndSubstituteFlowCondition( 251 AtomicBoolValue &Token, 252 llvm::DenseMap<AtomicBoolValue *, BoolValue *> Substitutions); 253 254 /// Returns true if and only if the constraints of the flow condition 255 /// identified by `Token` imply that `Val` is true. 256 bool flowConditionImplies(AtomicBoolValue &Token, BoolValue &Val); 257 258 /// Returns true if and only if the constraints of the flow condition 259 /// identified by `Token` are always true. 260 bool flowConditionIsTautology(AtomicBoolValue &Token); 261 262 /// Returns true if `Val1` is equivalent to `Val2`. 263 /// Note: This function doesn't take into account constraints on `Val1` and 264 /// `Val2` imposed by the flow condition. 265 bool equivalentBoolValues(BoolValue &Val1, BoolValue &Val2); 266 267 LLVM_DUMP_METHOD void dumpFlowCondition(AtomicBoolValue &Token); 268 269 /// Returns the `ControlFlowContext` registered for `F`, if any. Otherwise, 270 /// returns null. 271 const ControlFlowContext *getControlFlowContext(const FunctionDecl *F); 272 getOptions()273 const Options &getOptions() { return Opts; } 274 275 private: 276 friend class Environment; 277 278 struct NullableQualTypeDenseMapInfo : private llvm::DenseMapInfo<QualType> { getEmptyKeyNullableQualTypeDenseMapInfo279 static QualType getEmptyKey() { 280 // Allow a NULL `QualType` by using a different value as the empty key. 281 return QualType::getFromOpaquePtr(reinterpret_cast<Type *>(1)); 282 } 283 284 using DenseMapInfo::getHashValue; 285 using DenseMapInfo::getTombstoneKey; 286 using DenseMapInfo::isEqual; 287 }; 288 289 // Extends the set of modeled field declarations. 290 void addModeledFields(const llvm::DenseSet<const FieldDecl *> &Fields); 291 292 /// Returns the fields of `Type`, limited to the set of fields modeled by this 293 /// context. 294 llvm::DenseSet<const FieldDecl *> getReferencedFields(QualType Type); 295 296 /// Adds all constraints of the flow condition identified by `Token` and all 297 /// of its transitive dependencies to `Constraints`. `VisitedTokens` is used 298 /// to track tokens of flow conditions that were already visited by recursive 299 /// calls. 300 void addTransitiveFlowConditionConstraints( 301 AtomicBoolValue &Token, llvm::DenseSet<BoolValue *> &Constraints, 302 llvm::DenseSet<AtomicBoolValue *> &VisitedTokens); 303 304 /// Returns the outcome of satisfiability checking on `Constraints`. 305 /// Possible outcomes are: 306 /// - `Satisfiable`: A satisfying assignment exists and is returned. 307 /// - `Unsatisfiable`: A satisfying assignment does not exist. 308 /// - `TimedOut`: The search for a satisfying assignment was not completed. 309 Solver::Result querySolver(llvm::DenseSet<BoolValue *> Constraints); 310 311 /// Returns true if the solver is able to prove that there is no satisfying 312 /// assignment for `Constraints` isUnsatisfiable(llvm::DenseSet<BoolValue * > Constraints)313 bool isUnsatisfiable(llvm::DenseSet<BoolValue *> Constraints) { 314 return querySolver(std::move(Constraints)).getStatus() == 315 Solver::Result::Status::Unsatisfiable; 316 } 317 318 /// Returns a boolean value as a result of substituting `Val` and its sub 319 /// values based on entries in `SubstitutionsCache`. Intermediate results are 320 /// stored in `SubstitutionsCache` to avoid reprocessing values that have 321 /// already been visited. 322 BoolValue &substituteBoolValue( 323 BoolValue &Val, 324 llvm::DenseMap<BoolValue *, BoolValue *> &SubstitutionsCache); 325 326 /// Builds and returns the logical formula defining the flow condition 327 /// identified by `Token`, sub values may be substituted based on entries in 328 /// `SubstitutionsCache`. Intermediate results are stored in 329 /// `SubstitutionsCache` to avoid reprocessing values that have already been 330 /// visited. 331 BoolValue &buildAndSubstituteFlowConditionWithCache( 332 AtomicBoolValue &Token, 333 llvm::DenseMap<BoolValue *, BoolValue *> &SubstitutionsCache); 334 335 std::unique_ptr<Solver> S; 336 337 // Storage for the state of a program. 338 std::vector<std::unique_ptr<StorageLocation>> Locs; 339 std::vector<std::unique_ptr<Value>> Vals; 340 341 // Maps from program declarations and statements to storage locations that are 342 // assigned to them. These assignments are global (aggregated across all basic 343 // blocks) and are used to produce stable storage locations when the same 344 // basic blocks are evaluated multiple times. The storage locations that are 345 // in scope for a particular basic block are stored in `Environment`. 346 llvm::DenseMap<const ValueDecl *, StorageLocation *> DeclToLoc; 347 llvm::DenseMap<const Expr *, StorageLocation *> ExprToLoc; 348 349 // Null pointer values, keyed by the canonical pointee type. 350 // 351 // FIXME: The pointer values are indexed by the pointee types which are 352 // required to initialize the `PointeeLoc` field in `PointerValue`. Consider 353 // creating a type-independent `NullPointerValue` without a `PointeeLoc` 354 // field. 355 llvm::DenseMap<QualType, PointerValue *, NullableQualTypeDenseMapInfo> 356 NullPointerVals; 357 358 AtomicBoolValue &TrueVal; 359 AtomicBoolValue &FalseVal; 360 361 Options Opts; 362 363 // Indices that are used to avoid recreating the same composite boolean 364 // values. 365 llvm::DenseMap<std::pair<BoolValue *, BoolValue *>, ConjunctionValue *> 366 ConjunctionVals; 367 llvm::DenseMap<std::pair<BoolValue *, BoolValue *>, DisjunctionValue *> 368 DisjunctionVals; 369 llvm::DenseMap<BoolValue *, NegationValue *> NegationVals; 370 llvm::DenseMap<std::pair<BoolValue *, BoolValue *>, ImplicationValue *> 371 ImplicationVals; 372 llvm::DenseMap<std::pair<BoolValue *, BoolValue *>, BiconditionalValue *> 373 BiconditionalVals; 374 375 // Flow conditions are tracked symbolically: each unique flow condition is 376 // associated with a fresh symbolic variable (token), bound to the clause that 377 // defines the flow condition. Conceptually, each binding corresponds to an 378 // "iff" of the form `FC <=> (C1 ^ C2 ^ ...)` where `FC` is a flow condition 379 // token (an atomic boolean) and `Ci`s are the set of constraints in the flow 380 // flow condition clause. The set of constraints (C1 ^ C2 ^ ...) are stored in 381 // the `FlowConditionConstraints` map, keyed by the token of the flow 382 // condition. 383 // 384 // Flow conditions depend on other flow conditions if they are created using 385 // `forkFlowCondition` or `joinFlowConditions`. The graph of flow condition 386 // dependencies is stored in the `FlowConditionDeps` map. 387 llvm::DenseMap<AtomicBoolValue *, llvm::DenseSet<AtomicBoolValue *>> 388 FlowConditionDeps; 389 llvm::DenseMap<AtomicBoolValue *, BoolValue *> FlowConditionConstraints; 390 391 llvm::DenseMap<const FunctionDecl *, ControlFlowContext> FunctionContexts; 392 393 // Fields modeled by environments covered by this context. 394 llvm::DenseSet<const FieldDecl *> ModeledFields; 395 }; 396 397 } // namespace dataflow 398 } // namespace clang 399 400 #endif // LLVM_CLANG_ANALYSIS_FLOWSENSITIVE_DATAFLOWANALYSISCONTEXT_H 401