1 //===-- DataflowAnalysisContext.h -------------------------------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file defines a DataflowAnalysisContext class that owns objects that 10 // encompass the state of a program and stores context that is used during 11 // dataflow analysis. 12 // 13 //===----------------------------------------------------------------------===// 14 15 #ifndef LLVM_CLANG_ANALYSIS_FLOWSENSITIVE_DATAFLOWANALYSISCONTEXT_H 16 #define LLVM_CLANG_ANALYSIS_FLOWSENSITIVE_DATAFLOWANALYSISCONTEXT_H 17 18 #include "clang/AST/Decl.h" 19 #include "clang/AST/Expr.h" 20 #include "clang/AST/TypeOrdering.h" 21 #include "clang/Analysis/FlowSensitive/Arena.h" 22 #include "clang/Analysis/FlowSensitive/ControlFlowContext.h" 23 #include "clang/Analysis/FlowSensitive/Solver.h" 24 #include "clang/Analysis/FlowSensitive/StorageLocation.h" 25 #include "clang/Analysis/FlowSensitive/Value.h" 26 #include "llvm/ADT/DenseMap.h" 27 #include "llvm/ADT/DenseSet.h" 28 #include "llvm/ADT/SetVector.h" 29 #include "llvm/Support/Compiler.h" 30 #include <cassert> 31 #include <memory> 32 #include <optional> 33 #include <type_traits> 34 #include <utility> 35 #include <vector> 36 37 namespace clang { 38 namespace dataflow { 39 class Logger; 40 41 /// Skip past nodes that the CFG does not emit. These nodes are invisible to 42 /// flow-sensitive analysis, and should be ignored as they will effectively not 43 /// exist. 44 /// 45 /// * `ParenExpr` - The CFG takes the operator precedence into account, but 46 /// otherwise omits the node afterwards. 47 /// 48 /// * `ExprWithCleanups` - The CFG will generate the appropriate calls to 49 /// destructors and then omit the node. 50 /// 51 const Expr &ignoreCFGOmittedNodes(const Expr &E); 52 const Stmt &ignoreCFGOmittedNodes(const Stmt &S); 53 54 /// A set of `FieldDecl *`. Use `SmallSetVector` to guarantee deterministic 55 /// iteration order. 56 using FieldSet = llvm::SmallSetVector<const FieldDecl *, 4>; 57 58 /// Returns the set of all fields in the type. 59 FieldSet getObjectFields(QualType Type); 60 61 /// Returns whether `Fields` and `FieldLocs` contain the same fields. 62 bool containsSameFields(const FieldSet &Fields, 63 const RecordStorageLocation::FieldToLoc &FieldLocs); 64 65 struct ContextSensitiveOptions { 66 /// The maximum depth to analyze. A value of zero is equivalent to disabling 67 /// context-sensitive analysis entirely. 68 unsigned Depth = 2; 69 }; 70 71 /// Owns objects that encompass the state of a program and stores context that 72 /// is used during dataflow analysis. 73 class DataflowAnalysisContext { 74 public: 75 struct Options { 76 /// Options for analyzing function bodies when present in the translation 77 /// unit, or empty to disable context-sensitive analysis. Note that this is 78 /// fundamentally limited: some constructs, such as recursion, are 79 /// explicitly unsupported. 80 std::optional<ContextSensitiveOptions> ContextSensitiveOpts; 81 82 /// If provided, analysis details will be recorded here. 83 /// (This is always non-null within an AnalysisContext, the framework 84 /// provides a fallback no-op logger). 85 Logger *Log = nullptr; 86 }; 87 88 /// Constructs a dataflow analysis context. 89 /// 90 /// Requirements: 91 /// 92 /// `S` must not be null. 93 DataflowAnalysisContext(std::unique_ptr<Solver> S, 94 Options Opts = Options{ 95 /*ContextSensitiveOpts=*/std::nullopt, 96 /*Logger=*/nullptr}); 97 ~DataflowAnalysisContext(); 98 99 /// Sets a callback that returns the names and types of the synthetic fields 100 /// to add to a `RecordStorageLocation` of a given type. 101 /// Typically, this is called from the constructor of a `DataflowAnalysis` 102 /// 103 /// To maintain the invariant that all `RecordStorageLocation`s of a given 104 /// type have the same fields: 105 /// * The callback must always return the same result for a given type 106 /// * `setSyntheticFieldCallback()` must be called before any 107 // `RecordStorageLocation`s are created. setSyntheticFieldCallback(std::function<llvm::StringMap<QualType> (QualType)> CB)108 void setSyntheticFieldCallback( 109 std::function<llvm::StringMap<QualType>(QualType)> CB) { 110 assert(!RecordStorageLocationCreated); 111 SyntheticFieldCallback = CB; 112 } 113 114 /// Returns a new storage location appropriate for `Type`. 115 /// 116 /// A null `Type` is interpreted as the pointee type of `std::nullptr_t`. 117 StorageLocation &createStorageLocation(QualType Type); 118 119 /// Creates a `RecordStorageLocation` for the given type and with the given 120 /// fields. 121 /// 122 /// Requirements: 123 /// 124 /// `FieldLocs` must contain exactly the fields returned by 125 /// `getModeledFields(Type)`. 126 /// `SyntheticFields` must contain exactly the fields returned by 127 /// `getSyntheticFields(Type)`. 128 RecordStorageLocation &createRecordStorageLocation( 129 QualType Type, RecordStorageLocation::FieldToLoc FieldLocs, 130 RecordStorageLocation::SyntheticFieldMap SyntheticFields); 131 132 /// Returns a stable storage location for `D`. 133 StorageLocation &getStableStorageLocation(const ValueDecl &D); 134 135 /// Returns a stable storage location for `E`. 136 StorageLocation &getStableStorageLocation(const Expr &E); 137 138 /// Returns a pointer value that represents a null pointer. Calls with 139 /// `PointeeType` that are canonically equivalent will return the same result. 140 /// A null `PointeeType` can be used for the pointee of `std::nullptr_t`. 141 PointerValue &getOrCreateNullPointerValue(QualType PointeeType); 142 143 /// Adds `Constraint` to current and future flow conditions in this context. 144 /// 145 /// Invariants must contain only flow-insensitive information, i.e. facts that 146 /// are true on all paths through the program. 147 /// Information can be added eagerly (when analysis begins), or lazily (e.g. 148 /// when values are first used). The analysis must be careful that the same 149 /// information is added regardless of which order blocks are analyzed in. 150 void addInvariant(const Formula &Constraint); 151 152 /// Adds `Constraint` to the flow condition identified by `Token`. 153 void addFlowConditionConstraint(Atom Token, const Formula &Constraint); 154 155 /// Creates a new flow condition with the same constraints as the flow 156 /// condition identified by `Token` and returns its token. 157 Atom forkFlowCondition(Atom Token); 158 159 /// Creates a new flow condition that represents the disjunction of the flow 160 /// conditions identified by `FirstToken` and `SecondToken`, and returns its 161 /// token. 162 Atom joinFlowConditions(Atom FirstToken, Atom SecondToken); 163 164 /// Returns true if the constraints of the flow condition identified by 165 /// `Token` imply that `F` is true. 166 /// Returns false if the flow condition does not imply `F` or if the solver 167 /// times out. 168 bool flowConditionImplies(Atom Token, const Formula &F); 169 170 /// Returns true if the constraints of the flow condition identified by 171 /// `Token` still allow `F` to be true. 172 /// Returns false if the flow condition implies that `F` is false or if the 173 /// solver times out. 174 bool flowConditionAllows(Atom Token, const Formula &F); 175 176 /// Returns true if `Val1` is equivalent to `Val2`. 177 /// Note: This function doesn't take into account constraints on `Val1` and 178 /// `Val2` imposed by the flow condition. 179 bool equivalentFormulas(const Formula &Val1, const Formula &Val2); 180 181 LLVM_DUMP_METHOD void dumpFlowCondition(Atom Token, 182 llvm::raw_ostream &OS = llvm::dbgs()); 183 184 /// Returns the `ControlFlowContext` registered for `F`, if any. Otherwise, 185 /// returns null. 186 const ControlFlowContext *getControlFlowContext(const FunctionDecl *F); 187 getOptions()188 const Options &getOptions() { return Opts; } 189 arena()190 Arena &arena() { return *A; } 191 192 /// Returns the outcome of satisfiability checking on `Constraints`. 193 /// 194 /// Flow conditions are not incorporated, so they may need to be manually 195 /// included in `Constraints` to provide contextually-accurate results, e.g. 196 /// if any definitions or relationships of the values in `Constraints` have 197 /// been stored in flow conditions. 198 Solver::Result querySolver(llvm::SetVector<const Formula *> Constraints); 199 200 /// Returns the fields of `Type`, limited to the set of fields modeled by this 201 /// context. 202 FieldSet getModeledFields(QualType Type); 203 204 /// Returns the names and types of the synthetic fields for the given record 205 /// type. getSyntheticFields(QualType Type)206 llvm::StringMap<QualType> getSyntheticFields(QualType Type) { 207 assert(Type->isRecordType()); 208 if (SyntheticFieldCallback) 209 return SyntheticFieldCallback(Type); 210 return {}; 211 } 212 213 private: 214 friend class Environment; 215 216 struct NullableQualTypeDenseMapInfo : private llvm::DenseMapInfo<QualType> { getEmptyKeyNullableQualTypeDenseMapInfo217 static QualType getEmptyKey() { 218 // Allow a NULL `QualType` by using a different value as the empty key. 219 return QualType::getFromOpaquePtr(reinterpret_cast<Type *>(1)); 220 } 221 222 using DenseMapInfo::getHashValue; 223 using DenseMapInfo::getTombstoneKey; 224 using DenseMapInfo::isEqual; 225 }; 226 227 // Extends the set of modeled field declarations. 228 void addModeledFields(const FieldSet &Fields); 229 230 /// Adds all constraints of the flow condition identified by `Token` and all 231 /// of its transitive dependencies to `Constraints`. 232 void 233 addTransitiveFlowConditionConstraints(Atom Token, 234 llvm::SetVector<const Formula *> &Out); 235 236 /// Returns true if the solver is able to prove that there is a satisfying 237 /// assignment for `Constraints`. isSatisfiable(llvm::SetVector<const Formula * > Constraints)238 bool isSatisfiable(llvm::SetVector<const Formula *> Constraints) { 239 return querySolver(std::move(Constraints)).getStatus() == 240 Solver::Result::Status::Satisfiable; 241 } 242 243 /// Returns true if the solver is able to prove that there is no satisfying 244 /// assignment for `Constraints` isUnsatisfiable(llvm::SetVector<const Formula * > Constraints)245 bool isUnsatisfiable(llvm::SetVector<const Formula *> Constraints) { 246 return querySolver(std::move(Constraints)).getStatus() == 247 Solver::Result::Status::Unsatisfiable; 248 } 249 250 std::unique_ptr<Solver> S; 251 std::unique_ptr<Arena> A; 252 253 // Maps from program declarations and statements to storage locations that are 254 // assigned to them. These assignments are global (aggregated across all basic 255 // blocks) and are used to produce stable storage locations when the same 256 // basic blocks are evaluated multiple times. The storage locations that are 257 // in scope for a particular basic block are stored in `Environment`. 258 llvm::DenseMap<const ValueDecl *, StorageLocation *> DeclToLoc; 259 llvm::DenseMap<const Expr *, StorageLocation *> ExprToLoc; 260 261 // Null pointer values, keyed by the canonical pointee type. 262 // 263 // FIXME: The pointer values are indexed by the pointee types which are 264 // required to initialize the `PointeeLoc` field in `PointerValue`. Consider 265 // creating a type-independent `NullPointerValue` without a `PointeeLoc` 266 // field. 267 llvm::DenseMap<QualType, PointerValue *, NullableQualTypeDenseMapInfo> 268 NullPointerVals; 269 270 Options Opts; 271 272 // Flow conditions are tracked symbolically: each unique flow condition is 273 // associated with a fresh symbolic variable (token), bound to the clause that 274 // defines the flow condition. Conceptually, each binding corresponds to an 275 // "iff" of the form `FC <=> (C1 ^ C2 ^ ...)` where `FC` is a flow condition 276 // token (an atomic boolean) and `Ci`s are the set of constraints in the flow 277 // flow condition clause. The set of constraints (C1 ^ C2 ^ ...) are stored in 278 // the `FlowConditionConstraints` map, keyed by the token of the flow 279 // condition. 280 // 281 // Flow conditions depend on other flow conditions if they are created using 282 // `forkFlowCondition` or `joinFlowConditions`. The graph of flow condition 283 // dependencies is stored in the `FlowConditionDeps` map. 284 llvm::DenseMap<Atom, llvm::DenseSet<Atom>> FlowConditionDeps; 285 llvm::DenseMap<Atom, const Formula *> FlowConditionConstraints; 286 const Formula *Invariant = nullptr; 287 288 llvm::DenseMap<const FunctionDecl *, ControlFlowContext> FunctionContexts; 289 290 // Fields modeled by environments covered by this context. 291 FieldSet ModeledFields; 292 293 std::unique_ptr<Logger> LogOwner; // If created via flags. 294 295 std::function<llvm::StringMap<QualType>(QualType)> SyntheticFieldCallback; 296 297 /// Has any `RecordStorageLocation` been created yet? 298 bool RecordStorageLocationCreated = false; 299 }; 300 301 } // namespace dataflow 302 } // namespace clang 303 304 #endif // LLVM_CLANG_ANALYSIS_FLOWSENSITIVE_DATAFLOWANALYSISCONTEXT_H 305