1 //===- TypeErasedDataflowAnalysis.cpp -------------------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file defines type-erased base types and functions for building dataflow 10 // analyses that run over Control-Flow Graphs (CFGs). 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include <algorithm> 15 #include <memory> 16 #include <system_error> 17 #include <utility> 18 #include <vector> 19 20 #include "clang/AST/DeclCXX.h" 21 #include "clang/AST/OperationKinds.h" 22 #include "clang/AST/StmtVisitor.h" 23 #include "clang/Analysis/Analyses/PostOrderCFGView.h" 24 #include "clang/Analysis/CFG.h" 25 #include "clang/Analysis/FlowSensitive/DataflowEnvironment.h" 26 #include "clang/Analysis/FlowSensitive/DataflowWorklist.h" 27 #include "clang/Analysis/FlowSensitive/Transfer.h" 28 #include "clang/Analysis/FlowSensitive/TypeErasedDataflowAnalysis.h" 29 #include "clang/Analysis/FlowSensitive/Value.h" 30 #include "llvm/ADT/ArrayRef.h" 31 #include "llvm/ADT/DenseSet.h" 32 #include "llvm/ADT/None.h" 33 #include "llvm/ADT/Optional.h" 34 #include "llvm/ADT/STLExtras.h" 35 #include "llvm/Support/Error.h" 36 #include "llvm/Support/ErrorHandling.h" 37 38 namespace clang { 39 namespace dataflow { 40 41 class StmtToEnvMapImpl : public StmtToEnvMap { 42 public: 43 StmtToEnvMapImpl( 44 const ControlFlowContext &CFCtx, 45 llvm::ArrayRef<llvm::Optional<TypeErasedDataflowAnalysisState>> 46 BlockToState) 47 : CFCtx(CFCtx), BlockToState(BlockToState) {} 48 49 const Environment *getEnvironment(const Stmt &S) const override { 50 auto BlockIt = CFCtx.getStmtToBlock().find(&ignoreCFGOmittedNodes(S)); 51 assert(BlockIt != CFCtx.getStmtToBlock().end()); 52 const auto &State = BlockToState[BlockIt->getSecond()->getBlockID()]; 53 assert(State); 54 return &State.value().Env; 55 } 56 57 private: 58 const ControlFlowContext &CFCtx; 59 llvm::ArrayRef<llvm::Optional<TypeErasedDataflowAnalysisState>> BlockToState; 60 }; 61 62 /// Returns the index of `Block` in the successors of `Pred`. 63 static int blockIndexInPredecessor(const CFGBlock &Pred, 64 const CFGBlock &Block) { 65 auto BlockPos = llvm::find_if( 66 Pred.succs(), [&Block](const CFGBlock::AdjacentBlock &Succ) { 67 return Succ && Succ->getBlockID() == Block.getBlockID(); 68 }); 69 return BlockPos - Pred.succ_begin(); 70 } 71 72 /// Extends the flow condition of an environment based on a terminator 73 /// statement. 74 class TerminatorVisitor : public ConstStmtVisitor<TerminatorVisitor> { 75 public: 76 TerminatorVisitor(const StmtToEnvMap &StmtToEnv, Environment &Env, 77 int BlockSuccIdx, TransferOptions TransferOpts) 78 : StmtToEnv(StmtToEnv), Env(Env), BlockSuccIdx(BlockSuccIdx), 79 TransferOpts(TransferOpts) {} 80 81 void VisitIfStmt(const IfStmt *S) { 82 auto *Cond = S->getCond(); 83 assert(Cond != nullptr); 84 extendFlowCondition(*Cond); 85 } 86 87 void VisitWhileStmt(const WhileStmt *S) { 88 auto *Cond = S->getCond(); 89 assert(Cond != nullptr); 90 extendFlowCondition(*Cond); 91 } 92 93 void VisitDoStmt(const DoStmt *S) { 94 auto *Cond = S->getCond(); 95 assert(Cond != nullptr); 96 extendFlowCondition(*Cond); 97 } 98 99 void VisitForStmt(const ForStmt *S) { 100 auto *Cond = S->getCond(); 101 if (Cond != nullptr) 102 extendFlowCondition(*Cond); 103 } 104 105 void VisitBinaryOperator(const BinaryOperator *S) { 106 assert(S->getOpcode() == BO_LAnd || S->getOpcode() == BO_LOr); 107 auto *LHS = S->getLHS(); 108 assert(LHS != nullptr); 109 extendFlowCondition(*LHS); 110 } 111 112 void VisitConditionalOperator(const ConditionalOperator *S) { 113 auto *Cond = S->getCond(); 114 assert(Cond != nullptr); 115 extendFlowCondition(*Cond); 116 } 117 118 private: 119 void extendFlowCondition(const Expr &Cond) { 120 // The terminator sub-expression might not be evaluated. 121 if (Env.getStorageLocation(Cond, SkipPast::None) == nullptr) 122 transfer(StmtToEnv, Cond, Env, TransferOpts); 123 124 // FIXME: The flow condition must be an r-value, so `SkipPast::None` should 125 // suffice. 126 auto *Val = 127 cast_or_null<BoolValue>(Env.getValue(Cond, SkipPast::Reference)); 128 // Value merging depends on flow conditions from different environments 129 // being mutually exclusive -- that is, they cannot both be true in their 130 // entirety (even if they may share some clauses). So, we need *some* value 131 // for the condition expression, even if just an atom. 132 if (Val == nullptr) { 133 // FIXME: Consider introducing a helper for this get-or-create pattern. 134 auto *Loc = Env.getStorageLocation(Cond, SkipPast::None); 135 if (Loc == nullptr) { 136 Loc = &Env.createStorageLocation(Cond); 137 Env.setStorageLocation(Cond, *Loc); 138 } 139 Val = &Env.makeAtomicBoolValue(); 140 Env.setValue(*Loc, *Val); 141 } 142 143 // The condition must be inverted for the successor that encompasses the 144 // "else" branch, if such exists. 145 if (BlockSuccIdx == 1) 146 Val = &Env.makeNot(*Val); 147 148 Env.addToFlowCondition(*Val); 149 } 150 151 const StmtToEnvMap &StmtToEnv; 152 Environment &Env; 153 int BlockSuccIdx; 154 TransferOptions TransferOpts; 155 }; 156 157 /// Computes the input state for a given basic block by joining the output 158 /// states of its predecessors. 159 /// 160 /// Requirements: 161 /// 162 /// All predecessors of `Block` except those with loop back edges must have 163 /// already been transferred. States in `BlockStates` that are set to 164 /// `llvm::None` represent basic blocks that are not evaluated yet. 165 static TypeErasedDataflowAnalysisState computeBlockInputState( 166 const ControlFlowContext &CFCtx, 167 std::vector<llvm::Optional<TypeErasedDataflowAnalysisState>> &BlockStates, 168 const CFGBlock &Block, const Environment &InitEnv, 169 TypeErasedDataflowAnalysis &Analysis) { 170 llvm::DenseSet<const CFGBlock *> Preds; 171 Preds.insert(Block.pred_begin(), Block.pred_end()); 172 if (Block.getTerminator().isTemporaryDtorsBranch()) { 173 // This handles a special case where the code that produced the CFG includes 174 // a conditional operator with a branch that constructs a temporary and 175 // calls a destructor annotated as noreturn. The CFG models this as follows: 176 // 177 // B1 (contains the condition of the conditional operator) - succs: B2, B3 178 // B2 (contains code that does not call a noreturn destructor) - succs: B4 179 // B3 (contains code that calls a noreturn destructor) - succs: B4 180 // B4 (has temporary destructor terminator) - succs: B5, B6 181 // B5 (noreturn block that is associated with the noreturn destructor call) 182 // B6 (contains code that follows the conditional operator statement) 183 // 184 // The first successor (B5 above) of a basic block with a temporary 185 // destructor terminator (B4 above) is the block that evaluates the 186 // destructor. If that block has a noreturn element then the predecessor 187 // block that constructed the temporary object (B3 above) is effectively a 188 // noreturn block and its state should not be used as input for the state 189 // of the block that has a temporary destructor terminator (B4 above). This 190 // holds regardless of which branch of the ternary operator calls the 191 // noreturn destructor. However, it doesn't cases where a nested ternary 192 // operator includes a branch that contains a noreturn destructor call. 193 // 194 // See `NoreturnDestructorTest` for concrete examples. 195 if (Block.succ_begin()->getReachableBlock()->hasNoReturnElement()) { 196 auto StmtBlock = CFCtx.getStmtToBlock().find(Block.getTerminatorStmt()); 197 assert(StmtBlock != CFCtx.getStmtToBlock().end()); 198 Preds.erase(StmtBlock->getSecond()); 199 } 200 } 201 202 llvm::Optional<TypeErasedDataflowAnalysisState> MaybeState; 203 bool ApplyBuiltinTransfer = Analysis.applyBuiltinTransfer(); 204 205 for (const CFGBlock *Pred : Preds) { 206 // Skip if the `Block` is unreachable or control flow cannot get past it. 207 if (!Pred || Pred->hasNoReturnElement()) 208 continue; 209 210 // Skip if `Pred` was not evaluated yet. This could happen if `Pred` has a 211 // loop back edge to `Block`. 212 const llvm::Optional<TypeErasedDataflowAnalysisState> &MaybePredState = 213 BlockStates[Pred->getBlockID()]; 214 if (!MaybePredState) 215 continue; 216 217 TypeErasedDataflowAnalysisState PredState = MaybePredState.value(); 218 if (ApplyBuiltinTransfer) { 219 if (const Stmt *PredTerminatorStmt = Pred->getTerminatorStmt()) { 220 const StmtToEnvMapImpl StmtToEnv(CFCtx, BlockStates); 221 TerminatorVisitor(StmtToEnv, PredState.Env, 222 blockIndexInPredecessor(*Pred, Block), 223 Analysis.builtinTransferOptions()) 224 .Visit(PredTerminatorStmt); 225 } 226 } 227 228 if (MaybeState) { 229 Analysis.joinTypeErased(MaybeState->Lattice, PredState.Lattice); 230 MaybeState->Env.join(PredState.Env, Analysis); 231 } else { 232 MaybeState = std::move(PredState); 233 } 234 } 235 if (!MaybeState) { 236 // FIXME: Consider passing `Block` to `Analysis.typeErasedInitialElement()` 237 // to enable building analyses like computation of dominators that 238 // initialize the state of each basic block differently. 239 MaybeState.emplace(Analysis.typeErasedInitialElement(), InitEnv); 240 } 241 return *MaybeState; 242 } 243 244 /// Transfers `State` by evaluating `CfgStmt` in the context of `Analysis`. 245 /// `HandleTransferredStmt` (if provided) will be applied to `CfgStmt`, after it 246 /// is evaluated. 247 static void transferCFGStmt( 248 const ControlFlowContext &CFCtx, 249 llvm::ArrayRef<llvm::Optional<TypeErasedDataflowAnalysisState>> BlockStates, 250 const CFGStmt &CfgStmt, TypeErasedDataflowAnalysis &Analysis, 251 TypeErasedDataflowAnalysisState &State, 252 std::function<void(const CFGStmt &, 253 const TypeErasedDataflowAnalysisState &)> 254 HandleTransferredStmt) { 255 const Stmt *S = CfgStmt.getStmt(); 256 assert(S != nullptr); 257 258 if (Analysis.applyBuiltinTransfer()) 259 transfer(StmtToEnvMapImpl(CFCtx, BlockStates), *S, State.Env, 260 Analysis.builtinTransferOptions()); 261 Analysis.transferTypeErased(S, State.Lattice, State.Env); 262 263 if (HandleTransferredStmt != nullptr) 264 HandleTransferredStmt(CfgStmt, State); 265 } 266 267 /// Transfers `State` by evaluating `CfgInit`. 268 static void transferCFGInitializer(const CFGInitializer &CfgInit, 269 TypeErasedDataflowAnalysisState &State) { 270 const auto &ThisLoc = *cast<AggregateStorageLocation>( 271 State.Env.getThisPointeeStorageLocation()); 272 273 const CXXCtorInitializer *Initializer = CfgInit.getInitializer(); 274 assert(Initializer != nullptr); 275 276 const FieldDecl *Member = Initializer->getMember(); 277 if (Member == nullptr) 278 // Not a field initializer. 279 return; 280 281 auto *InitStmt = Initializer->getInit(); 282 assert(InitStmt != nullptr); 283 284 auto *InitStmtLoc = 285 State.Env.getStorageLocation(*InitStmt, SkipPast::Reference); 286 if (InitStmtLoc == nullptr) 287 return; 288 289 auto *InitStmtVal = State.Env.getValue(*InitStmtLoc); 290 if (InitStmtVal == nullptr) 291 return; 292 293 if (Member->getType()->isReferenceType()) { 294 auto &MemberLoc = ThisLoc.getChild(*Member); 295 State.Env.setValue(MemberLoc, 296 State.Env.takeOwnership( 297 std::make_unique<ReferenceValue>(*InitStmtLoc))); 298 } else { 299 auto &MemberLoc = ThisLoc.getChild(*Member); 300 State.Env.setValue(MemberLoc, *InitStmtVal); 301 } 302 } 303 304 TypeErasedDataflowAnalysisState transferBlock( 305 const ControlFlowContext &CFCtx, 306 std::vector<llvm::Optional<TypeErasedDataflowAnalysisState>> &BlockStates, 307 const CFGBlock &Block, const Environment &InitEnv, 308 TypeErasedDataflowAnalysis &Analysis, 309 std::function<void(const CFGStmt &, 310 const TypeErasedDataflowAnalysisState &)> 311 HandleTransferredStmt) { 312 TypeErasedDataflowAnalysisState State = 313 computeBlockInputState(CFCtx, BlockStates, Block, InitEnv, Analysis); 314 for (const CFGElement &Element : Block) { 315 switch (Element.getKind()) { 316 case CFGElement::Statement: 317 transferCFGStmt(CFCtx, BlockStates, *Element.getAs<CFGStmt>(), Analysis, 318 State, HandleTransferredStmt); 319 break; 320 case CFGElement::Initializer: 321 if (Analysis.applyBuiltinTransfer()) 322 transferCFGInitializer(*Element.getAs<CFGInitializer>(), State); 323 break; 324 default: 325 // FIXME: Evaluate other kinds of `CFGElement`. 326 break; 327 } 328 } 329 return State; 330 } 331 332 llvm::Expected<std::vector<llvm::Optional<TypeErasedDataflowAnalysisState>>> 333 runTypeErasedDataflowAnalysis( 334 const ControlFlowContext &CFCtx, TypeErasedDataflowAnalysis &Analysis, 335 const Environment &InitEnv, 336 std::function<void(const Stmt *, const TypeErasedDataflowAnalysisState &)> 337 PostVisitStmt) { 338 PostOrderCFGView POV(&CFCtx.getCFG()); 339 ForwardDataflowWorklist Worklist(CFCtx.getCFG(), &POV); 340 341 std::vector<llvm::Optional<TypeErasedDataflowAnalysisState>> BlockStates; 342 BlockStates.resize(CFCtx.getCFG().size(), llvm::None); 343 344 // The entry basic block doesn't contain statements so it can be skipped. 345 const CFGBlock &Entry = CFCtx.getCFG().getEntry(); 346 BlockStates[Entry.getBlockID()] = {Analysis.typeErasedInitialElement(), 347 InitEnv}; 348 Worklist.enqueueSuccessors(&Entry); 349 350 // Bugs in lattices and transfer functions can prevent the analysis from 351 // converging. To limit the damage (infinite loops) that these bugs can cause, 352 // limit the number of iterations. 353 // FIXME: Consider making the maximum number of iterations configurable. 354 // FIXME: Consider restricting the number of backedges followed, rather than 355 // iterations. 356 // FIXME: Set up statistics (see llvm/ADT/Statistic.h) to count average number 357 // of iterations, number of functions that time out, etc. 358 static constexpr uint32_t MaxAverageVisitsPerBlock = 4; 359 static constexpr uint32_t AbsoluteMaxIterations = 1 << 16; 360 const uint32_t RelativeMaxIterations = 361 MaxAverageVisitsPerBlock * BlockStates.size(); 362 const uint32_t MaxIterations = 363 std::min(RelativeMaxIterations, AbsoluteMaxIterations); 364 uint32_t Iterations = 0; 365 while (const CFGBlock *Block = Worklist.dequeue()) { 366 if (++Iterations > MaxIterations) { 367 return llvm::createStringError(std::errc::timed_out, 368 "maximum number of iterations reached"); 369 } 370 371 const llvm::Optional<TypeErasedDataflowAnalysisState> &OldBlockState = 372 BlockStates[Block->getBlockID()]; 373 TypeErasedDataflowAnalysisState NewBlockState = 374 transferBlock(CFCtx, BlockStates, *Block, InitEnv, Analysis); 375 376 if (OldBlockState && 377 Analysis.isEqualTypeErased(OldBlockState.value().Lattice, 378 NewBlockState.Lattice) && 379 OldBlockState->Env.equivalentTo(NewBlockState.Env, Analysis)) { 380 // The state of `Block` didn't change after transfer so there's no need to 381 // revisit its successors. 382 continue; 383 } 384 385 BlockStates[Block->getBlockID()] = std::move(NewBlockState); 386 387 // Do not add unreachable successor blocks to `Worklist`. 388 if (Block->hasNoReturnElement()) 389 continue; 390 391 Worklist.enqueueSuccessors(Block); 392 } 393 // FIXME: Consider evaluating unreachable basic blocks (those that have a 394 // state set to `llvm::None` at this point) to also analyze dead code. 395 396 if (PostVisitStmt) { 397 for (const CFGBlock *Block : CFCtx.getCFG()) { 398 // Skip blocks that were not evaluated. 399 if (!BlockStates[Block->getBlockID()]) 400 continue; 401 transferBlock( 402 CFCtx, BlockStates, *Block, InitEnv, Analysis, 403 [&PostVisitStmt](const clang::CFGStmt &Stmt, 404 const TypeErasedDataflowAnalysisState &State) { 405 PostVisitStmt(Stmt.getStmt(), State); 406 }); 407 } 408 } 409 410 return BlockStates; 411 } 412 413 } // namespace dataflow 414 } // namespace clang 415