1 //===- TypeErasedDataflowAnalysis.cpp -------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 //  This file defines type-erased base types and functions for building dataflow
10 //  analyses that run over Control-Flow Graphs (CFGs).
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include <algorithm>
15 #include <memory>
16 #include <system_error>
17 #include <utility>
18 #include <vector>
19 
20 #include "clang/AST/DeclCXX.h"
21 #include "clang/AST/OperationKinds.h"
22 #include "clang/AST/StmtVisitor.h"
23 #include "clang/Analysis/Analyses/PostOrderCFGView.h"
24 #include "clang/Analysis/CFG.h"
25 #include "clang/Analysis/FlowSensitive/DataflowEnvironment.h"
26 #include "clang/Analysis/FlowSensitive/DataflowWorklist.h"
27 #include "clang/Analysis/FlowSensitive/Transfer.h"
28 #include "clang/Analysis/FlowSensitive/TypeErasedDataflowAnalysis.h"
29 #include "clang/Analysis/FlowSensitive/Value.h"
30 #include "llvm/ADT/ArrayRef.h"
31 #include "llvm/ADT/DenseSet.h"
32 #include "llvm/ADT/None.h"
33 #include "llvm/ADT/Optional.h"
34 #include "llvm/ADT/STLExtras.h"
35 #include "llvm/Support/Error.h"
36 #include "llvm/Support/ErrorHandling.h"
37 
38 namespace clang {
39 namespace dataflow {
40 
41 class StmtToEnvMapImpl : public StmtToEnvMap {
42 public:
43   StmtToEnvMapImpl(
44       const ControlFlowContext &CFCtx,
45       llvm::ArrayRef<llvm::Optional<TypeErasedDataflowAnalysisState>>
46           BlockToState)
47       : CFCtx(CFCtx), BlockToState(BlockToState) {}
48 
49   const Environment *getEnvironment(const Stmt &S) const override {
50     auto BlockIt = CFCtx.getStmtToBlock().find(&ignoreCFGOmittedNodes(S));
51     assert(BlockIt != CFCtx.getStmtToBlock().end());
52     const auto &State = BlockToState[BlockIt->getSecond()->getBlockID()];
53     assert(State);
54     return &State.value().Env;
55   }
56 
57 private:
58   const ControlFlowContext &CFCtx;
59   llvm::ArrayRef<llvm::Optional<TypeErasedDataflowAnalysisState>> BlockToState;
60 };
61 
62 /// Returns the index of `Block` in the successors of `Pred`.
63 static int blockIndexInPredecessor(const CFGBlock &Pred,
64                                    const CFGBlock &Block) {
65   auto BlockPos = llvm::find_if(
66       Pred.succs(), [&Block](const CFGBlock::AdjacentBlock &Succ) {
67         return Succ && Succ->getBlockID() == Block.getBlockID();
68       });
69   return BlockPos - Pred.succ_begin();
70 }
71 
72 /// Extends the flow condition of an environment based on a terminator
73 /// statement.
74 class TerminatorVisitor : public ConstStmtVisitor<TerminatorVisitor> {
75 public:
76   TerminatorVisitor(const StmtToEnvMap &StmtToEnv, Environment &Env,
77                     int BlockSuccIdx, TransferOptions TransferOpts)
78       : StmtToEnv(StmtToEnv), Env(Env), BlockSuccIdx(BlockSuccIdx),
79         TransferOpts(TransferOpts) {}
80 
81   void VisitIfStmt(const IfStmt *S) {
82     auto *Cond = S->getCond();
83     assert(Cond != nullptr);
84     extendFlowCondition(*Cond);
85   }
86 
87   void VisitWhileStmt(const WhileStmt *S) {
88     auto *Cond = S->getCond();
89     assert(Cond != nullptr);
90     extendFlowCondition(*Cond);
91   }
92 
93   void VisitDoStmt(const DoStmt *S) {
94     auto *Cond = S->getCond();
95     assert(Cond != nullptr);
96     extendFlowCondition(*Cond);
97   }
98 
99   void VisitForStmt(const ForStmt *S) {
100     auto *Cond = S->getCond();
101     if (Cond != nullptr)
102       extendFlowCondition(*Cond);
103   }
104 
105   void VisitBinaryOperator(const BinaryOperator *S) {
106     assert(S->getOpcode() == BO_LAnd || S->getOpcode() == BO_LOr);
107     auto *LHS = S->getLHS();
108     assert(LHS != nullptr);
109     extendFlowCondition(*LHS);
110   }
111 
112   void VisitConditionalOperator(const ConditionalOperator *S) {
113     auto *Cond = S->getCond();
114     assert(Cond != nullptr);
115     extendFlowCondition(*Cond);
116   }
117 
118 private:
119   void extendFlowCondition(const Expr &Cond) {
120     // The terminator sub-expression might not be evaluated.
121     if (Env.getStorageLocation(Cond, SkipPast::None) == nullptr)
122       transfer(StmtToEnv, Cond, Env, TransferOpts);
123 
124     // FIXME: The flow condition must be an r-value, so `SkipPast::None` should
125     // suffice.
126     auto *Val =
127         cast_or_null<BoolValue>(Env.getValue(Cond, SkipPast::Reference));
128     // Value merging depends on flow conditions from different environments
129     // being mutually exclusive -- that is, they cannot both be true in their
130     // entirety (even if they may share some clauses). So, we need *some* value
131     // for the condition expression, even if just an atom.
132     if (Val == nullptr) {
133       // FIXME: Consider introducing a helper for this get-or-create pattern.
134       auto *Loc = Env.getStorageLocation(Cond, SkipPast::None);
135       if (Loc == nullptr) {
136         Loc = &Env.createStorageLocation(Cond);
137         Env.setStorageLocation(Cond, *Loc);
138       }
139       Val = &Env.makeAtomicBoolValue();
140       Env.setValue(*Loc, *Val);
141     }
142 
143     // The condition must be inverted for the successor that encompasses the
144     // "else" branch, if such exists.
145     if (BlockSuccIdx == 1)
146       Val = &Env.makeNot(*Val);
147 
148     Env.addToFlowCondition(*Val);
149   }
150 
151   const StmtToEnvMap &StmtToEnv;
152   Environment &Env;
153   int BlockSuccIdx;
154   TransferOptions TransferOpts;
155 };
156 
157 /// Computes the input state for a given basic block by joining the output
158 /// states of its predecessors.
159 ///
160 /// Requirements:
161 ///
162 ///   All predecessors of `Block` except those with loop back edges must have
163 ///   already been transferred. States in `BlockStates` that are set to
164 ///   `llvm::None` represent basic blocks that are not evaluated yet.
165 static TypeErasedDataflowAnalysisState computeBlockInputState(
166     const ControlFlowContext &CFCtx,
167     std::vector<llvm::Optional<TypeErasedDataflowAnalysisState>> &BlockStates,
168     const CFGBlock &Block, const Environment &InitEnv,
169     TypeErasedDataflowAnalysis &Analysis) {
170   llvm::DenseSet<const CFGBlock *> Preds;
171   Preds.insert(Block.pred_begin(), Block.pred_end());
172   if (Block.getTerminator().isTemporaryDtorsBranch()) {
173     // This handles a special case where the code that produced the CFG includes
174     // a conditional operator with a branch that constructs a temporary and
175     // calls a destructor annotated as noreturn. The CFG models this as follows:
176     //
177     // B1 (contains the condition of the conditional operator) - succs: B2, B3
178     // B2 (contains code that does not call a noreturn destructor) - succs: B4
179     // B3 (contains code that calls a noreturn destructor) - succs: B4
180     // B4 (has temporary destructor terminator) - succs: B5, B6
181     // B5 (noreturn block that is associated with the noreturn destructor call)
182     // B6 (contains code that follows the conditional operator statement)
183     //
184     // The first successor (B5 above) of a basic block with a temporary
185     // destructor terminator (B4 above) is the block that evaluates the
186     // destructor. If that block has a noreturn element then the predecessor
187     // block that constructed the temporary object (B3 above) is effectively a
188     // noreturn block and its state should not be used as input for the state
189     // of the block that has a temporary destructor terminator (B4 above). This
190     // holds regardless of which branch of the ternary operator calls the
191     // noreturn destructor. However, it doesn't cases where a nested ternary
192     // operator includes a branch that contains a noreturn destructor call.
193     //
194     // See `NoreturnDestructorTest` for concrete examples.
195     if (Block.succ_begin()->getReachableBlock()->hasNoReturnElement()) {
196       auto StmtBlock = CFCtx.getStmtToBlock().find(Block.getTerminatorStmt());
197       assert(StmtBlock != CFCtx.getStmtToBlock().end());
198       Preds.erase(StmtBlock->getSecond());
199     }
200   }
201 
202   llvm::Optional<TypeErasedDataflowAnalysisState> MaybeState;
203   bool ApplyBuiltinTransfer = Analysis.applyBuiltinTransfer();
204 
205   for (const CFGBlock *Pred : Preds) {
206     // Skip if the `Block` is unreachable or control flow cannot get past it.
207     if (!Pred || Pred->hasNoReturnElement())
208       continue;
209 
210     // Skip if `Pred` was not evaluated yet. This could happen if `Pred` has a
211     // loop back edge to `Block`.
212     const llvm::Optional<TypeErasedDataflowAnalysisState> &MaybePredState =
213         BlockStates[Pred->getBlockID()];
214     if (!MaybePredState)
215       continue;
216 
217     TypeErasedDataflowAnalysisState PredState = MaybePredState.value();
218     if (ApplyBuiltinTransfer) {
219       if (const Stmt *PredTerminatorStmt = Pred->getTerminatorStmt()) {
220         const StmtToEnvMapImpl StmtToEnv(CFCtx, BlockStates);
221         TerminatorVisitor(StmtToEnv, PredState.Env,
222                           blockIndexInPredecessor(*Pred, Block),
223                           Analysis.builtinTransferOptions())
224             .Visit(PredTerminatorStmt);
225       }
226     }
227 
228     if (MaybeState) {
229       Analysis.joinTypeErased(MaybeState->Lattice, PredState.Lattice);
230       MaybeState->Env.join(PredState.Env, Analysis);
231     } else {
232       MaybeState = std::move(PredState);
233     }
234   }
235   if (!MaybeState) {
236     // FIXME: Consider passing `Block` to `Analysis.typeErasedInitialElement()`
237     // to enable building analyses like computation of dominators that
238     // initialize the state of each basic block differently.
239     MaybeState.emplace(Analysis.typeErasedInitialElement(), InitEnv);
240   }
241   return *MaybeState;
242 }
243 
244 /// Transfers `State` by evaluating `CfgStmt` in the context of `Analysis`.
245 /// `HandleTransferredStmt` (if provided) will be applied to `CfgStmt`, after it
246 /// is evaluated.
247 static void transferCFGStmt(
248     const ControlFlowContext &CFCtx,
249     llvm::ArrayRef<llvm::Optional<TypeErasedDataflowAnalysisState>> BlockStates,
250     const CFGStmt &CfgStmt, TypeErasedDataflowAnalysis &Analysis,
251     TypeErasedDataflowAnalysisState &State,
252     std::function<void(const CFGStmt &,
253                        const TypeErasedDataflowAnalysisState &)>
254         HandleTransferredStmt) {
255   const Stmt *S = CfgStmt.getStmt();
256   assert(S != nullptr);
257 
258   if (Analysis.applyBuiltinTransfer())
259     transfer(StmtToEnvMapImpl(CFCtx, BlockStates), *S, State.Env,
260              Analysis.builtinTransferOptions());
261   Analysis.transferTypeErased(S, State.Lattice, State.Env);
262 
263   if (HandleTransferredStmt != nullptr)
264     HandleTransferredStmt(CfgStmt, State);
265 }
266 
267 /// Transfers `State` by evaluating `CfgInit`.
268 static void transferCFGInitializer(const CFGInitializer &CfgInit,
269                                    TypeErasedDataflowAnalysisState &State) {
270   const auto &ThisLoc = *cast<AggregateStorageLocation>(
271       State.Env.getThisPointeeStorageLocation());
272 
273   const CXXCtorInitializer *Initializer = CfgInit.getInitializer();
274   assert(Initializer != nullptr);
275 
276   const FieldDecl *Member = Initializer->getMember();
277   if (Member == nullptr)
278     // Not a field initializer.
279     return;
280 
281   auto *InitStmt = Initializer->getInit();
282   assert(InitStmt != nullptr);
283 
284   auto *InitStmtLoc =
285       State.Env.getStorageLocation(*InitStmt, SkipPast::Reference);
286   if (InitStmtLoc == nullptr)
287     return;
288 
289   auto *InitStmtVal = State.Env.getValue(*InitStmtLoc);
290   if (InitStmtVal == nullptr)
291     return;
292 
293   if (Member->getType()->isReferenceType()) {
294     auto &MemberLoc = ThisLoc.getChild(*Member);
295     State.Env.setValue(MemberLoc,
296                        State.Env.takeOwnership(
297                            std::make_unique<ReferenceValue>(*InitStmtLoc)));
298   } else {
299     auto &MemberLoc = ThisLoc.getChild(*Member);
300     State.Env.setValue(MemberLoc, *InitStmtVal);
301   }
302 }
303 
304 TypeErasedDataflowAnalysisState transferBlock(
305     const ControlFlowContext &CFCtx,
306     std::vector<llvm::Optional<TypeErasedDataflowAnalysisState>> &BlockStates,
307     const CFGBlock &Block, const Environment &InitEnv,
308     TypeErasedDataflowAnalysis &Analysis,
309     std::function<void(const CFGStmt &,
310                        const TypeErasedDataflowAnalysisState &)>
311         HandleTransferredStmt) {
312   TypeErasedDataflowAnalysisState State =
313       computeBlockInputState(CFCtx, BlockStates, Block, InitEnv, Analysis);
314   for (const CFGElement &Element : Block) {
315     switch (Element.getKind()) {
316     case CFGElement::Statement:
317       transferCFGStmt(CFCtx, BlockStates, *Element.getAs<CFGStmt>(), Analysis,
318                       State, HandleTransferredStmt);
319       break;
320     case CFGElement::Initializer:
321       if (Analysis.applyBuiltinTransfer())
322         transferCFGInitializer(*Element.getAs<CFGInitializer>(), State);
323       break;
324     default:
325       // FIXME: Evaluate other kinds of `CFGElement`.
326       break;
327     }
328   }
329   return State;
330 }
331 
332 llvm::Expected<std::vector<llvm::Optional<TypeErasedDataflowAnalysisState>>>
333 runTypeErasedDataflowAnalysis(
334     const ControlFlowContext &CFCtx, TypeErasedDataflowAnalysis &Analysis,
335     const Environment &InitEnv,
336     std::function<void(const Stmt *, const TypeErasedDataflowAnalysisState &)>
337         PostVisitStmt) {
338   PostOrderCFGView POV(&CFCtx.getCFG());
339   ForwardDataflowWorklist Worklist(CFCtx.getCFG(), &POV);
340 
341   std::vector<llvm::Optional<TypeErasedDataflowAnalysisState>> BlockStates;
342   BlockStates.resize(CFCtx.getCFG().size(), llvm::None);
343 
344   // The entry basic block doesn't contain statements so it can be skipped.
345   const CFGBlock &Entry = CFCtx.getCFG().getEntry();
346   BlockStates[Entry.getBlockID()] = {Analysis.typeErasedInitialElement(),
347                                      InitEnv};
348   Worklist.enqueueSuccessors(&Entry);
349 
350   // Bugs in lattices and transfer functions can prevent the analysis from
351   // converging. To limit the damage (infinite loops) that these bugs can cause,
352   // limit the number of iterations.
353   // FIXME: Consider making the maximum number of iterations configurable.
354   // FIXME: Consider restricting the number of backedges followed, rather than
355   // iterations.
356   // FIXME: Set up statistics (see llvm/ADT/Statistic.h) to count average number
357   // of iterations, number of functions that time out, etc.
358   static constexpr uint32_t MaxAverageVisitsPerBlock = 4;
359   static constexpr uint32_t AbsoluteMaxIterations = 1 << 16;
360   const uint32_t RelativeMaxIterations =
361       MaxAverageVisitsPerBlock * BlockStates.size();
362   const uint32_t MaxIterations =
363       std::min(RelativeMaxIterations, AbsoluteMaxIterations);
364   uint32_t Iterations = 0;
365   while (const CFGBlock *Block = Worklist.dequeue()) {
366     if (++Iterations > MaxIterations) {
367       return llvm::createStringError(std::errc::timed_out,
368                                      "maximum number of iterations reached");
369     }
370 
371     const llvm::Optional<TypeErasedDataflowAnalysisState> &OldBlockState =
372         BlockStates[Block->getBlockID()];
373     TypeErasedDataflowAnalysisState NewBlockState =
374         transferBlock(CFCtx, BlockStates, *Block, InitEnv, Analysis);
375 
376     if (OldBlockState &&
377         Analysis.isEqualTypeErased(OldBlockState.value().Lattice,
378                                    NewBlockState.Lattice) &&
379         OldBlockState->Env.equivalentTo(NewBlockState.Env, Analysis)) {
380       // The state of `Block` didn't change after transfer so there's no need to
381       // revisit its successors.
382       continue;
383     }
384 
385     BlockStates[Block->getBlockID()] = std::move(NewBlockState);
386 
387     // Do not add unreachable successor blocks to `Worklist`.
388     if (Block->hasNoReturnElement())
389       continue;
390 
391     Worklist.enqueueSuccessors(Block);
392   }
393   // FIXME: Consider evaluating unreachable basic blocks (those that have a
394   // state set to `llvm::None` at this point) to also analyze dead code.
395 
396   if (PostVisitStmt) {
397     for (const CFGBlock *Block : CFCtx.getCFG()) {
398       // Skip blocks that were not evaluated.
399       if (!BlockStates[Block->getBlockID()])
400         continue;
401       transferBlock(
402           CFCtx, BlockStates, *Block, InitEnv, Analysis,
403           [&PostVisitStmt](const clang::CFGStmt &Stmt,
404                            const TypeErasedDataflowAnalysisState &State) {
405             PostVisitStmt(Stmt.getStmt(), State);
406           });
407     }
408   }
409 
410   return BlockStates;
411 }
412 
413 } // namespace dataflow
414 } // namespace clang
415