1 //===- CalledOnceCheck.cpp - Check 'called once' parameters ---------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8
9 #include "clang/Analysis/Analyses/CalledOnceCheck.h"
10 #include "clang/AST/Attr.h"
11 #include "clang/AST/Decl.h"
12 #include "clang/AST/DeclBase.h"
13 #include "clang/AST/Expr.h"
14 #include "clang/AST/ExprObjC.h"
15 #include "clang/AST/OperationKinds.h"
16 #include "clang/AST/ParentMap.h"
17 #include "clang/AST/RecursiveASTVisitor.h"
18 #include "clang/AST/Stmt.h"
19 #include "clang/AST/StmtObjC.h"
20 #include "clang/AST/StmtVisitor.h"
21 #include "clang/AST/Type.h"
22 #include "clang/Analysis/AnalysisDeclContext.h"
23 #include "clang/Analysis/CFG.h"
24 #include "clang/Analysis/FlowSensitive/DataflowWorklist.h"
25 #include "clang/Basic/IdentifierTable.h"
26 #include "clang/Basic/LLVM.h"
27 #include "llvm/ADT/BitVector.h"
28 #include "llvm/ADT/BitmaskEnum.h"
29 #include "llvm/ADT/Optional.h"
30 #include "llvm/ADT/PointerIntPair.h"
31 #include "llvm/ADT/STLExtras.h"
32 #include "llvm/ADT/Sequence.h"
33 #include "llvm/ADT/SmallVector.h"
34 #include "llvm/ADT/StringRef.h"
35 #include "llvm/Support/Casting.h"
36 #include "llvm/Support/Compiler.h"
37 #include "llvm/Support/ErrorHandling.h"
38 #include <memory>
39
40 using namespace clang;
41
42 namespace {
43 static constexpr unsigned EXPECTED_MAX_NUMBER_OF_PARAMS = 2;
44 template <class T>
45 using ParamSizedVector = llvm::SmallVector<T, EXPECTED_MAX_NUMBER_OF_PARAMS>;
46 static constexpr unsigned EXPECTED_NUMBER_OF_BASIC_BLOCKS = 8;
47 template <class T>
48 using CFGSizedVector = llvm::SmallVector<T, EXPECTED_NUMBER_OF_BASIC_BLOCKS>;
49 constexpr llvm::StringLiteral CONVENTIONAL_NAMES[] = {
50 "completionHandler", "completion", "withCompletionHandler"};
51 constexpr llvm::StringLiteral CONVENTIONAL_SUFFIXES[] = {
52 "WithCompletionHandler", "WithCompletion"};
53 constexpr llvm::StringLiteral CONVENTIONAL_CONDITIONS[] = {
54 "error", "cancel", "shouldCall", "done", "OK", "success"};
55
56 class ParameterStatus {
57 public:
58 // Status kind is basically the main part of parameter's status.
59 // The kind represents our knowledge (so far) about a tracked parameter
60 // in the context of this analysis.
61 //
62 // Since we want to report on missing and extraneous calls, we need to
63 // track the fact whether paramater was called or not. This automatically
64 // decides two kinds: `NotCalled` and `Called`.
65 //
66 // One of the erroneous situations is the case when parameter is called only
67 // on some of the paths. We could've considered it `NotCalled`, but we want
68 // to report double call warnings even if these two calls are not guaranteed
69 // to happen in every execution. We also don't want to have it as `Called`
70 // because not calling tracked parameter on all of the paths is an error
71 // on its own. For these reasons, we need to have a separate kind,
72 // `MaybeCalled`, and change `Called` to `DefinitelyCalled` to avoid
73 // confusion.
74 //
75 // Two violations of calling parameter more than once and not calling it on
76 // every path are not, however, mutually exclusive. In situations where both
77 // violations take place, we prefer to report ONLY double call. It's always
78 // harder to pinpoint a bug that has arisen when a user neglects to take the
79 // right action (and therefore, no action is taken), than when a user takes
80 // the wrong action. And, in order to remember that we already reported
81 // a double call, we need another kind: `Reported`.
82 //
83 // Our analysis is intra-procedural and, while in the perfect world,
84 // developers only use tracked parameters to call them, in the real world,
85 // the picture might be different. Parameters can be stored in global
86 // variables or leaked into other functions that we know nothing about.
87 // We try to be lenient and trust users. Another kind `Escaped` reflects
88 // such situations. We don't know if it gets called there or not, but we
89 // should always think of `Escaped` as the best possible option.
90 //
91 // Some of the paths in the analyzed functions might end with a call
92 // to noreturn functions. Such paths are not required to have parameter
93 // calls and we want to track that. For the purposes of better diagnostics,
94 // we don't want to reuse `Escaped` and, thus, have another kind `NoReturn`.
95 //
96 // Additionally, we have `NotVisited` kind that tells us nothing about
97 // a tracked parameter, but is used for tracking analyzed (aka visited)
98 // basic blocks.
99 //
100 // If we consider `|` to be a JOIN operation of two kinds coming from
101 // two different paths, the following properties must hold:
102 //
103 // 1. for any Kind K: K | K == K
104 // Joining two identical kinds should result in the same kind.
105 //
106 // 2. for any Kind K: Reported | K == Reported
107 // Doesn't matter on which path it was reported, it still is.
108 //
109 // 3. for any Kind K: NoReturn | K == K
110 // We can totally ignore noreturn paths during merges.
111 //
112 // 4. DefinitelyCalled | NotCalled == MaybeCalled
113 // Called on one path, not called on another - that's simply
114 // a definition for MaybeCalled.
115 //
116 // 5. for any Kind K in [DefinitelyCalled, NotCalled, MaybeCalled]:
117 // Escaped | K == K
118 // Escaped mirrors other statuses after joins.
119 // Every situation, when we join any of the listed kinds K,
120 // is a violation. For this reason, in order to assume the
121 // best outcome for this escape, we consider it to be the
122 // same as the other path.
123 //
124 // 6. for any Kind K in [DefinitelyCalled, NotCalled]:
125 // MaybeCalled | K == MaybeCalled
126 // MaybeCalled should basically stay after almost every join.
127 enum Kind {
128 // No-return paths should be absolutely transparent for the analysis.
129 // 0x0 is the identity element for selected join operation (binary or).
130 NoReturn = 0x0, /* 0000 */
131 // Escaped marks situations when marked parameter escaped into
132 // another function (so we can assume that it was possibly called there).
133 Escaped = 0x1, /* 0001 */
134 // Parameter was definitely called once at this point.
135 DefinitelyCalled = 0x3, /* 0011 */
136 // Kinds less or equal to NON_ERROR_STATUS are not considered errors.
137 NON_ERROR_STATUS = DefinitelyCalled,
138 // Parameter was not yet called.
139 NotCalled = 0x5, /* 0101 */
140 // Parameter was not called at least on one path leading to this point,
141 // while there is also at least one path that it gets called.
142 MaybeCalled = 0x7, /* 0111 */
143 // Parameter was not yet analyzed.
144 NotVisited = 0x8, /* 1000 */
145 // We already reported a violation and stopped tracking calls for this
146 // parameter.
147 Reported = 0x15, /* 1111 */
148 LLVM_MARK_AS_BITMASK_ENUM(/* LargestValue = */ Reported)
149 };
150
151 constexpr ParameterStatus() = default;
ParameterStatus(Kind K)152 /* implicit */ ParameterStatus(Kind K) : StatusKind(K) {
153 assert(!seenAnyCalls(K) && "Can't initialize status without a call");
154 }
ParameterStatus(Kind K,const Expr * Call)155 ParameterStatus(Kind K, const Expr *Call) : StatusKind(K), Call(Call) {
156 assert(seenAnyCalls(K) && "This kind is not supposed to have a call");
157 }
158
getCall() const159 const Expr &getCall() const {
160 assert(seenAnyCalls(getKind()) && "ParameterStatus doesn't have a call");
161 return *Call;
162 }
seenAnyCalls(Kind K)163 static bool seenAnyCalls(Kind K) {
164 return (K & DefinitelyCalled) == DefinitelyCalled && K != Reported;
165 }
seenAnyCalls() const166 bool seenAnyCalls() const { return seenAnyCalls(getKind()); }
167
isErrorStatus(Kind K)168 static bool isErrorStatus(Kind K) { return K > NON_ERROR_STATUS; }
isErrorStatus() const169 bool isErrorStatus() const { return isErrorStatus(getKind()); }
170
getKind() const171 Kind getKind() const { return StatusKind; }
172
join(const ParameterStatus & Other)173 void join(const ParameterStatus &Other) {
174 // If we have a pointer already, let's keep it.
175 // For the purposes of the analysis, it doesn't really matter
176 // which call we report.
177 //
178 // If we don't have a pointer, let's take whatever gets joined.
179 if (!Call) {
180 Call = Other.Call;
181 }
182 // Join kinds.
183 StatusKind |= Other.getKind();
184 }
185
operator ==(const ParameterStatus & Other) const186 bool operator==(const ParameterStatus &Other) const {
187 // We compare only kinds, pointers on their own is only additional
188 // information.
189 return getKind() == Other.getKind();
190 }
191
192 private:
193 // It would've been a perfect place to use llvm::PointerIntPair, but
194 // unfortunately NumLowBitsAvailable for clang::Expr had been reduced to 2.
195 Kind StatusKind = NotVisited;
196 const Expr *Call = nullptr;
197 };
198
199 /// State aggregates statuses of all tracked parameters.
200 class State {
201 public:
State(unsigned Size,ParameterStatus::Kind K=ParameterStatus::NotVisited)202 State(unsigned Size, ParameterStatus::Kind K = ParameterStatus::NotVisited)
203 : ParamData(Size, K) {}
204
205 /// Return status of a parameter with the given index.
206 /// \{
getStatusFor(unsigned Index)207 ParameterStatus &getStatusFor(unsigned Index) { return ParamData[Index]; }
getStatusFor(unsigned Index) const208 const ParameterStatus &getStatusFor(unsigned Index) const {
209 return ParamData[Index];
210 }
211 /// \}
212
213 /// Return true if parameter with the given index can be called.
seenAnyCalls(unsigned Index) const214 bool seenAnyCalls(unsigned Index) const {
215 return getStatusFor(Index).seenAnyCalls();
216 }
217 /// Return a reference that we consider a call.
218 ///
219 /// Should only be used for parameters that can be called.
getCallFor(unsigned Index) const220 const Expr &getCallFor(unsigned Index) const {
221 return getStatusFor(Index).getCall();
222 }
223 /// Return status kind of parameter with the given index.
getKindFor(unsigned Index) const224 ParameterStatus::Kind getKindFor(unsigned Index) const {
225 return getStatusFor(Index).getKind();
226 }
227
isVisited() const228 bool isVisited() const {
229 return llvm::all_of(ParamData, [](const ParameterStatus &S) {
230 return S.getKind() != ParameterStatus::NotVisited;
231 });
232 }
233
234 // Join other state into the current state.
join(const State & Other)235 void join(const State &Other) {
236 assert(ParamData.size() == Other.ParamData.size() &&
237 "Couldn't join statuses with different sizes");
238 for (auto Pair : llvm::zip(ParamData, Other.ParamData)) {
239 std::get<0>(Pair).join(std::get<1>(Pair));
240 }
241 }
242
243 using iterator = ParamSizedVector<ParameterStatus>::iterator;
244 using const_iterator = ParamSizedVector<ParameterStatus>::const_iterator;
245
begin()246 iterator begin() { return ParamData.begin(); }
end()247 iterator end() { return ParamData.end(); }
248
begin() const249 const_iterator begin() const { return ParamData.begin(); }
end() const250 const_iterator end() const { return ParamData.end(); }
251
operator ==(const State & Other) const252 bool operator==(const State &Other) const {
253 return ParamData == Other.ParamData;
254 }
255
256 private:
257 ParamSizedVector<ParameterStatus> ParamData;
258 };
259
260 /// A simple class that finds DeclRefExpr in the given expression.
261 ///
262 /// However, we don't want to find ANY nested DeclRefExpr skipping whatever
263 /// expressions on our way. Only certain expressions considered "no-op"
264 /// for our task are indeed skipped.
265 class DeclRefFinder
266 : public ConstStmtVisitor<DeclRefFinder, const DeclRefExpr *> {
267 public:
268 /// Find a DeclRefExpr in the given expression.
269 ///
270 /// In its most basic form (ShouldRetrieveFromComparisons == false),
271 /// this function can be simply reduced to the following question:
272 ///
273 /// - If expression E is used as a function argument, could we say
274 /// that DeclRefExpr nested in E is used as an argument?
275 ///
276 /// According to this rule, we can say that parens, casts and dereferencing
277 /// (dereferencing only applied to function pointers, but this is our case)
278 /// can be skipped.
279 ///
280 /// When we should look into comparisons the question changes to:
281 ///
282 /// - If expression E is used as a condition, could we say that
283 /// DeclRefExpr is being checked?
284 ///
285 /// And even though, these are two different questions, they have quite a lot
286 /// in common. Actually, we can say that whatever expression answers
287 /// positively the first question also fits the second question as well.
288 ///
289 /// In addition, we skip binary operators == and !=, and unary opeartor !.
find(const Expr * E,bool ShouldRetrieveFromComparisons=false)290 static const DeclRefExpr *find(const Expr *E,
291 bool ShouldRetrieveFromComparisons = false) {
292 return DeclRefFinder(ShouldRetrieveFromComparisons).Visit(E);
293 }
294
VisitDeclRefExpr(const DeclRefExpr * DR)295 const DeclRefExpr *VisitDeclRefExpr(const DeclRefExpr *DR) { return DR; }
296
VisitUnaryOperator(const UnaryOperator * UO)297 const DeclRefExpr *VisitUnaryOperator(const UnaryOperator *UO) {
298 switch (UO->getOpcode()) {
299 case UO_LNot:
300 // We care about logical not only if we care about comparisons.
301 if (!ShouldRetrieveFromComparisons)
302 return nullptr;
303 LLVM_FALLTHROUGH;
304 // Function pointer/references can be dereferenced before a call.
305 // That doesn't make it, however, any different from a regular call.
306 // For this reason, dereference operation is a "no-op".
307 case UO_Deref:
308 return Visit(UO->getSubExpr());
309 default:
310 return nullptr;
311 }
312 }
313
VisitBinaryOperator(const BinaryOperator * BO)314 const DeclRefExpr *VisitBinaryOperator(const BinaryOperator *BO) {
315 if (!ShouldRetrieveFromComparisons)
316 return nullptr;
317
318 switch (BO->getOpcode()) {
319 case BO_EQ:
320 case BO_NE: {
321 const DeclRefExpr *LHS = Visit(BO->getLHS());
322 return LHS ? LHS : Visit(BO->getRHS());
323 }
324 default:
325 return nullptr;
326 }
327 }
328
VisitOpaqueValueExpr(const OpaqueValueExpr * OVE)329 const DeclRefExpr *VisitOpaqueValueExpr(const OpaqueValueExpr *OVE) {
330 return Visit(OVE->getSourceExpr());
331 }
332
VisitExpr(const Expr * E)333 const DeclRefExpr *VisitExpr(const Expr *E) {
334 // It is a fallback method that gets called whenever the actual type
335 // of the given expression is not covered.
336 //
337 // We first check if we have anything to skip. And then repeat the whole
338 // procedure for a nested expression instead.
339 const Expr *DeclutteredExpr = E->IgnoreParenCasts();
340 return E != DeclutteredExpr ? Visit(DeclutteredExpr) : nullptr;
341 }
342
343 private:
DeclRefFinder(bool ShouldRetrieveFromComparisons)344 DeclRefFinder(bool ShouldRetrieveFromComparisons)
345 : ShouldRetrieveFromComparisons(ShouldRetrieveFromComparisons) {}
346
347 bool ShouldRetrieveFromComparisons;
348 };
349
findDeclRefExpr(const Expr * In,bool ShouldRetrieveFromComparisons=false)350 const DeclRefExpr *findDeclRefExpr(const Expr *In,
351 bool ShouldRetrieveFromComparisons = false) {
352 return DeclRefFinder::find(In, ShouldRetrieveFromComparisons);
353 }
354
355 const ParmVarDecl *
findReferencedParmVarDecl(const Expr * In,bool ShouldRetrieveFromComparisons=false)356 findReferencedParmVarDecl(const Expr *In,
357 bool ShouldRetrieveFromComparisons = false) {
358 if (const DeclRefExpr *DR =
359 findDeclRefExpr(In, ShouldRetrieveFromComparisons)) {
360 return dyn_cast<ParmVarDecl>(DR->getDecl());
361 }
362
363 return nullptr;
364 }
365
366 /// Return conditions expression of a statement if it has one.
getCondition(const Stmt * S)367 const Expr *getCondition(const Stmt *S) {
368 if (!S) {
369 return nullptr;
370 }
371
372 if (const auto *If = dyn_cast<IfStmt>(S)) {
373 return If->getCond();
374 }
375 if (const auto *Ternary = dyn_cast<AbstractConditionalOperator>(S)) {
376 return Ternary->getCond();
377 }
378
379 return nullptr;
380 }
381
382 /// A small helper class that collects all named identifiers in the given
383 /// expression. It traverses it recursively, so names from deeper levels
384 /// of the AST will end up in the results.
385 /// Results might have duplicate names, if this is a problem, convert to
386 /// string sets afterwards.
387 class NamesCollector : public RecursiveASTVisitor<NamesCollector> {
388 public:
389 static constexpr unsigned EXPECTED_NUMBER_OF_NAMES = 5;
390 using NameCollection =
391 llvm::SmallVector<llvm::StringRef, EXPECTED_NUMBER_OF_NAMES>;
392
collect(const Expr * From)393 static NameCollection collect(const Expr *From) {
394 NamesCollector Impl;
395 Impl.TraverseStmt(const_cast<Expr *>(From));
396 return Impl.Result;
397 }
398
VisitDeclRefExpr(const DeclRefExpr * E)399 bool VisitDeclRefExpr(const DeclRefExpr *E) {
400 Result.push_back(E->getDecl()->getName());
401 return true;
402 }
403
VisitObjCPropertyRefExpr(const ObjCPropertyRefExpr * E)404 bool VisitObjCPropertyRefExpr(const ObjCPropertyRefExpr *E) {
405 llvm::StringRef Name;
406
407 if (E->isImplicitProperty()) {
408 ObjCMethodDecl *PropertyMethodDecl = nullptr;
409 if (E->isMessagingGetter()) {
410 PropertyMethodDecl = E->getImplicitPropertyGetter();
411 } else {
412 PropertyMethodDecl = E->getImplicitPropertySetter();
413 }
414 assert(PropertyMethodDecl &&
415 "Implicit property must have associated declaration");
416 Name = PropertyMethodDecl->getSelector().getNameForSlot(0);
417 } else {
418 assert(E->isExplicitProperty());
419 Name = E->getExplicitProperty()->getName();
420 }
421
422 Result.push_back(Name);
423 return true;
424 }
425
426 private:
427 NamesCollector() = default;
428 NameCollection Result;
429 };
430
431 /// Check whether the given expression mentions any of conventional names.
mentionsAnyOfConventionalNames(const Expr * E)432 bool mentionsAnyOfConventionalNames(const Expr *E) {
433 NamesCollector::NameCollection MentionedNames = NamesCollector::collect(E);
434
435 return llvm::any_of(MentionedNames, [](llvm::StringRef ConditionName) {
436 return llvm::any_of(
437 CONVENTIONAL_CONDITIONS,
438 [ConditionName](const llvm::StringLiteral &Conventional) {
439 return ConditionName.contains_lower(Conventional);
440 });
441 });
442 }
443
444 /// Clarification is a simple pair of a reason why parameter is not called
445 /// on every path and a statement to blame.
446 struct Clarification {
447 NeverCalledReason Reason;
448 const Stmt *Location;
449 };
450
451 /// A helper class that can produce a clarification based on the given pair
452 /// of basic blocks.
453 class NotCalledClarifier
454 : public ConstStmtVisitor<NotCalledClarifier,
455 llvm::Optional<Clarification>> {
456 public:
457 /// The main entrypoint for the class, the function that tries to find the
458 /// clarification of how to explain which sub-path starts with a CFG edge
459 /// from Conditional to SuccWithoutCall.
460 ///
461 /// This means that this function has one precondition:
462 /// SuccWithoutCall should be a successor block for Conditional.
463 ///
464 /// Because clarification is not needed for non-trivial pairs of blocks
465 /// (i.e. SuccWithoutCall is not the only successor), it returns meaningful
466 /// results only for such cases. For this very reason, the parent basic
467 /// block, Conditional, is named that way, so it is clear what kind of
468 /// block is expected.
469 static llvm::Optional<Clarification>
clarify(const CFGBlock * Conditional,const CFGBlock * SuccWithoutCall)470 clarify(const CFGBlock *Conditional, const CFGBlock *SuccWithoutCall) {
471 if (const Stmt *Terminator = Conditional->getTerminatorStmt()) {
472 return NotCalledClarifier{Conditional, SuccWithoutCall}.Visit(Terminator);
473 }
474 return llvm::None;
475 }
476
VisitIfStmt(const IfStmt * If)477 llvm::Optional<Clarification> VisitIfStmt(const IfStmt *If) {
478 return VisitBranchingBlock(If, NeverCalledReason::IfThen);
479 }
480
481 llvm::Optional<Clarification>
VisitAbstractConditionalOperator(const AbstractConditionalOperator * Ternary)482 VisitAbstractConditionalOperator(const AbstractConditionalOperator *Ternary) {
483 return VisitBranchingBlock(Ternary, NeverCalledReason::IfThen);
484 }
485
VisitSwitchStmt(const SwitchStmt * Switch)486 llvm::Optional<Clarification> VisitSwitchStmt(const SwitchStmt *Switch) {
487 const Stmt *CaseToBlame = SuccInQuestion->getLabel();
488 if (!CaseToBlame) {
489 // If interesting basic block is not labeled, it means that this
490 // basic block does not represent any of the cases.
491 return Clarification{NeverCalledReason::SwitchSkipped, Switch};
492 }
493
494 for (const SwitchCase *Case = Switch->getSwitchCaseList(); Case;
495 Case = Case->getNextSwitchCase()) {
496 if (Case == CaseToBlame) {
497 return Clarification{NeverCalledReason::Switch, Case};
498 }
499 }
500
501 llvm_unreachable("Found unexpected switch structure");
502 }
503
VisitForStmt(const ForStmt * For)504 llvm::Optional<Clarification> VisitForStmt(const ForStmt *For) {
505 return VisitBranchingBlock(For, NeverCalledReason::LoopEntered);
506 }
507
VisitWhileStmt(const WhileStmt * While)508 llvm::Optional<Clarification> VisitWhileStmt(const WhileStmt *While) {
509 return VisitBranchingBlock(While, NeverCalledReason::LoopEntered);
510 }
511
512 llvm::Optional<Clarification>
VisitBranchingBlock(const Stmt * Terminator,NeverCalledReason DefaultReason)513 VisitBranchingBlock(const Stmt *Terminator, NeverCalledReason DefaultReason) {
514 assert(Parent->succ_size() == 2 &&
515 "Branching block should have exactly two successors");
516 unsigned SuccessorIndex = getSuccessorIndex(Parent, SuccInQuestion);
517 NeverCalledReason ActualReason =
518 updateForSuccessor(DefaultReason, SuccessorIndex);
519 return Clarification{ActualReason, Terminator};
520 }
521
VisitBinaryOperator(const BinaryOperator *)522 llvm::Optional<Clarification> VisitBinaryOperator(const BinaryOperator *) {
523 // We don't want to report on short-curcuit logical operations.
524 return llvm::None;
525 }
526
VisitStmt(const Stmt * Terminator)527 llvm::Optional<Clarification> VisitStmt(const Stmt *Terminator) {
528 // If we got here, we didn't have a visit function for more derived
529 // classes of statement that this terminator actually belongs to.
530 //
531 // This is not a good scenario and should not happen in practice, but
532 // at least we'll warn the user.
533 return Clarification{NeverCalledReason::FallbackReason, Terminator};
534 }
535
getSuccessorIndex(const CFGBlock * Parent,const CFGBlock * Child)536 static unsigned getSuccessorIndex(const CFGBlock *Parent,
537 const CFGBlock *Child) {
538 CFGBlock::const_succ_iterator It = llvm::find(Parent->succs(), Child);
539 assert(It != Parent->succ_end() &&
540 "Given blocks should be in parent-child relationship");
541 return It - Parent->succ_begin();
542 }
543
544 static NeverCalledReason
updateForSuccessor(NeverCalledReason ReasonForTrueBranch,unsigned SuccessorIndex)545 updateForSuccessor(NeverCalledReason ReasonForTrueBranch,
546 unsigned SuccessorIndex) {
547 assert(SuccessorIndex <= 1);
548 unsigned RawReason =
549 static_cast<unsigned>(ReasonForTrueBranch) + SuccessorIndex;
550 assert(RawReason <=
551 static_cast<unsigned>(NeverCalledReason::LARGEST_VALUE));
552 return static_cast<NeverCalledReason>(RawReason);
553 }
554
555 private:
NotCalledClarifier(const CFGBlock * Parent,const CFGBlock * SuccInQuestion)556 NotCalledClarifier(const CFGBlock *Parent, const CFGBlock *SuccInQuestion)
557 : Parent(Parent), SuccInQuestion(SuccInQuestion) {}
558
559 const CFGBlock *Parent, *SuccInQuestion;
560 };
561
562 class CalledOnceChecker : public ConstStmtVisitor<CalledOnceChecker> {
563 public:
check(AnalysisDeclContext & AC,CalledOnceCheckHandler & Handler,bool CheckConventionalParameters)564 static void check(AnalysisDeclContext &AC, CalledOnceCheckHandler &Handler,
565 bool CheckConventionalParameters) {
566 CalledOnceChecker(AC, Handler, CheckConventionalParameters).check();
567 }
568
569 private:
CalledOnceChecker(AnalysisDeclContext & AC,CalledOnceCheckHandler & Handler,bool CheckConventionalParameters)570 CalledOnceChecker(AnalysisDeclContext &AC, CalledOnceCheckHandler &Handler,
571 bool CheckConventionalParameters)
572 : FunctionCFG(*AC.getCFG()), AC(AC), Handler(Handler),
573 CheckConventionalParameters(CheckConventionalParameters),
574 CurrentState(0) {
575 initDataStructures();
576 assert((size() == 0 || !States.empty()) &&
577 "Data structures are inconsistent");
578 }
579
580 //===----------------------------------------------------------------------===//
581 // Initializing functions
582 //===----------------------------------------------------------------------===//
583
initDataStructures()584 void initDataStructures() {
585 const Decl *AnalyzedDecl = AC.getDecl();
586
587 if (const auto *Function = dyn_cast<FunctionDecl>(AnalyzedDecl)) {
588 findParamsToTrack(Function);
589 } else if (const auto *Method = dyn_cast<ObjCMethodDecl>(AnalyzedDecl)) {
590 findParamsToTrack(Method);
591 } else if (const auto *Block = dyn_cast<BlockDecl>(AnalyzedDecl)) {
592 findCapturesToTrack(Block);
593 findParamsToTrack(Block);
594 }
595
596 // Have something to track, let's init states for every block from the CFG.
597 if (size() != 0) {
598 States =
599 CFGSizedVector<State>(FunctionCFG.getNumBlockIDs(), State(size()));
600 }
601 }
602
findCapturesToTrack(const BlockDecl * Block)603 void findCapturesToTrack(const BlockDecl *Block) {
604 for (const auto &Capture : Block->captures()) {
605 if (const auto *P = dyn_cast<ParmVarDecl>(Capture.getVariable())) {
606 // Parameter DeclContext is its owning function or method.
607 const DeclContext *ParamContext = P->getDeclContext();
608 if (shouldBeCalledOnce(ParamContext, P)) {
609 TrackedParams.push_back(P);
610 }
611 }
612 }
613 }
614
615 template <class FunctionLikeDecl>
findParamsToTrack(const FunctionLikeDecl * Function)616 void findParamsToTrack(const FunctionLikeDecl *Function) {
617 for (unsigned Index : llvm::seq<unsigned>(0u, Function->param_size())) {
618 if (shouldBeCalledOnce(Function, Index)) {
619 TrackedParams.push_back(Function->getParamDecl(Index));
620 }
621 }
622 }
623
624 //===----------------------------------------------------------------------===//
625 // Main logic 'check' functions
626 //===----------------------------------------------------------------------===//
627
check()628 void check() {
629 // Nothing to check here: we don't have marked parameters.
630 if (size() == 0 || isPossiblyEmptyImpl())
631 return;
632
633 assert(
634 llvm::none_of(States, [](const State &S) { return S.isVisited(); }) &&
635 "None of the blocks should be 'visited' before the analysis");
636
637 // For our task, both backward and forward approaches suite well.
638 // However, in order to report better diagnostics, we decided to go with
639 // backward analysis.
640 //
641 // Let's consider the following CFG and how forward and backward analyses
642 // will work for it.
643 //
644 // FORWARD: | BACKWARD:
645 // #1 | #1
646 // +---------+ | +-----------+
647 // | if | | |MaybeCalled|
648 // +---------+ | +-----------+
649 // |NotCalled| | | if |
650 // +---------+ | +-----------+
651 // / \ | / \
652 // #2 / \ #3 | #2 / \ #3
653 // +----------------+ +---------+ | +----------------+ +---------+
654 // | foo() | | ... | | |DefinitelyCalled| |NotCalled|
655 // +----------------+ +---------+ | +----------------+ +---------+
656 // |DefinitelyCalled| |NotCalled| | | foo() | | ... |
657 // +----------------+ +---------+ | +----------------+ +---------+
658 // \ / | \ /
659 // \ #4 / | \ #4 /
660 // +-----------+ | +---------+
661 // | ... | | |NotCalled|
662 // +-----------+ | +---------+
663 // |MaybeCalled| | | ... |
664 // +-----------+ | +---------+
665 //
666 // The most natural way to report lacking call in the block #3 would be to
667 // message that the false branch of the if statement in the block #1 doesn't
668 // have a call. And while with the forward approach we'll need to find a
669 // least common ancestor or something like that to find the 'if' to blame,
670 // backward analysis gives it to us out of the box.
671 BackwardDataflowWorklist Worklist(FunctionCFG, AC);
672
673 // Let's visit EXIT.
674 const CFGBlock *Exit = &FunctionCFG.getExit();
675 assignState(Exit, State(size(), ParameterStatus::NotCalled));
676 Worklist.enqueuePredecessors(Exit);
677
678 while (const CFGBlock *BB = Worklist.dequeue()) {
679 assert(BB && "Worklist should filter out null blocks");
680 check(BB);
681 assert(CurrentState.isVisited() &&
682 "After the check, basic block should be visited");
683
684 // Traverse successor basic blocks if the status of this block
685 // has changed.
686 if (assignState(BB, CurrentState)) {
687 Worklist.enqueuePredecessors(BB);
688 }
689 }
690
691 // Check that we have all tracked parameters at the last block.
692 // As we are performing a backward version of the analysis,
693 // it should be the ENTRY block.
694 checkEntry(&FunctionCFG.getEntry());
695 }
696
check(const CFGBlock * BB)697 void check(const CFGBlock *BB) {
698 // We start with a state 'inherited' from all the successors.
699 CurrentState = joinSuccessors(BB);
700 assert(CurrentState.isVisited() &&
701 "Shouldn't start with a 'not visited' state");
702
703 // This is the 'exit' situation, broken promises are probably OK
704 // in such scenarios.
705 if (BB->hasNoReturnElement()) {
706 markNoReturn();
707 // This block still can have calls (even multiple calls) and
708 // for this reason there is no early return here.
709 }
710
711 // We use a backward dataflow propagation and for this reason we
712 // should traverse basic blocks bottom-up.
713 for (const CFGElement &Element : llvm::reverse(*BB)) {
714 if (Optional<CFGStmt> S = Element.getAs<CFGStmt>()) {
715 check(S->getStmt());
716 }
717 }
718 }
check(const Stmt * S)719 void check(const Stmt *S) { Visit(S); }
720
checkEntry(const CFGBlock * Entry)721 void checkEntry(const CFGBlock *Entry) {
722 // We finalize this algorithm with the ENTRY block because
723 // we use a backward version of the analysis. This is where
724 // we can judge that some of the tracked parameters are not called on
725 // every path from ENTRY to EXIT.
726
727 const State &EntryStatus = getState(Entry);
728 llvm::BitVector NotCalledOnEveryPath(size(), false);
729 llvm::BitVector NotUsedOnEveryPath(size(), false);
730
731 // Check if there are no calls of the marked parameter at all
732 for (const auto &IndexedStatus : llvm::enumerate(EntryStatus)) {
733 const ParmVarDecl *Parameter = getParameter(IndexedStatus.index());
734
735 switch (IndexedStatus.value().getKind()) {
736 case ParameterStatus::NotCalled:
737 // If there were places where this parameter escapes (aka being used),
738 // we can provide a more useful diagnostic by pointing at the exact
739 // branches where it is not even mentioned.
740 if (!hasEverEscaped(IndexedStatus.index())) {
741 // This parameter is was not used at all, so we should report the
742 // most generic version of the warning.
743 if (isCaptured(Parameter)) {
744 // We want to specify that it was captured by the block.
745 Handler.handleCapturedNeverCalled(Parameter, AC.getDecl(),
746 !isExplicitlyMarked(Parameter));
747 } else {
748 Handler.handleNeverCalled(Parameter,
749 !isExplicitlyMarked(Parameter));
750 }
751 } else {
752 // Mark it as 'interesting' to figure out which paths don't even
753 // have escapes.
754 NotUsedOnEveryPath[IndexedStatus.index()] = true;
755 }
756
757 break;
758 case ParameterStatus::MaybeCalled:
759 // If we have 'maybe called' at this point, we have an error
760 // that there is at least one path where this parameter
761 // is not called.
762 //
763 // However, reporting the warning with only that information can be
764 // too vague for the users. For this reason, we mark such parameters
765 // as "interesting" for further analysis.
766 NotCalledOnEveryPath[IndexedStatus.index()] = true;
767 break;
768 default:
769 break;
770 }
771 }
772
773 // Early exit if we don't have parameters for extra analysis.
774 if (NotCalledOnEveryPath.none() && NotUsedOnEveryPath.none())
775 return;
776
777 // We are looking for a pair of blocks A, B so that the following is true:
778 // * A is a predecessor of B
779 // * B is marked as NotCalled
780 // * A has at least one successor marked as either
781 // Escaped or DefinitelyCalled
782 //
783 // In that situation, it is guaranteed that B is the first block of the path
784 // where the user doesn't call or use parameter in question.
785 //
786 // For this reason, branch A -> B can be used for reporting.
787 //
788 // This part of the algorithm is guarded by a condition that the function
789 // does indeed have a violation of contract. For this reason, we can
790 // spend more time to find a good spot to place the warning.
791 //
792 // The following algorithm has the worst case complexity of O(V + E),
793 // where V is the number of basic blocks in FunctionCFG,
794 // E is the number of edges between blocks in FunctionCFG.
795 for (const CFGBlock *BB : FunctionCFG) {
796 if (!BB)
797 continue;
798
799 const State &BlockState = getState(BB);
800
801 for (unsigned Index : llvm::seq(0u, size())) {
802 // We don't want to use 'isLosingCall' here because we want to report
803 // the following situation as well:
804 //
805 // MaybeCalled
806 // | ... |
807 // MaybeCalled NotCalled
808 //
809 // Even though successor is not 'DefinitelyCalled', it is still useful
810 // to report it, it is still a path without a call.
811 if (NotCalledOnEveryPath[Index] &&
812 BlockState.getKindFor(Index) == ParameterStatus::MaybeCalled) {
813
814 findAndReportNotCalledBranches(BB, Index);
815 } else if (NotUsedOnEveryPath[Index] &&
816 isLosingEscape(BlockState, BB, Index)) {
817
818 findAndReportNotCalledBranches(BB, Index, /* IsEscape = */ true);
819 }
820 }
821 }
822 }
823
824 /// Check potential call of a tracked parameter.
checkDirectCall(const CallExpr * Call)825 void checkDirectCall(const CallExpr *Call) {
826 if (auto Index = getIndexOfCallee(Call)) {
827 processCallFor(*Index, Call);
828 }
829 }
830
831 /// Check the call expression for being an indirect call of one of the tracked
832 /// parameters. It is indirect in the sense that this particular call is not
833 /// calling the parameter itself, but rather uses it as the argument.
834 template <class CallLikeExpr>
checkIndirectCall(const CallLikeExpr * CallOrMessage)835 void checkIndirectCall(const CallLikeExpr *CallOrMessage) {
836 // CallExpr::arguments does not interact nicely with llvm::enumerate.
837 llvm::ArrayRef<const Expr *> Arguments = llvm::makeArrayRef(
838 CallOrMessage->getArgs(), CallOrMessage->getNumArgs());
839
840 // Let's check if any of the call arguments is a point of interest.
841 for (const auto &Argument : llvm::enumerate(Arguments)) {
842 if (auto Index = getIndexOfExpression(Argument.value())) {
843 ParameterStatus &CurrentParamStatus = CurrentState.getStatusFor(*Index);
844
845 if (shouldBeCalledOnce(CallOrMessage, Argument.index())) {
846 // If the corresponding parameter is marked as 'called_once' we should
847 // consider it as a call.
848 processCallFor(*Index, CallOrMessage);
849 } else if (CurrentParamStatus.getKind() == ParameterStatus::NotCalled) {
850 // Otherwise, we mark this parameter as escaped, which can be
851 // interpreted both as called or not called depending on the context.
852 CurrentParamStatus = ParameterStatus::Escaped;
853 }
854 // Otherwise, let's keep the state as it is.
855 }
856 }
857 }
858
859 /// Process call of the parameter with the given index
processCallFor(unsigned Index,const Expr * Call)860 void processCallFor(unsigned Index, const Expr *Call) {
861 ParameterStatus &CurrentParamStatus = CurrentState.getStatusFor(Index);
862
863 if (CurrentParamStatus.seenAnyCalls()) {
864
865 // At this point, this parameter was called, so this is a second call.
866 const ParmVarDecl *Parameter = getParameter(Index);
867 Handler.handleDoubleCall(
868 Parameter, &CurrentState.getCallFor(Index), Call,
869 !isExplicitlyMarked(Parameter),
870 // We are sure that the second call is definitely
871 // going to happen if the status is 'DefinitelyCalled'.
872 CurrentParamStatus.getKind() == ParameterStatus::DefinitelyCalled);
873
874 // Mark this parameter as already reported on, so we don't repeat
875 // warnings.
876 CurrentParamStatus = ParameterStatus::Reported;
877
878 } else if (CurrentParamStatus.getKind() != ParameterStatus::Reported) {
879 // If we didn't report anything yet, let's mark this parameter
880 // as called.
881 ParameterStatus Called(ParameterStatus::DefinitelyCalled, Call);
882 CurrentParamStatus = Called;
883 }
884 }
885
findAndReportNotCalledBranches(const CFGBlock * Parent,unsigned Index,bool IsEscape=false)886 void findAndReportNotCalledBranches(const CFGBlock *Parent, unsigned Index,
887 bool IsEscape = false) {
888 for (const CFGBlock *Succ : Parent->succs()) {
889 if (!Succ)
890 continue;
891
892 if (getState(Succ).getKindFor(Index) == ParameterStatus::NotCalled) {
893 assert(Parent->succ_size() >= 2 &&
894 "Block should have at least two successors at this point");
895 if (auto Clarification = NotCalledClarifier::clarify(Parent, Succ)) {
896 const ParmVarDecl *Parameter = getParameter(Index);
897 Handler.handleNeverCalled(Parameter, Clarification->Location,
898 Clarification->Reason, !IsEscape,
899 !isExplicitlyMarked(Parameter));
900 }
901 }
902 }
903 }
904
905 //===----------------------------------------------------------------------===//
906 // Predicate functions to check parameters
907 //===----------------------------------------------------------------------===//
908
909 /// Return true if parameter is explicitly marked as 'called_once'.
isExplicitlyMarked(const ParmVarDecl * Parameter)910 static bool isExplicitlyMarked(const ParmVarDecl *Parameter) {
911 return Parameter->hasAttr<CalledOnceAttr>();
912 }
913
914 /// Return true if the given name matches conventional pattens.
isConventional(llvm::StringRef Name)915 static bool isConventional(llvm::StringRef Name) {
916 return llvm::count(CONVENTIONAL_NAMES, Name) != 0;
917 }
918
919 /// Return true if the given name has conventional suffixes.
hasConventionalSuffix(llvm::StringRef Name)920 static bool hasConventionalSuffix(llvm::StringRef Name) {
921 return llvm::any_of(CONVENTIONAL_SUFFIXES, [Name](llvm::StringRef Suffix) {
922 return Name.endswith(Suffix);
923 });
924 }
925
926 /// Return true if the given type can be used for conventional parameters.
isConventional(QualType Ty)927 static bool isConventional(QualType Ty) {
928 if (!Ty->isBlockPointerType()) {
929 return false;
930 }
931
932 QualType BlockType = Ty->getAs<BlockPointerType>()->getPointeeType();
933 // Completion handlers should have a block type with void return type.
934 return BlockType->getAs<FunctionType>()->getReturnType()->isVoidType();
935 }
936
937 /// Return true if the only parameter of the function is conventional.
isOnlyParameterConventional(const FunctionDecl * Function)938 static bool isOnlyParameterConventional(const FunctionDecl *Function) {
939 IdentifierInfo *II = Function->getIdentifier();
940 return Function->getNumParams() == 1 && II &&
941 hasConventionalSuffix(II->getName());
942 }
943
944 /// Return true/false if 'swift_async' attribute states that the given
945 /// parameter is conventionally called once.
946 /// Return llvm::None if the given declaration doesn't have 'swift_async'
947 /// attribute.
isConventionalSwiftAsync(const Decl * D,unsigned ParamIndex)948 static llvm::Optional<bool> isConventionalSwiftAsync(const Decl *D,
949 unsigned ParamIndex) {
950 if (const SwiftAsyncAttr *A = D->getAttr<SwiftAsyncAttr>()) {
951 if (A->getKind() == SwiftAsyncAttr::None) {
952 return false;
953 }
954
955 return A->getCompletionHandlerIndex().getASTIndex() == ParamIndex;
956 }
957 return llvm::None;
958 }
959
960 /// Return true if the specified selector piece matches conventions.
isConventionalSelectorPiece(Selector MethodSelector,unsigned PieceIndex,QualType PieceType)961 static bool isConventionalSelectorPiece(Selector MethodSelector,
962 unsigned PieceIndex,
963 QualType PieceType) {
964 if (!isConventional(PieceType)) {
965 return false;
966 }
967
968 if (MethodSelector.getNumArgs() == 1) {
969 assert(PieceIndex == 0);
970 return hasConventionalSuffix(MethodSelector.getNameForSlot(0));
971 }
972
973 return isConventional(MethodSelector.getNameForSlot(PieceIndex));
974 }
975
shouldBeCalledOnce(const ParmVarDecl * Parameter) const976 bool shouldBeCalledOnce(const ParmVarDecl *Parameter) const {
977 return isExplicitlyMarked(Parameter) ||
978 (CheckConventionalParameters &&
979 isConventional(Parameter->getName()) &&
980 isConventional(Parameter->getType()));
981 }
982
shouldBeCalledOnce(const DeclContext * ParamContext,const ParmVarDecl * Param)983 bool shouldBeCalledOnce(const DeclContext *ParamContext,
984 const ParmVarDecl *Param) {
985 unsigned ParamIndex = Param->getFunctionScopeIndex();
986 if (const auto *Function = dyn_cast<FunctionDecl>(ParamContext)) {
987 return shouldBeCalledOnce(Function, ParamIndex);
988 }
989 if (const auto *Method = dyn_cast<ObjCMethodDecl>(ParamContext)) {
990 return shouldBeCalledOnce(Method, ParamIndex);
991 }
992 return shouldBeCalledOnce(Param);
993 }
994
shouldBeCalledOnce(const BlockDecl * Block,unsigned ParamIndex) const995 bool shouldBeCalledOnce(const BlockDecl *Block, unsigned ParamIndex) const {
996 return shouldBeCalledOnce(Block->getParamDecl(ParamIndex));
997 }
998
shouldBeCalledOnce(const FunctionDecl * Function,unsigned ParamIndex) const999 bool shouldBeCalledOnce(const FunctionDecl *Function,
1000 unsigned ParamIndex) const {
1001 if (ParamIndex >= Function->getNumParams()) {
1002 return false;
1003 }
1004 // 'swift_async' goes first and overrides anything else.
1005 if (auto ConventionalAsync =
1006 isConventionalSwiftAsync(Function, ParamIndex)) {
1007 return ConventionalAsync.getValue();
1008 }
1009
1010 return shouldBeCalledOnce(Function->getParamDecl(ParamIndex)) ||
1011 (CheckConventionalParameters &&
1012 isOnlyParameterConventional(Function));
1013 }
1014
shouldBeCalledOnce(const ObjCMethodDecl * Method,unsigned ParamIndex) const1015 bool shouldBeCalledOnce(const ObjCMethodDecl *Method,
1016 unsigned ParamIndex) const {
1017 Selector MethodSelector = Method->getSelector();
1018 if (ParamIndex >= MethodSelector.getNumArgs()) {
1019 return false;
1020 }
1021
1022 // 'swift_async' goes first and overrides anything else.
1023 if (auto ConventionalAsync = isConventionalSwiftAsync(Method, ParamIndex)) {
1024 return ConventionalAsync.getValue();
1025 }
1026
1027 const ParmVarDecl *Parameter = Method->getParamDecl(ParamIndex);
1028 return shouldBeCalledOnce(Parameter) ||
1029 (CheckConventionalParameters &&
1030 isConventionalSelectorPiece(MethodSelector, ParamIndex,
1031 Parameter->getType()));
1032 }
1033
shouldBeCalledOnce(const CallExpr * Call,unsigned ParamIndex) const1034 bool shouldBeCalledOnce(const CallExpr *Call, unsigned ParamIndex) const {
1035 const FunctionDecl *Function = Call->getDirectCallee();
1036 return Function && shouldBeCalledOnce(Function, ParamIndex);
1037 }
1038
shouldBeCalledOnce(const ObjCMessageExpr * Message,unsigned ParamIndex) const1039 bool shouldBeCalledOnce(const ObjCMessageExpr *Message,
1040 unsigned ParamIndex) const {
1041 const ObjCMethodDecl *Method = Message->getMethodDecl();
1042 return Method && ParamIndex < Method->param_size() &&
1043 shouldBeCalledOnce(Method, ParamIndex);
1044 }
1045
1046 //===----------------------------------------------------------------------===//
1047 // Utility methods
1048 //===----------------------------------------------------------------------===//
1049
isCaptured(const ParmVarDecl * Parameter) const1050 bool isCaptured(const ParmVarDecl *Parameter) const {
1051 if (const BlockDecl *Block = dyn_cast<BlockDecl>(AC.getDecl())) {
1052 return Block->capturesVariable(Parameter);
1053 }
1054 return false;
1055 }
1056
1057 /// Return true if the analyzed function is actually a default implementation
1058 /// of the method that has to be overriden.
1059 ///
1060 /// These functions can have tracked parameters, but wouldn't call them
1061 /// because they are not designed to perform any meaningful actions.
1062 ///
1063 /// There are a couple of flavors of such default implementations:
1064 /// 1. Empty methods or methods with a single return statement
1065 /// 2. Methods that have one block with a call to no return function
1066 /// 3. Methods with only assertion-like operations
isPossiblyEmptyImpl() const1067 bool isPossiblyEmptyImpl() const {
1068 if (!isa<ObjCMethodDecl>(AC.getDecl())) {
1069 // We care only about functions that are not supposed to be called.
1070 // Only methods can be overriden.
1071 return false;
1072 }
1073
1074 // Case #1 (without return statements)
1075 if (FunctionCFG.size() == 2) {
1076 // Method has only two blocks: ENTRY and EXIT.
1077 // This is equivalent to empty function.
1078 return true;
1079 }
1080
1081 // Case #2
1082 if (FunctionCFG.size() == 3) {
1083 const CFGBlock &Entry = FunctionCFG.getEntry();
1084 if (Entry.succ_empty()) {
1085 return false;
1086 }
1087
1088 const CFGBlock *OnlyBlock = *Entry.succ_begin();
1089 // Method has only one block, let's see if it has a no-return
1090 // element.
1091 if (OnlyBlock && OnlyBlock->hasNoReturnElement()) {
1092 return true;
1093 }
1094 // Fallthrough, CFGs with only one block can fall into #1 and #3 as well.
1095 }
1096
1097 // Cases #1 (return statements) and #3.
1098 //
1099 // It is hard to detect that something is an assertion or came
1100 // from assertion. Here we use a simple heuristic:
1101 //
1102 // - If it came from a macro, it can be an assertion.
1103 //
1104 // Additionally, we can't assume a number of basic blocks or the CFG's
1105 // structure because assertions might include loops and conditions.
1106 return llvm::all_of(FunctionCFG, [](const CFGBlock *BB) {
1107 if (!BB) {
1108 // Unreachable blocks are totally fine.
1109 return true;
1110 }
1111
1112 // Return statements can have sub-expressions that are represented as
1113 // separate statements of a basic block. We should allow this.
1114 // This parent map will be initialized with a parent tree for all
1115 // subexpressions of the block's return statement (if it has one).
1116 std::unique_ptr<ParentMap> ReturnChildren;
1117
1118 return llvm::all_of(
1119 llvm::reverse(*BB), // we should start with return statements, if we
1120 // have any, i.e. from the bottom of the block
1121 [&ReturnChildren](const CFGElement &Element) {
1122 if (Optional<CFGStmt> S = Element.getAs<CFGStmt>()) {
1123 const Stmt *SuspiciousStmt = S->getStmt();
1124
1125 if (isa<ReturnStmt>(SuspiciousStmt)) {
1126 // Let's initialize this structure to test whether
1127 // some further statement is a part of this return.
1128 ReturnChildren = std::make_unique<ParentMap>(
1129 const_cast<Stmt *>(SuspiciousStmt));
1130 // Return statements are allowed as part of #1.
1131 return true;
1132 }
1133
1134 return SuspiciousStmt->getBeginLoc().isMacroID() ||
1135 (ReturnChildren &&
1136 ReturnChildren->hasParent(SuspiciousStmt));
1137 }
1138 return true;
1139 });
1140 });
1141 }
1142
1143 /// Check if parameter with the given index has ever escaped.
hasEverEscaped(unsigned Index) const1144 bool hasEverEscaped(unsigned Index) const {
1145 return llvm::any_of(States, [Index](const State &StateForOneBB) {
1146 return StateForOneBB.getKindFor(Index) == ParameterStatus::Escaped;
1147 });
1148 }
1149
1150 /// Return status stored for the given basic block.
1151 /// \{
getState(const CFGBlock * BB)1152 State &getState(const CFGBlock *BB) {
1153 assert(BB);
1154 return States[BB->getBlockID()];
1155 }
getState(const CFGBlock * BB) const1156 const State &getState(const CFGBlock *BB) const {
1157 assert(BB);
1158 return States[BB->getBlockID()];
1159 }
1160 /// \}
1161
1162 /// Assign status to the given basic block.
1163 ///
1164 /// Returns true when the stored status changed.
assignState(const CFGBlock * BB,const State & ToAssign)1165 bool assignState(const CFGBlock *BB, const State &ToAssign) {
1166 State &Current = getState(BB);
1167 if (Current == ToAssign) {
1168 return false;
1169 }
1170
1171 Current = ToAssign;
1172 return true;
1173 }
1174
1175 /// Join all incoming statuses for the given basic block.
joinSuccessors(const CFGBlock * BB) const1176 State joinSuccessors(const CFGBlock *BB) const {
1177 auto Succs =
1178 llvm::make_filter_range(BB->succs(), [this](const CFGBlock *Succ) {
1179 return Succ && this->getState(Succ).isVisited();
1180 });
1181 // We came to this block from somewhere after all.
1182 assert(!Succs.empty() &&
1183 "Basic block should have at least one visited successor");
1184
1185 State Result = getState(*Succs.begin());
1186
1187 for (const CFGBlock *Succ : llvm::drop_begin(Succs, 1)) {
1188 Result.join(getState(Succ));
1189 }
1190
1191 if (const Expr *Condition = getCondition(BB->getTerminatorStmt())) {
1192 handleConditional(BB, Condition, Result);
1193 }
1194
1195 return Result;
1196 }
1197
handleConditional(const CFGBlock * BB,const Expr * Condition,State & ToAlter) const1198 void handleConditional(const CFGBlock *BB, const Expr *Condition,
1199 State &ToAlter) const {
1200 handleParameterCheck(BB, Condition, ToAlter);
1201 if (SuppressOnConventionalErrorPaths) {
1202 handleConventionalCheck(BB, Condition, ToAlter);
1203 }
1204 }
1205
handleParameterCheck(const CFGBlock * BB,const Expr * Condition,State & ToAlter) const1206 void handleParameterCheck(const CFGBlock *BB, const Expr *Condition,
1207 State &ToAlter) const {
1208 // In this function, we try to deal with the following pattern:
1209 //
1210 // if (parameter)
1211 // parameter(...);
1212 //
1213 // It's not good to show a warning here because clearly 'parameter'
1214 // couldn't and shouldn't be called on the 'else' path.
1215 //
1216 // Let's check if this if statement has a check involving one of
1217 // the tracked parameters.
1218 if (const ParmVarDecl *Parameter = findReferencedParmVarDecl(
1219 Condition,
1220 /* ShouldRetrieveFromComparisons = */ true)) {
1221 if (const auto Index = getIndex(*Parameter)) {
1222 ParameterStatus &CurrentStatus = ToAlter.getStatusFor(*Index);
1223
1224 // We don't want to deep dive into semantics of the check and
1225 // figure out if that check was for null or something else.
1226 // We simply trust the user that they know what they are doing.
1227 //
1228 // For this reason, in the following loop we look for the
1229 // best-looking option.
1230 for (const CFGBlock *Succ : BB->succs()) {
1231 if (!Succ)
1232 continue;
1233
1234 const ParameterStatus &StatusInSucc =
1235 getState(Succ).getStatusFor(*Index);
1236
1237 if (StatusInSucc.isErrorStatus()) {
1238 continue;
1239 }
1240
1241 // Let's use this status instead.
1242 CurrentStatus = StatusInSucc;
1243
1244 if (StatusInSucc.getKind() == ParameterStatus::DefinitelyCalled) {
1245 // This is the best option to have and we already found it.
1246 break;
1247 }
1248
1249 // If we found 'Escaped' first, we still might find 'DefinitelyCalled'
1250 // on the other branch. And we prefer the latter.
1251 }
1252 }
1253 }
1254 }
1255
handleConventionalCheck(const CFGBlock * BB,const Expr * Condition,State & ToAlter) const1256 void handleConventionalCheck(const CFGBlock *BB, const Expr *Condition,
1257 State &ToAlter) const {
1258 // Even when the analysis is technically correct, it is a widespread pattern
1259 // not to call completion handlers in some scenarios. These usually have
1260 // typical conditional names, such as 'error' or 'cancel'.
1261 if (!mentionsAnyOfConventionalNames(Condition)) {
1262 return;
1263 }
1264
1265 for (const auto &IndexedStatus : llvm::enumerate(ToAlter)) {
1266 const ParmVarDecl *Parameter = getParameter(IndexedStatus.index());
1267 // Conventions do not apply to explicitly marked parameters.
1268 if (isExplicitlyMarked(Parameter)) {
1269 continue;
1270 }
1271
1272 ParameterStatus &CurrentStatus = IndexedStatus.value();
1273 // If we did find that on one of the branches the user uses the callback
1274 // and doesn't on the other path, we believe that they know what they are
1275 // doing and trust them.
1276 //
1277 // There are two possible scenarios for that:
1278 // 1. Current status is 'MaybeCalled' and one of the branches is
1279 // 'DefinitelyCalled'
1280 // 2. Current status is 'NotCalled' and one of the branches is 'Escaped'
1281 if (isLosingCall(ToAlter, BB, IndexedStatus.index()) ||
1282 isLosingEscape(ToAlter, BB, IndexedStatus.index())) {
1283 CurrentStatus = ParameterStatus::Escaped;
1284 }
1285 }
1286 }
1287
isLosingCall(const State & StateAfterJoin,const CFGBlock * JoinBlock,unsigned ParameterIndex) const1288 bool isLosingCall(const State &StateAfterJoin, const CFGBlock *JoinBlock,
1289 unsigned ParameterIndex) const {
1290 // Let's check if the block represents DefinitelyCalled -> MaybeCalled
1291 // transition.
1292 return isLosingJoin(StateAfterJoin, JoinBlock, ParameterIndex,
1293 ParameterStatus::MaybeCalled,
1294 ParameterStatus::DefinitelyCalled);
1295 }
1296
isLosingEscape(const State & StateAfterJoin,const CFGBlock * JoinBlock,unsigned ParameterIndex) const1297 bool isLosingEscape(const State &StateAfterJoin, const CFGBlock *JoinBlock,
1298 unsigned ParameterIndex) const {
1299 // Let's check if the block represents Escaped -> NotCalled transition.
1300 return isLosingJoin(StateAfterJoin, JoinBlock, ParameterIndex,
1301 ParameterStatus::NotCalled, ParameterStatus::Escaped);
1302 }
1303
isLosingJoin(const State & StateAfterJoin,const CFGBlock * JoinBlock,unsigned ParameterIndex,ParameterStatus::Kind AfterJoin,ParameterStatus::Kind BeforeJoin) const1304 bool isLosingJoin(const State &StateAfterJoin, const CFGBlock *JoinBlock,
1305 unsigned ParameterIndex, ParameterStatus::Kind AfterJoin,
1306 ParameterStatus::Kind BeforeJoin) const {
1307 assert(!ParameterStatus::isErrorStatus(BeforeJoin) &&
1308 ParameterStatus::isErrorStatus(AfterJoin) &&
1309 "It's not a losing join if statuses do not represent "
1310 "correct-to-error transition");
1311
1312 const ParameterStatus &CurrentStatus =
1313 StateAfterJoin.getStatusFor(ParameterIndex);
1314
1315 return CurrentStatus.getKind() == AfterJoin &&
1316 anySuccessorHasStatus(JoinBlock, ParameterIndex, BeforeJoin);
1317 }
1318
1319 /// Return true if any of the successors of the given basic block has
1320 /// a specified status for the given parameter.
anySuccessorHasStatus(const CFGBlock * Parent,unsigned ParameterIndex,ParameterStatus::Kind ToFind) const1321 bool anySuccessorHasStatus(const CFGBlock *Parent, unsigned ParameterIndex,
1322 ParameterStatus::Kind ToFind) const {
1323 return llvm::any_of(
1324 Parent->succs(), [this, ParameterIndex, ToFind](const CFGBlock *Succ) {
1325 return Succ && getState(Succ).getKindFor(ParameterIndex) == ToFind;
1326 });
1327 }
1328
1329 /// Check given expression that was discovered to escape.
checkEscapee(const Expr * E)1330 void checkEscapee(const Expr *E) {
1331 if (const ParmVarDecl *Parameter = findReferencedParmVarDecl(E)) {
1332 checkEscapee(*Parameter);
1333 }
1334 }
1335
1336 /// Check given parameter that was discovered to escape.
checkEscapee(const ParmVarDecl & Parameter)1337 void checkEscapee(const ParmVarDecl &Parameter) {
1338 if (auto Index = getIndex(Parameter)) {
1339 ParameterStatus &CurrentParamStatus = CurrentState.getStatusFor(*Index);
1340
1341 if (CurrentParamStatus.getKind() == ParameterStatus::NotCalled) {
1342 CurrentParamStatus = ParameterStatus::Escaped;
1343 }
1344 }
1345 }
1346
1347 /// Mark all parameters in the current state as 'no-return'.
markNoReturn()1348 void markNoReturn() {
1349 for (ParameterStatus &PS : CurrentState) {
1350 PS = ParameterStatus::NoReturn;
1351 }
1352 }
1353
1354 /// Check if the given assignment represents suppression and act on it.
checkSuppression(const BinaryOperator * Assignment)1355 void checkSuppression(const BinaryOperator *Assignment) {
1356 // Suppression has the following form:
1357 // parameter = 0;
1358 // 0 can be of any form (NULL, nil, etc.)
1359 if (auto Index = getIndexOfExpression(Assignment->getLHS())) {
1360
1361 // We don't care what is written in the RHS, it could be whatever
1362 // we can interpret as 0.
1363 if (auto Constant =
1364 Assignment->getRHS()->IgnoreParenCasts()->getIntegerConstantExpr(
1365 AC.getASTContext())) {
1366
1367 ParameterStatus &CurrentParamStatus = CurrentState.getStatusFor(*Index);
1368
1369 if (0 == *Constant && CurrentParamStatus.seenAnyCalls()) {
1370 // Even though this suppression mechanism is introduced to tackle
1371 // false positives for multiple calls, the fact that the user has
1372 // to use suppression can also tell us that we couldn't figure out
1373 // how different paths cancel each other out. And if that is true,
1374 // we will most certainly have false positives about parameters not
1375 // being called on certain paths.
1376 //
1377 // For this reason, we abandon tracking this parameter altogether.
1378 CurrentParamStatus = ParameterStatus::Reported;
1379 }
1380 }
1381 }
1382 }
1383
1384 public:
1385 //===----------------------------------------------------------------------===//
1386 // Tree traversal methods
1387 //===----------------------------------------------------------------------===//
1388
VisitCallExpr(const CallExpr * Call)1389 void VisitCallExpr(const CallExpr *Call) {
1390 // This call might be a direct call, i.e. a parameter call...
1391 checkDirectCall(Call);
1392 // ... or an indirect call, i.e. when parameter is an argument.
1393 checkIndirectCall(Call);
1394 }
1395
VisitObjCMessageExpr(const ObjCMessageExpr * Message)1396 void VisitObjCMessageExpr(const ObjCMessageExpr *Message) {
1397 // The most common situation that we are defending against here is
1398 // copying a tracked parameter.
1399 if (const Expr *Receiver = Message->getInstanceReceiver()) {
1400 checkEscapee(Receiver);
1401 }
1402 // Message expressions unlike calls, could not be direct.
1403 checkIndirectCall(Message);
1404 }
1405
VisitBlockExpr(const BlockExpr * Block)1406 void VisitBlockExpr(const BlockExpr *Block) {
1407 for (const auto &Capture : Block->getBlockDecl()->captures()) {
1408 // If a block captures a tracked parameter, it should be
1409 // considered escaped.
1410 // On one hand, blocks that do that should definitely call it on
1411 // every path. However, it is not guaranteed that the block
1412 // itself gets called whenever it gets created.
1413 //
1414 // Because we don't want to track blocks and whether they get called,
1415 // we consider such parameters simply escaped.
1416 if (const auto *Param = dyn_cast<ParmVarDecl>(Capture.getVariable())) {
1417 checkEscapee(*Param);
1418 }
1419 }
1420 }
1421
VisitBinaryOperator(const BinaryOperator * Op)1422 void VisitBinaryOperator(const BinaryOperator *Op) {
1423 if (Op->getOpcode() == clang::BO_Assign) {
1424 // Let's check if one of the tracked parameters is assigned into
1425 // something, and if it is we don't want to track extra variables, so we
1426 // consider it as an escapee.
1427 checkEscapee(Op->getRHS());
1428
1429 // Let's check whether this assignment is a suppression.
1430 checkSuppression(Op);
1431 }
1432 }
1433
VisitDeclStmt(const DeclStmt * DS)1434 void VisitDeclStmt(const DeclStmt *DS) {
1435 // Variable initialization is not assignment and should be handled
1436 // separately.
1437 //
1438 // Multiple declarations can be a part of declaration statement.
1439 for (const auto *Declaration : DS->getDeclGroup()) {
1440 if (const auto *Var = dyn_cast<VarDecl>(Declaration)) {
1441 if (Var->getInit()) {
1442 checkEscapee(Var->getInit());
1443 }
1444 }
1445 }
1446 }
1447
VisitCStyleCastExpr(const CStyleCastExpr * Cast)1448 void VisitCStyleCastExpr(const CStyleCastExpr *Cast) {
1449 // We consider '(void)parameter' as a manual no-op escape.
1450 // It should be used to explicitly tell the analysis that this parameter
1451 // is intentionally not called on this path.
1452 if (Cast->getType().getCanonicalType()->isVoidType()) {
1453 checkEscapee(Cast->getSubExpr());
1454 }
1455 }
1456
VisitObjCAtThrowStmt(const ObjCAtThrowStmt *)1457 void VisitObjCAtThrowStmt(const ObjCAtThrowStmt *) {
1458 // It is OK not to call marked parameters on exceptional paths.
1459 markNoReturn();
1460 }
1461
1462 private:
size() const1463 unsigned size() const { return TrackedParams.size(); }
1464
getIndexOfCallee(const CallExpr * Call) const1465 llvm::Optional<unsigned> getIndexOfCallee(const CallExpr *Call) const {
1466 return getIndexOfExpression(Call->getCallee());
1467 }
1468
getIndexOfExpression(const Expr * E) const1469 llvm::Optional<unsigned> getIndexOfExpression(const Expr *E) const {
1470 if (const ParmVarDecl *Parameter = findReferencedParmVarDecl(E)) {
1471 return getIndex(*Parameter);
1472 }
1473
1474 return llvm::None;
1475 }
1476
getIndex(const ParmVarDecl & Parameter) const1477 llvm::Optional<unsigned> getIndex(const ParmVarDecl &Parameter) const {
1478 // Expected number of parameters that we actually track is 1.
1479 //
1480 // Also, the maximum number of declared parameters could not be on a scale
1481 // of hundreds of thousands.
1482 //
1483 // In this setting, linear search seems reasonable and even performs better
1484 // than bisection.
1485 ParamSizedVector<const ParmVarDecl *>::const_iterator It =
1486 llvm::find(TrackedParams, &Parameter);
1487
1488 if (It != TrackedParams.end()) {
1489 return It - TrackedParams.begin();
1490 }
1491
1492 return llvm::None;
1493 }
1494
getParameter(unsigned Index) const1495 const ParmVarDecl *getParameter(unsigned Index) const {
1496 assert(Index < TrackedParams.size());
1497 return TrackedParams[Index];
1498 }
1499
1500 const CFG &FunctionCFG;
1501 AnalysisDeclContext &AC;
1502 CalledOnceCheckHandler &Handler;
1503 bool CheckConventionalParameters;
1504 // As of now, we turn this behavior off. So, we still are going to report
1505 // missing calls on paths that look like it was intentional.
1506 // Technically such reports are true positives, but they can make some users
1507 // grumpy because of the sheer number of warnings.
1508 // It can be turned back on if we decide that we want to have the other way
1509 // around.
1510 bool SuppressOnConventionalErrorPaths = false;
1511
1512 State CurrentState;
1513 ParamSizedVector<const ParmVarDecl *> TrackedParams;
1514 CFGSizedVector<State> States;
1515 };
1516
1517 } // end anonymous namespace
1518
1519 namespace clang {
checkCalledOnceParameters(AnalysisDeclContext & AC,CalledOnceCheckHandler & Handler,bool CheckConventionalParameters)1520 void checkCalledOnceParameters(AnalysisDeclContext &AC,
1521 CalledOnceCheckHandler &Handler,
1522 bool CheckConventionalParameters) {
1523 CalledOnceChecker::check(AC, Handler, CheckConventionalParameters);
1524 }
1525 } // end namespace clang
1526