1 //===--- SuspiciousMissingCommaCheck.cpp - clang-tidy----------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "SuspiciousMissingCommaCheck.h"
10 #include "clang/AST/ASTContext.h"
11 #include "clang/ASTMatchers/ASTMatchFinder.h"
12 
13 using namespace clang::ast_matchers;
14 
15 namespace clang {
16 namespace tidy {
17 namespace bugprone {
18 
19 namespace {
20 
isConcatenatedLiteralsOnPurpose(ASTContext * Ctx,const StringLiteral * Lit)21 bool isConcatenatedLiteralsOnPurpose(ASTContext *Ctx,
22                                      const StringLiteral *Lit) {
23   // String literals surrounded by parentheses are assumed to be on purpose.
24   //    i.e.:  const char* Array[] = { ("a" "b" "c"), "d", [...] };
25 
26   TraversalKindScope RAII(*Ctx, TK_AsIs);
27   auto Parents = Ctx->getParents(*Lit);
28   if (Parents.size() == 1 && Parents[0].get<ParenExpr>() != nullptr)
29     return true;
30 
31   // Appropriately indented string literals are assumed to be on purpose.
32   // The following frequent indentation is accepted:
33   //     const char* Array[] = {
34   //       "first literal"
35   //           "indented literal"
36   //           "indented literal",
37   //       "second literal",
38   //       [...]
39   //     };
40   const SourceManager &SM = Ctx->getSourceManager();
41   bool IndentedCorrectly = true;
42   SourceLocation FirstToken = Lit->getStrTokenLoc(0);
43   FileID BaseFID = SM.getFileID(FirstToken);
44   unsigned int BaseIndent = SM.getSpellingColumnNumber(FirstToken);
45   unsigned int BaseLine = SM.getSpellingLineNumber(FirstToken);
46   for (unsigned int TokNum = 1; TokNum < Lit->getNumConcatenated(); ++TokNum) {
47     SourceLocation Token = Lit->getStrTokenLoc(TokNum);
48     FileID FID = SM.getFileID(Token);
49     unsigned int Indent = SM.getSpellingColumnNumber(Token);
50     unsigned int Line = SM.getSpellingLineNumber(Token);
51     if (FID != BaseFID || Line != BaseLine + TokNum || Indent <= BaseIndent) {
52       IndentedCorrectly = false;
53       break;
54     }
55   }
56   if (IndentedCorrectly)
57     return true;
58 
59   // There is no pattern recognized by the checker, assume it's not on purpose.
60   return false;
61 }
62 
AST_MATCHER_P(StringLiteral,isConcatenatedLiteral,unsigned,MaxConcatenatedTokens)63 AST_MATCHER_P(StringLiteral, isConcatenatedLiteral, unsigned,
64               MaxConcatenatedTokens) {
65   return Node.getNumConcatenated() > 1 &&
66          Node.getNumConcatenated() < MaxConcatenatedTokens &&
67          !isConcatenatedLiteralsOnPurpose(&Finder->getASTContext(), &Node);
68 }
69 
70 } // namespace
71 
SuspiciousMissingCommaCheck(StringRef Name,ClangTidyContext * Context)72 SuspiciousMissingCommaCheck::SuspiciousMissingCommaCheck(
73     StringRef Name, ClangTidyContext *Context)
74     : ClangTidyCheck(Name, Context),
75       SizeThreshold(Options.get("SizeThreshold", 5U)),
76       RatioThreshold(std::stod(Options.get("RatioThreshold", ".2"))),
77       MaxConcatenatedTokens(Options.get("MaxConcatenatedTokens", 5U)) {}
78 
storeOptions(ClangTidyOptions::OptionMap & Opts)79 void SuspiciousMissingCommaCheck::storeOptions(
80     ClangTidyOptions::OptionMap &Opts) {
81   Options.store(Opts, "SizeThreshold", SizeThreshold);
82   Options.store(Opts, "RatioThreshold", std::to_string(RatioThreshold));
83   Options.store(Opts, "MaxConcatenatedTokens", MaxConcatenatedTokens);
84 }
85 
registerMatchers(MatchFinder * Finder)86 void SuspiciousMissingCommaCheck::registerMatchers(MatchFinder *Finder) {
87   const auto ConcatenatedStringLiteral =
88       stringLiteral(isConcatenatedLiteral(MaxConcatenatedTokens)).bind("str");
89 
90   const auto StringsInitializerList =
91       initListExpr(hasType(constantArrayType()),
92                    has(ignoringParenImpCasts(expr(ConcatenatedStringLiteral))));
93 
94   Finder->addMatcher(StringsInitializerList.bind("list"), this);
95 }
96 
check(const MatchFinder::MatchResult & Result)97 void SuspiciousMissingCommaCheck::check(
98     const MatchFinder::MatchResult &Result) {
99   const auto *InitializerList = Result.Nodes.getNodeAs<InitListExpr>("list");
100   const auto *ConcatenatedLiteral =
101       Result.Nodes.getNodeAs<StringLiteral>("str");
102   assert(InitializerList && ConcatenatedLiteral);
103 
104   // Skip small arrays as they often generate false-positive.
105   unsigned int Size = InitializerList->getNumInits();
106   if (Size < SizeThreshold)
107     return;
108 
109   // Count the number of occurrence of concatenated string literal.
110   unsigned int Count = 0;
111   for (unsigned int I = 0; I < Size; ++I) {
112     const Expr *Child = InitializerList->getInit(I)->IgnoreImpCasts();
113     if (const auto *Literal = dyn_cast<StringLiteral>(Child)) {
114       if (Literal->getNumConcatenated() > 1)
115         ++Count;
116     }
117   }
118 
119   // Warn only when concatenation is not common in this initializer list.
120   // The current threshold is set to less than 1/5 of the string literals.
121   if (double(Count) / Size > RatioThreshold)
122     return;
123 
124   diag(ConcatenatedLiteral->getBeginLoc(),
125        "suspicious string literal, probably missing a comma");
126 }
127 
128 } // namespace bugprone
129 } // namespace tidy
130 } // namespace clang
131