1 /*
2  * goostring-format-checker.cc
3  *
4  * This file is licensed under the GPLv2 or later
5  *
6  * Clang++ compiler plugin that checks usage of GooString::format-like functions
7  *
8  * Copyright (C) 2014 Fabio D'Urso <fabiodurso@hotmail.it>
9  * Copyright (C) 2021 Albert Astals Cid <aacid@kde.org>
10  */
11 
12 #include <cctype>
13 
14 #include <clang/Frontend/FrontendPluginRegistry.h>
15 #include <clang/AST/AST.h>
16 #include <clang/AST/ASTConsumer.h>
17 #include <clang/AST/Attr.h>
18 #include <clang/AST/RecursiveASTVisitor.h>
19 #include <clang/Frontend/CompilerInstance.h>
20 
21 using namespace clang;
22 
23 namespace {
24 
25 class GooStringFormatCheckerVisitor : public RecursiveASTVisitor<GooStringFormatCheckerVisitor>
26 {
27 public:
28     explicit GooStringFormatCheckerVisitor(CompilerInstance *compInst);
29 
30     bool VisitFunctionDecl(FunctionDecl *funcDecl);
31     bool VisitCallExpr(CallExpr *callExpr);
32 
33 private:
34     /* Returns the index of the format argument, or -1 if the function must
35      * not be checked */
36     int findFormatArgumentIndex(const FunctionDecl *funcDecl) const;
37 
38     /* Returns the SourceLocation of the n-th character */
39     SourceLocation getLocationOfCharacter(const StringLiteral *strLiteral, unsigned n);
40 
41     /* Validates usage of a placeholder and returns the corresponding
42      * argument index, or -1 in case of errors */
43     int verifyPlaceholder(const CallExpr *callExpr, const SourceLocation &placeholderLocation, std::string &placeholderText, int baseArgIdx) const;
44 
45     CompilerInstance *compInst;
46     DiagnosticsEngine *diag;
47     unsigned diag_badFuncZeroArgs;
48     unsigned diag_badFuncNonVariadic;
49     unsigned diag_badFuncLastArgInvalidType;
50     unsigned diag_notStringLiteral;
51     unsigned diag_notPlainASCII;
52     unsigned diag_wrongOrder;
53     unsigned diag_unescapedBracket;
54     unsigned diag_unterminatedPlaceholder;
55     unsigned diag_unconsumedArgs;
56     unsigned diag_missingColon;
57     unsigned diag_missingArgNumber;
58     unsigned diag_badArgNumber;
59     unsigned diag_argumentNotPresent;
60     unsigned diag_badPrecision;
61     unsigned diag_badType;
62     unsigned diag_wrongArgExprType;
63 };
64 
GooStringFormatCheckerVisitor(CompilerInstance * compInst)65 GooStringFormatCheckerVisitor::GooStringFormatCheckerVisitor(CompilerInstance *compInst) : compInst(compInst)
66 {
67     diag = &compInst->getDiagnostics();
68 
69     diag_badFuncZeroArgs = diag->getCustomDiagID(DiagnosticsEngine::Error, "Cannot enforce format string checks on a function that takes no arguments");
70     diag_badFuncNonVariadic = diag->getCustomDiagID(DiagnosticsEngine::Error, "Cannot enforce format string checks on a non-variadic function");
71     diag_badFuncLastArgInvalidType = diag->getCustomDiagID(DiagnosticsEngine::Error, "Cannot enforce format string checks if the last non-variadic argument is not const char *");
72     diag_notStringLiteral = diag->getCustomDiagID(DiagnosticsEngine::Warning, "Format string is not a string literal. Skipping format checks");
73     diag_notPlainASCII = diag->getCustomDiagID(DiagnosticsEngine::Warning, "Format string contains non-ASCII or NUL characters. Skipping format checks");
74     diag_wrongOrder = diag->getCustomDiagID(DiagnosticsEngine::Error, "Argument %0 must be consumed before argument %1");
75     diag_unescapedBracket = diag->getCustomDiagID(DiagnosticsEngine::Error, "Unescaped '}' character");
76     diag_unterminatedPlaceholder = diag->getCustomDiagID(DiagnosticsEngine::Error, "Unterminated placeholder");
77     diag_unconsumedArgs = diag->getCustomDiagID(DiagnosticsEngine::Warning, "Unconsumed argument(s)");
78     diag_missingColon = diag->getCustomDiagID(DiagnosticsEngine::Error, "Invalid placeholder '{%0}': missing colon character");
79     diag_missingArgNumber = diag->getCustomDiagID(DiagnosticsEngine::Error, "Invalid placeholder '{%0}': missing <arg> number");
80     diag_badArgNumber = diag->getCustomDiagID(DiagnosticsEngine::Error, "Invalid placeholder '{%0}': bad <arg> number");
81     diag_argumentNotPresent = diag->getCustomDiagID(DiagnosticsEngine::Error, "Argument for placeholder '{%0}' is not present");
82     diag_badPrecision = diag->getCustomDiagID(DiagnosticsEngine::Error, "Invalid placeholder '{%0}': bad <precision> value");
83     diag_badType = diag->getCustomDiagID(DiagnosticsEngine::Error, "Invalid placeholder '{%0}': bad <type> specifier");
84     diag_wrongArgExprType = diag->getCustomDiagID(DiagnosticsEngine::Error, "Expected %0 for placeholder '{%1}', found %2");
85 }
86 
VisitFunctionDecl(FunctionDecl * funcDecl)87 bool GooStringFormatCheckerVisitor::VisitFunctionDecl(FunctionDecl *funcDecl)
88 {
89     findFormatArgumentIndex(funcDecl); // Spot misuse of the "gooformat" annotation
90     return true;
91 }
92 
VisitCallExpr(CallExpr * callExpr)93 bool GooStringFormatCheckerVisitor::VisitCallExpr(CallExpr *callExpr)
94 {
95     /*** Locate format argument or skip calls that needn't be checked ***/
96 
97     const int formatArgIdx = findFormatArgumentIndex(callExpr->getDirectCallee());
98     if (formatArgIdx == -1)
99         return true;
100 
101     /*** Obtain format string value ***/
102 
103     const Expr *formatArgExpr = callExpr->getArg(formatArgIdx);
104     while (formatArgExpr->getStmtClass() == Stmt::ImplicitCastExprClass) {
105         formatArgExpr = static_cast<const ImplicitCastExpr *>(formatArgExpr)->getSubExpr();
106     }
107     if (formatArgExpr->getStmtClass() != Stmt::StringLiteralClass) {
108         diag->Report(formatArgExpr->getExprLoc(), diag_notStringLiteral);
109         return true;
110     }
111     const StringLiteral *formatArgStrLiteral = static_cast<const StringLiteral *>(formatArgExpr);
112     if (formatArgStrLiteral->containsNonAsciiOrNull()) {
113         diag->Report(formatArgExpr->getExprLoc(), diag_notPlainASCII);
114         return true;
115     }
116 
117     /*** Parse format string and verify arguments ***/
118 
119     const std::string format = formatArgStrLiteral->getString().str();
120 
121     /* Keeps track of whether we are currently parsing a character contained
122      * within '{' ... '}'. If set, current_placeholder contains the contents
123      * parsed so far (without brackets) */
124     bool in_placeholder = false;
125     std::string current_placeholder;
126 
127     // Source location of the current placeholder's opening bracket
128     SourceLocation placeholderLoc;
129 
130     /* Keeps track of the next expected argument number, to check that
131      * arguments are first consumed in order (eg {0:d}{2:d}{1:d} is wrong).
132      * Note that it's possible to "look back" at already consumed
133      * arguments (eg {0:d}{1:d}{0:d} is OK) */
134     int nextExpectedArgNum = 0;
135 
136     for (unsigned i = 0; i < format.length(); i++) {
137         if (in_placeholder) {
138             // Have we reached the end of the placeholder?
139             if (format[i] == '}') {
140                 in_placeholder = false;
141 
142                 // Verifies the placeholder and returns the argument number
143                 const int foundArgNum = verifyPlaceholder(callExpr, placeholderLoc, current_placeholder, formatArgIdx + 1);
144 
145                 // If the placeholder wasn't valid, disable argument order checks
146                 if (foundArgNum == -1) {
147                     nextExpectedArgNum = -1;
148                 }
149 
150                 // If argument order checks are enabled, let's check!
151                 if (nextExpectedArgNum != -1) {
152                     if (foundArgNum == nextExpectedArgNum) {
153                         nextExpectedArgNum++;
154                     } else if (foundArgNum > nextExpectedArgNum) {
155                         diag->Report(placeholderLoc, diag_wrongOrder) << nextExpectedArgNum << foundArgNum;
156                         nextExpectedArgNum = -1; // disable further checks
157                     }
158                 }
159             } else {
160                 current_placeholder += format[i];
161             }
162         } else if (format[i] == '{') {
163             // If we find a '{' then a placeholder is starting...
164             in_placeholder = true;
165             current_placeholder = "";
166             placeholderLoc = getLocationOfCharacter(formatArgStrLiteral, i);
167 
168             // ...unless it's followed by another '{' (escape sequence)
169             if (i + 1 < format.length() && format[i + 1] == '{') {
170                 i++; // skip next '{' character
171                 in_placeholder = false;
172             }
173         } else if (format[i] == '}') {
174             /* If we have found a '}' and we're not in a placeholder,
175              * then it *MUST* be followed by another '}' (escape sequence) */
176             if (i + 1 >= format.length() || format[i + 1] != '}') {
177                 diag->Report(getLocationOfCharacter(formatArgStrLiteral, i), diag_unescapedBracket);
178             } else {
179                 i++; // skip next '}' character
180             }
181         }
182     }
183 
184     /* If we've reached the end of the format string and in_placeholder is
185      * still set, then the last placeholder wasn't terminated properly */
186     if (in_placeholder)
187         diag->Report(placeholderLoc, diag_unterminatedPlaceholder);
188 
189     int unconsumedArgs = callExpr->getNumArgs() - (formatArgIdx + 1 + nextExpectedArgNum);
190     if (unconsumedArgs > 0)
191         diag->Report(callExpr->getArg(callExpr->getNumArgs() - unconsumedArgs)->getExprLoc(), diag_unconsumedArgs);
192 
193     return true;
194 }
195 
findFormatArgumentIndex(const FunctionDecl * funcDecl) const196 int GooStringFormatCheckerVisitor::findFormatArgumentIndex(const FunctionDecl *funcDecl) const
197 {
198     if (!funcDecl)
199         return -1;
200 
201     AnnotateAttr *annotation = NULL;
202     for (specific_attr_iterator<AnnotateAttr> it = funcDecl->specific_attr_begin<AnnotateAttr>(); it != funcDecl->specific_attr_end<AnnotateAttr>() && !annotation; ++it) {
203         if (it->getAnnotation() == "gooformat")
204             annotation = *it;
205     }
206 
207     // If this function hasn't got the "gooformat" annotation on it
208     if (!annotation)
209         return -1;
210 
211     if (funcDecl->getNumParams() == 0) {
212         diag->Report(annotation->getLocation(), diag_badFuncZeroArgs);
213         return -1;
214     }
215 
216     if (!funcDecl->isVariadic()) {
217         diag->Report(annotation->getLocation(), diag_badFuncNonVariadic);
218         return -1;
219     }
220 
221     // Assume the last non-variadic argument is the format specifier
222     const int formatArgIdx = funcDecl->getNumParams() - 1;
223     const QualType formatArgType = funcDecl->getParamDecl(formatArgIdx)->getType();
224     if (formatArgType.getAsString() != "const char *") {
225         diag->Report(annotation->getLocation(), diag_badFuncLastArgInvalidType);
226         return -1;
227     }
228 
229     return formatArgIdx;
230 }
231 
getLocationOfCharacter(const StringLiteral * strLiteral,unsigned n)232 SourceLocation GooStringFormatCheckerVisitor::getLocationOfCharacter(const StringLiteral *strLiteral, unsigned n)
233 {
234     return strLiteral->getLocationOfByte(n, compInst->getSourceManager(), compInst->getLangOpts(), compInst->getTarget());
235 }
236 
verifyPlaceholder(const CallExpr * callExpr,const SourceLocation & placeholderLocation,std::string & placeholderText,int baseArgIdx) const237 int GooStringFormatCheckerVisitor::verifyPlaceholder(const CallExpr *callExpr, const SourceLocation &placeholderLocation, std::string &placeholderText, int baseArgIdx) const
238 {
239     // Find the colon that separates the argument number and the format specifier
240     const size_t delim = placeholderText.find(':');
241     if (delim == std::string::npos) {
242         diag->Report(placeholderLocation, diag_missingColon) << placeholderText;
243         return -1;
244     }
245     if (delim == 0) {
246         diag->Report(placeholderLocation, diag_missingArgNumber) << placeholderText;
247         return -1;
248     }
249     for (unsigned int i = 0; i < delim; i++) {
250         if (!isdigit(placeholderText[i])) {
251             diag->Report(placeholderLocation, diag_badArgNumber) << placeholderText;
252             return -1;
253         }
254     }
255 
256     // Extract argument number and its actual position in the call's argument list
257     const int argNum = atoi(placeholderText.substr(0, delim).c_str());
258     const int argIdx = baseArgIdx + argNum;
259     if (argIdx >= callExpr->getNumArgs()) {
260         diag->Report(placeholderLocation, diag_argumentNotPresent) << placeholderText;
261         return argNum;
262     }
263 
264     // Check and strip width/precision specifiers
265     std::string format = placeholderText.substr(delim + 1);
266     bool dot_found = false;
267     while (isdigit(format[0]) || format[0] == '.') {
268         if (format[0] == '.') {
269             if (dot_found) {
270                 diag->Report(placeholderLocation, diag_badPrecision) << placeholderText;
271                 return argNum;
272             }
273             dot_found = true;
274         }
275         format = format.substr(1);
276     }
277 
278     const Expr *argExpr = callExpr->getArg(argIdx);
279     const QualType qualType = argExpr->getType();
280     const Type *valueType = qualType->getUnqualifiedDesugaredType();
281 
282     if (format == "d" || format == "x" || format == "X" || format == "o" || format == "b" || format == "w") {
283         if (!valueType->isSpecificBuiltinType(BuiltinType::Int)) {
284             diag->Report(argExpr->getExprLoc(), diag_wrongArgExprType) << "int" << placeholderText << qualType.getAsString();
285         }
286     } else if (format == "ud" || format == "ux" || format == "uX" || format == "uo" || format == "ub") {
287         if (!valueType->isSpecificBuiltinType(BuiltinType::UInt)) {
288             diag->Report(argExpr->getExprLoc(), diag_wrongArgExprType) << "unsigned int" << placeholderText << qualType.getAsString();
289         }
290     } else if (format == "ld" || format == "lx" || format == "lX" || format == "lo" || format == "lb") {
291         if (!valueType->isSpecificBuiltinType(BuiltinType::Long)) {
292             diag->Report(argExpr->getExprLoc(), diag_wrongArgExprType) << "long" << placeholderText << qualType.getAsString();
293         }
294     } else if (format == "uld" || format == "ulx" || format == "ulX" || format == "ulo" || format == "ulb") {
295         if (!valueType->isSpecificBuiltinType(BuiltinType::ULong)) {
296             diag->Report(argExpr->getExprLoc(), diag_wrongArgExprType) << "unsigned long" << placeholderText << qualType.getAsString();
297         }
298     } else if (format == "lld" || format == "llx" || format == "llX" || format == "llo" || format == "llb") {
299         if (!valueType->isSpecificBuiltinType(BuiltinType::LongLong)) {
300             diag->Report(argExpr->getExprLoc(), diag_wrongArgExprType) << "long long" << placeholderText << qualType.getAsString();
301         }
302     } else if (format == "ulld" || format == "ullx" || format == "ullX" || format == "ullo" || format == "ullb") {
303         if (!valueType->isSpecificBuiltinType(BuiltinType::ULongLong)) {
304             diag->Report(argExpr->getExprLoc(), diag_wrongArgExprType) << "unsigned long long" << placeholderText << qualType.getAsString();
305         }
306     } else if (format == "f" || format == "g" || format == "gs") {
307         if (!valueType->isSpecificBuiltinType(BuiltinType::Double)) {
308             diag->Report(argExpr->getExprLoc(), diag_wrongArgExprType) << "float or double" << placeholderText << qualType.getAsString();
309         }
310     } else if (format == "c") {
311         if (!valueType->isSpecificBuiltinType(BuiltinType::UInt) && !valueType->isSpecificBuiltinType(BuiltinType::Int)) {
312             diag->Report(argExpr->getExprLoc(), diag_wrongArgExprType) << "char, short or int" << placeholderText << qualType.getAsString();
313         }
314     } else if (format == "s") {
315         if (!valueType->isPointerType() || !valueType->getPointeeType()->getUnqualifiedDesugaredType()->isCharType()) {
316             diag->Report(argExpr->getExprLoc(), diag_wrongArgExprType) << "char *" << placeholderText << qualType.getAsString();
317         }
318     } else if (format == "t") {
319         const CXXRecordDecl *pointeeType = valueType->isPointerType() ? valueType->getPointeeType()->getAsCXXRecordDecl() : 0;
320         if (pointeeType == 0 || pointeeType->getQualifiedNameAsString() != "GooString") {
321             diag->Report(argExpr->getExprLoc(), diag_wrongArgExprType) << "GooString *" << placeholderText << qualType.getAsString();
322         }
323     } else {
324         diag->Report(placeholderLocation, diag_badType) << placeholderText;
325         return argNum;
326     }
327 
328     return argNum;
329 }
330 
331 class GooStringFormatCheckerConsumer : public clang::ASTConsumer
332 {
333 public:
GooStringFormatCheckerConsumer(CompilerInstance * compInst)334     GooStringFormatCheckerConsumer(CompilerInstance *compInst) : visitor(compInst) { }
335 
HandleTranslationUnit(clang::ASTContext & ctx)336     virtual void HandleTranslationUnit(clang::ASTContext &ctx) { visitor.TraverseDecl(ctx.getTranslationUnitDecl()); }
337 
338 private:
339     GooStringFormatCheckerVisitor visitor;
340 };
341 
342 class GooStringFormatCheckerAction : public PluginASTAction
343 {
344 protected:
CreateASTConsumer(CompilerInstance & compInst,llvm::StringRef inFile)345     std::unique_ptr<ASTConsumer> CreateASTConsumer(CompilerInstance &compInst, llvm::StringRef inFile) { return std::make_unique<GooStringFormatCheckerConsumer>(&compInst); }
346 
ParseArgs(const CompilerInstance & compInst,const std::vector<std::string> & args)347     bool ParseArgs(const CompilerInstance &compInst, const std::vector<std::string> &args)
348     {
349         if (args.size() != 0) {
350             DiagnosticsEngine &D = compInst.getDiagnostics();
351             D.Report(D.getCustomDiagID(DiagnosticsEngine::Error, "goostring-format-checker takes no arguments"));
352             return false;
353         } else {
354             return true;
355         }
356     }
357 };
358 
359 }
360 
361 static FrontendPluginRegistry::Add<GooStringFormatCheckerAction> X("goostring-format-checker", "Checks usage of GooString::format-like functions");
362