1 #include "TranslatorCommentsCheck.h"
2
3 #include <ClangTidyDiagnosticConsumer.h>
4 #include <clang/AST/ASTContext.h>
5 #include <clang/AST/Expr.h>
6 #include <clang/ASTMatchers/ASTMatchers.h>
7 #include <clang/ASTMatchers/ASTMatchersInternal.h>
8 #include <clang/ASTMatchers/ASTMatchersMacros.h>
9 #include <clang/Basic/DiagnosticIDs.h>
10 #include <clang/Basic/IdentifierTable.h>
11 #include <clang/Basic/SourceManager.h>
12 #include <clang/Frontend/CompilerInstance.h>
13 #include <clang/Lex/Lexer.h>
14 #include <clang/Lex/MacroArgs.h>
15 #include <clang/Lex/PPCallbacks.h>
16 #include <clang/Lex/Preprocessor.h>
17 #include <clang/Lex/Token.h>
18 #include <llvm/ADT/STLExtras.h>
19 #include <llvm/Support/Regex.h>
20 #include <iterator>
21 #include <map>
22 #include <utility>
23
24 #include "clang/Basic/TokenKinds.h"
25
26 namespace clang
27 {
28 class CXXConstructExpr;
29 class MacroDefinition;
30 } // namespace clang
31
32 using namespace clang::ast_matchers;
33
34 namespace clang
35 {
36 namespace ast_matchers
37 {
AST_POLYMORPHIC_MATCHER_P2(hasImmediateArgument,AST_POLYMORPHIC_SUPPORTED_TYPES (CallExpr,CXXConstructExpr),unsigned int,N,internal::Matcher<Expr>,InnerMatcher)38 AST_POLYMORPHIC_MATCHER_P2( hasImmediateArgument,
39 AST_POLYMORPHIC_SUPPORTED_TYPES( CallExpr, CXXConstructExpr ),
40 unsigned int, N, internal::Matcher<Expr>, InnerMatcher )
41 {
42 return N < Node.getNumArgs() &&
43 InnerMatcher.matches( *Node.getArg( N )->IgnoreImplicit(), Finder, Builder );
44 }
45
AST_MATCHER_P(StringLiteral,isMarkedString,tidy::cata::TranslatorCommentsCheck *,Check)46 AST_MATCHER_P( StringLiteral, isMarkedString, tidy::cata::TranslatorCommentsCheck *, Check )
47 {
48 Check->MatchingStarted = true;
49 SourceManager &SM = Finder->getASTContext().getSourceManager();
50 SourceLocation Loc = SM.getFileLoc( Node.getBeginLoc() );
51 return Check->MarkedStrings.find( Loc ) != Check->MarkedStrings.end();
52 static_cast<void>( Builder );
53 }
54 } // namespace ast_matchers
55 namespace tidy
56 {
57 namespace cata
58 {
59
60 class TranslatorCommentsCheck::TranslatorCommentsHandler : public CommentHandler
61 {
62 public:
TranslatorCommentsHandler(TranslatorCommentsCheck & Check)63 explicit TranslatorCommentsHandler( TranslatorCommentsCheck &Check ) : Check( Check ),
64 // xgettext will treat all comments containing the marker as
65 // translator comments, but we only match those starting with
66 // the marker to allow using the marker inside normal comments
67 Match( "^/[/*]~.*$" ) {}
68
HandleComment(Preprocessor & PP,SourceRange Range)69 bool HandleComment( Preprocessor &PP, SourceRange Range ) override {
70 if( Check.MatchingStarted ) {
71 // according to the standard, all comments are processed before analyzing the syntax
72 Check.diag( Range.getBegin(), "AST Matching started before the end of comment preprocessing",
73 DiagnosticIDs::Error );
74 }
75
76 const SourceManager &SM = PP.getSourceManager();
77 StringRef Text = Lexer::getSourceText( CharSourceRange::getCharRange( Range ),
78 SM, PP.getLangOpts() );
79
80 if( !Match.match( Text ) ) {
81 return false;
82 }
83
84 SourceLocation BegLoc = SM.getFileLoc( Range.getBegin() );
85 SourceLocation EndLoc = SM.getFileLoc( Range.getEnd() );
86 FileID File = SM.getFileID( EndLoc );
87 unsigned int EndLine = SM.getSpellingLineNumber( EndLoc );
88 unsigned int EndCol = SM.getSpellingColumnNumber( EndLoc );
89
90 if( File != SM.getFileID( BegLoc ) ) {
91 Check.diag( BegLoc, "Mysterious multi-file comment", DiagnosticIDs::Error );
92 return false;
93 }
94
95 unsigned int BegLine = SM.getSpellingLineNumber( BegLoc );
96
97 TranslatorComments.emplace( TranslatorCommentLocation { File, EndLine, EndCol },
98 TranslatorComment { BegLoc, BegLine, false } );
99 return false;
100 }
101
102 struct TranslatorCommentLocation {
103 FileID File;
104 unsigned int EndLine;
105 unsigned int EndCol;
106
operator ==clang::tidy::cata::TranslatorCommentsCheck::TranslatorCommentsHandler::TranslatorCommentLocation107 bool operator==( const TranslatorCommentLocation &Other ) const {
108 return File == Other.File && EndLine == Other.EndLine && EndCol == Other.EndCol;
109 }
110
operator <clang::tidy::cata::TranslatorCommentsCheck::TranslatorCommentsHandler::TranslatorCommentLocation111 bool operator<( const TranslatorCommentLocation &Other ) const {
112 if( File != Other.File ) {
113 return File < Other.File;
114 }
115 if( EndLine != Other.EndLine ) {
116 return EndLine < Other.EndLine;
117 }
118 return EndCol < Other.EndCol;
119 }
120 };
121
122 struct TranslatorComment {
123 SourceLocation Beg;
124 unsigned int BegLine;
125 bool Checked;
126 };
127
128 std::map<TranslatorCommentLocation, TranslatorComment> TranslatorComments;
129
130 private:
131 TranslatorCommentsCheck &Check;
132 llvm::Regex Match;
133 };
134
135 class TranslatorCommentsCheck::TranslationMacroCallback : public PPCallbacks
136 {
137 public:
TranslationMacroCallback(TranslatorCommentsCheck & Check,const SourceManager & SM)138 TranslationMacroCallback( TranslatorCommentsCheck &Check, const SourceManager &SM )
139 : Check( Check ), SM( SM ) {}
140
MacroExpands(const Token & MacroNameTok,const MacroDefinition &,SourceRange Range,const MacroArgs * Args)141 void MacroExpands( const Token &MacroNameTok,
142 const MacroDefinition &,
143 SourceRange Range,
144 const MacroArgs *Args ) override {
145 if( Check.MatchingStarted ) {
146 // according to the standard, all macros are expanded before analyzing the syntax
147 Check.diag( Range.getBegin(), "AST Matching started before the end of macro expansion",
148 DiagnosticIDs::Error );
149 }
150
151 StringRef MacroName = MacroNameTok.getIdentifierInfo()->getName();
152
153 bool is_marker;
154 unsigned int RawStringInd;
155 if( MacroName == "_" ) {
156 is_marker = false;
157 RawStringInd = 0;
158 } else if( MacroName == "translate_marker" ) {
159 is_marker = true;
160 RawStringInd = 0;
161 } else if( MacroName == "translate_marker_context" ) {
162 is_marker = true;
163 RawStringInd = 1;
164 } else {
165 return;
166 }
167
168 if( RawStringInd >= Args->getNumMacroArguments() ) {
169 Check.diag( Range.getBegin(), "Translation marker doesn't have expected number of arguments",
170 DiagnosticIDs::Error );
171 }
172
173 // First ensure that translation markers have only string literal arguments
174 for( unsigned int i = 0; i < Args->getNumMacroArguments(); i++ ) {
175 const Token *Tok = Args->getUnexpArgument( i );
176 if( Tok->is( tok::eof ) ) {
177 Check.diag( Tok->getLocation(), "Empty argument to a translation marker macro" );
178 return;
179 }
180 for( ; Tok->isNot( tok::eof ); ++Tok ) {
181 if( !tok::isStringLiteral( Tok->getKind() ) ) {
182 if( is_marker ) {
183 Check.diag( Tok->getLocation(), "Translation marker macros only accepts string literal arguments" );
184 }
185 return;
186 }
187 }
188 }
189
190 const Token *Tok = Args->getUnexpArgument( RawStringInd );
191 Check.MarkedStrings.emplace( SM.getFileLoc( Tok->getLocation() ) );
192 }
193
194 private:
195 TranslatorCommentsCheck &Check;
196 const SourceManager &SM;
197 };
198
TranslatorCommentsCheck(StringRef Name,ClangTidyContext * Context)199 TranslatorCommentsCheck::TranslatorCommentsCheck( StringRef Name, ClangTidyContext *Context )
200 : ClangTidyCheck( Name, Context ),
201 MatchingStarted( false ),
202 Handler( llvm::make_unique<TranslatorCommentsHandler>( *this ) ) {}
203
registerPPCallbacks(CompilerInstance & Compiler)204 void TranslatorCommentsCheck::registerPPCallbacks( CompilerInstance &Compiler )
205 {
206 Compiler.getPreprocessor().addCommentHandler( Handler.get() );
207 Compiler.getPreprocessor().addPPCallbacks(
208 llvm::make_unique<TranslationMacroCallback>( *this, Compiler.getSourceManager() ) );
209 }
210
registerMatchers(MatchFinder * Finder)211 void TranslatorCommentsCheck::registerMatchers( MatchFinder *Finder )
212 {
213 const auto stringLiteralArgumentBound =
214 anyOf(
215 stringLiteral().bind( "RawText" ),
216 cxxConstructExpr(
217 unless( isListInitialization() ),
218 hasImmediateArgument( 0, stringLiteral().bind( "RawText" ) )
219 )
220 );
221 const auto stringLiteralArgumentUnbound =
222 anyOf(
223 stringLiteral(),
224 cxxConstructExpr(
225 unless( isListInitialization() ),
226 hasImmediateArgument( 0, stringLiteral() )
227 )
228 );
229 Finder->addMatcher(
230 callExpr(
231 callee( functionDecl( hasAnyName( "_", "gettext" ) ) ),
232 hasImmediateArgument( 0, stringLiteralArgumentBound )
233 ),
234 this
235 );
236 Finder->addMatcher(
237 callExpr(
238 callee( functionDecl( hasName( "ngettext" ) ) ),
239 hasImmediateArgument( 0, stringLiteralArgumentBound ),
240 hasImmediateArgument( 1, stringLiteralArgumentUnbound )
241 ),
242 this
243 );
244 Finder->addMatcher(
245 callExpr(
246 callee( functionDecl( hasName( "to_translation" ) ) ),
247 argumentCountIs( 1 ),
248 hasImmediateArgument( 0, stringLiteralArgumentBound )
249 ),
250 this
251 );
252 Finder->addMatcher(
253 callExpr(
254 callee( functionDecl( hasName( "pl_translation" ) ) ),
255 argumentCountIs( 2 ),
256 hasImmediateArgument( 0, stringLiteralArgumentBound ),
257 hasImmediateArgument( 1, stringLiteralArgumentUnbound )
258 ),
259 this
260 );
261 Finder->addMatcher(
262 callExpr(
263 callee( functionDecl( hasAnyName( "pgettext" ) ) ),
264 hasImmediateArgument( 0, stringLiteralArgumentUnbound ),
265 hasImmediateArgument( 1, stringLiteralArgumentBound )
266 ),
267 this
268 );
269 Finder->addMatcher(
270 callExpr(
271 callee( functionDecl( hasAnyName( "npgettext" ) ) ),
272 hasImmediateArgument( 0, stringLiteralArgumentUnbound ),
273 hasImmediateArgument( 1, stringLiteralArgumentBound ),
274 hasImmediateArgument( 2, stringLiteralArgumentUnbound )
275 ),
276 this
277 );
278 Finder->addMatcher(
279 callExpr(
280 callee( functionDecl( hasName( "to_translation" ) ) ),
281 argumentCountIs( 2 ),
282 hasImmediateArgument( 0, stringLiteralArgumentUnbound ),
283 hasImmediateArgument( 1, stringLiteralArgumentBound )
284 ),
285 this
286 );
287 Finder->addMatcher(
288 callExpr(
289 callee( functionDecl( hasName( "pl_translation" ) ) ),
290 argumentCountIs( 3 ),
291 hasImmediateArgument( 0, stringLiteralArgumentUnbound ),
292 hasImmediateArgument( 1, stringLiteralArgumentBound ),
293 hasImmediateArgument( 2, stringLiteralArgumentUnbound )
294 ),
295 this
296 );
297 Finder->addMatcher(
298 stringLiteral( isMarkedString( this ) ).bind( "RawText" ),
299 this
300 );
301 }
302
check(const MatchFinder::MatchResult & Result)303 void TranslatorCommentsCheck::check( const MatchFinder::MatchResult &Result )
304 {
305 MatchingStarted = true;
306
307 const StringLiteral *RawText = Result.Nodes.getNodeAs<StringLiteral>( "RawText" );
308 if( !RawText ) {
309 return;
310 }
311
312 const SourceManager &SM = *Result.SourceManager;
313 SourceLocation BegLoc = SM.getFileLoc( RawText->getBeginLoc() );
314 FileID File = SM.getFileID( BegLoc );
315 unsigned int BegLine = SM.getSpellingLineNumber( BegLoc );
316 unsigned int BegCol = SM.getSpellingColumnNumber( BegLoc );
317
318 auto it = Handler->TranslatorComments.lower_bound( { File, BegLine, BegCol } );
319 // Strictly speaking, a translator comment preceding a raw string with only
320 // blank lines in between will also be extracted, but we report it as an
321 // error here for simplicity.
322 while( it != Handler->TranslatorComments.begin() && std::prev( it )->first.File == File &&
323 std::prev( it )->first.EndLine + 1 >= BegLine ) {
324 it = std::prev( it );
325 // TODO: for the following code,
326 //
327 // /*<marker> foo*/ to_translation( "bar" );
328 // _( "baz" );
329 //
330 // The current logic will mark the comment when matching _() in addition
331 // to to_translation(), while xgettext will only match the comment with
332 // to_translation(). However the logic currently does not concern the
333 // content of the extracted string, so this doens't affect the results
334 // for now.
335 it->second.Checked = true;
336 BegLine = it->second.BegLine;
337 }
338 }
339
onEndOfTranslationUnit()340 void TranslatorCommentsCheck::onEndOfTranslationUnit()
341 {
342 // Report all translator comments without a matching string, after the end of AST iteration
343 for( const auto &elem : Handler->TranslatorComments ) {
344 if( !elem.second.Checked ) {
345 diag( elem.second.Beg, "Translator comment without a matching raw string" );
346 }
347 }
348 ClangTidyCheck::onEndOfTranslationUnit();
349 }
350
351 } // namespace cata
352 } // namespace tidy
353 } // namespace clang
354