1 #include "TranslatorCommentsCheck.h"
2 
3 #include <ClangTidyDiagnosticConsumer.h>
4 #include <clang/AST/ASTContext.h>
5 #include <clang/AST/Expr.h>
6 #include <clang/ASTMatchers/ASTMatchers.h>
7 #include <clang/ASTMatchers/ASTMatchersInternal.h>
8 #include <clang/ASTMatchers/ASTMatchersMacros.h>
9 #include <clang/Basic/DiagnosticIDs.h>
10 #include <clang/Basic/IdentifierTable.h>
11 #include <clang/Basic/SourceManager.h>
12 #include <clang/Frontend/CompilerInstance.h>
13 #include <clang/Lex/Lexer.h>
14 #include <clang/Lex/MacroArgs.h>
15 #include <clang/Lex/PPCallbacks.h>
16 #include <clang/Lex/Preprocessor.h>
17 #include <clang/Lex/Token.h>
18 #include <llvm/ADT/STLExtras.h>
19 #include <llvm/Support/Regex.h>
20 #include <iterator>
21 #include <map>
22 #include <utility>
23 
24 #include "clang/Basic/TokenKinds.h"
25 
26 namespace clang
27 {
28 class CXXConstructExpr;
29 class MacroDefinition;
30 }  // namespace clang
31 
32 using namespace clang::ast_matchers;
33 
34 namespace clang
35 {
36 namespace ast_matchers
37 {
AST_POLYMORPHIC_MATCHER_P2(hasImmediateArgument,AST_POLYMORPHIC_SUPPORTED_TYPES (CallExpr,CXXConstructExpr),unsigned int,N,internal::Matcher<Expr>,InnerMatcher)38 AST_POLYMORPHIC_MATCHER_P2( hasImmediateArgument,
39                             AST_POLYMORPHIC_SUPPORTED_TYPES( CallExpr, CXXConstructExpr ),
40                             unsigned int, N, internal::Matcher<Expr>, InnerMatcher )
41 {
42     return N < Node.getNumArgs() &&
43            InnerMatcher.matches( *Node.getArg( N )->IgnoreImplicit(), Finder, Builder );
44 }
45 
AST_MATCHER_P(StringLiteral,isMarkedString,tidy::cata::TranslatorCommentsCheck *,Check)46 AST_MATCHER_P( StringLiteral, isMarkedString, tidy::cata::TranslatorCommentsCheck *, Check )
47 {
48     Check->MatchingStarted = true;
49     SourceManager &SM = Finder->getASTContext().getSourceManager();
50     SourceLocation Loc = SM.getFileLoc( Node.getBeginLoc() );
51     return Check->MarkedStrings.find( Loc ) != Check->MarkedStrings.end();
52     static_cast<void>( Builder );
53 }
54 } // namespace ast_matchers
55 namespace tidy
56 {
57 namespace cata
58 {
59 
60 class TranslatorCommentsCheck::TranslatorCommentsHandler : public CommentHandler
61 {
62     public:
TranslatorCommentsHandler(TranslatorCommentsCheck & Check)63         explicit TranslatorCommentsHandler( TranslatorCommentsCheck &Check ) : Check( Check ),
64             // xgettext will treat all comments containing the marker as
65             // translator comments, but we only match those starting with
66             // the marker to allow using the marker inside normal comments
67             Match( "^/[/*]~.*$" ) {}
68 
HandleComment(Preprocessor & PP,SourceRange Range)69         bool HandleComment( Preprocessor &PP, SourceRange Range ) override {
70             if( Check.MatchingStarted ) {
71                 // according to the standard, all comments are processed before analyzing the syntax
72                 Check.diag( Range.getBegin(), "AST Matching started before the end of comment preprocessing",
73                             DiagnosticIDs::Error );
74             }
75 
76             const SourceManager &SM = PP.getSourceManager();
77             StringRef Text = Lexer::getSourceText( CharSourceRange::getCharRange( Range ),
78                                                    SM, PP.getLangOpts() );
79 
80             if( !Match.match( Text ) ) {
81                 return false;
82             }
83 
84             SourceLocation BegLoc = SM.getFileLoc( Range.getBegin() );
85             SourceLocation EndLoc = SM.getFileLoc( Range.getEnd() );
86             FileID File = SM.getFileID( EndLoc );
87             unsigned int EndLine = SM.getSpellingLineNumber( EndLoc );
88             unsigned int EndCol = SM.getSpellingColumnNumber( EndLoc );
89 
90             if( File != SM.getFileID( BegLoc ) ) {
91                 Check.diag( BegLoc, "Mysterious multi-file comment", DiagnosticIDs::Error );
92                 return false;
93             }
94 
95             unsigned int BegLine = SM.getSpellingLineNumber( BegLoc );
96 
97             TranslatorComments.emplace( TranslatorCommentLocation { File, EndLine, EndCol },
98                                         TranslatorComment { BegLoc, BegLine, false } );
99             return false;
100         }
101 
102         struct TranslatorCommentLocation {
103             FileID File;
104             unsigned int EndLine;
105             unsigned int EndCol;
106 
operator ==clang::tidy::cata::TranslatorCommentsCheck::TranslatorCommentsHandler::TranslatorCommentLocation107             bool operator==( const TranslatorCommentLocation &Other ) const {
108                 return File == Other.File && EndLine == Other.EndLine && EndCol == Other.EndCol;
109             }
110 
operator <clang::tidy::cata::TranslatorCommentsCheck::TranslatorCommentsHandler::TranslatorCommentLocation111             bool operator<( const TranslatorCommentLocation &Other ) const {
112                 if( File != Other.File ) {
113                     return File < Other.File;
114                 }
115                 if( EndLine != Other.EndLine ) {
116                     return EndLine < Other.EndLine;
117                 }
118                 return EndCol < Other.EndCol;
119             }
120         };
121 
122         struct TranslatorComment {
123             SourceLocation Beg;
124             unsigned int BegLine;
125             bool Checked;
126         };
127 
128         std::map<TranslatorCommentLocation, TranslatorComment> TranslatorComments;
129 
130     private:
131         TranslatorCommentsCheck &Check;
132         llvm::Regex Match;
133 };
134 
135 class TranslatorCommentsCheck::TranslationMacroCallback : public PPCallbacks
136 {
137     public:
TranslationMacroCallback(TranslatorCommentsCheck & Check,const SourceManager & SM)138         TranslationMacroCallback( TranslatorCommentsCheck &Check, const SourceManager &SM )
139             : Check( Check ), SM( SM ) {}
140 
MacroExpands(const Token & MacroNameTok,const MacroDefinition &,SourceRange Range,const MacroArgs * Args)141         void MacroExpands( const Token &MacroNameTok,
142                            const MacroDefinition &,
143                            SourceRange Range,
144                            const MacroArgs *Args ) override {
145             if( Check.MatchingStarted ) {
146                 // according to the standard, all macros are expanded before analyzing the syntax
147                 Check.diag( Range.getBegin(), "AST Matching started before the end of macro expansion",
148                             DiagnosticIDs::Error );
149             }
150 
151             StringRef MacroName = MacroNameTok.getIdentifierInfo()->getName();
152 
153             bool is_marker;
154             unsigned int RawStringInd;
155             if( MacroName == "_" ) {
156                 is_marker = false;
157                 RawStringInd = 0;
158             } else if( MacroName == "translate_marker" ) {
159                 is_marker = true;
160                 RawStringInd = 0;
161             } else if( MacroName == "translate_marker_context" ) {
162                 is_marker = true;
163                 RawStringInd = 1;
164             } else {
165                 return;
166             }
167 
168             if( RawStringInd >= Args->getNumMacroArguments() ) {
169                 Check.diag( Range.getBegin(), "Translation marker doesn't have expected number of arguments",
170                             DiagnosticIDs::Error );
171             }
172 
173             // First ensure that translation markers have only string literal arguments
174             for( unsigned int i = 0; i < Args->getNumMacroArguments(); i++ ) {
175                 const Token *Tok = Args->getUnexpArgument( i );
176                 if( Tok->is( tok::eof ) ) {
177                     Check.diag( Tok->getLocation(), "Empty argument to a translation marker macro" );
178                     return;
179                 }
180                 for( ; Tok->isNot( tok::eof ); ++Tok ) {
181                     if( !tok::isStringLiteral( Tok->getKind() ) ) {
182                         if( is_marker ) {
183                             Check.diag( Tok->getLocation(), "Translation marker macros only accepts string literal arguments" );
184                         }
185                         return;
186                     }
187                 }
188             }
189 
190             const Token *Tok = Args->getUnexpArgument( RawStringInd );
191             Check.MarkedStrings.emplace( SM.getFileLoc( Tok->getLocation() ) );
192         }
193 
194     private:
195         TranslatorCommentsCheck &Check;
196         const SourceManager &SM;
197 };
198 
TranslatorCommentsCheck(StringRef Name,ClangTidyContext * Context)199 TranslatorCommentsCheck::TranslatorCommentsCheck( StringRef Name, ClangTidyContext *Context )
200     : ClangTidyCheck( Name, Context ),
201       MatchingStarted( false ),
202       Handler( llvm::make_unique<TranslatorCommentsHandler>( *this ) ) {}
203 
registerPPCallbacks(CompilerInstance & Compiler)204 void TranslatorCommentsCheck::registerPPCallbacks( CompilerInstance &Compiler )
205 {
206     Compiler.getPreprocessor().addCommentHandler( Handler.get() );
207     Compiler.getPreprocessor().addPPCallbacks(
208         llvm::make_unique<TranslationMacroCallback>( *this, Compiler.getSourceManager() ) );
209 }
210 
registerMatchers(MatchFinder * Finder)211 void TranslatorCommentsCheck::registerMatchers( MatchFinder *Finder )
212 {
213     const auto stringLiteralArgumentBound =
214         anyOf(
215             stringLiteral().bind( "RawText" ),
216             cxxConstructExpr(
217                 unless( isListInitialization() ),
218                 hasImmediateArgument( 0, stringLiteral().bind( "RawText" ) )
219             )
220         );
221     const auto stringLiteralArgumentUnbound =
222         anyOf(
223             stringLiteral(),
224             cxxConstructExpr(
225                 unless( isListInitialization() ),
226                 hasImmediateArgument( 0, stringLiteral() )
227             )
228         );
229     Finder->addMatcher(
230         callExpr(
231             callee( functionDecl( hasAnyName( "_", "gettext" ) ) ),
232             hasImmediateArgument( 0, stringLiteralArgumentBound )
233         ),
234         this
235     );
236     Finder->addMatcher(
237         callExpr(
238             callee( functionDecl( hasName( "ngettext" ) ) ),
239             hasImmediateArgument( 0, stringLiteralArgumentBound ),
240             hasImmediateArgument( 1, stringLiteralArgumentUnbound )
241         ),
242         this
243     );
244     Finder->addMatcher(
245         callExpr(
246             callee( functionDecl( hasName( "to_translation" ) ) ),
247             argumentCountIs( 1 ),
248             hasImmediateArgument( 0, stringLiteralArgumentBound )
249         ),
250         this
251     );
252     Finder->addMatcher(
253         callExpr(
254             callee( functionDecl( hasName( "pl_translation" ) ) ),
255             argumentCountIs( 2 ),
256             hasImmediateArgument( 0, stringLiteralArgumentBound ),
257             hasImmediateArgument( 1, stringLiteralArgumentUnbound )
258         ),
259         this
260     );
261     Finder->addMatcher(
262         callExpr(
263             callee( functionDecl( hasAnyName( "pgettext" ) ) ),
264             hasImmediateArgument( 0, stringLiteralArgumentUnbound ),
265             hasImmediateArgument( 1, stringLiteralArgumentBound )
266         ),
267         this
268     );
269     Finder->addMatcher(
270         callExpr(
271             callee( functionDecl( hasAnyName( "npgettext" ) ) ),
272             hasImmediateArgument( 0, stringLiteralArgumentUnbound ),
273             hasImmediateArgument( 1, stringLiteralArgumentBound ),
274             hasImmediateArgument( 2, stringLiteralArgumentUnbound )
275         ),
276         this
277     );
278     Finder->addMatcher(
279         callExpr(
280             callee( functionDecl( hasName( "to_translation" ) ) ),
281             argumentCountIs( 2 ),
282             hasImmediateArgument( 0, stringLiteralArgumentUnbound ),
283             hasImmediateArgument( 1, stringLiteralArgumentBound )
284         ),
285         this
286     );
287     Finder->addMatcher(
288         callExpr(
289             callee( functionDecl( hasName( "pl_translation" ) ) ),
290             argumentCountIs( 3 ),
291             hasImmediateArgument( 0, stringLiteralArgumentUnbound ),
292             hasImmediateArgument( 1, stringLiteralArgumentBound ),
293             hasImmediateArgument( 2, stringLiteralArgumentUnbound )
294         ),
295         this
296     );
297     Finder->addMatcher(
298         stringLiteral( isMarkedString( this ) ).bind( "RawText" ),
299         this
300     );
301 }
302 
check(const MatchFinder::MatchResult & Result)303 void TranslatorCommentsCheck::check( const MatchFinder::MatchResult &Result )
304 {
305     MatchingStarted = true;
306 
307     const StringLiteral *RawText = Result.Nodes.getNodeAs<StringLiteral>( "RawText" );
308     if( !RawText ) {
309         return;
310     }
311 
312     const SourceManager &SM = *Result.SourceManager;
313     SourceLocation BegLoc = SM.getFileLoc( RawText->getBeginLoc() );
314     FileID File = SM.getFileID( BegLoc );
315     unsigned int BegLine = SM.getSpellingLineNumber( BegLoc );
316     unsigned int BegCol = SM.getSpellingColumnNumber( BegLoc );
317 
318     auto it = Handler->TranslatorComments.lower_bound( { File, BegLine, BegCol } );
319     // Strictly speaking, a translator comment preceding a raw string with only
320     // blank lines in between will also be extracted, but we report it as an
321     // error here for simplicity.
322     while( it != Handler->TranslatorComments.begin() && std::prev( it )->first.File == File &&
323            std::prev( it )->first.EndLine + 1 >= BegLine ) {
324         it = std::prev( it );
325         // TODO: for the following code,
326         //
327         // /*<marker> foo*/ to_translation( "bar" );
328         // _( "baz" );
329         //
330         // The current logic will mark the comment when matching _() in addition
331         // to to_translation(), while xgettext will only match the comment with
332         // to_translation(). However the logic currently does not concern the
333         // content of the extracted string, so this doens't affect the results
334         // for now.
335         it->second.Checked = true;
336         BegLine = it->second.BegLine;
337     }
338 }
339 
onEndOfTranslationUnit()340 void TranslatorCommentsCheck::onEndOfTranslationUnit()
341 {
342     // Report all translator comments without a matching string, after the end of AST iteration
343     for( const auto &elem : Handler->TranslatorComments ) {
344         if( !elem.second.Checked ) {
345             diag( elem.second.Beg, "Translator comment without a matching raw string" );
346         }
347     }
348     ClangTidyCheck::onEndOfTranslationUnit();
349 }
350 
351 } // namespace cata
352 } // namespace tidy
353 } // namespace clang
354