1 //===--- LexerUtils.cpp - clang-tidy---------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "LexerUtils.h"
10 #include "clang/AST/AST.h"
11 #include "clang/Basic/SourceManager.h"
12 
13 namespace clang {
14 namespace tidy {
15 namespace utils {
16 namespace lexer {
17 
getPreviousToken(SourceLocation Location,const SourceManager & SM,const LangOptions & LangOpts,bool SkipComments)18 Token getPreviousToken(SourceLocation Location, const SourceManager &SM,
19                        const LangOptions &LangOpts, bool SkipComments) {
20   Token Token;
21   Token.setKind(tok::unknown);
22 
23   Location = Location.getLocWithOffset(-1);
24   if (Location.isInvalid())
25       return Token;
26 
27   auto StartOfFile = SM.getLocForStartOfFile(SM.getFileID(Location));
28   while (Location != StartOfFile) {
29     Location = Lexer::GetBeginningOfToken(Location, SM, LangOpts);
30     if (!Lexer::getRawToken(Location, Token, SM, LangOpts) &&
31         (!SkipComments || !Token.is(tok::comment))) {
32       break;
33     }
34     Location = Location.getLocWithOffset(-1);
35   }
36   return Token;
37 }
38 
findPreviousTokenStart(SourceLocation Start,const SourceManager & SM,const LangOptions & LangOpts)39 SourceLocation findPreviousTokenStart(SourceLocation Start,
40                                       const SourceManager &SM,
41                                       const LangOptions &LangOpts) {
42   if (Start.isInvalid() || Start.isMacroID())
43     return SourceLocation();
44 
45   SourceLocation BeforeStart = Start.getLocWithOffset(-1);
46   if (BeforeStart.isInvalid() || BeforeStart.isMacroID())
47     return SourceLocation();
48 
49   return Lexer::GetBeginningOfToken(BeforeStart, SM, LangOpts);
50 }
51 
findPreviousTokenKind(SourceLocation Start,const SourceManager & SM,const LangOptions & LangOpts,tok::TokenKind TK)52 SourceLocation findPreviousTokenKind(SourceLocation Start,
53                                      const SourceManager &SM,
54                                      const LangOptions &LangOpts,
55                                      tok::TokenKind TK) {
56   if (Start.isInvalid() || Start.isMacroID())
57     return SourceLocation();
58 
59   while (true) {
60     SourceLocation L = findPreviousTokenStart(Start, SM, LangOpts);
61     if (L.isInvalid() || L.isMacroID())
62       return SourceLocation();
63 
64     Token T;
65     if (Lexer::getRawToken(L, T, SM, LangOpts, /*IgnoreWhiteSpace=*/true))
66       return SourceLocation();
67 
68     if (T.is(TK))
69       return T.getLocation();
70 
71     Start = L;
72   }
73 }
74 
findNextTerminator(SourceLocation Start,const SourceManager & SM,const LangOptions & LangOpts)75 SourceLocation findNextTerminator(SourceLocation Start, const SourceManager &SM,
76                                   const LangOptions &LangOpts) {
77   return findNextAnyTokenKind(Start, SM, LangOpts, tok::comma, tok::semi);
78 }
79 
findNextTokenSkippingComments(SourceLocation Start,const SourceManager & SM,const LangOptions & LangOpts)80 Optional<Token> findNextTokenSkippingComments(SourceLocation Start,
81                                               const SourceManager &SM,
82                                               const LangOptions &LangOpts) {
83   Optional<Token> CurrentToken;
84   do {
85     CurrentToken = Lexer::findNextToken(Start, SM, LangOpts);
86   } while (CurrentToken && CurrentToken->is(tok::comment));
87   return CurrentToken;
88 }
89 
rangeContainsExpansionsOrDirectives(SourceRange Range,const SourceManager & SM,const LangOptions & LangOpts)90 bool rangeContainsExpansionsOrDirectives(SourceRange Range,
91                                          const SourceManager &SM,
92                                          const LangOptions &LangOpts) {
93   assert(Range.isValid() && "Invalid Range for relexing provided");
94   SourceLocation Loc = Range.getBegin();
95 
96   while (Loc < Range.getEnd()) {
97     if (Loc.isMacroID())
98       return true;
99 
100     llvm::Optional<Token> Tok = Lexer::findNextToken(Loc, SM, LangOpts);
101 
102     if (!Tok)
103       return true;
104 
105     if (Tok->is(tok::hash))
106       return true;
107 
108     Loc = Lexer::getLocForEndOfToken(Loc, 0, SM, LangOpts).getLocWithOffset(1);
109   }
110 
111   return false;
112 }
113 
getQualifyingToken(tok::TokenKind TK,CharSourceRange Range,const ASTContext & Context,const SourceManager & SM)114 llvm::Optional<Token> getQualifyingToken(tok::TokenKind TK,
115                                          CharSourceRange Range,
116                                          const ASTContext &Context,
117                                          const SourceManager &SM) {
118   assert((TK == tok::kw_const || TK == tok::kw_volatile ||
119           TK == tok::kw_restrict) &&
120          "TK is not a qualifier keyword");
121   std::pair<FileID, unsigned> LocInfo = SM.getDecomposedLoc(Range.getBegin());
122   StringRef File = SM.getBufferData(LocInfo.first);
123   Lexer RawLexer(SM.getLocForStartOfFile(LocInfo.first), Context.getLangOpts(),
124                  File.begin(), File.data() + LocInfo.second, File.end());
125   llvm::Optional<Token> LastMatchBeforeTemplate;
126   llvm::Optional<Token> LastMatchAfterTemplate;
127   bool SawTemplate = false;
128   Token Tok;
129   while (!RawLexer.LexFromRawLexer(Tok) &&
130          Range.getEnd() != Tok.getLocation() &&
131          !SM.isBeforeInTranslationUnit(Range.getEnd(), Tok.getLocation())) {
132     if (Tok.is(tok::raw_identifier)) {
133       IdentifierInfo &Info = Context.Idents.get(
134           StringRef(SM.getCharacterData(Tok.getLocation()), Tok.getLength()));
135       Tok.setIdentifierInfo(&Info);
136       Tok.setKind(Info.getTokenID());
137     }
138     if (Tok.is(tok::less))
139       SawTemplate = true;
140     else if (Tok.isOneOf(tok::greater, tok::greatergreater))
141       LastMatchAfterTemplate = None;
142     else if (Tok.is(TK)) {
143       if (SawTemplate)
144         LastMatchAfterTemplate = Tok;
145       else
146         LastMatchBeforeTemplate = Tok;
147     }
148   }
149   return LastMatchAfterTemplate != None ? LastMatchAfterTemplate
150                                         : LastMatchBeforeTemplate;
151 }
152 
breakAndReturnEnd(const Stmt & S)153 static bool breakAndReturnEnd(const Stmt &S) {
154   return isa<CompoundStmt, DeclStmt, NullStmt>(S);
155 }
156 
breakAndReturnEndPlus1Token(const Stmt & S)157 static bool breakAndReturnEndPlus1Token(const Stmt &S) {
158   return isa<Expr, DoStmt, ReturnStmt, BreakStmt, ContinueStmt, GotoStmt, SEHLeaveStmt>(S);
159 }
160 
161 // Given a Stmt which does not include it's semicolon this method returns the
162 // SourceLocation of the semicolon.
getSemicolonAfterStmtEndLoc(const SourceLocation & EndLoc,const SourceManager & SM,const LangOptions & LangOpts)163 static SourceLocation getSemicolonAfterStmtEndLoc(const SourceLocation &EndLoc,
164                                                   const SourceManager &SM,
165                                                   const LangOptions &LangOpts) {
166 
167   if (EndLoc.isMacroID()) {
168     // Assuming EndLoc points to a function call foo within macro F.
169     // This method is supposed to return location of the semicolon within
170     // those macro arguments:
171     //  F     (      foo()               ;   )
172     //  ^ EndLoc         ^ SpellingLoc   ^ next token of SpellingLoc
173     const SourceLocation SpellingLoc = SM.getSpellingLoc(EndLoc);
174     Optional<Token> NextTok =
175         findNextTokenSkippingComments(SpellingLoc, SM, LangOpts);
176 
177     // Was the next token found successfully?
178     // All macro issues are simply resolved by ensuring it's a semicolon.
179     if (NextTok && NextTok->is(tok::TokenKind::semi)) {
180       // Ideally this would return `F` with spelling location `;` (NextTok)
181       // following the examle above. For now simply return NextTok location.
182       return NextTok->getLocation();
183     }
184 
185     // Fallthrough to 'normal handling'.
186     //  F     (      foo()              ) ;
187     //  ^ EndLoc         ^ SpellingLoc  ) ^ next token of EndLoc
188   }
189 
190   Optional<Token> NextTok = findNextTokenSkippingComments(EndLoc, SM, LangOpts);
191 
192   // Testing for semicolon again avoids some issues with macros.
193   if (NextTok && NextTok->is(tok::TokenKind::semi))
194     return NextTok->getLocation();
195 
196   return SourceLocation();
197 }
198 
getUnifiedEndLoc(const Stmt & S,const SourceManager & SM,const LangOptions & LangOpts)199 SourceLocation getUnifiedEndLoc(const Stmt &S, const SourceManager &SM,
200                                 const LangOptions &LangOpts) {
201 
202   const Stmt *LastChild = &S;
203   while (!LastChild->children().empty() && !breakAndReturnEnd(*LastChild) &&
204          !breakAndReturnEndPlus1Token(*LastChild)) {
205     for (const Stmt *Child : LastChild->children())
206       LastChild = Child;
207   }
208 
209   if (!breakAndReturnEnd(*LastChild) &&
210       breakAndReturnEndPlus1Token(*LastChild))
211     return getSemicolonAfterStmtEndLoc(S.getEndLoc(), SM, LangOpts);
212 
213   return S.getEndLoc();
214 }
215 
216 } // namespace lexer
217 } // namespace utils
218 } // namespace tidy
219 } // namespace clang
220