1 //===--- LexerUtils.cpp - clang-tidy---------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8
9 #include "LexerUtils.h"
10 #include "clang/AST/AST.h"
11 #include "clang/Basic/SourceManager.h"
12
13 namespace clang {
14 namespace tidy {
15 namespace utils {
16 namespace lexer {
17
getPreviousToken(SourceLocation Location,const SourceManager & SM,const LangOptions & LangOpts,bool SkipComments)18 Token getPreviousToken(SourceLocation Location, const SourceManager &SM,
19 const LangOptions &LangOpts, bool SkipComments) {
20 Token Token;
21 Token.setKind(tok::unknown);
22
23 Location = Location.getLocWithOffset(-1);
24 if (Location.isInvalid())
25 return Token;
26
27 auto StartOfFile = SM.getLocForStartOfFile(SM.getFileID(Location));
28 while (Location != StartOfFile) {
29 Location = Lexer::GetBeginningOfToken(Location, SM, LangOpts);
30 if (!Lexer::getRawToken(Location, Token, SM, LangOpts) &&
31 (!SkipComments || !Token.is(tok::comment))) {
32 break;
33 }
34 Location = Location.getLocWithOffset(-1);
35 }
36 return Token;
37 }
38
findPreviousTokenStart(SourceLocation Start,const SourceManager & SM,const LangOptions & LangOpts)39 SourceLocation findPreviousTokenStart(SourceLocation Start,
40 const SourceManager &SM,
41 const LangOptions &LangOpts) {
42 if (Start.isInvalid() || Start.isMacroID())
43 return SourceLocation();
44
45 SourceLocation BeforeStart = Start.getLocWithOffset(-1);
46 if (BeforeStart.isInvalid() || BeforeStart.isMacroID())
47 return SourceLocation();
48
49 return Lexer::GetBeginningOfToken(BeforeStart, SM, LangOpts);
50 }
51
findPreviousTokenKind(SourceLocation Start,const SourceManager & SM,const LangOptions & LangOpts,tok::TokenKind TK)52 SourceLocation findPreviousTokenKind(SourceLocation Start,
53 const SourceManager &SM,
54 const LangOptions &LangOpts,
55 tok::TokenKind TK) {
56 if (Start.isInvalid() || Start.isMacroID())
57 return SourceLocation();
58
59 while (true) {
60 SourceLocation L = findPreviousTokenStart(Start, SM, LangOpts);
61 if (L.isInvalid() || L.isMacroID())
62 return SourceLocation();
63
64 Token T;
65 if (Lexer::getRawToken(L, T, SM, LangOpts, /*IgnoreWhiteSpace=*/true))
66 return SourceLocation();
67
68 if (T.is(TK))
69 return T.getLocation();
70
71 Start = L;
72 }
73 }
74
findNextTerminator(SourceLocation Start,const SourceManager & SM,const LangOptions & LangOpts)75 SourceLocation findNextTerminator(SourceLocation Start, const SourceManager &SM,
76 const LangOptions &LangOpts) {
77 return findNextAnyTokenKind(Start, SM, LangOpts, tok::comma, tok::semi);
78 }
79
findNextTokenSkippingComments(SourceLocation Start,const SourceManager & SM,const LangOptions & LangOpts)80 Optional<Token> findNextTokenSkippingComments(SourceLocation Start,
81 const SourceManager &SM,
82 const LangOptions &LangOpts) {
83 Optional<Token> CurrentToken;
84 do {
85 CurrentToken = Lexer::findNextToken(Start, SM, LangOpts);
86 } while (CurrentToken && CurrentToken->is(tok::comment));
87 return CurrentToken;
88 }
89
rangeContainsExpansionsOrDirectives(SourceRange Range,const SourceManager & SM,const LangOptions & LangOpts)90 bool rangeContainsExpansionsOrDirectives(SourceRange Range,
91 const SourceManager &SM,
92 const LangOptions &LangOpts) {
93 assert(Range.isValid() && "Invalid Range for relexing provided");
94 SourceLocation Loc = Range.getBegin();
95
96 while (Loc < Range.getEnd()) {
97 if (Loc.isMacroID())
98 return true;
99
100 llvm::Optional<Token> Tok = Lexer::findNextToken(Loc, SM, LangOpts);
101
102 if (!Tok)
103 return true;
104
105 if (Tok->is(tok::hash))
106 return true;
107
108 Loc = Lexer::getLocForEndOfToken(Loc, 0, SM, LangOpts).getLocWithOffset(1);
109 }
110
111 return false;
112 }
113
getQualifyingToken(tok::TokenKind TK,CharSourceRange Range,const ASTContext & Context,const SourceManager & SM)114 llvm::Optional<Token> getQualifyingToken(tok::TokenKind TK,
115 CharSourceRange Range,
116 const ASTContext &Context,
117 const SourceManager &SM) {
118 assert((TK == tok::kw_const || TK == tok::kw_volatile ||
119 TK == tok::kw_restrict) &&
120 "TK is not a qualifier keyword");
121 std::pair<FileID, unsigned> LocInfo = SM.getDecomposedLoc(Range.getBegin());
122 StringRef File = SM.getBufferData(LocInfo.first);
123 Lexer RawLexer(SM.getLocForStartOfFile(LocInfo.first), Context.getLangOpts(),
124 File.begin(), File.data() + LocInfo.second, File.end());
125 llvm::Optional<Token> LastMatchBeforeTemplate;
126 llvm::Optional<Token> LastMatchAfterTemplate;
127 bool SawTemplate = false;
128 Token Tok;
129 while (!RawLexer.LexFromRawLexer(Tok) &&
130 Range.getEnd() != Tok.getLocation() &&
131 !SM.isBeforeInTranslationUnit(Range.getEnd(), Tok.getLocation())) {
132 if (Tok.is(tok::raw_identifier)) {
133 IdentifierInfo &Info = Context.Idents.get(
134 StringRef(SM.getCharacterData(Tok.getLocation()), Tok.getLength()));
135 Tok.setIdentifierInfo(&Info);
136 Tok.setKind(Info.getTokenID());
137 }
138 if (Tok.is(tok::less))
139 SawTemplate = true;
140 else if (Tok.isOneOf(tok::greater, tok::greatergreater))
141 LastMatchAfterTemplate = None;
142 else if (Tok.is(TK)) {
143 if (SawTemplate)
144 LastMatchAfterTemplate = Tok;
145 else
146 LastMatchBeforeTemplate = Tok;
147 }
148 }
149 return LastMatchAfterTemplate != None ? LastMatchAfterTemplate
150 : LastMatchBeforeTemplate;
151 }
152
breakAndReturnEnd(const Stmt & S)153 static bool breakAndReturnEnd(const Stmt &S) {
154 return isa<CompoundStmt, DeclStmt, NullStmt>(S);
155 }
156
breakAndReturnEndPlus1Token(const Stmt & S)157 static bool breakAndReturnEndPlus1Token(const Stmt &S) {
158 return isa<Expr, DoStmt, ReturnStmt, BreakStmt, ContinueStmt, GotoStmt, SEHLeaveStmt>(S);
159 }
160
161 // Given a Stmt which does not include it's semicolon this method returns the
162 // SourceLocation of the semicolon.
getSemicolonAfterStmtEndLoc(const SourceLocation & EndLoc,const SourceManager & SM,const LangOptions & LangOpts)163 static SourceLocation getSemicolonAfterStmtEndLoc(const SourceLocation &EndLoc,
164 const SourceManager &SM,
165 const LangOptions &LangOpts) {
166
167 if (EndLoc.isMacroID()) {
168 // Assuming EndLoc points to a function call foo within macro F.
169 // This method is supposed to return location of the semicolon within
170 // those macro arguments:
171 // F ( foo() ; )
172 // ^ EndLoc ^ SpellingLoc ^ next token of SpellingLoc
173 const SourceLocation SpellingLoc = SM.getSpellingLoc(EndLoc);
174 Optional<Token> NextTok =
175 findNextTokenSkippingComments(SpellingLoc, SM, LangOpts);
176
177 // Was the next token found successfully?
178 // All macro issues are simply resolved by ensuring it's a semicolon.
179 if (NextTok && NextTok->is(tok::TokenKind::semi)) {
180 // Ideally this would return `F` with spelling location `;` (NextTok)
181 // following the examle above. For now simply return NextTok location.
182 return NextTok->getLocation();
183 }
184
185 // Fallthrough to 'normal handling'.
186 // F ( foo() ) ;
187 // ^ EndLoc ^ SpellingLoc ) ^ next token of EndLoc
188 }
189
190 Optional<Token> NextTok = findNextTokenSkippingComments(EndLoc, SM, LangOpts);
191
192 // Testing for semicolon again avoids some issues with macros.
193 if (NextTok && NextTok->is(tok::TokenKind::semi))
194 return NextTok->getLocation();
195
196 return SourceLocation();
197 }
198
getUnifiedEndLoc(const Stmt & S,const SourceManager & SM,const LangOptions & LangOpts)199 SourceLocation getUnifiedEndLoc(const Stmt &S, const SourceManager &SM,
200 const LangOptions &LangOpts) {
201
202 const Stmt *LastChild = &S;
203 while (!LastChild->children().empty() && !breakAndReturnEnd(*LastChild) &&
204 !breakAndReturnEndPlus1Token(*LastChild)) {
205 for (const Stmt *Child : LastChild->children())
206 LastChild = Child;
207 }
208
209 if (!breakAndReturnEnd(*LastChild) &&
210 breakAndReturnEndPlus1Token(*LastChild))
211 return getSemicolonAfterStmtEndLoc(S.getEndLoc(), SM, LangOpts);
212
213 return S.getEndLoc();
214 }
215
216 } // namespace lexer
217 } // namespace utils
218 } // namespace tidy
219 } // namespace clang
220