1 //===- unittests/Lex/LexerTest.cpp ------ Lexer tests ---------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8
9 #include "clang/Lex/Lexer.h"
10 #include "clang/Basic/Diagnostic.h"
11 #include "clang/Basic/DiagnosticOptions.h"
12 #include "clang/Basic/FileManager.h"
13 #include "clang/Basic/LangOptions.h"
14 #include "clang/Basic/SourceLocation.h"
15 #include "clang/Basic/SourceManager.h"
16 #include "clang/Basic/TargetInfo.h"
17 #include "clang/Basic/TargetOptions.h"
18 #include "clang/Basic/TokenKinds.h"
19 #include "clang/Lex/HeaderSearch.h"
20 #include "clang/Lex/HeaderSearchOptions.h"
21 #include "clang/Lex/MacroArgs.h"
22 #include "clang/Lex/MacroInfo.h"
23 #include "clang/Lex/ModuleLoader.h"
24 #include "clang/Lex/Preprocessor.h"
25 #include "clang/Lex/PreprocessorOptions.h"
26 #include "gmock/gmock.h"
27 #include "gtest/gtest.h"
28 #include <memory>
29 #include <vector>
30
31 namespace {
32 using namespace clang;
33 using testing::ElementsAre;
34
35 // The test fixture.
36 class LexerTest : public ::testing::Test {
37 protected:
LexerTest()38 LexerTest()
39 : FileMgr(FileMgrOpts),
40 DiagID(new DiagnosticIDs()),
41 Diags(DiagID, new DiagnosticOptions, new IgnoringDiagConsumer()),
42 SourceMgr(Diags, FileMgr),
43 TargetOpts(new TargetOptions)
44 {
45 TargetOpts->Triple = "x86_64-apple-darwin11.1.0";
46 Target = TargetInfo::CreateTargetInfo(Diags, TargetOpts);
47 }
48
CreatePP(StringRef Source,TrivialModuleLoader & ModLoader)49 std::unique_ptr<Preprocessor> CreatePP(StringRef Source,
50 TrivialModuleLoader &ModLoader) {
51 std::unique_ptr<llvm::MemoryBuffer> Buf =
52 llvm::MemoryBuffer::getMemBuffer(Source);
53 SourceMgr.setMainFileID(SourceMgr.createFileID(std::move(Buf)));
54
55 HeaderSearch HeaderInfo(std::make_shared<HeaderSearchOptions>(), SourceMgr,
56 Diags, LangOpts, Target.get());
57 std::unique_ptr<Preprocessor> PP = std::make_unique<Preprocessor>(
58 std::make_shared<PreprocessorOptions>(), Diags, LangOpts, SourceMgr,
59 HeaderInfo, ModLoader,
60 /*IILookup =*/nullptr,
61 /*OwnsHeaderSearch =*/false);
62 PP->Initialize(*Target);
63 PP->EnterMainSourceFile();
64 return PP;
65 }
66
Lex(StringRef Source)67 std::vector<Token> Lex(StringRef Source) {
68 TrivialModuleLoader ModLoader;
69 PP = CreatePP(Source, ModLoader);
70
71 std::vector<Token> toks;
72 while (1) {
73 Token tok;
74 PP->Lex(tok);
75 if (tok.is(tok::eof))
76 break;
77 toks.push_back(tok);
78 }
79
80 return toks;
81 }
82
CheckLex(StringRef Source,ArrayRef<tok::TokenKind> ExpectedTokens)83 std::vector<Token> CheckLex(StringRef Source,
84 ArrayRef<tok::TokenKind> ExpectedTokens) {
85 auto toks = Lex(Source);
86 EXPECT_EQ(ExpectedTokens.size(), toks.size());
87 for (unsigned i = 0, e = ExpectedTokens.size(); i != e; ++i) {
88 EXPECT_EQ(ExpectedTokens[i], toks[i].getKind());
89 }
90
91 return toks;
92 }
93
getSourceText(Token Begin,Token End)94 std::string getSourceText(Token Begin, Token End) {
95 bool Invalid;
96 StringRef Str =
97 Lexer::getSourceText(CharSourceRange::getTokenRange(SourceRange(
98 Begin.getLocation(), End.getLocation())),
99 SourceMgr, LangOpts, &Invalid);
100 if (Invalid)
101 return "<INVALID>";
102 return std::string(Str);
103 }
104
105 FileSystemOptions FileMgrOpts;
106 FileManager FileMgr;
107 IntrusiveRefCntPtr<DiagnosticIDs> DiagID;
108 DiagnosticsEngine Diags;
109 SourceManager SourceMgr;
110 LangOptions LangOpts;
111 std::shared_ptr<TargetOptions> TargetOpts;
112 IntrusiveRefCntPtr<TargetInfo> Target;
113 std::unique_ptr<Preprocessor> PP;
114 };
115
TEST_F(LexerTest,GetSourceTextExpandsToMaximumInMacroArgument)116 TEST_F(LexerTest, GetSourceTextExpandsToMaximumInMacroArgument) {
117 std::vector<tok::TokenKind> ExpectedTokens;
118 ExpectedTokens.push_back(tok::identifier);
119 ExpectedTokens.push_back(tok::l_paren);
120 ExpectedTokens.push_back(tok::identifier);
121 ExpectedTokens.push_back(tok::r_paren);
122
123 std::vector<Token> toks = CheckLex("#define M(x) x\n"
124 "M(f(M(i)))",
125 ExpectedTokens);
126
127 EXPECT_EQ("M(i)", getSourceText(toks[2], toks[2]));
128 }
129
TEST_F(LexerTest,GetSourceTextExpandsToMaximumInMacroArgumentForEndOfMacro)130 TEST_F(LexerTest, GetSourceTextExpandsToMaximumInMacroArgumentForEndOfMacro) {
131 std::vector<tok::TokenKind> ExpectedTokens;
132 ExpectedTokens.push_back(tok::identifier);
133 ExpectedTokens.push_back(tok::identifier);
134
135 std::vector<Token> toks = CheckLex("#define M(x) x\n"
136 "M(M(i) c)",
137 ExpectedTokens);
138
139 EXPECT_EQ("M(i)", getSourceText(toks[0], toks[0]));
140 }
141
TEST_F(LexerTest,GetSourceTextExpandsInMacroArgumentForBeginOfMacro)142 TEST_F(LexerTest, GetSourceTextExpandsInMacroArgumentForBeginOfMacro) {
143 std::vector<tok::TokenKind> ExpectedTokens;
144 ExpectedTokens.push_back(tok::identifier);
145 ExpectedTokens.push_back(tok::identifier);
146 ExpectedTokens.push_back(tok::identifier);
147
148 std::vector<Token> toks = CheckLex("#define M(x) x\n"
149 "M(c c M(i))",
150 ExpectedTokens);
151
152 EXPECT_EQ("c M(i)", getSourceText(toks[1], toks[2]));
153 }
154
TEST_F(LexerTest,GetSourceTextExpandsInMacroArgumentForEndOfMacro)155 TEST_F(LexerTest, GetSourceTextExpandsInMacroArgumentForEndOfMacro) {
156 std::vector<tok::TokenKind> ExpectedTokens;
157 ExpectedTokens.push_back(tok::identifier);
158 ExpectedTokens.push_back(tok::identifier);
159 ExpectedTokens.push_back(tok::identifier);
160
161 std::vector<Token> toks = CheckLex("#define M(x) x\n"
162 "M(M(i) c c)",
163 ExpectedTokens);
164
165 EXPECT_EQ("M(i) c", getSourceText(toks[0], toks[1]));
166 }
167
TEST_F(LexerTest,GetSourceTextInSeparateFnMacros)168 TEST_F(LexerTest, GetSourceTextInSeparateFnMacros) {
169 std::vector<tok::TokenKind> ExpectedTokens;
170 ExpectedTokens.push_back(tok::identifier);
171 ExpectedTokens.push_back(tok::identifier);
172 ExpectedTokens.push_back(tok::identifier);
173 ExpectedTokens.push_back(tok::identifier);
174
175 std::vector<Token> toks = CheckLex("#define M(x) x\n"
176 "M(c M(i)) M(M(i) c)",
177 ExpectedTokens);
178
179 EXPECT_EQ("<INVALID>", getSourceText(toks[1], toks[2]));
180 }
181
TEST_F(LexerTest,GetSourceTextWorksAcrossTokenPastes)182 TEST_F(LexerTest, GetSourceTextWorksAcrossTokenPastes) {
183 std::vector<tok::TokenKind> ExpectedTokens;
184 ExpectedTokens.push_back(tok::identifier);
185 ExpectedTokens.push_back(tok::l_paren);
186 ExpectedTokens.push_back(tok::identifier);
187 ExpectedTokens.push_back(tok::r_paren);
188
189 std::vector<Token> toks = CheckLex("#define M(x) x\n"
190 "#define C(x) M(x##c)\n"
191 "M(f(C(i)))",
192 ExpectedTokens);
193
194 EXPECT_EQ("C(i)", getSourceText(toks[2], toks[2]));
195 }
196
TEST_F(LexerTest,GetSourceTextExpandsAcrossMultipleMacroCalls)197 TEST_F(LexerTest, GetSourceTextExpandsAcrossMultipleMacroCalls) {
198 std::vector<tok::TokenKind> ExpectedTokens;
199 ExpectedTokens.push_back(tok::identifier);
200 ExpectedTokens.push_back(tok::l_paren);
201 ExpectedTokens.push_back(tok::identifier);
202 ExpectedTokens.push_back(tok::r_paren);
203
204 std::vector<Token> toks = CheckLex("#define M(x) x\n"
205 "f(M(M(i)))",
206 ExpectedTokens);
207 EXPECT_EQ("M(M(i))", getSourceText(toks[2], toks[2]));
208 }
209
TEST_F(LexerTest,GetSourceTextInMiddleOfMacroArgument)210 TEST_F(LexerTest, GetSourceTextInMiddleOfMacroArgument) {
211 std::vector<tok::TokenKind> ExpectedTokens;
212 ExpectedTokens.push_back(tok::identifier);
213 ExpectedTokens.push_back(tok::l_paren);
214 ExpectedTokens.push_back(tok::identifier);
215 ExpectedTokens.push_back(tok::r_paren);
216
217 std::vector<Token> toks = CheckLex("#define M(x) x\n"
218 "M(f(i))",
219 ExpectedTokens);
220 EXPECT_EQ("i", getSourceText(toks[2], toks[2]));
221 }
222
TEST_F(LexerTest,GetSourceTextExpandsAroundDifferentMacroCalls)223 TEST_F(LexerTest, GetSourceTextExpandsAroundDifferentMacroCalls) {
224 std::vector<tok::TokenKind> ExpectedTokens;
225 ExpectedTokens.push_back(tok::identifier);
226 ExpectedTokens.push_back(tok::l_paren);
227 ExpectedTokens.push_back(tok::identifier);
228 ExpectedTokens.push_back(tok::r_paren);
229
230 std::vector<Token> toks = CheckLex("#define M(x) x\n"
231 "#define C(x) x\n"
232 "f(C(M(i)))",
233 ExpectedTokens);
234 EXPECT_EQ("C(M(i))", getSourceText(toks[2], toks[2]));
235 }
236
TEST_F(LexerTest,GetSourceTextOnlyExpandsIfFirstTokenInMacro)237 TEST_F(LexerTest, GetSourceTextOnlyExpandsIfFirstTokenInMacro) {
238 std::vector<tok::TokenKind> ExpectedTokens;
239 ExpectedTokens.push_back(tok::identifier);
240 ExpectedTokens.push_back(tok::l_paren);
241 ExpectedTokens.push_back(tok::identifier);
242 ExpectedTokens.push_back(tok::identifier);
243 ExpectedTokens.push_back(tok::r_paren);
244
245 std::vector<Token> toks = CheckLex("#define M(x) x\n"
246 "#define C(x) c x\n"
247 "f(C(M(i)))",
248 ExpectedTokens);
249 EXPECT_EQ("M(i)", getSourceText(toks[3], toks[3]));
250 }
251
TEST_F(LexerTest,GetSourceTextExpandsRecursively)252 TEST_F(LexerTest, GetSourceTextExpandsRecursively) {
253 std::vector<tok::TokenKind> ExpectedTokens;
254 ExpectedTokens.push_back(tok::identifier);
255 ExpectedTokens.push_back(tok::identifier);
256 ExpectedTokens.push_back(tok::l_paren);
257 ExpectedTokens.push_back(tok::identifier);
258 ExpectedTokens.push_back(tok::r_paren);
259
260 std::vector<Token> toks = CheckLex("#define M(x) x\n"
261 "#define C(x) c M(x)\n"
262 "C(f(M(i)))",
263 ExpectedTokens);
264 EXPECT_EQ("M(i)", getSourceText(toks[3], toks[3]));
265 }
266
TEST_F(LexerTest,LexAPI)267 TEST_F(LexerTest, LexAPI) {
268 std::vector<tok::TokenKind> ExpectedTokens;
269 // Line 1 (after the #defines)
270 ExpectedTokens.push_back(tok::l_square);
271 ExpectedTokens.push_back(tok::identifier);
272 ExpectedTokens.push_back(tok::r_square);
273 ExpectedTokens.push_back(tok::l_square);
274 ExpectedTokens.push_back(tok::identifier);
275 ExpectedTokens.push_back(tok::r_square);
276 // Line 2
277 ExpectedTokens.push_back(tok::identifier);
278 ExpectedTokens.push_back(tok::identifier);
279 ExpectedTokens.push_back(tok::identifier);
280 ExpectedTokens.push_back(tok::identifier);
281
282 std::vector<Token> toks = CheckLex("#define M(x) [x]\n"
283 "#define N(x) x\n"
284 "#define INN(x) x\n"
285 "#define NOF1 INN(val)\n"
286 "#define NOF2 val\n"
287 "M(foo) N([bar])\n"
288 "N(INN(val)) N(NOF1) N(NOF2) N(val)",
289 ExpectedTokens);
290
291 SourceLocation lsqrLoc = toks[0].getLocation();
292 SourceLocation idLoc = toks[1].getLocation();
293 SourceLocation rsqrLoc = toks[2].getLocation();
294 CharSourceRange macroRange = SourceMgr.getExpansionRange(lsqrLoc);
295
296 SourceLocation Loc;
297 EXPECT_TRUE(Lexer::isAtStartOfMacroExpansion(lsqrLoc, SourceMgr, LangOpts, &Loc));
298 EXPECT_EQ(Loc, macroRange.getBegin());
299 EXPECT_FALSE(Lexer::isAtStartOfMacroExpansion(idLoc, SourceMgr, LangOpts));
300 EXPECT_FALSE(Lexer::isAtEndOfMacroExpansion(idLoc, SourceMgr, LangOpts));
301 EXPECT_TRUE(Lexer::isAtEndOfMacroExpansion(rsqrLoc, SourceMgr, LangOpts, &Loc));
302 EXPECT_EQ(Loc, macroRange.getEnd());
303 EXPECT_TRUE(macroRange.isTokenRange());
304
305 CharSourceRange range = Lexer::makeFileCharRange(
306 CharSourceRange::getTokenRange(lsqrLoc, idLoc), SourceMgr, LangOpts);
307 EXPECT_TRUE(range.isInvalid());
308 range = Lexer::makeFileCharRange(CharSourceRange::getTokenRange(idLoc, rsqrLoc),
309 SourceMgr, LangOpts);
310 EXPECT_TRUE(range.isInvalid());
311 range = Lexer::makeFileCharRange(CharSourceRange::getTokenRange(lsqrLoc, rsqrLoc),
312 SourceMgr, LangOpts);
313 EXPECT_TRUE(!range.isTokenRange());
314 EXPECT_EQ(range.getAsRange(),
315 SourceRange(macroRange.getBegin(),
316 macroRange.getEnd().getLocWithOffset(1)));
317
318 StringRef text = Lexer::getSourceText(
319 CharSourceRange::getTokenRange(lsqrLoc, rsqrLoc),
320 SourceMgr, LangOpts);
321 EXPECT_EQ(text, "M(foo)");
322
323 SourceLocation macroLsqrLoc = toks[3].getLocation();
324 SourceLocation macroIdLoc = toks[4].getLocation();
325 SourceLocation macroRsqrLoc = toks[5].getLocation();
326 SourceLocation fileLsqrLoc = SourceMgr.getSpellingLoc(macroLsqrLoc);
327 SourceLocation fileIdLoc = SourceMgr.getSpellingLoc(macroIdLoc);
328 SourceLocation fileRsqrLoc = SourceMgr.getSpellingLoc(macroRsqrLoc);
329
330 range = Lexer::makeFileCharRange(
331 CharSourceRange::getTokenRange(macroLsqrLoc, macroIdLoc),
332 SourceMgr, LangOpts);
333 EXPECT_EQ(SourceRange(fileLsqrLoc, fileIdLoc.getLocWithOffset(3)),
334 range.getAsRange());
335
336 range = Lexer::makeFileCharRange(CharSourceRange::getTokenRange(macroIdLoc, macroRsqrLoc),
337 SourceMgr, LangOpts);
338 EXPECT_EQ(SourceRange(fileIdLoc, fileRsqrLoc.getLocWithOffset(1)),
339 range.getAsRange());
340
341 macroRange = SourceMgr.getExpansionRange(macroLsqrLoc);
342 range = Lexer::makeFileCharRange(
343 CharSourceRange::getTokenRange(macroLsqrLoc, macroRsqrLoc),
344 SourceMgr, LangOpts);
345 EXPECT_EQ(SourceRange(macroRange.getBegin(), macroRange.getEnd().getLocWithOffset(1)),
346 range.getAsRange());
347
348 text = Lexer::getSourceText(
349 CharSourceRange::getTokenRange(SourceRange(macroLsqrLoc, macroIdLoc)),
350 SourceMgr, LangOpts);
351 EXPECT_EQ(text, "[bar");
352
353
354 SourceLocation idLoc1 = toks[6].getLocation();
355 SourceLocation idLoc2 = toks[7].getLocation();
356 SourceLocation idLoc3 = toks[8].getLocation();
357 SourceLocation idLoc4 = toks[9].getLocation();
358 EXPECT_EQ("INN", Lexer::getImmediateMacroName(idLoc1, SourceMgr, LangOpts));
359 EXPECT_EQ("INN", Lexer::getImmediateMacroName(idLoc2, SourceMgr, LangOpts));
360 EXPECT_EQ("NOF2", Lexer::getImmediateMacroName(idLoc3, SourceMgr, LangOpts));
361 EXPECT_EQ("N", Lexer::getImmediateMacroName(idLoc4, SourceMgr, LangOpts));
362 }
363
TEST_F(LexerTest,HandlesSplitTokens)364 TEST_F(LexerTest, HandlesSplitTokens) {
365 std::vector<tok::TokenKind> ExpectedTokens;
366 // Line 1 (after the #defines)
367 ExpectedTokens.push_back(tok::identifier);
368 ExpectedTokens.push_back(tok::less);
369 ExpectedTokens.push_back(tok::identifier);
370 ExpectedTokens.push_back(tok::less);
371 ExpectedTokens.push_back(tok::greatergreater);
372 // Line 2
373 ExpectedTokens.push_back(tok::identifier);
374 ExpectedTokens.push_back(tok::less);
375 ExpectedTokens.push_back(tok::identifier);
376 ExpectedTokens.push_back(tok::less);
377 ExpectedTokens.push_back(tok::greatergreater);
378
379 std::vector<Token> toks = CheckLex("#define TY ty\n"
380 "#define RANGLE ty<ty<>>\n"
381 "TY<ty<>>\n"
382 "RANGLE",
383 ExpectedTokens);
384
385 SourceLocation outerTyLoc = toks[0].getLocation();
386 SourceLocation innerTyLoc = toks[2].getLocation();
387 SourceLocation gtgtLoc = toks[4].getLocation();
388 // Split the token to simulate the action of the parser and force creation of
389 // an `ExpansionTokenRange`.
390 SourceLocation rangleLoc = PP->SplitToken(gtgtLoc, 1);
391
392 // Verify that it only captures the first greater-then and not the second one.
393 CharSourceRange range = Lexer::makeFileCharRange(
394 CharSourceRange::getTokenRange(innerTyLoc, rangleLoc), SourceMgr,
395 LangOpts);
396 EXPECT_TRUE(range.isCharRange());
397 EXPECT_EQ(range.getAsRange(),
398 SourceRange(innerTyLoc, gtgtLoc.getLocWithOffset(1)));
399
400 // Verify case where range begins in a macro expansion.
401 range = Lexer::makeFileCharRange(
402 CharSourceRange::getTokenRange(outerTyLoc, rangleLoc), SourceMgr,
403 LangOpts);
404 EXPECT_TRUE(range.isCharRange());
405 EXPECT_EQ(range.getAsRange(),
406 SourceRange(SourceMgr.getExpansionLoc(outerTyLoc),
407 gtgtLoc.getLocWithOffset(1)));
408
409 SourceLocation macroInnerTyLoc = toks[7].getLocation();
410 SourceLocation macroGtgtLoc = toks[9].getLocation();
411 // Split the token to simulate the action of the parser and force creation of
412 // an `ExpansionTokenRange`.
413 SourceLocation macroRAngleLoc = PP->SplitToken(macroGtgtLoc, 1);
414
415 // Verify that it fails (because it only captures the first greater-then and
416 // not the second one, so it doesn't span the entire macro expansion).
417 range = Lexer::makeFileCharRange(
418 CharSourceRange::getTokenRange(macroInnerTyLoc, macroRAngleLoc),
419 SourceMgr, LangOpts);
420 EXPECT_TRUE(range.isInvalid());
421 }
422
TEST_F(LexerTest,DontMergeMacroArgsFromDifferentMacroFiles)423 TEST_F(LexerTest, DontMergeMacroArgsFromDifferentMacroFiles) {
424 std::vector<Token> toks =
425 Lex("#define helper1 0\n"
426 "void helper2(const char *, ...);\n"
427 "#define M1(a, ...) helper2(a, ##__VA_ARGS__)\n"
428 "#define M2(a, ...) M1(a, helper1, ##__VA_ARGS__)\n"
429 "void f1() { M2(\"a\", \"b\"); }");
430
431 // Check the file corresponding to the "helper1" macro arg in M2.
432 //
433 // The lexer used to report its size as 31, meaning that the end of the
434 // expansion would be on the *next line* (just past `M2("a", "b")`). Make
435 // sure that we get the correct end location (the comma after "helper1").
436 SourceLocation helper1ArgLoc = toks[20].getLocation();
437 EXPECT_EQ(SourceMgr.getFileIDSize(SourceMgr.getFileID(helper1ArgLoc)), 8U);
438 }
439
TEST_F(LexerTest,DontOverallocateStringifyArgs)440 TEST_F(LexerTest, DontOverallocateStringifyArgs) {
441 TrivialModuleLoader ModLoader;
442 auto PP = CreatePP("\"StrArg\", 5, 'C'", ModLoader);
443
444 llvm::BumpPtrAllocator Allocator;
445 std::array<IdentifierInfo *, 3> ParamList;
446 MacroInfo *MI = PP->AllocateMacroInfo({});
447 MI->setIsFunctionLike();
448 MI->setParameterList(ParamList, Allocator);
449 EXPECT_EQ(3u, MI->getNumParams());
450 EXPECT_TRUE(MI->isFunctionLike());
451
452 Token Eof;
453 Eof.setKind(tok::eof);
454 std::vector<Token> ArgTokens;
455 while (1) {
456 Token tok;
457 PP->Lex(tok);
458 if (tok.is(tok::eof)) {
459 ArgTokens.push_back(Eof);
460 break;
461 }
462 if (tok.is(tok::comma))
463 ArgTokens.push_back(Eof);
464 else
465 ArgTokens.push_back(tok);
466 }
467
468 auto MacroArgsDeleter = [&PP](MacroArgs *M) { M->destroy(*PP); };
469 std::unique_ptr<MacroArgs, decltype(MacroArgsDeleter)> MA(
470 MacroArgs::create(MI, ArgTokens, false, *PP), MacroArgsDeleter);
471 auto StringifyArg = [&](int ArgNo) {
472 return MA->StringifyArgument(MA->getUnexpArgument(ArgNo), *PP,
473 /*Charify=*/false, {}, {});
474 };
475 Token Result = StringifyArg(0);
476 EXPECT_EQ(tok::string_literal, Result.getKind());
477 EXPECT_STREQ("\"\\\"StrArg\\\"\"", Result.getLiteralData());
478 Result = StringifyArg(1);
479 EXPECT_EQ(tok::string_literal, Result.getKind());
480 EXPECT_STREQ("\"5\"", Result.getLiteralData());
481 Result = StringifyArg(2);
482 EXPECT_EQ(tok::string_literal, Result.getKind());
483 EXPECT_STREQ("\"'C'\"", Result.getLiteralData());
484 #if !defined(NDEBUG) && GTEST_HAS_DEATH_TEST
485 EXPECT_DEATH(StringifyArg(3), "Invalid arg #");
486 #endif
487 }
488
TEST_F(LexerTest,IsNewLineEscapedValid)489 TEST_F(LexerTest, IsNewLineEscapedValid) {
490 auto hasNewLineEscaped = [](const char *S) {
491 return Lexer::isNewLineEscaped(S, S + strlen(S) - 1);
492 };
493
494 EXPECT_TRUE(hasNewLineEscaped("\\\r"));
495 EXPECT_TRUE(hasNewLineEscaped("\\\n"));
496 EXPECT_TRUE(hasNewLineEscaped("\\\r\n"));
497 EXPECT_TRUE(hasNewLineEscaped("\\\n\r"));
498 EXPECT_TRUE(hasNewLineEscaped("\\ \t\v\f\r"));
499 EXPECT_TRUE(hasNewLineEscaped("\\ \t\v\f\r\n"));
500
501 EXPECT_FALSE(hasNewLineEscaped("\\\r\r"));
502 EXPECT_FALSE(hasNewLineEscaped("\\\r\r\n"));
503 EXPECT_FALSE(hasNewLineEscaped("\\\n\n"));
504 EXPECT_FALSE(hasNewLineEscaped("\r"));
505 EXPECT_FALSE(hasNewLineEscaped("\n"));
506 EXPECT_FALSE(hasNewLineEscaped("\r\n"));
507 EXPECT_FALSE(hasNewLineEscaped("\n\r"));
508 EXPECT_FALSE(hasNewLineEscaped("\r\r"));
509 EXPECT_FALSE(hasNewLineEscaped("\n\n"));
510 }
511
TEST_F(LexerTest,GetBeginningOfTokenWithEscapedNewLine)512 TEST_F(LexerTest, GetBeginningOfTokenWithEscapedNewLine) {
513 // Each line should have the same length for
514 // further offset calculation to be more straightforward.
515 const unsigned IdentifierLength = 8;
516 std::string TextToLex = "rabarbar\n"
517 "foo\\\nbar\n"
518 "foo\\\rbar\n"
519 "fo\\\r\nbar\n"
520 "foo\\\n\rba\n";
521 std::vector<tok::TokenKind> ExpectedTokens{5, tok::identifier};
522 std::vector<Token> LexedTokens = CheckLex(TextToLex, ExpectedTokens);
523
524 for (const Token &Tok : LexedTokens) {
525 std::pair<FileID, unsigned> OriginalLocation =
526 SourceMgr.getDecomposedLoc(Tok.getLocation());
527 for (unsigned Offset = 0; Offset < IdentifierLength; ++Offset) {
528 SourceLocation LookupLocation =
529 Tok.getLocation().getLocWithOffset(Offset);
530
531 std::pair<FileID, unsigned> FoundLocation =
532 SourceMgr.getDecomposedExpansionLoc(
533 Lexer::GetBeginningOfToken(LookupLocation, SourceMgr, LangOpts));
534
535 // Check that location returned by the GetBeginningOfToken
536 // is the same as original token location reported by Lexer.
537 EXPECT_EQ(FoundLocation.second, OriginalLocation.second);
538 }
539 }
540 }
541
TEST_F(LexerTest,AvoidPastEndOfStringDereference)542 TEST_F(LexerTest, AvoidPastEndOfStringDereference) {
543 EXPECT_TRUE(Lex(" // \\\n").empty());
544 EXPECT_TRUE(Lex("#include <\\\\").empty());
545 EXPECT_TRUE(Lex("#include <\\\\\n").empty());
546 }
547
TEST_F(LexerTest,StringizingRasString)548 TEST_F(LexerTest, StringizingRasString) {
549 // For "std::string Lexer::Stringify(StringRef Str, bool Charify)".
550 std::string String1 = R"(foo
551 {"bar":[]}
552 baz)";
553 // For "void Lexer::Stringify(SmallVectorImpl<char> &Str)".
554 SmallString<128> String2;
555 String2 += String1.c_str();
556
557 // Corner cases.
558 std::string String3 = R"(\
559 \n
560 \\n
561 \\)";
562 SmallString<128> String4;
563 String4 += String3.c_str();
564 std::string String5 = R"(a\
565
566
567 \\b)";
568 SmallString<128> String6;
569 String6 += String5.c_str();
570
571 String1 = Lexer::Stringify(StringRef(String1));
572 Lexer::Stringify(String2);
573 String3 = Lexer::Stringify(StringRef(String3));
574 Lexer::Stringify(String4);
575 String5 = Lexer::Stringify(StringRef(String5));
576 Lexer::Stringify(String6);
577
578 EXPECT_EQ(String1, R"(foo\n {\"bar\":[]}\n baz)");
579 EXPECT_EQ(String2, R"(foo\n {\"bar\":[]}\n baz)");
580 EXPECT_EQ(String3, R"(\\\n \\n\n \\\\n\n \\\\)");
581 EXPECT_EQ(String4, R"(\\\n \\n\n \\\\n\n \\\\)");
582 EXPECT_EQ(String5, R"(a\\\n\n\n \\\\b)");
583 EXPECT_EQ(String6, R"(a\\\n\n\n \\\\b)");
584 }
585
TEST_F(LexerTest,CharRangeOffByOne)586 TEST_F(LexerTest, CharRangeOffByOne) {
587 std::vector<Token> toks = Lex(R"(#define MOO 1
588 void foo() { MOO; })");
589 const Token &moo = toks[5];
590
591 EXPECT_EQ(getSourceText(moo, moo), "MOO");
592
593 SourceRange R{moo.getLocation(), moo.getLocation()};
594
595 EXPECT_TRUE(
596 Lexer::isAtStartOfMacroExpansion(R.getBegin(), SourceMgr, LangOpts));
597 EXPECT_TRUE(
598 Lexer::isAtEndOfMacroExpansion(R.getEnd(), SourceMgr, LangOpts));
599
600 CharSourceRange CR = Lexer::getAsCharRange(R, SourceMgr, LangOpts);
601
602 EXPECT_EQ(Lexer::getSourceText(CR, SourceMgr, LangOpts), "MOO"); // Was "MO".
603 }
604
TEST_F(LexerTest,FindNextToken)605 TEST_F(LexerTest, FindNextToken) {
606 Lex("int abcd = 0;\n"
607 "int xyz = abcd;\n");
608 std::vector<std::string> GeneratedByNextToken;
609 SourceLocation Loc =
610 SourceMgr.getLocForStartOfFile(SourceMgr.getMainFileID());
611 while (true) {
612 auto T = Lexer::findNextToken(Loc, SourceMgr, LangOpts);
613 ASSERT_TRUE(T.hasValue());
614 if (T->is(tok::eof))
615 break;
616 GeneratedByNextToken.push_back(getSourceText(*T, *T));
617 Loc = T->getLocation();
618 }
619 EXPECT_THAT(GeneratedByNextToken, ElementsAre("abcd", "=", "0", ";", "int",
620 "xyz", "=", "abcd", ";"));
621 }
622
TEST_F(LexerTest,CreatedFIDCountForPredefinedBuffer)623 TEST_F(LexerTest, CreatedFIDCountForPredefinedBuffer) {
624 TrivialModuleLoader ModLoader;
625 auto PP = CreatePP("", ModLoader);
626 while (1) {
627 Token tok;
628 PP->Lex(tok);
629 if (tok.is(tok::eof))
630 break;
631 }
632 EXPECT_EQ(SourceMgr.getNumCreatedFIDsForFileID(PP->getPredefinesFileID()),
633 1U);
634 }
635 } // anonymous namespace
636