1 //===- unittests/Lex/LexerTest.cpp ------ Lexer tests ---------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "clang/Lex/Lexer.h"
10 #include "clang/Basic/Diagnostic.h"
11 #include "clang/Basic/DiagnosticOptions.h"
12 #include "clang/Basic/FileManager.h"
13 #include "clang/Basic/LangOptions.h"
14 #include "clang/Basic/SourceLocation.h"
15 #include "clang/Basic/SourceManager.h"
16 #include "clang/Basic/TargetInfo.h"
17 #include "clang/Basic/TargetOptions.h"
18 #include "clang/Basic/TokenKinds.h"
19 #include "clang/Lex/HeaderSearch.h"
20 #include "clang/Lex/HeaderSearchOptions.h"
21 #include "clang/Lex/MacroArgs.h"
22 #include "clang/Lex/MacroInfo.h"
23 #include "clang/Lex/ModuleLoader.h"
24 #include "clang/Lex/Preprocessor.h"
25 #include "clang/Lex/PreprocessorOptions.h"
26 #include "gmock/gmock.h"
27 #include "gtest/gtest.h"
28 #include <memory>
29 #include <vector>
30 
31 namespace {
32 using namespace clang;
33 using testing::ElementsAre;
34 
35 // The test fixture.
36 class LexerTest : public ::testing::Test {
37 protected:
LexerTest()38   LexerTest()
39     : FileMgr(FileMgrOpts),
40       DiagID(new DiagnosticIDs()),
41       Diags(DiagID, new DiagnosticOptions, new IgnoringDiagConsumer()),
42       SourceMgr(Diags, FileMgr),
43       TargetOpts(new TargetOptions)
44   {
45     TargetOpts->Triple = "x86_64-apple-darwin11.1.0";
46     Target = TargetInfo::CreateTargetInfo(Diags, TargetOpts);
47   }
48 
CreatePP(StringRef Source,TrivialModuleLoader & ModLoader)49   std::unique_ptr<Preprocessor> CreatePP(StringRef Source,
50                                          TrivialModuleLoader &ModLoader) {
51     std::unique_ptr<llvm::MemoryBuffer> Buf =
52         llvm::MemoryBuffer::getMemBuffer(Source);
53     SourceMgr.setMainFileID(SourceMgr.createFileID(std::move(Buf)));
54 
55     HeaderSearch HeaderInfo(std::make_shared<HeaderSearchOptions>(), SourceMgr,
56                             Diags, LangOpts, Target.get());
57     std::unique_ptr<Preprocessor> PP = std::make_unique<Preprocessor>(
58         std::make_shared<PreprocessorOptions>(), Diags, LangOpts, SourceMgr,
59         HeaderInfo, ModLoader,
60         /*IILookup =*/nullptr,
61         /*OwnsHeaderSearch =*/false);
62     PP->Initialize(*Target);
63     PP->EnterMainSourceFile();
64     return PP;
65   }
66 
Lex(StringRef Source)67   std::vector<Token> Lex(StringRef Source) {
68     TrivialModuleLoader ModLoader;
69     PP = CreatePP(Source, ModLoader);
70 
71     std::vector<Token> toks;
72     while (1) {
73       Token tok;
74       PP->Lex(tok);
75       if (tok.is(tok::eof))
76         break;
77       toks.push_back(tok);
78     }
79 
80     return toks;
81   }
82 
CheckLex(StringRef Source,ArrayRef<tok::TokenKind> ExpectedTokens)83   std::vector<Token> CheckLex(StringRef Source,
84                               ArrayRef<tok::TokenKind> ExpectedTokens) {
85     auto toks = Lex(Source);
86     EXPECT_EQ(ExpectedTokens.size(), toks.size());
87     for (unsigned i = 0, e = ExpectedTokens.size(); i != e; ++i) {
88       EXPECT_EQ(ExpectedTokens[i], toks[i].getKind());
89     }
90 
91     return toks;
92   }
93 
getSourceText(Token Begin,Token End)94   std::string getSourceText(Token Begin, Token End) {
95     bool Invalid;
96     StringRef Str =
97         Lexer::getSourceText(CharSourceRange::getTokenRange(SourceRange(
98                                     Begin.getLocation(), End.getLocation())),
99                              SourceMgr, LangOpts, &Invalid);
100     if (Invalid)
101       return "<INVALID>";
102     return std::string(Str);
103   }
104 
105   FileSystemOptions FileMgrOpts;
106   FileManager FileMgr;
107   IntrusiveRefCntPtr<DiagnosticIDs> DiagID;
108   DiagnosticsEngine Diags;
109   SourceManager SourceMgr;
110   LangOptions LangOpts;
111   std::shared_ptr<TargetOptions> TargetOpts;
112   IntrusiveRefCntPtr<TargetInfo> Target;
113   std::unique_ptr<Preprocessor> PP;
114 };
115 
TEST_F(LexerTest,GetSourceTextExpandsToMaximumInMacroArgument)116 TEST_F(LexerTest, GetSourceTextExpandsToMaximumInMacroArgument) {
117   std::vector<tok::TokenKind> ExpectedTokens;
118   ExpectedTokens.push_back(tok::identifier);
119   ExpectedTokens.push_back(tok::l_paren);
120   ExpectedTokens.push_back(tok::identifier);
121   ExpectedTokens.push_back(tok::r_paren);
122 
123   std::vector<Token> toks = CheckLex("#define M(x) x\n"
124                                      "M(f(M(i)))",
125                                      ExpectedTokens);
126 
127   EXPECT_EQ("M(i)", getSourceText(toks[2], toks[2]));
128 }
129 
TEST_F(LexerTest,GetSourceTextExpandsToMaximumInMacroArgumentForEndOfMacro)130 TEST_F(LexerTest, GetSourceTextExpandsToMaximumInMacroArgumentForEndOfMacro) {
131   std::vector<tok::TokenKind> ExpectedTokens;
132   ExpectedTokens.push_back(tok::identifier);
133   ExpectedTokens.push_back(tok::identifier);
134 
135   std::vector<Token> toks = CheckLex("#define M(x) x\n"
136                                      "M(M(i) c)",
137                                      ExpectedTokens);
138 
139   EXPECT_EQ("M(i)", getSourceText(toks[0], toks[0]));
140 }
141 
TEST_F(LexerTest,GetSourceTextExpandsInMacroArgumentForBeginOfMacro)142 TEST_F(LexerTest, GetSourceTextExpandsInMacroArgumentForBeginOfMacro) {
143   std::vector<tok::TokenKind> ExpectedTokens;
144   ExpectedTokens.push_back(tok::identifier);
145   ExpectedTokens.push_back(tok::identifier);
146   ExpectedTokens.push_back(tok::identifier);
147 
148   std::vector<Token> toks = CheckLex("#define M(x) x\n"
149                                      "M(c c M(i))",
150                                      ExpectedTokens);
151 
152   EXPECT_EQ("c M(i)", getSourceText(toks[1], toks[2]));
153 }
154 
TEST_F(LexerTest,GetSourceTextExpandsInMacroArgumentForEndOfMacro)155 TEST_F(LexerTest, GetSourceTextExpandsInMacroArgumentForEndOfMacro) {
156   std::vector<tok::TokenKind> ExpectedTokens;
157   ExpectedTokens.push_back(tok::identifier);
158   ExpectedTokens.push_back(tok::identifier);
159   ExpectedTokens.push_back(tok::identifier);
160 
161   std::vector<Token> toks = CheckLex("#define M(x) x\n"
162                                      "M(M(i) c c)",
163                                      ExpectedTokens);
164 
165   EXPECT_EQ("M(i) c", getSourceText(toks[0], toks[1]));
166 }
167 
TEST_F(LexerTest,GetSourceTextInSeparateFnMacros)168 TEST_F(LexerTest, GetSourceTextInSeparateFnMacros) {
169   std::vector<tok::TokenKind> ExpectedTokens;
170   ExpectedTokens.push_back(tok::identifier);
171   ExpectedTokens.push_back(tok::identifier);
172   ExpectedTokens.push_back(tok::identifier);
173   ExpectedTokens.push_back(tok::identifier);
174 
175   std::vector<Token> toks = CheckLex("#define M(x) x\n"
176                                      "M(c M(i)) M(M(i) c)",
177                                      ExpectedTokens);
178 
179   EXPECT_EQ("<INVALID>", getSourceText(toks[1], toks[2]));
180 }
181 
TEST_F(LexerTest,GetSourceTextWorksAcrossTokenPastes)182 TEST_F(LexerTest, GetSourceTextWorksAcrossTokenPastes) {
183   std::vector<tok::TokenKind> ExpectedTokens;
184   ExpectedTokens.push_back(tok::identifier);
185   ExpectedTokens.push_back(tok::l_paren);
186   ExpectedTokens.push_back(tok::identifier);
187   ExpectedTokens.push_back(tok::r_paren);
188 
189   std::vector<Token> toks = CheckLex("#define M(x) x\n"
190                                      "#define C(x) M(x##c)\n"
191                                      "M(f(C(i)))",
192                                      ExpectedTokens);
193 
194   EXPECT_EQ("C(i)", getSourceText(toks[2], toks[2]));
195 }
196 
TEST_F(LexerTest,GetSourceTextExpandsAcrossMultipleMacroCalls)197 TEST_F(LexerTest, GetSourceTextExpandsAcrossMultipleMacroCalls) {
198   std::vector<tok::TokenKind> ExpectedTokens;
199   ExpectedTokens.push_back(tok::identifier);
200   ExpectedTokens.push_back(tok::l_paren);
201   ExpectedTokens.push_back(tok::identifier);
202   ExpectedTokens.push_back(tok::r_paren);
203 
204   std::vector<Token> toks = CheckLex("#define M(x) x\n"
205                                      "f(M(M(i)))",
206                                      ExpectedTokens);
207   EXPECT_EQ("M(M(i))", getSourceText(toks[2], toks[2]));
208 }
209 
TEST_F(LexerTest,GetSourceTextInMiddleOfMacroArgument)210 TEST_F(LexerTest, GetSourceTextInMiddleOfMacroArgument) {
211   std::vector<tok::TokenKind> ExpectedTokens;
212   ExpectedTokens.push_back(tok::identifier);
213   ExpectedTokens.push_back(tok::l_paren);
214   ExpectedTokens.push_back(tok::identifier);
215   ExpectedTokens.push_back(tok::r_paren);
216 
217   std::vector<Token> toks = CheckLex("#define M(x) x\n"
218                                      "M(f(i))",
219                                      ExpectedTokens);
220   EXPECT_EQ("i", getSourceText(toks[2], toks[2]));
221 }
222 
TEST_F(LexerTest,GetSourceTextExpandsAroundDifferentMacroCalls)223 TEST_F(LexerTest, GetSourceTextExpandsAroundDifferentMacroCalls) {
224   std::vector<tok::TokenKind> ExpectedTokens;
225   ExpectedTokens.push_back(tok::identifier);
226   ExpectedTokens.push_back(tok::l_paren);
227   ExpectedTokens.push_back(tok::identifier);
228   ExpectedTokens.push_back(tok::r_paren);
229 
230   std::vector<Token> toks = CheckLex("#define M(x) x\n"
231                                      "#define C(x) x\n"
232                                      "f(C(M(i)))",
233                                      ExpectedTokens);
234   EXPECT_EQ("C(M(i))", getSourceText(toks[2], toks[2]));
235 }
236 
TEST_F(LexerTest,GetSourceTextOnlyExpandsIfFirstTokenInMacro)237 TEST_F(LexerTest, GetSourceTextOnlyExpandsIfFirstTokenInMacro) {
238   std::vector<tok::TokenKind> ExpectedTokens;
239   ExpectedTokens.push_back(tok::identifier);
240   ExpectedTokens.push_back(tok::l_paren);
241   ExpectedTokens.push_back(tok::identifier);
242   ExpectedTokens.push_back(tok::identifier);
243   ExpectedTokens.push_back(tok::r_paren);
244 
245   std::vector<Token> toks = CheckLex("#define M(x) x\n"
246                                      "#define C(x) c x\n"
247                                      "f(C(M(i)))",
248                                      ExpectedTokens);
249   EXPECT_EQ("M(i)", getSourceText(toks[3], toks[3]));
250 }
251 
TEST_F(LexerTest,GetSourceTextExpandsRecursively)252 TEST_F(LexerTest, GetSourceTextExpandsRecursively) {
253   std::vector<tok::TokenKind> ExpectedTokens;
254   ExpectedTokens.push_back(tok::identifier);
255   ExpectedTokens.push_back(tok::identifier);
256   ExpectedTokens.push_back(tok::l_paren);
257   ExpectedTokens.push_back(tok::identifier);
258   ExpectedTokens.push_back(tok::r_paren);
259 
260   std::vector<Token> toks = CheckLex("#define M(x) x\n"
261                                      "#define C(x) c M(x)\n"
262                                      "C(f(M(i)))",
263                                      ExpectedTokens);
264   EXPECT_EQ("M(i)", getSourceText(toks[3], toks[3]));
265 }
266 
TEST_F(LexerTest,LexAPI)267 TEST_F(LexerTest, LexAPI) {
268   std::vector<tok::TokenKind> ExpectedTokens;
269   // Line 1 (after the #defines)
270   ExpectedTokens.push_back(tok::l_square);
271   ExpectedTokens.push_back(tok::identifier);
272   ExpectedTokens.push_back(tok::r_square);
273   ExpectedTokens.push_back(tok::l_square);
274   ExpectedTokens.push_back(tok::identifier);
275   ExpectedTokens.push_back(tok::r_square);
276   // Line 2
277   ExpectedTokens.push_back(tok::identifier);
278   ExpectedTokens.push_back(tok::identifier);
279   ExpectedTokens.push_back(tok::identifier);
280   ExpectedTokens.push_back(tok::identifier);
281 
282   std::vector<Token> toks = CheckLex("#define M(x) [x]\n"
283                                      "#define N(x) x\n"
284                                      "#define INN(x) x\n"
285                                      "#define NOF1 INN(val)\n"
286                                      "#define NOF2 val\n"
287                                      "M(foo) N([bar])\n"
288                                      "N(INN(val)) N(NOF1) N(NOF2) N(val)",
289                                      ExpectedTokens);
290 
291   SourceLocation lsqrLoc = toks[0].getLocation();
292   SourceLocation idLoc = toks[1].getLocation();
293   SourceLocation rsqrLoc = toks[2].getLocation();
294   CharSourceRange macroRange = SourceMgr.getExpansionRange(lsqrLoc);
295 
296   SourceLocation Loc;
297   EXPECT_TRUE(Lexer::isAtStartOfMacroExpansion(lsqrLoc, SourceMgr, LangOpts, &Loc));
298   EXPECT_EQ(Loc, macroRange.getBegin());
299   EXPECT_FALSE(Lexer::isAtStartOfMacroExpansion(idLoc, SourceMgr, LangOpts));
300   EXPECT_FALSE(Lexer::isAtEndOfMacroExpansion(idLoc, SourceMgr, LangOpts));
301   EXPECT_TRUE(Lexer::isAtEndOfMacroExpansion(rsqrLoc, SourceMgr, LangOpts, &Loc));
302   EXPECT_EQ(Loc, macroRange.getEnd());
303   EXPECT_TRUE(macroRange.isTokenRange());
304 
305   CharSourceRange range = Lexer::makeFileCharRange(
306            CharSourceRange::getTokenRange(lsqrLoc, idLoc), SourceMgr, LangOpts);
307   EXPECT_TRUE(range.isInvalid());
308   range = Lexer::makeFileCharRange(CharSourceRange::getTokenRange(idLoc, rsqrLoc),
309                                    SourceMgr, LangOpts);
310   EXPECT_TRUE(range.isInvalid());
311   range = Lexer::makeFileCharRange(CharSourceRange::getTokenRange(lsqrLoc, rsqrLoc),
312                                    SourceMgr, LangOpts);
313   EXPECT_TRUE(!range.isTokenRange());
314   EXPECT_EQ(range.getAsRange(),
315             SourceRange(macroRange.getBegin(),
316                         macroRange.getEnd().getLocWithOffset(1)));
317 
318   StringRef text = Lexer::getSourceText(
319                                CharSourceRange::getTokenRange(lsqrLoc, rsqrLoc),
320                                SourceMgr, LangOpts);
321   EXPECT_EQ(text, "M(foo)");
322 
323   SourceLocation macroLsqrLoc = toks[3].getLocation();
324   SourceLocation macroIdLoc = toks[4].getLocation();
325   SourceLocation macroRsqrLoc = toks[5].getLocation();
326   SourceLocation fileLsqrLoc = SourceMgr.getSpellingLoc(macroLsqrLoc);
327   SourceLocation fileIdLoc = SourceMgr.getSpellingLoc(macroIdLoc);
328   SourceLocation fileRsqrLoc = SourceMgr.getSpellingLoc(macroRsqrLoc);
329 
330   range = Lexer::makeFileCharRange(
331       CharSourceRange::getTokenRange(macroLsqrLoc, macroIdLoc),
332       SourceMgr, LangOpts);
333   EXPECT_EQ(SourceRange(fileLsqrLoc, fileIdLoc.getLocWithOffset(3)),
334             range.getAsRange());
335 
336   range = Lexer::makeFileCharRange(CharSourceRange::getTokenRange(macroIdLoc, macroRsqrLoc),
337                                    SourceMgr, LangOpts);
338   EXPECT_EQ(SourceRange(fileIdLoc, fileRsqrLoc.getLocWithOffset(1)),
339             range.getAsRange());
340 
341   macroRange = SourceMgr.getExpansionRange(macroLsqrLoc);
342   range = Lexer::makeFileCharRange(
343                      CharSourceRange::getTokenRange(macroLsqrLoc, macroRsqrLoc),
344                      SourceMgr, LangOpts);
345   EXPECT_EQ(SourceRange(macroRange.getBegin(), macroRange.getEnd().getLocWithOffset(1)),
346             range.getAsRange());
347 
348   text = Lexer::getSourceText(
349           CharSourceRange::getTokenRange(SourceRange(macroLsqrLoc, macroIdLoc)),
350           SourceMgr, LangOpts);
351   EXPECT_EQ(text, "[bar");
352 
353 
354   SourceLocation idLoc1 = toks[6].getLocation();
355   SourceLocation idLoc2 = toks[7].getLocation();
356   SourceLocation idLoc3 = toks[8].getLocation();
357   SourceLocation idLoc4 = toks[9].getLocation();
358   EXPECT_EQ("INN", Lexer::getImmediateMacroName(idLoc1, SourceMgr, LangOpts));
359   EXPECT_EQ("INN", Lexer::getImmediateMacroName(idLoc2, SourceMgr, LangOpts));
360   EXPECT_EQ("NOF2", Lexer::getImmediateMacroName(idLoc3, SourceMgr, LangOpts));
361   EXPECT_EQ("N", Lexer::getImmediateMacroName(idLoc4, SourceMgr, LangOpts));
362 }
363 
TEST_F(LexerTest,HandlesSplitTokens)364 TEST_F(LexerTest, HandlesSplitTokens) {
365   std::vector<tok::TokenKind> ExpectedTokens;
366   // Line 1 (after the #defines)
367   ExpectedTokens.push_back(tok::identifier);
368   ExpectedTokens.push_back(tok::less);
369   ExpectedTokens.push_back(tok::identifier);
370   ExpectedTokens.push_back(tok::less);
371   ExpectedTokens.push_back(tok::greatergreater);
372   // Line 2
373   ExpectedTokens.push_back(tok::identifier);
374   ExpectedTokens.push_back(tok::less);
375   ExpectedTokens.push_back(tok::identifier);
376   ExpectedTokens.push_back(tok::less);
377   ExpectedTokens.push_back(tok::greatergreater);
378 
379   std::vector<Token> toks = CheckLex("#define TY ty\n"
380                                      "#define RANGLE ty<ty<>>\n"
381                                      "TY<ty<>>\n"
382                                      "RANGLE",
383                                      ExpectedTokens);
384 
385   SourceLocation outerTyLoc = toks[0].getLocation();
386   SourceLocation innerTyLoc = toks[2].getLocation();
387   SourceLocation gtgtLoc = toks[4].getLocation();
388   // Split the token to simulate the action of the parser and force creation of
389   // an `ExpansionTokenRange`.
390   SourceLocation rangleLoc = PP->SplitToken(gtgtLoc, 1);
391 
392   // Verify that it only captures the first greater-then and not the second one.
393   CharSourceRange range = Lexer::makeFileCharRange(
394       CharSourceRange::getTokenRange(innerTyLoc, rangleLoc), SourceMgr,
395       LangOpts);
396   EXPECT_TRUE(range.isCharRange());
397   EXPECT_EQ(range.getAsRange(),
398             SourceRange(innerTyLoc, gtgtLoc.getLocWithOffset(1)));
399 
400   // Verify case where range begins in a macro expansion.
401   range = Lexer::makeFileCharRange(
402       CharSourceRange::getTokenRange(outerTyLoc, rangleLoc), SourceMgr,
403       LangOpts);
404   EXPECT_TRUE(range.isCharRange());
405   EXPECT_EQ(range.getAsRange(),
406             SourceRange(SourceMgr.getExpansionLoc(outerTyLoc),
407                         gtgtLoc.getLocWithOffset(1)));
408 
409   SourceLocation macroInnerTyLoc = toks[7].getLocation();
410   SourceLocation macroGtgtLoc = toks[9].getLocation();
411   // Split the token to simulate the action of the parser and force creation of
412   // an `ExpansionTokenRange`.
413   SourceLocation macroRAngleLoc = PP->SplitToken(macroGtgtLoc, 1);
414 
415   // Verify that it fails (because it only captures the first greater-then and
416   // not the second one, so it doesn't span the entire macro expansion).
417   range = Lexer::makeFileCharRange(
418       CharSourceRange::getTokenRange(macroInnerTyLoc, macroRAngleLoc),
419       SourceMgr, LangOpts);
420   EXPECT_TRUE(range.isInvalid());
421 }
422 
TEST_F(LexerTest,DontMergeMacroArgsFromDifferentMacroFiles)423 TEST_F(LexerTest, DontMergeMacroArgsFromDifferentMacroFiles) {
424   std::vector<Token> toks =
425       Lex("#define helper1 0\n"
426           "void helper2(const char *, ...);\n"
427           "#define M1(a, ...) helper2(a, ##__VA_ARGS__)\n"
428           "#define M2(a, ...) M1(a, helper1, ##__VA_ARGS__)\n"
429           "void f1() { M2(\"a\", \"b\"); }");
430 
431   // Check the file corresponding to the "helper1" macro arg in M2.
432   //
433   // The lexer used to report its size as 31, meaning that the end of the
434   // expansion would be on the *next line* (just past `M2("a", "b")`). Make
435   // sure that we get the correct end location (the comma after "helper1").
436   SourceLocation helper1ArgLoc = toks[20].getLocation();
437   EXPECT_EQ(SourceMgr.getFileIDSize(SourceMgr.getFileID(helper1ArgLoc)), 8U);
438 }
439 
TEST_F(LexerTest,DontOverallocateStringifyArgs)440 TEST_F(LexerTest, DontOverallocateStringifyArgs) {
441   TrivialModuleLoader ModLoader;
442   auto PP = CreatePP("\"StrArg\", 5, 'C'", ModLoader);
443 
444   llvm::BumpPtrAllocator Allocator;
445   std::array<IdentifierInfo *, 3> ParamList;
446   MacroInfo *MI = PP->AllocateMacroInfo({});
447   MI->setIsFunctionLike();
448   MI->setParameterList(ParamList, Allocator);
449   EXPECT_EQ(3u, MI->getNumParams());
450   EXPECT_TRUE(MI->isFunctionLike());
451 
452   Token Eof;
453   Eof.setKind(tok::eof);
454   std::vector<Token> ArgTokens;
455   while (1) {
456     Token tok;
457     PP->Lex(tok);
458     if (tok.is(tok::eof)) {
459       ArgTokens.push_back(Eof);
460       break;
461     }
462     if (tok.is(tok::comma))
463       ArgTokens.push_back(Eof);
464     else
465       ArgTokens.push_back(tok);
466   }
467 
468   auto MacroArgsDeleter = [&PP](MacroArgs *M) { M->destroy(*PP); };
469   std::unique_ptr<MacroArgs, decltype(MacroArgsDeleter)> MA(
470       MacroArgs::create(MI, ArgTokens, false, *PP), MacroArgsDeleter);
471   auto StringifyArg = [&](int ArgNo) {
472     return MA->StringifyArgument(MA->getUnexpArgument(ArgNo), *PP,
473                                  /*Charify=*/false, {}, {});
474   };
475   Token Result = StringifyArg(0);
476   EXPECT_EQ(tok::string_literal, Result.getKind());
477   EXPECT_STREQ("\"\\\"StrArg\\\"\"", Result.getLiteralData());
478   Result = StringifyArg(1);
479   EXPECT_EQ(tok::string_literal, Result.getKind());
480   EXPECT_STREQ("\"5\"", Result.getLiteralData());
481   Result = StringifyArg(2);
482   EXPECT_EQ(tok::string_literal, Result.getKind());
483   EXPECT_STREQ("\"'C'\"", Result.getLiteralData());
484 #if !defined(NDEBUG) && GTEST_HAS_DEATH_TEST
485   EXPECT_DEATH(StringifyArg(3), "Invalid arg #");
486 #endif
487 }
488 
TEST_F(LexerTest,IsNewLineEscapedValid)489 TEST_F(LexerTest, IsNewLineEscapedValid) {
490   auto hasNewLineEscaped = [](const char *S) {
491     return Lexer::isNewLineEscaped(S, S + strlen(S) - 1);
492   };
493 
494   EXPECT_TRUE(hasNewLineEscaped("\\\r"));
495   EXPECT_TRUE(hasNewLineEscaped("\\\n"));
496   EXPECT_TRUE(hasNewLineEscaped("\\\r\n"));
497   EXPECT_TRUE(hasNewLineEscaped("\\\n\r"));
498   EXPECT_TRUE(hasNewLineEscaped("\\ \t\v\f\r"));
499   EXPECT_TRUE(hasNewLineEscaped("\\ \t\v\f\r\n"));
500 
501   EXPECT_FALSE(hasNewLineEscaped("\\\r\r"));
502   EXPECT_FALSE(hasNewLineEscaped("\\\r\r\n"));
503   EXPECT_FALSE(hasNewLineEscaped("\\\n\n"));
504   EXPECT_FALSE(hasNewLineEscaped("\r"));
505   EXPECT_FALSE(hasNewLineEscaped("\n"));
506   EXPECT_FALSE(hasNewLineEscaped("\r\n"));
507   EXPECT_FALSE(hasNewLineEscaped("\n\r"));
508   EXPECT_FALSE(hasNewLineEscaped("\r\r"));
509   EXPECT_FALSE(hasNewLineEscaped("\n\n"));
510 }
511 
TEST_F(LexerTest,GetBeginningOfTokenWithEscapedNewLine)512 TEST_F(LexerTest, GetBeginningOfTokenWithEscapedNewLine) {
513   // Each line should have the same length for
514   // further offset calculation to be more straightforward.
515   const unsigned IdentifierLength = 8;
516   std::string TextToLex = "rabarbar\n"
517                           "foo\\\nbar\n"
518                           "foo\\\rbar\n"
519                           "fo\\\r\nbar\n"
520                           "foo\\\n\rba\n";
521   std::vector<tok::TokenKind> ExpectedTokens{5, tok::identifier};
522   std::vector<Token> LexedTokens = CheckLex(TextToLex, ExpectedTokens);
523 
524   for (const Token &Tok : LexedTokens) {
525     std::pair<FileID, unsigned> OriginalLocation =
526         SourceMgr.getDecomposedLoc(Tok.getLocation());
527     for (unsigned Offset = 0; Offset < IdentifierLength; ++Offset) {
528       SourceLocation LookupLocation =
529           Tok.getLocation().getLocWithOffset(Offset);
530 
531       std::pair<FileID, unsigned> FoundLocation =
532           SourceMgr.getDecomposedExpansionLoc(
533               Lexer::GetBeginningOfToken(LookupLocation, SourceMgr, LangOpts));
534 
535       // Check that location returned by the GetBeginningOfToken
536       // is the same as original token location reported by Lexer.
537       EXPECT_EQ(FoundLocation.second, OriginalLocation.second);
538     }
539   }
540 }
541 
TEST_F(LexerTest,AvoidPastEndOfStringDereference)542 TEST_F(LexerTest, AvoidPastEndOfStringDereference) {
543   EXPECT_TRUE(Lex("  //  \\\n").empty());
544   EXPECT_TRUE(Lex("#include <\\\\").empty());
545   EXPECT_TRUE(Lex("#include <\\\\\n").empty());
546 }
547 
TEST_F(LexerTest,StringizingRasString)548 TEST_F(LexerTest, StringizingRasString) {
549   // For "std::string Lexer::Stringify(StringRef Str, bool Charify)".
550   std::string String1 = R"(foo
551     {"bar":[]}
552     baz)";
553   // For "void Lexer::Stringify(SmallVectorImpl<char> &Str)".
554   SmallString<128> String2;
555   String2 += String1.c_str();
556 
557   // Corner cases.
558   std::string String3 = R"(\
559     \n
560     \\n
561     \\)";
562   SmallString<128> String4;
563   String4 += String3.c_str();
564   std::string String5 = R"(a\
565 
566 
567     \\b)";
568   SmallString<128> String6;
569   String6 += String5.c_str();
570 
571   String1 = Lexer::Stringify(StringRef(String1));
572   Lexer::Stringify(String2);
573   String3 = Lexer::Stringify(StringRef(String3));
574   Lexer::Stringify(String4);
575   String5 = Lexer::Stringify(StringRef(String5));
576   Lexer::Stringify(String6);
577 
578   EXPECT_EQ(String1, R"(foo\n    {\"bar\":[]}\n    baz)");
579   EXPECT_EQ(String2, R"(foo\n    {\"bar\":[]}\n    baz)");
580   EXPECT_EQ(String3, R"(\\\n    \\n\n    \\\\n\n    \\\\)");
581   EXPECT_EQ(String4, R"(\\\n    \\n\n    \\\\n\n    \\\\)");
582   EXPECT_EQ(String5, R"(a\\\n\n\n    \\\\b)");
583   EXPECT_EQ(String6, R"(a\\\n\n\n    \\\\b)");
584 }
585 
TEST_F(LexerTest,CharRangeOffByOne)586 TEST_F(LexerTest, CharRangeOffByOne) {
587   std::vector<Token> toks = Lex(R"(#define MOO 1
588     void foo() { MOO; })");
589   const Token &moo = toks[5];
590 
591   EXPECT_EQ(getSourceText(moo, moo), "MOO");
592 
593   SourceRange R{moo.getLocation(), moo.getLocation()};
594 
595   EXPECT_TRUE(
596       Lexer::isAtStartOfMacroExpansion(R.getBegin(), SourceMgr, LangOpts));
597   EXPECT_TRUE(
598       Lexer::isAtEndOfMacroExpansion(R.getEnd(), SourceMgr, LangOpts));
599 
600   CharSourceRange CR = Lexer::getAsCharRange(R, SourceMgr, LangOpts);
601 
602   EXPECT_EQ(Lexer::getSourceText(CR, SourceMgr, LangOpts), "MOO"); // Was "MO".
603 }
604 
TEST_F(LexerTest,FindNextToken)605 TEST_F(LexerTest, FindNextToken) {
606   Lex("int abcd = 0;\n"
607       "int xyz = abcd;\n");
608   std::vector<std::string> GeneratedByNextToken;
609   SourceLocation Loc =
610       SourceMgr.getLocForStartOfFile(SourceMgr.getMainFileID());
611   while (true) {
612     auto T = Lexer::findNextToken(Loc, SourceMgr, LangOpts);
613     ASSERT_TRUE(T.hasValue());
614     if (T->is(tok::eof))
615       break;
616     GeneratedByNextToken.push_back(getSourceText(*T, *T));
617     Loc = T->getLocation();
618   }
619   EXPECT_THAT(GeneratedByNextToken, ElementsAre("abcd", "=", "0", ";", "int",
620                                                 "xyz", "=", "abcd", ";"));
621 }
622 
TEST_F(LexerTest,CreatedFIDCountForPredefinedBuffer)623 TEST_F(LexerTest, CreatedFIDCountForPredefinedBuffer) {
624   TrivialModuleLoader ModLoader;
625   auto PP = CreatePP("", ModLoader);
626   while (1) {
627     Token tok;
628     PP->Lex(tok);
629     if (tok.is(tok::eof))
630       break;
631   }
632   EXPECT_EQ(SourceMgr.getNumCreatedFIDsForFileID(PP->getPredefinesFileID()),
633             1U);
634 }
635 } // anonymous namespace
636