1 //===- unittests/AST/CommentLexer.cpp ------ Comment lexer tests ----------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8
9 #include "clang/AST/CommentLexer.h"
10 #include "clang/AST/CommentCommandTraits.h"
11 #include "clang/Basic/CommentOptions.h"
12 #include "clang/Basic/Diagnostic.h"
13 #include "clang/Basic/DiagnosticOptions.h"
14 #include "clang/Basic/FileManager.h"
15 #include "clang/Basic/SourceManager.h"
16 #include "llvm/ADT/STLExtras.h"
17 #include "gtest/gtest.h"
18 #include <vector>
19
20 using namespace llvm;
21 using namespace clang;
22
23 namespace clang {
24 namespace comments {
25
26 namespace {
27 class CommentLexerTest : public ::testing::Test {
28 protected:
CommentLexerTest()29 CommentLexerTest()
30 : FileMgr(FileMgrOpts),
31 DiagID(new DiagnosticIDs()),
32 Diags(DiagID, new DiagnosticOptions, new IgnoringDiagConsumer()),
33 SourceMgr(Diags, FileMgr),
34 Traits(Allocator, CommentOptions()) {
35 }
36
37 FileSystemOptions FileMgrOpts;
38 FileManager FileMgr;
39 IntrusiveRefCntPtr<DiagnosticIDs> DiagID;
40 DiagnosticsEngine Diags;
41 SourceManager SourceMgr;
42 llvm::BumpPtrAllocator Allocator;
43 CommandTraits Traits;
44
45 void lexString(const char *Source, std::vector<Token> &Toks);
46
getCommandName(const Token & Tok)47 StringRef getCommandName(const Token &Tok) {
48 return Traits.getCommandInfo(Tok.getCommandID())->Name;
49 }
50
getVerbatimBlockName(const Token & Tok)51 StringRef getVerbatimBlockName(const Token &Tok) {
52 return Traits.getCommandInfo(Tok.getVerbatimBlockID())->Name;
53 }
54
getVerbatimLineName(const Token & Tok)55 StringRef getVerbatimLineName(const Token &Tok) {
56 return Traits.getCommandInfo(Tok.getVerbatimLineID())->Name;
57 }
58 };
59
lexString(const char * Source,std::vector<Token> & Toks)60 void CommentLexerTest::lexString(const char *Source,
61 std::vector<Token> &Toks) {
62 std::unique_ptr<MemoryBuffer> Buf = MemoryBuffer::getMemBuffer(Source);
63 FileID File = SourceMgr.createFileID(std::move(Buf));
64 SourceLocation Begin = SourceMgr.getLocForStartOfFile(File);
65
66 Lexer L(Allocator, Diags, Traits, Begin, Source, Source + strlen(Source));
67
68 while (1) {
69 Token Tok;
70 L.lex(Tok);
71 if (Tok.is(tok::eof))
72 break;
73 Toks.push_back(Tok);
74 }
75 }
76
77 } // unnamed namespace
78
79 // Empty source range should be handled.
TEST_F(CommentLexerTest,Basic1)80 TEST_F(CommentLexerTest, Basic1) {
81 const char *Source = "";
82 std::vector<Token> Toks;
83
84 lexString(Source, Toks);
85
86 ASSERT_EQ(0U, Toks.size());
87 }
88
89 // Empty comments should be handled.
TEST_F(CommentLexerTest,Basic2)90 TEST_F(CommentLexerTest, Basic2) {
91 const char *Sources[] = {
92 "//", "///", "//!", "///<", "//!<"
93 };
94 for (size_t i = 0, e = array_lengthof(Sources); i != e; i++) {
95 std::vector<Token> Toks;
96
97 lexString(Sources[i], Toks);
98
99 ASSERT_EQ(1U, Toks.size());
100
101 ASSERT_EQ(tok::newline, Toks[0].getKind());
102 }
103 }
104
105 // Empty comments should be handled.
TEST_F(CommentLexerTest,Basic3)106 TEST_F(CommentLexerTest, Basic3) {
107 const char *Sources[] = {
108 "/**/", "/***/", "/*!*/", "/**<*/", "/*!<*/"
109 };
110 for (size_t i = 0, e = array_lengthof(Sources); i != e; i++) {
111 std::vector<Token> Toks;
112
113 lexString(Sources[i], Toks);
114
115 ASSERT_EQ(2U, Toks.size());
116
117 ASSERT_EQ(tok::newline, Toks[0].getKind());
118 ASSERT_EQ(tok::newline, Toks[1].getKind());
119 }
120 }
121
122 // Single comment with plain text.
TEST_F(CommentLexerTest,Basic4)123 TEST_F(CommentLexerTest, Basic4) {
124 const char *Sources[] = {
125 "// Meow", "/// Meow", "//! Meow",
126 "// Meow\n", "// Meow\r\n", "//! Meow\r",
127 };
128
129 for (size_t i = 0, e = array_lengthof(Sources); i != e; i++) {
130 std::vector<Token> Toks;
131
132 lexString(Sources[i], Toks);
133
134 ASSERT_EQ(2U, Toks.size());
135
136 ASSERT_EQ(tok::text, Toks[0].getKind());
137 ASSERT_EQ(StringRef(" Meow"), Toks[0].getText());
138
139 ASSERT_EQ(tok::newline, Toks[1].getKind());
140 }
141 }
142
143 // Single comment with plain text.
TEST_F(CommentLexerTest,Basic5)144 TEST_F(CommentLexerTest, Basic5) {
145 const char *Sources[] = {
146 "/* Meow*/", "/** Meow*/", "/*! Meow*/"
147 };
148
149 for (size_t i = 0, e = array_lengthof(Sources); i != e; i++) {
150 std::vector<Token> Toks;
151
152 lexString(Sources[i], Toks);
153
154 ASSERT_EQ(3U, Toks.size());
155
156 ASSERT_EQ(tok::text, Toks[0].getKind());
157 ASSERT_EQ(StringRef(" Meow"), Toks[0].getText());
158
159 ASSERT_EQ(tok::newline, Toks[1].getKind());
160 ASSERT_EQ(tok::newline, Toks[2].getKind());
161 }
162 }
163
164 // Test newline escaping.
TEST_F(CommentLexerTest,Basic6)165 TEST_F(CommentLexerTest, Basic6) {
166 const char *Sources[] = {
167 "// Aaa\\\n" " Bbb\\ \n" " Ccc?" "?/\n",
168 "// Aaa\\\r\n" " Bbb\\ \r\n" " Ccc?" "?/\r\n",
169 "// Aaa\\\r" " Bbb\\ \r" " Ccc?" "?/\r"
170 };
171
172 for (size_t i = 0, e = array_lengthof(Sources); i != e; i++) {
173 std::vector<Token> Toks;
174
175 lexString(Sources[i], Toks);
176
177 ASSERT_EQ(10U, Toks.size());
178
179 ASSERT_EQ(tok::text, Toks[0].getKind());
180 ASSERT_EQ(StringRef(" Aaa"), Toks[0].getText());
181 ASSERT_EQ(tok::text, Toks[1].getKind());
182 ASSERT_EQ(StringRef("\\"), Toks[1].getText());
183 ASSERT_EQ(tok::newline, Toks[2].getKind());
184
185 ASSERT_EQ(tok::text, Toks[3].getKind());
186 ASSERT_EQ(StringRef(" Bbb"), Toks[3].getText());
187 ASSERT_EQ(tok::text, Toks[4].getKind());
188 ASSERT_EQ(StringRef("\\"), Toks[4].getText());
189 ASSERT_EQ(tok::text, Toks[5].getKind());
190 ASSERT_EQ(StringRef(" "), Toks[5].getText());
191 ASSERT_EQ(tok::newline, Toks[6].getKind());
192
193 ASSERT_EQ(tok::text, Toks[7].getKind());
194 ASSERT_EQ(StringRef(" Ccc?" "?/"), Toks[7].getText());
195 ASSERT_EQ(tok::newline, Toks[8].getKind());
196
197 ASSERT_EQ(tok::newline, Toks[9].getKind());
198 }
199 }
200
201 // Check that we skip C-style aligned stars correctly.
TEST_F(CommentLexerTest,Basic7)202 TEST_F(CommentLexerTest, Basic7) {
203 const char *Source =
204 "/* Aaa\n"
205 " * Bbb\r\n"
206 "\t* Ccc\n"
207 " ! Ddd\n"
208 " * Eee\n"
209 " ** Fff\n"
210 " */";
211 std::vector<Token> Toks;
212
213 lexString(Source, Toks);
214
215 ASSERT_EQ(15U, Toks.size());
216
217 ASSERT_EQ(tok::text, Toks[0].getKind());
218 ASSERT_EQ(StringRef(" Aaa"), Toks[0].getText());
219 ASSERT_EQ(tok::newline, Toks[1].getKind());
220
221 ASSERT_EQ(tok::text, Toks[2].getKind());
222 ASSERT_EQ(StringRef(" Bbb"), Toks[2].getText());
223 ASSERT_EQ(tok::newline, Toks[3].getKind());
224
225 ASSERT_EQ(tok::text, Toks[4].getKind());
226 ASSERT_EQ(StringRef(" Ccc"), Toks[4].getText());
227 ASSERT_EQ(tok::newline, Toks[5].getKind());
228
229 ASSERT_EQ(tok::text, Toks[6].getKind());
230 ASSERT_EQ(StringRef(" ! Ddd"), Toks[6].getText());
231 ASSERT_EQ(tok::newline, Toks[7].getKind());
232
233 ASSERT_EQ(tok::text, Toks[8].getKind());
234 ASSERT_EQ(StringRef(" Eee"), Toks[8].getText());
235 ASSERT_EQ(tok::newline, Toks[9].getKind());
236
237 ASSERT_EQ(tok::text, Toks[10].getKind());
238 ASSERT_EQ(StringRef("* Fff"), Toks[10].getText());
239 ASSERT_EQ(tok::newline, Toks[11].getKind());
240
241 ASSERT_EQ(tok::text, Toks[12].getKind());
242 ASSERT_EQ(StringRef(" "), Toks[12].getText());
243
244 ASSERT_EQ(tok::newline, Toks[13].getKind());
245 ASSERT_EQ(tok::newline, Toks[14].getKind());
246 }
247
248 // A command marker followed by comment end.
TEST_F(CommentLexerTest,DoxygenCommand1)249 TEST_F(CommentLexerTest, DoxygenCommand1) {
250 const char *Sources[] = { "//@", "///@", "//!@" };
251 for (size_t i = 0, e = array_lengthof(Sources); i != e; i++) {
252 std::vector<Token> Toks;
253
254 lexString(Sources[i], Toks);
255
256 ASSERT_EQ(2U, Toks.size());
257
258 ASSERT_EQ(tok::text, Toks[0].getKind());
259 ASSERT_EQ(StringRef("@"), Toks[0].getText());
260
261 ASSERT_EQ(tok::newline, Toks[1].getKind());
262 }
263 }
264
265 // A command marker followed by comment end.
TEST_F(CommentLexerTest,DoxygenCommand2)266 TEST_F(CommentLexerTest, DoxygenCommand2) {
267 const char *Sources[] = { "/*@*/", "/**@*/", "/*!@*/"};
268 for (size_t i = 0, e = array_lengthof(Sources); i != e; i++) {
269 std::vector<Token> Toks;
270
271 lexString(Sources[i], Toks);
272
273 ASSERT_EQ(3U, Toks.size());
274
275 ASSERT_EQ(tok::text, Toks[0].getKind());
276 ASSERT_EQ(StringRef("@"), Toks[0].getText());
277
278 ASSERT_EQ(tok::newline, Toks[1].getKind());
279 ASSERT_EQ(tok::newline, Toks[2].getKind());
280 }
281 }
282
283 // A command marker followed by comment end.
TEST_F(CommentLexerTest,DoxygenCommand3)284 TEST_F(CommentLexerTest, DoxygenCommand3) {
285 const char *Sources[] = { "/*\\*/", "/**\\*/" };
286 for (size_t i = 0, e = array_lengthof(Sources); i != e; i++) {
287 std::vector<Token> Toks;
288
289 lexString(Sources[i], Toks);
290
291 ASSERT_EQ(3U, Toks.size());
292
293 ASSERT_EQ(tok::text, Toks[0].getKind());
294 ASSERT_EQ(StringRef("\\"), Toks[0].getText());
295
296 ASSERT_EQ(tok::newline, Toks[1].getKind());
297 ASSERT_EQ(tok::newline, Toks[2].getKind());
298 }
299 }
300
301 // Doxygen escape sequences.
TEST_F(CommentLexerTest,DoxygenCommand4)302 TEST_F(CommentLexerTest, DoxygenCommand4) {
303 const char *Sources[] = {
304 "/// \\\\ \\@ \\& \\$ \\# \\< \\> \\% \\\" \\. \\::",
305 "/// @\\ @@ @& @$ @# @< @> @% @\" @. @::"
306 };
307 const char *Text[] = {
308 " ",
309 "\\", " ", "@", " ", "&", " ", "$", " ", "#", " ",
310 "<", " ", ">", " ", "%", " ", "\"", " ", ".", " ",
311 "::", ""
312 };
313
314 for (size_t i = 0, e = array_lengthof(Sources); i != e; i++) {
315 std::vector<Token> Toks;
316
317 lexString(Sources[i], Toks);
318
319 ASSERT_EQ(array_lengthof(Text), Toks.size());
320
321 for (size_t j = 0, e = Toks.size(); j != e; j++) {
322 if(Toks[j].is(tok::text)) {
323 ASSERT_EQ(StringRef(Text[j]), Toks[j].getText())
324 << "index " << i;
325 }
326 }
327 }
328 }
329
330 // A command marker followed by a non-letter that is not a part of an escape
331 // sequence.
TEST_F(CommentLexerTest,DoxygenCommand5)332 TEST_F(CommentLexerTest, DoxygenCommand5) {
333 const char *Source = "/// \\^ \\0";
334 std::vector<Token> Toks;
335
336 lexString(Source, Toks);
337
338 ASSERT_EQ(6U, Toks.size());
339
340 ASSERT_EQ(tok::text, Toks[0].getKind());
341 ASSERT_EQ(StringRef(" "), Toks[0].getText());
342
343 ASSERT_EQ(tok::text, Toks[1].getKind());
344 ASSERT_EQ(StringRef("\\"), Toks[1].getText());
345
346 ASSERT_EQ(tok::text, Toks[2].getKind());
347 ASSERT_EQ(StringRef("^ "), Toks[2].getText());
348
349 ASSERT_EQ(tok::text, Toks[3].getKind());
350 ASSERT_EQ(StringRef("\\"), Toks[3].getText());
351
352 ASSERT_EQ(tok::text, Toks[4].getKind());
353 ASSERT_EQ(StringRef("0"), Toks[4].getText());
354
355 ASSERT_EQ(tok::newline, Toks[5].getKind());
356 }
357
TEST_F(CommentLexerTest,DoxygenCommand6)358 TEST_F(CommentLexerTest, DoxygenCommand6) {
359 const char *Source = "/// \\brief Aaa.";
360 std::vector<Token> Toks;
361
362 lexString(Source, Toks);
363
364 ASSERT_EQ(4U, Toks.size());
365
366 ASSERT_EQ(tok::text, Toks[0].getKind());
367 ASSERT_EQ(StringRef(" "), Toks[0].getText());
368
369 ASSERT_EQ(tok::backslash_command, Toks[1].getKind());
370 ASSERT_EQ(StringRef("brief"), getCommandName(Toks[1]));
371
372 ASSERT_EQ(tok::text, Toks[2].getKind());
373 ASSERT_EQ(StringRef(" Aaa."), Toks[2].getText());
374
375 ASSERT_EQ(tok::newline, Toks[3].getKind());
376 }
377
TEST_F(CommentLexerTest,DoxygenCommand7)378 TEST_F(CommentLexerTest, DoxygenCommand7) {
379 const char *Source = "/// \\em\\em \\em\t\\em\n";
380 std::vector<Token> Toks;
381
382 lexString(Source, Toks);
383
384 ASSERT_EQ(8U, Toks.size());
385
386 ASSERT_EQ(tok::text, Toks[0].getKind());
387 ASSERT_EQ(StringRef(" "), Toks[0].getText());
388
389 ASSERT_EQ(tok::backslash_command, Toks[1].getKind());
390 ASSERT_EQ(StringRef("em"), getCommandName(Toks[1]));
391
392 ASSERT_EQ(tok::backslash_command, Toks[2].getKind());
393 ASSERT_EQ(StringRef("em"), getCommandName(Toks[2]));
394
395 ASSERT_EQ(tok::text, Toks[3].getKind());
396 ASSERT_EQ(StringRef(" "), Toks[3].getText());
397
398 ASSERT_EQ(tok::backslash_command, Toks[4].getKind());
399 ASSERT_EQ(StringRef("em"), getCommandName(Toks[4]));
400
401 ASSERT_EQ(tok::text, Toks[5].getKind());
402 ASSERT_EQ(StringRef("\t"), Toks[5].getText());
403
404 ASSERT_EQ(tok::backslash_command, Toks[6].getKind());
405 ASSERT_EQ(StringRef("em"), getCommandName(Toks[6]));
406
407 ASSERT_EQ(tok::newline, Toks[7].getKind());
408 }
409
TEST_F(CommentLexerTest,DoxygenCommand8)410 TEST_F(CommentLexerTest, DoxygenCommand8) {
411 const char *Source = "/// @em@em @em\t@em\n";
412 std::vector<Token> Toks;
413
414 lexString(Source, Toks);
415
416 ASSERT_EQ(8U, Toks.size());
417
418 ASSERT_EQ(tok::text, Toks[0].getKind());
419 ASSERT_EQ(StringRef(" "), Toks[0].getText());
420
421 ASSERT_EQ(tok::at_command, Toks[1].getKind());
422 ASSERT_EQ(StringRef("em"), getCommandName(Toks[1]));
423
424 ASSERT_EQ(tok::at_command, Toks[2].getKind());
425 ASSERT_EQ(StringRef("em"), getCommandName(Toks[2]));
426
427 ASSERT_EQ(tok::text, Toks[3].getKind());
428 ASSERT_EQ(StringRef(" "), Toks[3].getText());
429
430 ASSERT_EQ(tok::at_command, Toks[4].getKind());
431 ASSERT_EQ(StringRef("em"), getCommandName(Toks[4]));
432
433 ASSERT_EQ(tok::text, Toks[5].getKind());
434 ASSERT_EQ(StringRef("\t"), Toks[5].getText());
435
436 ASSERT_EQ(tok::at_command, Toks[6].getKind());
437 ASSERT_EQ(StringRef("em"), getCommandName(Toks[6]));
438
439 ASSERT_EQ(tok::newline, Toks[7].getKind());
440 }
441
TEST_F(CommentLexerTest,DoxygenCommand9)442 TEST_F(CommentLexerTest, DoxygenCommand9) {
443 const char *Source = "/// \\aaa\\bbb \\ccc\t\\ddd\n";
444 std::vector<Token> Toks;
445
446 lexString(Source, Toks);
447
448 ASSERT_EQ(8U, Toks.size());
449
450 ASSERT_EQ(tok::text, Toks[0].getKind());
451 ASSERT_EQ(StringRef(" "), Toks[0].getText());
452
453 ASSERT_EQ(tok::unknown_command, Toks[1].getKind());
454 ASSERT_EQ(StringRef("aaa"), Toks[1].getUnknownCommandName());
455
456 ASSERT_EQ(tok::unknown_command, Toks[2].getKind());
457 ASSERT_EQ(StringRef("bbb"), Toks[2].getUnknownCommandName());
458
459 ASSERT_EQ(tok::text, Toks[3].getKind());
460 ASSERT_EQ(StringRef(" "), Toks[3].getText());
461
462 ASSERT_EQ(tok::unknown_command, Toks[4].getKind());
463 ASSERT_EQ(StringRef("ccc"), Toks[4].getUnknownCommandName());
464
465 ASSERT_EQ(tok::text, Toks[5].getKind());
466 ASSERT_EQ(StringRef("\t"), Toks[5].getText());
467
468 ASSERT_EQ(tok::unknown_command, Toks[6].getKind());
469 ASSERT_EQ(StringRef("ddd"), Toks[6].getUnknownCommandName());
470
471 ASSERT_EQ(tok::newline, Toks[7].getKind());
472 }
473
TEST_F(CommentLexerTest,DoxygenCommand10)474 TEST_F(CommentLexerTest, DoxygenCommand10) {
475 const char *Source = "// \\c\n";
476 std::vector<Token> Toks;
477
478 lexString(Source, Toks);
479
480 ASSERT_EQ(3U, Toks.size());
481
482 ASSERT_EQ(tok::text, Toks[0].getKind());
483 ASSERT_EQ(StringRef(" "), Toks[0].getText());
484
485 ASSERT_EQ(tok::backslash_command, Toks[1].getKind());
486 ASSERT_EQ(StringRef("c"), getCommandName(Toks[1]));
487
488 ASSERT_EQ(tok::newline, Toks[2].getKind());
489 }
490
TEST_F(CommentLexerTest,RegisterCustomBlockCommand)491 TEST_F(CommentLexerTest, RegisterCustomBlockCommand) {
492 const char *Source =
493 "/// \\NewBlockCommand Aaa.\n"
494 "/// @NewBlockCommand Aaa.\n";
495
496 Traits.registerBlockCommand(StringRef("NewBlockCommand"));
497
498 std::vector<Token> Toks;
499
500 lexString(Source, Toks);
501
502 ASSERT_EQ(8U, Toks.size());
503
504 ASSERT_EQ(tok::text, Toks[0].getKind());
505 ASSERT_EQ(StringRef(" "), Toks[0].getText());
506
507 ASSERT_EQ(tok::backslash_command, Toks[1].getKind());
508 ASSERT_EQ(StringRef("NewBlockCommand"), getCommandName(Toks[1]));
509
510 ASSERT_EQ(tok::text, Toks[2].getKind());
511 ASSERT_EQ(StringRef(" Aaa."), Toks[2].getText());
512
513 ASSERT_EQ(tok::newline, Toks[3].getKind());
514
515 ASSERT_EQ(tok::text, Toks[4].getKind());
516 ASSERT_EQ(StringRef(" "), Toks[4].getText());
517
518 ASSERT_EQ(tok::at_command, Toks[5].getKind());
519 ASSERT_EQ(StringRef("NewBlockCommand"), getCommandName(Toks[5]));
520
521 ASSERT_EQ(tok::text, Toks[6].getKind());
522 ASSERT_EQ(StringRef(" Aaa."), Toks[6].getText());
523
524 ASSERT_EQ(tok::newline, Toks[7].getKind());
525 }
526
TEST_F(CommentLexerTest,RegisterMultipleBlockCommands)527 TEST_F(CommentLexerTest, RegisterMultipleBlockCommands) {
528 const char *Source =
529 "/// \\Foo\n"
530 "/// \\Bar Baz\n"
531 "/// \\Blech quux=corge\n";
532
533 Traits.registerBlockCommand(StringRef("Foo"));
534 Traits.registerBlockCommand(StringRef("Bar"));
535 Traits.registerBlockCommand(StringRef("Blech"));
536
537 std::vector<Token> Toks;
538
539 lexString(Source, Toks);
540
541 ASSERT_EQ(11U, Toks.size());
542
543 ASSERT_EQ(tok::text, Toks[0].getKind());
544 ASSERT_EQ(StringRef(" "), Toks[0].getText());
545
546 ASSERT_EQ(tok::backslash_command, Toks[1].getKind());
547 ASSERT_EQ(StringRef("Foo"), getCommandName(Toks[1]));
548
549 ASSERT_EQ(tok::newline, Toks[2].getKind());
550
551 ASSERT_EQ(tok::text, Toks[3].getKind());
552 ASSERT_EQ(StringRef(" "), Toks[3].getText());
553
554 ASSERT_EQ(tok::backslash_command, Toks[4].getKind());
555 ASSERT_EQ(StringRef("Bar"), getCommandName(Toks[4]));
556
557 ASSERT_EQ(tok::text, Toks[5].getKind());
558 ASSERT_EQ(StringRef(" Baz"), Toks[5].getText());
559
560 ASSERT_EQ(tok::newline, Toks[6].getKind());
561
562 ASSERT_EQ(tok::text, Toks[7].getKind());
563 ASSERT_EQ(StringRef(" "), Toks[7].getText());
564
565 ASSERT_EQ(tok::backslash_command, Toks[8].getKind());
566 ASSERT_EQ(StringRef("Blech"), getCommandName(Toks[8]));
567
568 ASSERT_EQ(tok::text, Toks[9].getKind());
569 ASSERT_EQ(StringRef(" quux=corge"), Toks[9].getText());
570
571 ASSERT_EQ(tok::newline, Toks[10].getKind());
572 }
573
574 // Empty verbatim block.
TEST_F(CommentLexerTest,VerbatimBlock1)575 TEST_F(CommentLexerTest, VerbatimBlock1) {
576 const char *Sources[] = {
577 "/// \\verbatim\\endverbatim\n//",
578 "/** \\verbatim\\endverbatim*/"
579 };
580
581 for (size_t i = 0, e = array_lengthof(Sources); i != e; i++) {
582 std::vector<Token> Toks;
583
584 lexString(Sources[i], Toks);
585
586 ASSERT_EQ(5U, Toks.size());
587
588 ASSERT_EQ(tok::text, Toks[0].getKind());
589 ASSERT_EQ(StringRef(" "), Toks[0].getText());
590
591 ASSERT_EQ(tok::verbatim_block_begin, Toks[1].getKind());
592 ASSERT_EQ(StringRef("verbatim"), getVerbatimBlockName(Toks[1]));
593
594 ASSERT_EQ(tok::verbatim_block_end, Toks[2].getKind());
595 ASSERT_EQ(StringRef("endverbatim"), getVerbatimBlockName(Toks[2]));
596
597 ASSERT_EQ(tok::newline, Toks[3].getKind());
598 ASSERT_EQ(tok::newline, Toks[4].getKind());
599 }
600 }
601
602 // Empty verbatim block without an end command.
TEST_F(CommentLexerTest,VerbatimBlock2)603 TEST_F(CommentLexerTest, VerbatimBlock2) {
604 const char *Source = "/// \\verbatim";
605
606 std::vector<Token> Toks;
607
608 lexString(Source, Toks);
609
610 ASSERT_EQ(3U, Toks.size());
611
612 ASSERT_EQ(tok::text, Toks[0].getKind());
613 ASSERT_EQ(StringRef(" "), Toks[0].getText());
614
615 ASSERT_EQ(tok::verbatim_block_begin, Toks[1].getKind());
616 ASSERT_EQ(StringRef("verbatim"), getVerbatimBlockName(Toks[1]));
617
618 ASSERT_EQ(tok::newline, Toks[2].getKind());
619 }
620
621 // Empty verbatim block without an end command.
TEST_F(CommentLexerTest,VerbatimBlock3)622 TEST_F(CommentLexerTest, VerbatimBlock3) {
623 const char *Source = "/** \\verbatim*/";
624
625 std::vector<Token> Toks;
626
627 lexString(Source, Toks);
628
629 ASSERT_EQ(4U, Toks.size());
630
631 ASSERT_EQ(tok::text, Toks[0].getKind());
632 ASSERT_EQ(StringRef(" "), Toks[0].getText());
633
634 ASSERT_EQ(tok::verbatim_block_begin, Toks[1].getKind());
635 ASSERT_EQ(StringRef("verbatim"), getVerbatimBlockName(Toks[1]));
636
637 ASSERT_EQ(tok::newline, Toks[2].getKind());
638 ASSERT_EQ(tok::newline, Toks[3].getKind());
639 }
640
641 // Single-line verbatim block.
TEST_F(CommentLexerTest,VerbatimBlock4)642 TEST_F(CommentLexerTest, VerbatimBlock4) {
643 const char *Sources[] = {
644 "/// Meow \\verbatim aaa \\endverbatim\n//",
645 "/** Meow \\verbatim aaa \\endverbatim*/"
646 };
647
648 for (size_t i = 0, e = array_lengthof(Sources); i != e; i++) {
649 std::vector<Token> Toks;
650
651 lexString(Sources[i], Toks);
652
653 ASSERT_EQ(6U, Toks.size());
654
655 ASSERT_EQ(tok::text, Toks[0].getKind());
656 ASSERT_EQ(StringRef(" Meow "), Toks[0].getText());
657
658 ASSERT_EQ(tok::verbatim_block_begin, Toks[1].getKind());
659 ASSERT_EQ(StringRef("verbatim"), getVerbatimBlockName(Toks[1]));
660
661 ASSERT_EQ(tok::verbatim_block_line, Toks[2].getKind());
662 ASSERT_EQ(StringRef(" aaa "), Toks[2].getVerbatimBlockText());
663
664 ASSERT_EQ(tok::verbatim_block_end, Toks[3].getKind());
665 ASSERT_EQ(StringRef("endverbatim"), getVerbatimBlockName(Toks[3]));
666
667 ASSERT_EQ(tok::newline, Toks[4].getKind());
668 ASSERT_EQ(tok::newline, Toks[5].getKind());
669 }
670 }
671
672 // Single-line verbatim block without an end command.
TEST_F(CommentLexerTest,VerbatimBlock5)673 TEST_F(CommentLexerTest, VerbatimBlock5) {
674 const char *Sources[] = {
675 "/// Meow \\verbatim aaa \n//",
676 "/** Meow \\verbatim aaa */"
677 };
678
679 for (size_t i = 0, e = array_lengthof(Sources); i != e; i++) {
680 std::vector<Token> Toks;
681
682 lexString(Sources[i], Toks);
683
684 ASSERT_EQ(5U, Toks.size());
685
686 ASSERT_EQ(tok::text, Toks[0].getKind());
687 ASSERT_EQ(StringRef(" Meow "), Toks[0].getText());
688
689 ASSERT_EQ(tok::verbatim_block_begin, Toks[1].getKind());
690 ASSERT_EQ(StringRef("verbatim"), getVerbatimBlockName(Toks[1]));
691
692 ASSERT_EQ(tok::verbatim_block_line, Toks[2].getKind());
693 ASSERT_EQ(StringRef(" aaa "), Toks[2].getVerbatimBlockText());
694
695 ASSERT_EQ(tok::newline, Toks[3].getKind());
696 ASSERT_EQ(tok::newline, Toks[4].getKind());
697 }
698 }
699
TEST_F(CommentLexerTest,VerbatimBlock6)700 TEST_F(CommentLexerTest, VerbatimBlock6) {
701 const char *Source =
702 "// \\verbatim\n"
703 "// Aaa\n"
704 "//\n"
705 "// Bbb\n"
706 "// \\endverbatim\n";
707
708 std::vector<Token> Toks;
709
710 lexString(Source, Toks);
711
712 ASSERT_EQ(10U, Toks.size());
713
714 ASSERT_EQ(tok::text, Toks[0].getKind());
715 ASSERT_EQ(StringRef(" "), Toks[0].getText());
716
717 ASSERT_EQ(tok::verbatim_block_begin, Toks[1].getKind());
718 ASSERT_EQ(StringRef("verbatim"), getVerbatimBlockName(Toks[1]));
719
720 ASSERT_EQ(tok::newline, Toks[2].getKind());
721
722 ASSERT_EQ(tok::verbatim_block_line, Toks[3].getKind());
723 ASSERT_EQ(StringRef(" Aaa"), Toks[3].getVerbatimBlockText());
724
725 ASSERT_EQ(tok::newline, Toks[4].getKind());
726
727 ASSERT_EQ(tok::newline, Toks[5].getKind());
728
729 ASSERT_EQ(tok::verbatim_block_line, Toks[6].getKind());
730 ASSERT_EQ(StringRef(" Bbb"), Toks[6].getVerbatimBlockText());
731
732 ASSERT_EQ(tok::newline, Toks[7].getKind());
733
734 ASSERT_EQ(tok::verbatim_block_end, Toks[8].getKind());
735 ASSERT_EQ(StringRef("endverbatim"), getVerbatimBlockName(Toks[8]));
736
737 ASSERT_EQ(tok::newline, Toks[9].getKind());
738 }
739
TEST_F(CommentLexerTest,VerbatimBlock7)740 TEST_F(CommentLexerTest, VerbatimBlock7) {
741 const char *Source =
742 "/* \\verbatim\n"
743 " * Aaa\n"
744 " *\n"
745 " * Bbb\n"
746 " * \\endverbatim\n"
747 " */";
748
749 std::vector<Token> Toks;
750
751 lexString(Source, Toks);
752
753 ASSERT_EQ(10U, Toks.size());
754
755 ASSERT_EQ(tok::text, Toks[0].getKind());
756 ASSERT_EQ(StringRef(" "), Toks[0].getText());
757
758 ASSERT_EQ(tok::verbatim_block_begin, Toks[1].getKind());
759 ASSERT_EQ(StringRef("verbatim"), getVerbatimBlockName(Toks[1]));
760
761 ASSERT_EQ(tok::verbatim_block_line, Toks[2].getKind());
762 ASSERT_EQ(StringRef(" Aaa"), Toks[2].getVerbatimBlockText());
763
764 ASSERT_EQ(tok::verbatim_block_line, Toks[3].getKind());
765 ASSERT_EQ(StringRef(""), Toks[3].getVerbatimBlockText());
766
767 ASSERT_EQ(tok::verbatim_block_line, Toks[4].getKind());
768 ASSERT_EQ(StringRef(" Bbb"), Toks[4].getVerbatimBlockText());
769
770 ASSERT_EQ(tok::verbatim_block_end, Toks[5].getKind());
771 ASSERT_EQ(StringRef("endverbatim"), getVerbatimBlockName(Toks[5]));
772
773 ASSERT_EQ(tok::newline, Toks[6].getKind());
774
775 ASSERT_EQ(tok::text, Toks[7].getKind());
776 ASSERT_EQ(StringRef(" "), Toks[7].getText());
777
778 ASSERT_EQ(tok::newline, Toks[8].getKind());
779 ASSERT_EQ(tok::newline, Toks[9].getKind());
780 }
781
782 // Complex test for verbatim blocks.
TEST_F(CommentLexerTest,VerbatimBlock8)783 TEST_F(CommentLexerTest, VerbatimBlock8) {
784 const char *Source =
785 "/* Meow \\verbatim aaa\\$\\@\n"
786 "bbb \\endverbati\r"
787 "ccc\r\n"
788 "ddd \\endverbatim Blah \\verbatim eee\n"
789 "\\endverbatim BlahBlah*/";
790 std::vector<Token> Toks;
791
792 lexString(Source, Toks);
793
794 ASSERT_EQ(14U, Toks.size());
795
796 ASSERT_EQ(tok::text, Toks[0].getKind());
797 ASSERT_EQ(StringRef(" Meow "), Toks[0].getText());
798
799 ASSERT_EQ(tok::verbatim_block_begin, Toks[1].getKind());
800 ASSERT_EQ(StringRef("verbatim"), getVerbatimBlockName(Toks[1]));
801
802 ASSERT_EQ(tok::verbatim_block_line, Toks[2].getKind());
803 ASSERT_EQ(StringRef(" aaa\\$\\@"), Toks[2].getVerbatimBlockText());
804
805 ASSERT_EQ(tok::verbatim_block_line, Toks[3].getKind());
806 ASSERT_EQ(StringRef("bbb \\endverbati"), Toks[3].getVerbatimBlockText());
807
808 ASSERT_EQ(tok::verbatim_block_line, Toks[4].getKind());
809 ASSERT_EQ(StringRef("ccc"), Toks[4].getVerbatimBlockText());
810
811 ASSERT_EQ(tok::verbatim_block_line, Toks[5].getKind());
812 ASSERT_EQ(StringRef("ddd "), Toks[5].getVerbatimBlockText());
813
814 ASSERT_EQ(tok::verbatim_block_end, Toks[6].getKind());
815 ASSERT_EQ(StringRef("endverbatim"), getVerbatimBlockName(Toks[6]));
816
817 ASSERT_EQ(tok::text, Toks[7].getKind());
818 ASSERT_EQ(StringRef(" Blah "), Toks[7].getText());
819
820 ASSERT_EQ(tok::verbatim_block_begin, Toks[8].getKind());
821 ASSERT_EQ(StringRef("verbatim"), getVerbatimBlockName(Toks[8]));
822
823 ASSERT_EQ(tok::verbatim_block_line, Toks[9].getKind());
824 ASSERT_EQ(StringRef(" eee"), Toks[9].getVerbatimBlockText());
825
826 ASSERT_EQ(tok::verbatim_block_end, Toks[10].getKind());
827 ASSERT_EQ(StringRef("endverbatim"), getVerbatimBlockName(Toks[10]));
828
829 ASSERT_EQ(tok::text, Toks[11].getKind());
830 ASSERT_EQ(StringRef(" BlahBlah"), Toks[11].getText());
831
832 ASSERT_EQ(tok::newline, Toks[12].getKind());
833 ASSERT_EQ(tok::newline, Toks[13].getKind());
834 }
835
836 // LaTeX verbatim blocks.
TEST_F(CommentLexerTest,VerbatimBlock9)837 TEST_F(CommentLexerTest, VerbatimBlock9) {
838 const char *Source =
839 "/// \\f$ Aaa \\f$ \\f[ Bbb \\f] \\f{ Ccc \\f}";
840 std::vector<Token> Toks;
841
842 lexString(Source, Toks);
843
844 ASSERT_EQ(13U, Toks.size());
845
846 ASSERT_EQ(tok::text, Toks[0].getKind());
847 ASSERT_EQ(StringRef(" "), Toks[0].getText());
848
849 ASSERT_EQ(tok::verbatim_block_begin, Toks[1].getKind());
850 ASSERT_EQ(StringRef("f$"), getVerbatimBlockName(Toks[1]));
851
852 ASSERT_EQ(tok::verbatim_block_line, Toks[2].getKind());
853 ASSERT_EQ(StringRef(" Aaa "), Toks[2].getVerbatimBlockText());
854
855 ASSERT_EQ(tok::verbatim_block_end, Toks[3].getKind());
856 ASSERT_EQ(StringRef("f$"), getVerbatimBlockName(Toks[3]));
857
858 ASSERT_EQ(tok::text, Toks[4].getKind());
859 ASSERT_EQ(StringRef(" "), Toks[4].getText());
860
861 ASSERT_EQ(tok::verbatim_block_begin, Toks[5].getKind());
862 ASSERT_EQ(StringRef("f["), getVerbatimBlockName(Toks[5]));
863
864 ASSERT_EQ(tok::verbatim_block_line, Toks[6].getKind());
865 ASSERT_EQ(StringRef(" Bbb "), Toks[6].getVerbatimBlockText());
866
867 ASSERT_EQ(tok::verbatim_block_end, Toks[7].getKind());
868 ASSERT_EQ(StringRef("f]"), getVerbatimBlockName(Toks[7]));
869
870 ASSERT_EQ(tok::text, Toks[8].getKind());
871 ASSERT_EQ(StringRef(" "), Toks[8].getText());
872
873 ASSERT_EQ(tok::verbatim_block_begin, Toks[9].getKind());
874 ASSERT_EQ(StringRef("f{"), getVerbatimBlockName(Toks[9]));
875
876 ASSERT_EQ(tok::verbatim_block_line, Toks[10].getKind());
877 ASSERT_EQ(StringRef(" Ccc "), Toks[10].getVerbatimBlockText());
878
879 ASSERT_EQ(tok::verbatim_block_end, Toks[11].getKind());
880 ASSERT_EQ(StringRef("f}"), getVerbatimBlockName(Toks[11]));
881
882 ASSERT_EQ(tok::newline, Toks[12].getKind());
883 }
884
885 // Empty verbatim line.
TEST_F(CommentLexerTest,VerbatimLine1)886 TEST_F(CommentLexerTest, VerbatimLine1) {
887 const char *Sources[] = {
888 "/// \\fn\n//",
889 "/** \\fn*/"
890 };
891
892 for (size_t i = 0, e = array_lengthof(Sources); i != e; i++) {
893 std::vector<Token> Toks;
894
895 lexString(Sources[i], Toks);
896
897 ASSERT_EQ(4U, Toks.size());
898
899 ASSERT_EQ(tok::text, Toks[0].getKind());
900 ASSERT_EQ(StringRef(" "), Toks[0].getText());
901
902 ASSERT_EQ(tok::verbatim_line_name, Toks[1].getKind());
903 ASSERT_EQ(StringRef("fn"), getVerbatimLineName(Toks[1]));
904
905 ASSERT_EQ(tok::newline, Toks[2].getKind());
906 ASSERT_EQ(tok::newline, Toks[3].getKind());
907 }
908 }
909
910 // Verbatim line with Doxygen escape sequences, which should not be expanded.
TEST_F(CommentLexerTest,VerbatimLine2)911 TEST_F(CommentLexerTest, VerbatimLine2) {
912 const char *Sources[] = {
913 "/// \\fn void *foo(const char *zzz = \"\\$\");\n//",
914 "/** \\fn void *foo(const char *zzz = \"\\$\");*/"
915 };
916
917 for (size_t i = 0, e = array_lengthof(Sources); i != e; i++) {
918 std::vector<Token> Toks;
919
920 lexString(Sources[i], Toks);
921
922 ASSERT_EQ(5U, Toks.size());
923
924 ASSERT_EQ(tok::text, Toks[0].getKind());
925 ASSERT_EQ(StringRef(" "), Toks[0].getText());
926
927 ASSERT_EQ(tok::verbatim_line_name, Toks[1].getKind());
928 ASSERT_EQ(StringRef("fn"), getVerbatimLineName(Toks[1]));
929
930 ASSERT_EQ(tok::verbatim_line_text, Toks[2].getKind());
931 ASSERT_EQ(StringRef(" void *foo(const char *zzz = \"\\$\");"),
932 Toks[2].getVerbatimLineText());
933
934 ASSERT_EQ(tok::newline, Toks[3].getKind());
935 ASSERT_EQ(tok::newline, Toks[4].getKind());
936 }
937 }
938
939 // Verbatim line should not eat anything from next source line.
TEST_F(CommentLexerTest,VerbatimLine3)940 TEST_F(CommentLexerTest, VerbatimLine3) {
941 const char *Source =
942 "/** \\fn void *foo(const char *zzz = \"\\$\");\n"
943 " * Meow\n"
944 " */";
945
946 std::vector<Token> Toks;
947
948 lexString(Source, Toks);
949
950 ASSERT_EQ(9U, Toks.size());
951
952 ASSERT_EQ(tok::text, Toks[0].getKind());
953 ASSERT_EQ(StringRef(" "), Toks[0].getText());
954
955 ASSERT_EQ(tok::verbatim_line_name, Toks[1].getKind());
956 ASSERT_EQ(StringRef("fn"), getVerbatimLineName(Toks[1]));
957
958 ASSERT_EQ(tok::verbatim_line_text, Toks[2].getKind());
959 ASSERT_EQ(StringRef(" void *foo(const char *zzz = \"\\$\");"),
960 Toks[2].getVerbatimLineText());
961 ASSERT_EQ(tok::newline, Toks[3].getKind());
962
963 ASSERT_EQ(tok::text, Toks[4].getKind());
964 ASSERT_EQ(StringRef(" Meow"), Toks[4].getText());
965 ASSERT_EQ(tok::newline, Toks[5].getKind());
966
967 ASSERT_EQ(tok::text, Toks[6].getKind());
968 ASSERT_EQ(StringRef(" "), Toks[6].getText());
969
970 ASSERT_EQ(tok::newline, Toks[7].getKind());
971 ASSERT_EQ(tok::newline, Toks[8].getKind());
972 }
973
TEST_F(CommentLexerTest,HTML1)974 TEST_F(CommentLexerTest, HTML1) {
975 const char *Source =
976 "// <";
977
978 std::vector<Token> Toks;
979
980 lexString(Source, Toks);
981
982 ASSERT_EQ(3U, Toks.size());
983
984 ASSERT_EQ(tok::text, Toks[0].getKind());
985 ASSERT_EQ(StringRef(" "), Toks[0].getText());
986
987 ASSERT_EQ(tok::text, Toks[1].getKind());
988 ASSERT_EQ(StringRef("<"), Toks[1].getText());
989
990 ASSERT_EQ(tok::newline, Toks[2].getKind());
991 }
992
TEST_F(CommentLexerTest,HTML2)993 TEST_F(CommentLexerTest, HTML2) {
994 const char *Source =
995 "// a<2";
996
997 std::vector<Token> Toks;
998
999 lexString(Source, Toks);
1000
1001 ASSERT_EQ(4U, Toks.size());
1002
1003 ASSERT_EQ(tok::text, Toks[0].getKind());
1004 ASSERT_EQ(StringRef(" a"), Toks[0].getText());
1005
1006 ASSERT_EQ(tok::text, Toks[1].getKind());
1007 ASSERT_EQ(StringRef("<"), Toks[1].getText());
1008
1009 ASSERT_EQ(tok::text, Toks[2].getKind());
1010 ASSERT_EQ(StringRef("2"), Toks[2].getText());
1011
1012 ASSERT_EQ(tok::newline, Toks[3].getKind());
1013 }
1014
TEST_F(CommentLexerTest,HTML3)1015 TEST_F(CommentLexerTest, HTML3) {
1016 const char *Source =
1017 "// < img";
1018
1019 std::vector<Token> Toks;
1020
1021 lexString(Source, Toks);
1022
1023 ASSERT_EQ(4U, Toks.size());
1024
1025 ASSERT_EQ(tok::text, Toks[0].getKind());
1026 ASSERT_EQ(StringRef(" "), Toks[0].getText());
1027
1028 ASSERT_EQ(tok::text, Toks[1].getKind());
1029 ASSERT_EQ(StringRef("<"), Toks[1].getText());
1030
1031 ASSERT_EQ(tok::text, Toks[2].getKind());
1032 ASSERT_EQ(StringRef(" img"), Toks[2].getText());
1033
1034 ASSERT_EQ(tok::newline, Toks[3].getKind());
1035 }
1036
TEST_F(CommentLexerTest,HTML4)1037 TEST_F(CommentLexerTest, HTML4) {
1038 const char *Sources[] = {
1039 "// <img",
1040 "// <img "
1041 };
1042
1043 for (size_t i = 0, e = array_lengthof(Sources); i != e; i++) {
1044 std::vector<Token> Toks;
1045
1046 lexString(Sources[i], Toks);
1047
1048 ASSERT_EQ(3U, Toks.size());
1049
1050 ASSERT_EQ(tok::text, Toks[0].getKind());
1051 ASSERT_EQ(StringRef(" "), Toks[0].getText());
1052
1053 ASSERT_EQ(tok::html_start_tag, Toks[1].getKind());
1054 ASSERT_EQ(StringRef("img"), Toks[1].getHTMLTagStartName());
1055
1056 ASSERT_EQ(tok::newline, Toks[2].getKind());
1057 }
1058 }
1059
TEST_F(CommentLexerTest,HTML5)1060 TEST_F(CommentLexerTest, HTML5) {
1061 const char *Source =
1062 "// <img 42";
1063
1064 std::vector<Token> Toks;
1065
1066 lexString(Source, Toks);
1067
1068 ASSERT_EQ(4U, Toks.size());
1069
1070 ASSERT_EQ(tok::text, Toks[0].getKind());
1071 ASSERT_EQ(StringRef(" "), Toks[0].getText());
1072
1073 ASSERT_EQ(tok::html_start_tag, Toks[1].getKind());
1074 ASSERT_EQ(StringRef("img"), Toks[1].getHTMLTagStartName());
1075
1076 ASSERT_EQ(tok::text, Toks[2].getKind());
1077 ASSERT_EQ(StringRef("42"), Toks[2].getText());
1078
1079 ASSERT_EQ(tok::newline, Toks[3].getKind());
1080 }
1081
TEST_F(CommentLexerTest,HTML6)1082 TEST_F(CommentLexerTest, HTML6) {
1083 const char *Source = "// <img> Meow";
1084
1085 std::vector<Token> Toks;
1086
1087 lexString(Source, Toks);
1088
1089 ASSERT_EQ(5U, Toks.size());
1090
1091 ASSERT_EQ(tok::text, Toks[0].getKind());
1092 ASSERT_EQ(StringRef(" "), Toks[0].getText());
1093
1094 ASSERT_EQ(tok::html_start_tag, Toks[1].getKind());
1095 ASSERT_EQ(StringRef("img"), Toks[1].getHTMLTagStartName());
1096
1097 ASSERT_EQ(tok::html_greater, Toks[2].getKind());
1098
1099 ASSERT_EQ(tok::text, Toks[3].getKind());
1100 ASSERT_EQ(StringRef(" Meow"), Toks[3].getText());
1101
1102 ASSERT_EQ(tok::newline, Toks[4].getKind());
1103 }
1104
TEST_F(CommentLexerTest,HTML7)1105 TEST_F(CommentLexerTest, HTML7) {
1106 const char *Source = "// <img=";
1107
1108 std::vector<Token> Toks;
1109
1110 lexString(Source, Toks);
1111
1112 ASSERT_EQ(4U, Toks.size());
1113
1114 ASSERT_EQ(tok::text, Toks[0].getKind());
1115 ASSERT_EQ(StringRef(" "), Toks[0].getText());
1116
1117 ASSERT_EQ(tok::html_start_tag, Toks[1].getKind());
1118 ASSERT_EQ(StringRef("img"), Toks[1].getHTMLTagStartName());
1119
1120 ASSERT_EQ(tok::text, Toks[2].getKind());
1121 ASSERT_EQ(StringRef("="), Toks[2].getText());
1122
1123 ASSERT_EQ(tok::newline, Toks[3].getKind());
1124 }
1125
TEST_F(CommentLexerTest,HTML8)1126 TEST_F(CommentLexerTest, HTML8) {
1127 const char *Source = "// <img src=> Meow";
1128
1129 std::vector<Token> Toks;
1130
1131 lexString(Source, Toks);
1132
1133 ASSERT_EQ(7U, Toks.size());
1134
1135 ASSERT_EQ(tok::text, Toks[0].getKind());
1136 ASSERT_EQ(StringRef(" "), Toks[0].getText());
1137
1138 ASSERT_EQ(tok::html_start_tag, Toks[1].getKind());
1139 ASSERT_EQ(StringRef("img"), Toks[1].getHTMLTagStartName());
1140
1141 ASSERT_EQ(tok::html_ident, Toks[2].getKind());
1142 ASSERT_EQ(StringRef("src"), Toks[2].getHTMLIdent());
1143
1144 ASSERT_EQ(tok::html_equals, Toks[3].getKind());
1145
1146 ASSERT_EQ(tok::html_greater, Toks[4].getKind());
1147
1148 ASSERT_EQ(tok::text, Toks[5].getKind());
1149 ASSERT_EQ(StringRef(" Meow"), Toks[5].getText());
1150
1151 ASSERT_EQ(tok::newline, Toks[6].getKind());
1152 }
1153
TEST_F(CommentLexerTest,HTML9)1154 TEST_F(CommentLexerTest, HTML9) {
1155 const char *Sources[] = {
1156 "// <img src",
1157 "// <img src "
1158 };
1159
1160 for (size_t i = 0, e = array_lengthof(Sources); i != e; i++) {
1161 std::vector<Token> Toks;
1162
1163 lexString(Sources[i], Toks);
1164
1165 ASSERT_EQ(4U, Toks.size());
1166
1167 ASSERT_EQ(tok::text, Toks[0].getKind());
1168 ASSERT_EQ(StringRef(" "), Toks[0].getText());
1169
1170 ASSERT_EQ(tok::html_start_tag, Toks[1].getKind());
1171 ASSERT_EQ(StringRef("img"), Toks[1].getHTMLTagStartName());
1172
1173 ASSERT_EQ(tok::html_ident, Toks[2].getKind());
1174 ASSERT_EQ(StringRef("src"), Toks[2].getHTMLIdent());
1175
1176 ASSERT_EQ(tok::newline, Toks[3].getKind());
1177 }
1178 }
1179
TEST_F(CommentLexerTest,HTML10)1180 TEST_F(CommentLexerTest, HTML10) {
1181 const char *Sources[] = {
1182 "// <img src=",
1183 "// <img src ="
1184 };
1185
1186 for (size_t i = 0, e = array_lengthof(Sources); i != e; i++) {
1187 std::vector<Token> Toks;
1188
1189 lexString(Sources[i], Toks);
1190
1191 ASSERT_EQ(5U, Toks.size());
1192
1193 ASSERT_EQ(tok::text, Toks[0].getKind());
1194 ASSERT_EQ(StringRef(" "), Toks[0].getText());
1195
1196 ASSERT_EQ(tok::html_start_tag, Toks[1].getKind());
1197 ASSERT_EQ(StringRef("img"), Toks[1].getHTMLTagStartName());
1198
1199 ASSERT_EQ(tok::html_ident, Toks[2].getKind());
1200 ASSERT_EQ(StringRef("src"), Toks[2].getHTMLIdent());
1201
1202 ASSERT_EQ(tok::html_equals, Toks[3].getKind());
1203
1204 ASSERT_EQ(tok::newline, Toks[4].getKind());
1205 }
1206 }
1207
TEST_F(CommentLexerTest,HTML11)1208 TEST_F(CommentLexerTest, HTML11) {
1209 const char *Sources[] = {
1210 "// <img src=\"",
1211 "// <img src = \"",
1212 "// <img src=\'",
1213 "// <img src = \'"
1214 };
1215
1216 for (size_t i = 0, e = array_lengthof(Sources); i != e; i++) {
1217 std::vector<Token> Toks;
1218
1219 lexString(Sources[i], Toks);
1220
1221 ASSERT_EQ(6U, Toks.size());
1222
1223 ASSERT_EQ(tok::text, Toks[0].getKind());
1224 ASSERT_EQ(StringRef(" "), Toks[0].getText());
1225
1226 ASSERT_EQ(tok::html_start_tag, Toks[1].getKind());
1227 ASSERT_EQ(StringRef("img"), Toks[1].getHTMLTagStartName());
1228
1229 ASSERT_EQ(tok::html_ident, Toks[2].getKind());
1230 ASSERT_EQ(StringRef("src"), Toks[2].getHTMLIdent());
1231
1232 ASSERT_EQ(tok::html_equals, Toks[3].getKind());
1233
1234 ASSERT_EQ(tok::html_quoted_string, Toks[4].getKind());
1235 ASSERT_EQ(StringRef(""), Toks[4].getHTMLQuotedString());
1236
1237 ASSERT_EQ(tok::newline, Toks[5].getKind());
1238 }
1239 }
1240
TEST_F(CommentLexerTest,HTML12)1241 TEST_F(CommentLexerTest, HTML12) {
1242 const char *Source = "// <img src=@";
1243
1244 std::vector<Token> Toks;
1245
1246 lexString(Source, Toks);
1247
1248 ASSERT_EQ(6U, Toks.size());
1249
1250 ASSERT_EQ(tok::text, Toks[0].getKind());
1251 ASSERT_EQ(StringRef(" "), Toks[0].getText());
1252
1253 ASSERT_EQ(tok::html_start_tag, Toks[1].getKind());
1254 ASSERT_EQ(StringRef("img"), Toks[1].getHTMLTagStartName());
1255
1256 ASSERT_EQ(tok::html_ident, Toks[2].getKind());
1257 ASSERT_EQ(StringRef("src"), Toks[2].getHTMLIdent());
1258
1259 ASSERT_EQ(tok::html_equals, Toks[3].getKind());
1260
1261 ASSERT_EQ(tok::text, Toks[4].getKind());
1262 ASSERT_EQ(StringRef("@"), Toks[4].getText());
1263
1264 ASSERT_EQ(tok::newline, Toks[5].getKind());
1265 }
1266
TEST_F(CommentLexerTest,HTML13)1267 TEST_F(CommentLexerTest, HTML13) {
1268 const char *Sources[] = {
1269 "// <img src=\"val\\\"\\'val",
1270 "// <img src=\"val\\\"\\'val\"",
1271 "// <img src=\'val\\\"\\'val",
1272 "// <img src=\'val\\\"\\'val\'"
1273 };
1274
1275 for (size_t i = 0, e = array_lengthof(Sources); i != e; i++) {
1276 std::vector<Token> Toks;
1277
1278 lexString(Sources[i], Toks);
1279
1280 ASSERT_EQ(6U, Toks.size());
1281
1282 ASSERT_EQ(tok::text, Toks[0].getKind());
1283 ASSERT_EQ(StringRef(" "), Toks[0].getText());
1284
1285 ASSERT_EQ(tok::html_start_tag, Toks[1].getKind());
1286 ASSERT_EQ(StringRef("img"), Toks[1].getHTMLTagStartName());
1287
1288 ASSERT_EQ(tok::html_ident, Toks[2].getKind());
1289 ASSERT_EQ(StringRef("src"), Toks[2].getHTMLIdent());
1290
1291 ASSERT_EQ(tok::html_equals, Toks[3].getKind());
1292
1293 ASSERT_EQ(tok::html_quoted_string, Toks[4].getKind());
1294 ASSERT_EQ(StringRef("val\\\"\\'val"), Toks[4].getHTMLQuotedString());
1295
1296 ASSERT_EQ(tok::newline, Toks[5].getKind());
1297 }
1298 }
1299
TEST_F(CommentLexerTest,HTML14)1300 TEST_F(CommentLexerTest, HTML14) {
1301 const char *Sources[] = {
1302 "// <img src=\"val\\\"\\'val\">",
1303 "// <img src=\'val\\\"\\'val\'>"
1304 };
1305
1306 for (size_t i = 0, e = array_lengthof(Sources); i != e; i++) {
1307 std::vector<Token> Toks;
1308
1309 lexString(Sources[i], Toks);
1310
1311 ASSERT_EQ(7U, Toks.size());
1312
1313 ASSERT_EQ(tok::text, Toks[0].getKind());
1314 ASSERT_EQ(StringRef(" "), Toks[0].getText());
1315
1316 ASSERT_EQ(tok::html_start_tag, Toks[1].getKind());
1317 ASSERT_EQ(StringRef("img"), Toks[1].getHTMLTagStartName());
1318
1319 ASSERT_EQ(tok::html_ident, Toks[2].getKind());
1320 ASSERT_EQ(StringRef("src"), Toks[2].getHTMLIdent());
1321
1322 ASSERT_EQ(tok::html_equals, Toks[3].getKind());
1323
1324 ASSERT_EQ(tok::html_quoted_string, Toks[4].getKind());
1325 ASSERT_EQ(StringRef("val\\\"\\'val"), Toks[4].getHTMLQuotedString());
1326
1327 ASSERT_EQ(tok::html_greater, Toks[5].getKind());
1328
1329 ASSERT_EQ(tok::newline, Toks[6].getKind());
1330 }
1331 }
1332
TEST_F(CommentLexerTest,HTML15)1333 TEST_F(CommentLexerTest, HTML15) {
1334 const char *Sources[] = {
1335 "// <img/>",
1336 "// <img />"
1337 };
1338
1339 for (size_t i = 0, e = array_lengthof(Sources); i != e; i++) {
1340 std::vector<Token> Toks;
1341
1342 lexString(Sources[i], Toks);
1343
1344 ASSERT_EQ(4U, Toks.size());
1345
1346 ASSERT_EQ(tok::text, Toks[0].getKind());
1347 ASSERT_EQ(StringRef(" "), Toks[0].getText());
1348
1349 ASSERT_EQ(tok::html_start_tag, Toks[1].getKind());
1350 ASSERT_EQ(StringRef("img"), Toks[1].getHTMLTagStartName());
1351
1352 ASSERT_EQ(tok::html_slash_greater, Toks[2].getKind());
1353
1354 ASSERT_EQ(tok::newline, Toks[3].getKind());
1355 }
1356 }
1357
TEST_F(CommentLexerTest,HTML16)1358 TEST_F(CommentLexerTest, HTML16) {
1359 const char *Sources[] = {
1360 "// <img/ Aaa",
1361 "// <img / Aaa"
1362 };
1363
1364 for (size_t i = 0, e = array_lengthof(Sources); i != e; i++) {
1365 std::vector<Token> Toks;
1366
1367 lexString(Sources[i], Toks);
1368
1369 ASSERT_EQ(5U, Toks.size());
1370
1371 ASSERT_EQ(tok::text, Toks[0].getKind());
1372 ASSERT_EQ(StringRef(" "), Toks[0].getText());
1373
1374 ASSERT_EQ(tok::html_start_tag, Toks[1].getKind());
1375 ASSERT_EQ(StringRef("img"), Toks[1].getHTMLTagStartName());
1376
1377 ASSERT_EQ(tok::text, Toks[2].getKind());
1378 ASSERT_EQ(StringRef("/"), Toks[2].getText());
1379
1380 ASSERT_EQ(tok::text, Toks[3].getKind());
1381 ASSERT_EQ(StringRef(" Aaa"), Toks[3].getText());
1382
1383 ASSERT_EQ(tok::newline, Toks[4].getKind());
1384 }
1385 }
1386
TEST_F(CommentLexerTest,HTML17)1387 TEST_F(CommentLexerTest, HTML17) {
1388 const char *Source = "// </";
1389
1390 std::vector<Token> Toks;
1391
1392 lexString(Source, Toks);
1393
1394 ASSERT_EQ(3U, Toks.size());
1395
1396 ASSERT_EQ(tok::text, Toks[0].getKind());
1397 ASSERT_EQ(StringRef(" "), Toks[0].getText());
1398
1399 ASSERT_EQ(tok::text, Toks[1].getKind());
1400 ASSERT_EQ(StringRef("</"), Toks[1].getText());
1401
1402 ASSERT_EQ(tok::newline, Toks[2].getKind());
1403 }
1404
TEST_F(CommentLexerTest,HTML18)1405 TEST_F(CommentLexerTest, HTML18) {
1406 const char *Source = "// </@";
1407
1408 std::vector<Token> Toks;
1409
1410 lexString(Source, Toks);
1411
1412 ASSERT_EQ(4U, Toks.size());
1413
1414 ASSERT_EQ(tok::text, Toks[0].getKind());
1415 ASSERT_EQ(StringRef(" "), Toks[0].getText());
1416
1417 ASSERT_EQ(tok::text, Toks[1].getKind());
1418 ASSERT_EQ(StringRef("</"), Toks[1].getText());
1419
1420 ASSERT_EQ(tok::text, Toks[2].getKind());
1421 ASSERT_EQ(StringRef("@"), Toks[2].getText());
1422
1423 ASSERT_EQ(tok::newline, Toks[3].getKind());
1424 }
1425
TEST_F(CommentLexerTest,HTML19)1426 TEST_F(CommentLexerTest, HTML19) {
1427 const char *Source = "// </img";
1428
1429 std::vector<Token> Toks;
1430
1431 lexString(Source, Toks);
1432
1433 ASSERT_EQ(3U, Toks.size());
1434
1435 ASSERT_EQ(tok::text, Toks[0].getKind());
1436 ASSERT_EQ(StringRef(" "), Toks[0].getText());
1437
1438 ASSERT_EQ(tok::html_end_tag, Toks[1].getKind());
1439 ASSERT_EQ(StringRef("img"), Toks[1].getHTMLTagEndName());
1440
1441 ASSERT_EQ(tok::newline, Toks[2].getKind());
1442 }
1443
TEST_F(CommentLexerTest,NotAKnownHTMLTag1)1444 TEST_F(CommentLexerTest, NotAKnownHTMLTag1) {
1445 const char *Source = "// <tag>";
1446
1447 std::vector<Token> Toks;
1448
1449 lexString(Source, Toks);
1450
1451 ASSERT_EQ(4U, Toks.size());
1452
1453 ASSERT_EQ(tok::text, Toks[0].getKind());
1454 ASSERT_EQ(StringRef(" "), Toks[0].getText());
1455
1456 ASSERT_EQ(tok::text, Toks[1].getKind());
1457 ASSERT_EQ(StringRef("<tag"), Toks[1].getText());
1458
1459 ASSERT_EQ(tok::text, Toks[2].getKind());
1460 ASSERT_EQ(StringRef(">"), Toks[2].getText());
1461
1462 ASSERT_EQ(tok::newline, Toks[3].getKind());
1463 }
1464
TEST_F(CommentLexerTest,NotAKnownHTMLTag2)1465 TEST_F(CommentLexerTest, NotAKnownHTMLTag2) {
1466 const char *Source = "// </tag>";
1467
1468 std::vector<Token> Toks;
1469
1470 lexString(Source, Toks);
1471
1472 ASSERT_EQ(4U, Toks.size());
1473
1474 ASSERT_EQ(tok::text, Toks[0].getKind());
1475 ASSERT_EQ(StringRef(" "), Toks[0].getText());
1476
1477 ASSERT_EQ(tok::text, Toks[1].getKind());
1478 ASSERT_EQ(StringRef("</tag"), Toks[1].getText());
1479
1480 ASSERT_EQ(tok::text, Toks[2].getKind());
1481 ASSERT_EQ(StringRef(">"), Toks[2].getText());
1482
1483 ASSERT_EQ(tok::newline, Toks[3].getKind());
1484 }
1485
TEST_F(CommentLexerTest,HTMLCharacterReferences1)1486 TEST_F(CommentLexerTest, HTMLCharacterReferences1) {
1487 const char *Source = "// &";
1488
1489 std::vector<Token> Toks;
1490
1491 lexString(Source, Toks);
1492
1493 ASSERT_EQ(3U, Toks.size());
1494
1495 ASSERT_EQ(tok::text, Toks[0].getKind());
1496 ASSERT_EQ(StringRef(" "), Toks[0].getText());
1497
1498 ASSERT_EQ(tok::text, Toks[1].getKind());
1499 ASSERT_EQ(StringRef("&"), Toks[1].getText());
1500
1501 ASSERT_EQ(tok::newline, Toks[2].getKind());
1502 }
1503
TEST_F(CommentLexerTest,HTMLCharacterReferences2)1504 TEST_F(CommentLexerTest, HTMLCharacterReferences2) {
1505 const char *Source = "// &!";
1506
1507 std::vector<Token> Toks;
1508
1509 lexString(Source, Toks);
1510
1511 ASSERT_EQ(4U, Toks.size());
1512
1513 ASSERT_EQ(tok::text, Toks[0].getKind());
1514 ASSERT_EQ(StringRef(" "), Toks[0].getText());
1515
1516 ASSERT_EQ(tok::text, Toks[1].getKind());
1517 ASSERT_EQ(StringRef("&"), Toks[1].getText());
1518
1519 ASSERT_EQ(tok::text, Toks[2].getKind());
1520 ASSERT_EQ(StringRef("!"), Toks[2].getText());
1521
1522 ASSERT_EQ(tok::newline, Toks[3].getKind());
1523 }
1524
TEST_F(CommentLexerTest,HTMLCharacterReferences3)1525 TEST_F(CommentLexerTest, HTMLCharacterReferences3) {
1526 const char *Source = "// &";
1527
1528 std::vector<Token> Toks;
1529
1530 lexString(Source, Toks);
1531
1532 ASSERT_EQ(3U, Toks.size());
1533
1534 ASSERT_EQ(tok::text, Toks[0].getKind());
1535 ASSERT_EQ(StringRef(" "), Toks[0].getText());
1536
1537 ASSERT_EQ(tok::text, Toks[1].getKind());
1538 ASSERT_EQ(StringRef("&"), Toks[1].getText());
1539
1540 ASSERT_EQ(tok::newline, Toks[2].getKind());
1541 }
1542
TEST_F(CommentLexerTest,HTMLCharacterReferences4)1543 TEST_F(CommentLexerTest, HTMLCharacterReferences4) {
1544 const char *Source = "// &!";
1545
1546 std::vector<Token> Toks;
1547
1548 lexString(Source, Toks);
1549
1550 ASSERT_EQ(4U, Toks.size());
1551
1552 ASSERT_EQ(tok::text, Toks[0].getKind());
1553 ASSERT_EQ(StringRef(" "), Toks[0].getText());
1554
1555 ASSERT_EQ(tok::text, Toks[1].getKind());
1556 ASSERT_EQ(StringRef("&"), Toks[1].getText());
1557
1558 ASSERT_EQ(tok::text, Toks[2].getKind());
1559 ASSERT_EQ(StringRef("!"), Toks[2].getText());
1560
1561 ASSERT_EQ(tok::newline, Toks[3].getKind());
1562 }
1563
TEST_F(CommentLexerTest,HTMLCharacterReferences5)1564 TEST_F(CommentLexerTest, HTMLCharacterReferences5) {
1565 const char *Source = "// &#";
1566
1567 std::vector<Token> Toks;
1568
1569 lexString(Source, Toks);
1570
1571 ASSERT_EQ(3U, Toks.size());
1572
1573 ASSERT_EQ(tok::text, Toks[0].getKind());
1574 ASSERT_EQ(StringRef(" "), Toks[0].getText());
1575
1576 ASSERT_EQ(tok::text, Toks[1].getKind());
1577 ASSERT_EQ(StringRef("&#"), Toks[1].getText());
1578
1579 ASSERT_EQ(tok::newline, Toks[2].getKind());
1580 }
1581
TEST_F(CommentLexerTest,HTMLCharacterReferences6)1582 TEST_F(CommentLexerTest, HTMLCharacterReferences6) {
1583 const char *Source = "// &#a";
1584
1585 std::vector<Token> Toks;
1586
1587 lexString(Source, Toks);
1588
1589 ASSERT_EQ(4U, Toks.size());
1590
1591 ASSERT_EQ(tok::text, Toks[0].getKind());
1592 ASSERT_EQ(StringRef(" "), Toks[0].getText());
1593
1594 ASSERT_EQ(tok::text, Toks[1].getKind());
1595 ASSERT_EQ(StringRef("&#"), Toks[1].getText());
1596
1597 ASSERT_EQ(tok::text, Toks[2].getKind());
1598 ASSERT_EQ(StringRef("a"), Toks[2].getText());
1599
1600 ASSERT_EQ(tok::newline, Toks[3].getKind());
1601 }
1602
TEST_F(CommentLexerTest,HTMLCharacterReferences7)1603 TEST_F(CommentLexerTest, HTMLCharacterReferences7) {
1604 const char *Source = "// *";
1605
1606 std::vector<Token> Toks;
1607
1608 lexString(Source, Toks);
1609
1610 ASSERT_EQ(3U, Toks.size());
1611
1612 ASSERT_EQ(tok::text, Toks[0].getKind());
1613 ASSERT_EQ(StringRef(" "), Toks[0].getText());
1614
1615 ASSERT_EQ(tok::text, Toks[1].getKind());
1616 ASSERT_EQ(StringRef("*"), Toks[1].getText());
1617
1618 ASSERT_EQ(tok::newline, Toks[2].getKind());
1619 }
1620
TEST_F(CommentLexerTest,HTMLCharacterReferences8)1621 TEST_F(CommentLexerTest, HTMLCharacterReferences8) {
1622 const char *Source = "// *a";
1623
1624 std::vector<Token> Toks;
1625
1626 lexString(Source, Toks);
1627
1628 ASSERT_EQ(4U, Toks.size());
1629
1630 ASSERT_EQ(tok::text, Toks[0].getKind());
1631 ASSERT_EQ(StringRef(" "), Toks[0].getText());
1632
1633 ASSERT_EQ(tok::text, Toks[1].getKind());
1634 ASSERT_EQ(StringRef("*"), Toks[1].getText());
1635
1636 ASSERT_EQ(tok::text, Toks[2].getKind());
1637 ASSERT_EQ(StringRef("a"), Toks[2].getText());
1638
1639 ASSERT_EQ(tok::newline, Toks[3].getKind());
1640 }
1641
TEST_F(CommentLexerTest,HTMLCharacterReferences9)1642 TEST_F(CommentLexerTest, HTMLCharacterReferences9) {
1643 const char *Source = "// &#x";
1644
1645 std::vector<Token> Toks;
1646
1647 lexString(Source, Toks);
1648
1649 ASSERT_EQ(3U, Toks.size());
1650
1651 ASSERT_EQ(tok::text, Toks[0].getKind());
1652 ASSERT_EQ(StringRef(" "), Toks[0].getText());
1653
1654 ASSERT_EQ(tok::text, Toks[1].getKind());
1655 ASSERT_EQ(StringRef("&#x"), Toks[1].getText());
1656
1657 ASSERT_EQ(tok::newline, Toks[2].getKind());
1658 }
1659
TEST_F(CommentLexerTest,HTMLCharacterReferences10)1660 TEST_F(CommentLexerTest, HTMLCharacterReferences10) {
1661 const char *Source = "// &#xz";
1662
1663 std::vector<Token> Toks;
1664
1665 lexString(Source, Toks);
1666
1667 ASSERT_EQ(4U, Toks.size());
1668
1669 ASSERT_EQ(tok::text, Toks[0].getKind());
1670 ASSERT_EQ(StringRef(" "), Toks[0].getText());
1671
1672 ASSERT_EQ(tok::text, Toks[1].getKind());
1673 ASSERT_EQ(StringRef("&#x"), Toks[1].getText());
1674
1675 ASSERT_EQ(tok::text, Toks[2].getKind());
1676 ASSERT_EQ(StringRef("z"), Toks[2].getText());
1677
1678 ASSERT_EQ(tok::newline, Toks[3].getKind());
1679 }
1680
TEST_F(CommentLexerTest,HTMLCharacterReferences11)1681 TEST_F(CommentLexerTest, HTMLCharacterReferences11) {
1682 const char *Source = "// «";
1683
1684 std::vector<Token> Toks;
1685
1686 lexString(Source, Toks);
1687
1688 ASSERT_EQ(3U, Toks.size());
1689
1690 ASSERT_EQ(tok::text, Toks[0].getKind());
1691 ASSERT_EQ(StringRef(" "), Toks[0].getText());
1692
1693 ASSERT_EQ(tok::text, Toks[1].getKind());
1694 ASSERT_EQ(StringRef("«"), Toks[1].getText());
1695
1696 ASSERT_EQ(tok::newline, Toks[2].getKind());
1697 }
1698
TEST_F(CommentLexerTest,HTMLCharacterReferences12)1699 TEST_F(CommentLexerTest, HTMLCharacterReferences12) {
1700 const char *Source = "// «z";
1701
1702 std::vector<Token> Toks;
1703
1704 lexString(Source, Toks);
1705
1706 ASSERT_EQ(4U, Toks.size());
1707
1708 ASSERT_EQ(tok::text, Toks[0].getKind());
1709 ASSERT_EQ(StringRef(" "), Toks[0].getText());
1710
1711 ASSERT_EQ(tok::text, Toks[1].getKind());
1712 ASSERT_EQ(StringRef("«"), Toks[1].getText());
1713
1714 ASSERT_EQ(tok::text, Toks[2].getKind());
1715 ASSERT_EQ(StringRef("z"), Toks[2].getText());
1716
1717 ASSERT_EQ(tok::newline, Toks[3].getKind());
1718 }
1719
TEST_F(CommentLexerTest,HTMLCharacterReferences13)1720 TEST_F(CommentLexerTest, HTMLCharacterReferences13) {
1721 const char *Source = "// &";
1722
1723 std::vector<Token> Toks;
1724
1725 lexString(Source, Toks);
1726
1727 ASSERT_EQ(3U, Toks.size());
1728
1729 ASSERT_EQ(tok::text, Toks[0].getKind());
1730 ASSERT_EQ(StringRef(" "), Toks[0].getText());
1731
1732 ASSERT_EQ(tok::text, Toks[1].getKind());
1733 ASSERT_EQ(StringRef("&"), Toks[1].getText());
1734
1735 ASSERT_EQ(tok::newline, Toks[2].getKind());
1736 }
1737
TEST_F(CommentLexerTest,HTMLCharacterReferences14)1738 TEST_F(CommentLexerTest, HTMLCharacterReferences14) {
1739 const char *Source = "// &<";
1740
1741 std::vector<Token> Toks;
1742
1743 lexString(Source, Toks);
1744
1745 ASSERT_EQ(4U, Toks.size());
1746
1747 ASSERT_EQ(tok::text, Toks[0].getKind());
1748 ASSERT_EQ(StringRef(" "), Toks[0].getText());
1749
1750 ASSERT_EQ(tok::text, Toks[1].getKind());
1751 ASSERT_EQ(StringRef("&"), Toks[1].getText());
1752
1753 ASSERT_EQ(tok::text, Toks[2].getKind());
1754 ASSERT_EQ(StringRef("<"), Toks[2].getText());
1755
1756 ASSERT_EQ(tok::newline, Toks[3].getKind());
1757 }
1758
TEST_F(CommentLexerTest,HTMLCharacterReferences15)1759 TEST_F(CommentLexerTest, HTMLCharacterReferences15) {
1760 const char *Source = "// & meow";
1761
1762 std::vector<Token> Toks;
1763
1764 lexString(Source, Toks);
1765
1766 ASSERT_EQ(4U, Toks.size());
1767
1768 ASSERT_EQ(tok::text, Toks[0].getKind());
1769 ASSERT_EQ(StringRef(" "), Toks[0].getText());
1770
1771 ASSERT_EQ(tok::text, Toks[1].getKind());
1772 ASSERT_EQ(StringRef("&"), Toks[1].getText());
1773
1774 ASSERT_EQ(tok::text, Toks[2].getKind());
1775 ASSERT_EQ(StringRef(" meow"), Toks[2].getText());
1776
1777 ASSERT_EQ(tok::newline, Toks[3].getKind());
1778 }
1779
TEST_F(CommentLexerTest,HTMLCharacterReferences16)1780 TEST_F(CommentLexerTest, HTMLCharacterReferences16) {
1781 const char *Sources[] = {
1782 "// =",
1783 "// =",
1784 "// =",
1785 "// ="
1786 };
1787
1788 for (size_t i = 0, e = array_lengthof(Sources); i != e; i++) {
1789 std::vector<Token> Toks;
1790
1791 lexString(Sources[i], Toks);
1792
1793 ASSERT_EQ(3U, Toks.size());
1794
1795 ASSERT_EQ(tok::text, Toks[0].getKind());
1796 ASSERT_EQ(StringRef(" "), Toks[0].getText());
1797
1798 ASSERT_EQ(tok::text, Toks[1].getKind());
1799 ASSERT_EQ(StringRef("="), Toks[1].getText());
1800
1801 ASSERT_EQ(tok::newline, Toks[2].getKind());
1802 }
1803 }
1804
TEST_F(CommentLexerTest,MultipleComments)1805 TEST_F(CommentLexerTest, MultipleComments) {
1806 const char *Source =
1807 "// Aaa\n"
1808 "/// Bbb\n"
1809 "/* Ccc\n"
1810 " * Ddd*/\n"
1811 "/** Eee*/";
1812
1813 std::vector<Token> Toks;
1814
1815 lexString(Source, Toks);
1816
1817 ASSERT_EQ(12U, Toks.size());
1818
1819 ASSERT_EQ(tok::text, Toks[0].getKind());
1820 ASSERT_EQ(StringRef(" Aaa"), Toks[0].getText());
1821 ASSERT_EQ(tok::newline, Toks[1].getKind());
1822
1823 ASSERT_EQ(tok::text, Toks[2].getKind());
1824 ASSERT_EQ(StringRef(" Bbb"), Toks[2].getText());
1825 ASSERT_EQ(tok::newline, Toks[3].getKind());
1826
1827 ASSERT_EQ(tok::text, Toks[4].getKind());
1828 ASSERT_EQ(StringRef(" Ccc"), Toks[4].getText());
1829 ASSERT_EQ(tok::newline, Toks[5].getKind());
1830
1831 ASSERT_EQ(tok::text, Toks[6].getKind());
1832 ASSERT_EQ(StringRef(" Ddd"), Toks[6].getText());
1833 ASSERT_EQ(tok::newline, Toks[7].getKind());
1834 ASSERT_EQ(tok::newline, Toks[8].getKind());
1835
1836 ASSERT_EQ(tok::text, Toks[9].getKind());
1837 ASSERT_EQ(StringRef(" Eee"), Toks[9].getText());
1838
1839 ASSERT_EQ(tok::newline, Toks[10].getKind());
1840 ASSERT_EQ(tok::newline, Toks[11].getKind());
1841 }
1842
1843 } // end namespace comments
1844 } // end namespace clang
1845
1846