1 //===--- CommentParser.cpp - Doxygen comment parser -----------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "clang/AST/CommentParser.h"
10 #include "clang/AST/CommentCommandTraits.h"
11 #include "clang/AST/CommentDiagnostic.h"
12 #include "clang/AST/CommentSema.h"
13 #include "clang/Basic/CharInfo.h"
14 #include "clang/Basic/SourceManager.h"
15 #include "llvm/Support/ErrorHandling.h"
16 
17 namespace clang {
18 
isWhitespace(llvm::StringRef S)19 static inline bool isWhitespace(llvm::StringRef S) {
20   for (StringRef::const_iterator I = S.begin(), E = S.end(); I != E; ++I) {
21     if (!isWhitespace(*I))
22       return false;
23   }
24   return true;
25 }
26 
27 namespace comments {
28 
29 /// Re-lexes a sequence of tok::text tokens.
30 class TextTokenRetokenizer {
31   llvm::BumpPtrAllocator &Allocator;
32   Parser &P;
33 
34   /// This flag is set when there are no more tokens we can fetch from lexer.
35   bool NoMoreInterestingTokens;
36 
37   /// Token buffer: tokens we have processed and lookahead.
38   SmallVector<Token, 16> Toks;
39 
40   /// A position in \c Toks.
41   struct Position {
42     const char *BufferStart;
43     const char *BufferEnd;
44     const char *BufferPtr;
45     SourceLocation BufferStartLoc;
46     unsigned CurToken;
47   };
48 
49   /// Current position in Toks.
50   Position Pos;
51 
isEnd() const52   bool isEnd() const {
53     return Pos.CurToken >= Toks.size();
54   }
55 
56   /// Sets up the buffer pointers to point to current token.
setupBuffer()57   void setupBuffer() {
58     assert(!isEnd());
59     const Token &Tok = Toks[Pos.CurToken];
60 
61     Pos.BufferStart = Tok.getText().begin();
62     Pos.BufferEnd = Tok.getText().end();
63     Pos.BufferPtr = Pos.BufferStart;
64     Pos.BufferStartLoc = Tok.getLocation();
65   }
66 
getSourceLocation() const67   SourceLocation getSourceLocation() const {
68     const unsigned CharNo = Pos.BufferPtr - Pos.BufferStart;
69     return Pos.BufferStartLoc.getLocWithOffset(CharNo);
70   }
71 
peek() const72   char peek() const {
73     assert(!isEnd());
74     assert(Pos.BufferPtr != Pos.BufferEnd);
75     return *Pos.BufferPtr;
76   }
77 
consumeChar()78   void consumeChar() {
79     assert(!isEnd());
80     assert(Pos.BufferPtr != Pos.BufferEnd);
81     Pos.BufferPtr++;
82     if (Pos.BufferPtr == Pos.BufferEnd) {
83       Pos.CurToken++;
84       if (isEnd() && !addToken())
85         return;
86 
87       assert(!isEnd());
88       setupBuffer();
89     }
90   }
91 
92   /// Add a token.
93   /// Returns true on success, false if there are no interesting tokens to
94   /// fetch from lexer.
addToken()95   bool addToken() {
96     if (NoMoreInterestingTokens)
97       return false;
98 
99     if (P.Tok.is(tok::newline)) {
100       // If we see a single newline token between text tokens, skip it.
101       Token Newline = P.Tok;
102       P.consumeToken();
103       if (P.Tok.isNot(tok::text)) {
104         P.putBack(Newline);
105         NoMoreInterestingTokens = true;
106         return false;
107       }
108     }
109     if (P.Tok.isNot(tok::text)) {
110       NoMoreInterestingTokens = true;
111       return false;
112     }
113 
114     Toks.push_back(P.Tok);
115     P.consumeToken();
116     if (Toks.size() == 1)
117       setupBuffer();
118     return true;
119   }
120 
consumeWhitespace()121   void consumeWhitespace() {
122     while (!isEnd()) {
123       if (isWhitespace(peek()))
124         consumeChar();
125       else
126         break;
127     }
128   }
129 
formTokenWithChars(Token & Result,SourceLocation Loc,const char * TokBegin,unsigned TokLength,StringRef Text)130   void formTokenWithChars(Token &Result,
131                           SourceLocation Loc,
132                           const char *TokBegin,
133                           unsigned TokLength,
134                           StringRef Text) {
135     Result.setLocation(Loc);
136     Result.setKind(tok::text);
137     Result.setLength(TokLength);
138 #ifndef NDEBUG
139     Result.TextPtr = "<UNSET>";
140     Result.IntVal = 7;
141 #endif
142     Result.setText(Text);
143   }
144 
145 public:
TextTokenRetokenizer(llvm::BumpPtrAllocator & Allocator,Parser & P)146   TextTokenRetokenizer(llvm::BumpPtrAllocator &Allocator, Parser &P):
147       Allocator(Allocator), P(P), NoMoreInterestingTokens(false) {
148     Pos.CurToken = 0;
149     addToken();
150   }
151 
152   /// Extract a word -- sequence of non-whitespace characters.
lexWord(Token & Tok)153   bool lexWord(Token &Tok) {
154     if (isEnd())
155       return false;
156 
157     Position SavedPos = Pos;
158 
159     consumeWhitespace();
160     SmallString<32> WordText;
161     const char *WordBegin = Pos.BufferPtr;
162     SourceLocation Loc = getSourceLocation();
163     while (!isEnd()) {
164       const char C = peek();
165       if (!isWhitespace(C)) {
166         WordText.push_back(C);
167         consumeChar();
168       } else
169         break;
170     }
171     const unsigned Length = WordText.size();
172     if (Length == 0) {
173       Pos = SavedPos;
174       return false;
175     }
176 
177     char *TextPtr = Allocator.Allocate<char>(Length + 1);
178 
179     memcpy(TextPtr, WordText.c_str(), Length + 1);
180     StringRef Text = StringRef(TextPtr, Length);
181 
182     formTokenWithChars(Tok, Loc, WordBegin, Length, Text);
183     return true;
184   }
185 
lexDelimitedSeq(Token & Tok,char OpenDelim,char CloseDelim)186   bool lexDelimitedSeq(Token &Tok, char OpenDelim, char CloseDelim) {
187     if (isEnd())
188       return false;
189 
190     Position SavedPos = Pos;
191 
192     consumeWhitespace();
193     SmallString<32> WordText;
194     const char *WordBegin = Pos.BufferPtr;
195     SourceLocation Loc = getSourceLocation();
196     bool Error = false;
197     if (!isEnd()) {
198       const char C = peek();
199       if (C == OpenDelim) {
200         WordText.push_back(C);
201         consumeChar();
202       } else
203         Error = true;
204     }
205     char C = '\0';
206     while (!Error && !isEnd()) {
207       C = peek();
208       WordText.push_back(C);
209       consumeChar();
210       if (C == CloseDelim)
211         break;
212     }
213     if (!Error && C != CloseDelim)
214       Error = true;
215 
216     if (Error) {
217       Pos = SavedPos;
218       return false;
219     }
220 
221     const unsigned Length = WordText.size();
222     char *TextPtr = Allocator.Allocate<char>(Length + 1);
223 
224     memcpy(TextPtr, WordText.c_str(), Length + 1);
225     StringRef Text = StringRef(TextPtr, Length);
226 
227     formTokenWithChars(Tok, Loc, WordBegin,
228                        Pos.BufferPtr - WordBegin, Text);
229     return true;
230   }
231 
232   /// Put back tokens that we didn't consume.
putBackLeftoverTokens()233   void putBackLeftoverTokens() {
234     if (isEnd())
235       return;
236 
237     bool HavePartialTok = false;
238     Token PartialTok;
239     if (Pos.BufferPtr != Pos.BufferStart) {
240       formTokenWithChars(PartialTok, getSourceLocation(),
241                          Pos.BufferPtr, Pos.BufferEnd - Pos.BufferPtr,
242                          StringRef(Pos.BufferPtr,
243                                    Pos.BufferEnd - Pos.BufferPtr));
244       HavePartialTok = true;
245       Pos.CurToken++;
246     }
247 
248     P.putBack(llvm::makeArrayRef(Toks.begin() + Pos.CurToken, Toks.end()));
249     Pos.CurToken = Toks.size();
250 
251     if (HavePartialTok)
252       P.putBack(PartialTok);
253   }
254 };
255 
Parser(Lexer & L,Sema & S,llvm::BumpPtrAllocator & Allocator,const SourceManager & SourceMgr,DiagnosticsEngine & Diags,const CommandTraits & Traits)256 Parser::Parser(Lexer &L, Sema &S, llvm::BumpPtrAllocator &Allocator,
257                const SourceManager &SourceMgr, DiagnosticsEngine &Diags,
258                const CommandTraits &Traits):
259     L(L), S(S), Allocator(Allocator), SourceMgr(SourceMgr), Diags(Diags),
260     Traits(Traits) {
261   consumeToken();
262 }
263 
parseParamCommandArgs(ParamCommandComment * PC,TextTokenRetokenizer & Retokenizer)264 void Parser::parseParamCommandArgs(ParamCommandComment *PC,
265                                    TextTokenRetokenizer &Retokenizer) {
266   Token Arg;
267   // Check if argument looks like direction specification: [dir]
268   // e.g., [in], [out], [in,out]
269   if (Retokenizer.lexDelimitedSeq(Arg, '[', ']'))
270     S.actOnParamCommandDirectionArg(PC,
271                                     Arg.getLocation(),
272                                     Arg.getEndLocation(),
273                                     Arg.getText());
274 
275   if (Retokenizer.lexWord(Arg))
276     S.actOnParamCommandParamNameArg(PC,
277                                     Arg.getLocation(),
278                                     Arg.getEndLocation(),
279                                     Arg.getText());
280 }
281 
parseTParamCommandArgs(TParamCommandComment * TPC,TextTokenRetokenizer & Retokenizer)282 void Parser::parseTParamCommandArgs(TParamCommandComment *TPC,
283                                     TextTokenRetokenizer &Retokenizer) {
284   Token Arg;
285   if (Retokenizer.lexWord(Arg))
286     S.actOnTParamCommandParamNameArg(TPC,
287                                      Arg.getLocation(),
288                                      Arg.getEndLocation(),
289                                      Arg.getText());
290 }
291 
parseBlockCommandArgs(BlockCommandComment * BC,TextTokenRetokenizer & Retokenizer,unsigned NumArgs)292 void Parser::parseBlockCommandArgs(BlockCommandComment *BC,
293                                    TextTokenRetokenizer &Retokenizer,
294                                    unsigned NumArgs) {
295   typedef BlockCommandComment::Argument Argument;
296   Argument *Args =
297       new (Allocator.Allocate<Argument>(NumArgs)) Argument[NumArgs];
298   unsigned ParsedArgs = 0;
299   Token Arg;
300   while (ParsedArgs < NumArgs && Retokenizer.lexWord(Arg)) {
301     Args[ParsedArgs] = Argument(SourceRange(Arg.getLocation(),
302                                             Arg.getEndLocation()),
303                                 Arg.getText());
304     ParsedArgs++;
305   }
306 
307   S.actOnBlockCommandArgs(BC, llvm::makeArrayRef(Args, ParsedArgs));
308 }
309 
parseBlockCommand()310 BlockCommandComment *Parser::parseBlockCommand() {
311   assert(Tok.is(tok::backslash_command) || Tok.is(tok::at_command));
312 
313   ParamCommandComment *PC = nullptr;
314   TParamCommandComment *TPC = nullptr;
315   BlockCommandComment *BC = nullptr;
316   const CommandInfo *Info = Traits.getCommandInfo(Tok.getCommandID());
317   CommandMarkerKind CommandMarker =
318       Tok.is(tok::backslash_command) ? CMK_Backslash : CMK_At;
319   if (Info->IsParamCommand) {
320     PC = S.actOnParamCommandStart(Tok.getLocation(),
321                                   Tok.getEndLocation(),
322                                   Tok.getCommandID(),
323                                   CommandMarker);
324   } else if (Info->IsTParamCommand) {
325     TPC = S.actOnTParamCommandStart(Tok.getLocation(),
326                                     Tok.getEndLocation(),
327                                     Tok.getCommandID(),
328                                     CommandMarker);
329   } else {
330     BC = S.actOnBlockCommandStart(Tok.getLocation(),
331                                   Tok.getEndLocation(),
332                                   Tok.getCommandID(),
333                                   CommandMarker);
334   }
335   consumeToken();
336 
337   if (isTokBlockCommand()) {
338     // Block command ahead.  We can't nest block commands, so pretend that this
339     // command has an empty argument.
340     ParagraphComment *Paragraph = S.actOnParagraphComment(None);
341     if (PC) {
342       S.actOnParamCommandFinish(PC, Paragraph);
343       return PC;
344     } else if (TPC) {
345       S.actOnTParamCommandFinish(TPC, Paragraph);
346       return TPC;
347     } else {
348       S.actOnBlockCommandFinish(BC, Paragraph);
349       return BC;
350     }
351   }
352 
353   if (PC || TPC || Info->NumArgs > 0) {
354     // In order to parse command arguments we need to retokenize a few
355     // following text tokens.
356     TextTokenRetokenizer Retokenizer(Allocator, *this);
357 
358     if (PC)
359       parseParamCommandArgs(PC, Retokenizer);
360     else if (TPC)
361       parseTParamCommandArgs(TPC, Retokenizer);
362     else
363       parseBlockCommandArgs(BC, Retokenizer, Info->NumArgs);
364 
365     Retokenizer.putBackLeftoverTokens();
366   }
367 
368   // If there's a block command ahead, we will attach an empty paragraph to
369   // this command.
370   bool EmptyParagraph = false;
371   if (isTokBlockCommand())
372     EmptyParagraph = true;
373   else if (Tok.is(tok::newline)) {
374     Token PrevTok = Tok;
375     consumeToken();
376     EmptyParagraph = isTokBlockCommand();
377     putBack(PrevTok);
378   }
379 
380   ParagraphComment *Paragraph;
381   if (EmptyParagraph)
382     Paragraph = S.actOnParagraphComment(None);
383   else {
384     BlockContentComment *Block = parseParagraphOrBlockCommand();
385     // Since we have checked for a block command, we should have parsed a
386     // paragraph.
387     Paragraph = cast<ParagraphComment>(Block);
388   }
389 
390   if (PC) {
391     S.actOnParamCommandFinish(PC, Paragraph);
392     return PC;
393   } else if (TPC) {
394     S.actOnTParamCommandFinish(TPC, Paragraph);
395     return TPC;
396   } else {
397     S.actOnBlockCommandFinish(BC, Paragraph);
398     return BC;
399   }
400 }
401 
parseInlineCommand()402 InlineCommandComment *Parser::parseInlineCommand() {
403   assert(Tok.is(tok::backslash_command) || Tok.is(tok::at_command));
404 
405   const Token CommandTok = Tok;
406   consumeToken();
407 
408   TextTokenRetokenizer Retokenizer(Allocator, *this);
409 
410   Token ArgTok;
411   bool ArgTokValid = Retokenizer.lexWord(ArgTok);
412 
413   InlineCommandComment *IC;
414   if (ArgTokValid) {
415     IC = S.actOnInlineCommand(CommandTok.getLocation(),
416                               CommandTok.getEndLocation(),
417                               CommandTok.getCommandID(),
418                               ArgTok.getLocation(),
419                               ArgTok.getEndLocation(),
420                               ArgTok.getText());
421   } else {
422     IC = S.actOnInlineCommand(CommandTok.getLocation(),
423                               CommandTok.getEndLocation(),
424                               CommandTok.getCommandID());
425 
426     Diag(CommandTok.getEndLocation().getLocWithOffset(1),
427          diag::warn_doc_inline_contents_no_argument)
428         << CommandTok.is(tok::at_command)
429         << Traits.getCommandInfo(CommandTok.getCommandID())->Name
430         << SourceRange(CommandTok.getLocation(), CommandTok.getEndLocation());
431   }
432 
433   Retokenizer.putBackLeftoverTokens();
434 
435   return IC;
436 }
437 
parseHTMLStartTag()438 HTMLStartTagComment *Parser::parseHTMLStartTag() {
439   assert(Tok.is(tok::html_start_tag));
440   HTMLStartTagComment *HST =
441       S.actOnHTMLStartTagStart(Tok.getLocation(),
442                                Tok.getHTMLTagStartName());
443   consumeToken();
444 
445   SmallVector<HTMLStartTagComment::Attribute, 2> Attrs;
446   while (true) {
447     switch (Tok.getKind()) {
448     case tok::html_ident: {
449       Token Ident = Tok;
450       consumeToken();
451       if (Tok.isNot(tok::html_equals)) {
452         Attrs.push_back(HTMLStartTagComment::Attribute(Ident.getLocation(),
453                                                        Ident.getHTMLIdent()));
454         continue;
455       }
456       Token Equals = Tok;
457       consumeToken();
458       if (Tok.isNot(tok::html_quoted_string)) {
459         Diag(Tok.getLocation(),
460              diag::warn_doc_html_start_tag_expected_quoted_string)
461           << SourceRange(Equals.getLocation());
462         Attrs.push_back(HTMLStartTagComment::Attribute(Ident.getLocation(),
463                                                        Ident.getHTMLIdent()));
464         while (Tok.is(tok::html_equals) ||
465                Tok.is(tok::html_quoted_string))
466           consumeToken();
467         continue;
468       }
469       Attrs.push_back(HTMLStartTagComment::Attribute(
470                               Ident.getLocation(),
471                               Ident.getHTMLIdent(),
472                               Equals.getLocation(),
473                               SourceRange(Tok.getLocation(),
474                                           Tok.getEndLocation()),
475                               Tok.getHTMLQuotedString()));
476       consumeToken();
477       continue;
478     }
479 
480     case tok::html_greater:
481       S.actOnHTMLStartTagFinish(HST,
482                                 S.copyArray(llvm::makeArrayRef(Attrs)),
483                                 Tok.getLocation(),
484                                 /* IsSelfClosing = */ false);
485       consumeToken();
486       return HST;
487 
488     case tok::html_slash_greater:
489       S.actOnHTMLStartTagFinish(HST,
490                                 S.copyArray(llvm::makeArrayRef(Attrs)),
491                                 Tok.getLocation(),
492                                 /* IsSelfClosing = */ true);
493       consumeToken();
494       return HST;
495 
496     case tok::html_equals:
497     case tok::html_quoted_string:
498       Diag(Tok.getLocation(),
499            diag::warn_doc_html_start_tag_expected_ident_or_greater);
500       while (Tok.is(tok::html_equals) ||
501              Tok.is(tok::html_quoted_string))
502         consumeToken();
503       if (Tok.is(tok::html_ident) ||
504           Tok.is(tok::html_greater) ||
505           Tok.is(tok::html_slash_greater))
506         continue;
507 
508       S.actOnHTMLStartTagFinish(HST,
509                                 S.copyArray(llvm::makeArrayRef(Attrs)),
510                                 SourceLocation(),
511                                 /* IsSelfClosing = */ false);
512       return HST;
513 
514     default:
515       // Not a token from an HTML start tag.  Thus HTML tag prematurely ended.
516       S.actOnHTMLStartTagFinish(HST,
517                                 S.copyArray(llvm::makeArrayRef(Attrs)),
518                                 SourceLocation(),
519                                 /* IsSelfClosing = */ false);
520       bool StartLineInvalid;
521       const unsigned StartLine = SourceMgr.getPresumedLineNumber(
522                                                   HST->getLocation(),
523                                                   &StartLineInvalid);
524       bool EndLineInvalid;
525       const unsigned EndLine = SourceMgr.getPresumedLineNumber(
526                                                   Tok.getLocation(),
527                                                   &EndLineInvalid);
528       if (StartLineInvalid || EndLineInvalid || StartLine == EndLine)
529         Diag(Tok.getLocation(),
530              diag::warn_doc_html_start_tag_expected_ident_or_greater)
531           << HST->getSourceRange();
532       else {
533         Diag(Tok.getLocation(),
534              diag::warn_doc_html_start_tag_expected_ident_or_greater);
535         Diag(HST->getLocation(), diag::note_doc_html_tag_started_here)
536           << HST->getSourceRange();
537       }
538       return HST;
539     }
540   }
541 }
542 
parseHTMLEndTag()543 HTMLEndTagComment *Parser::parseHTMLEndTag() {
544   assert(Tok.is(tok::html_end_tag));
545   Token TokEndTag = Tok;
546   consumeToken();
547   SourceLocation Loc;
548   if (Tok.is(tok::html_greater)) {
549     Loc = Tok.getLocation();
550     consumeToken();
551   }
552 
553   return S.actOnHTMLEndTag(TokEndTag.getLocation(),
554                            Loc,
555                            TokEndTag.getHTMLTagEndName());
556 }
557 
parseParagraphOrBlockCommand()558 BlockContentComment *Parser::parseParagraphOrBlockCommand() {
559   SmallVector<InlineContentComment *, 8> Content;
560 
561   while (true) {
562     switch (Tok.getKind()) {
563     case tok::verbatim_block_begin:
564     case tok::verbatim_line_name:
565     case tok::eof:
566       break; // Block content or EOF ahead, finish this parapgaph.
567 
568     case tok::unknown_command:
569       Content.push_back(S.actOnUnknownCommand(Tok.getLocation(),
570                                               Tok.getEndLocation(),
571                                               Tok.getUnknownCommandName()));
572       consumeToken();
573       continue;
574 
575     case tok::backslash_command:
576     case tok::at_command: {
577       const CommandInfo *Info = Traits.getCommandInfo(Tok.getCommandID());
578       if (Info->IsBlockCommand) {
579         if (Content.size() == 0)
580           return parseBlockCommand();
581         break; // Block command ahead, finish this parapgaph.
582       }
583       if (Info->IsVerbatimBlockEndCommand) {
584         Diag(Tok.getLocation(),
585              diag::warn_verbatim_block_end_without_start)
586           << Tok.is(tok::at_command)
587           << Info->Name
588           << SourceRange(Tok.getLocation(), Tok.getEndLocation());
589         consumeToken();
590         continue;
591       }
592       if (Info->IsUnknownCommand) {
593         Content.push_back(S.actOnUnknownCommand(Tok.getLocation(),
594                                                 Tok.getEndLocation(),
595                                                 Info->getID()));
596         consumeToken();
597         continue;
598       }
599       assert(Info->IsInlineCommand);
600       Content.push_back(parseInlineCommand());
601       continue;
602     }
603 
604     case tok::newline: {
605       consumeToken();
606       if (Tok.is(tok::newline) || Tok.is(tok::eof)) {
607         consumeToken();
608         break; // Two newlines -- end of paragraph.
609       }
610       // Also allow [tok::newline, tok::text, tok::newline] if the middle
611       // tok::text is just whitespace.
612       if (Tok.is(tok::text) && isWhitespace(Tok.getText())) {
613         Token WhitespaceTok = Tok;
614         consumeToken();
615         if (Tok.is(tok::newline) || Tok.is(tok::eof)) {
616           consumeToken();
617           break;
618         }
619         // We have [tok::newline, tok::text, non-newline].  Put back tok::text.
620         putBack(WhitespaceTok);
621       }
622       if (Content.size() > 0)
623         Content.back()->addTrailingNewline();
624       continue;
625     }
626 
627     // Don't deal with HTML tag soup now.
628     case tok::html_start_tag:
629       Content.push_back(parseHTMLStartTag());
630       continue;
631 
632     case tok::html_end_tag:
633       Content.push_back(parseHTMLEndTag());
634       continue;
635 
636     case tok::text:
637       Content.push_back(S.actOnText(Tok.getLocation(),
638                                     Tok.getEndLocation(),
639                                     Tok.getText()));
640       consumeToken();
641       continue;
642 
643     case tok::verbatim_block_line:
644     case tok::verbatim_block_end:
645     case tok::verbatim_line_text:
646     case tok::html_ident:
647     case tok::html_equals:
648     case tok::html_quoted_string:
649     case tok::html_greater:
650     case tok::html_slash_greater:
651       llvm_unreachable("should not see this token");
652     }
653     break;
654   }
655 
656   return S.actOnParagraphComment(S.copyArray(llvm::makeArrayRef(Content)));
657 }
658 
parseVerbatimBlock()659 VerbatimBlockComment *Parser::parseVerbatimBlock() {
660   assert(Tok.is(tok::verbatim_block_begin));
661 
662   VerbatimBlockComment *VB =
663       S.actOnVerbatimBlockStart(Tok.getLocation(),
664                                 Tok.getVerbatimBlockID());
665   consumeToken();
666 
667   // Don't create an empty line if verbatim opening command is followed
668   // by a newline.
669   if (Tok.is(tok::newline))
670     consumeToken();
671 
672   SmallVector<VerbatimBlockLineComment *, 8> Lines;
673   while (Tok.is(tok::verbatim_block_line) ||
674          Tok.is(tok::newline)) {
675     VerbatimBlockLineComment *Line;
676     if (Tok.is(tok::verbatim_block_line)) {
677       Line = S.actOnVerbatimBlockLine(Tok.getLocation(),
678                                       Tok.getVerbatimBlockText());
679       consumeToken();
680       if (Tok.is(tok::newline)) {
681         consumeToken();
682       }
683     } else {
684       // Empty line, just a tok::newline.
685       Line = S.actOnVerbatimBlockLine(Tok.getLocation(), "");
686       consumeToken();
687     }
688     Lines.push_back(Line);
689   }
690 
691   if (Tok.is(tok::verbatim_block_end)) {
692     const CommandInfo *Info = Traits.getCommandInfo(Tok.getVerbatimBlockID());
693     S.actOnVerbatimBlockFinish(VB, Tok.getLocation(),
694                                Info->Name,
695                                S.copyArray(llvm::makeArrayRef(Lines)));
696     consumeToken();
697   } else {
698     // Unterminated \\verbatim block
699     S.actOnVerbatimBlockFinish(VB, SourceLocation(), "",
700                                S.copyArray(llvm::makeArrayRef(Lines)));
701   }
702 
703   return VB;
704 }
705 
parseVerbatimLine()706 VerbatimLineComment *Parser::parseVerbatimLine() {
707   assert(Tok.is(tok::verbatim_line_name));
708 
709   Token NameTok = Tok;
710   consumeToken();
711 
712   SourceLocation TextBegin;
713   StringRef Text;
714   // Next token might not be a tok::verbatim_line_text if verbatim line
715   // starting command comes just before a newline or comment end.
716   if (Tok.is(tok::verbatim_line_text)) {
717     TextBegin = Tok.getLocation();
718     Text = Tok.getVerbatimLineText();
719   } else {
720     TextBegin = NameTok.getEndLocation();
721     Text = "";
722   }
723 
724   VerbatimLineComment *VL = S.actOnVerbatimLine(NameTok.getLocation(),
725                                                 NameTok.getVerbatimLineID(),
726                                                 TextBegin,
727                                                 Text);
728   consumeToken();
729   return VL;
730 }
731 
parseBlockContent()732 BlockContentComment *Parser::parseBlockContent() {
733   switch (Tok.getKind()) {
734   case tok::text:
735   case tok::unknown_command:
736   case tok::backslash_command:
737   case tok::at_command:
738   case tok::html_start_tag:
739   case tok::html_end_tag:
740     return parseParagraphOrBlockCommand();
741 
742   case tok::verbatim_block_begin:
743     return parseVerbatimBlock();
744 
745   case tok::verbatim_line_name:
746     return parseVerbatimLine();
747 
748   case tok::eof:
749   case tok::newline:
750   case tok::verbatim_block_line:
751   case tok::verbatim_block_end:
752   case tok::verbatim_line_text:
753   case tok::html_ident:
754   case tok::html_equals:
755   case tok::html_quoted_string:
756   case tok::html_greater:
757   case tok::html_slash_greater:
758     llvm_unreachable("should not see this token");
759   }
760   llvm_unreachable("bogus token kind");
761 }
762 
parseFullComment()763 FullComment *Parser::parseFullComment() {
764   // Skip newlines at the beginning of the comment.
765   while (Tok.is(tok::newline))
766     consumeToken();
767 
768   SmallVector<BlockContentComment *, 8> Blocks;
769   while (Tok.isNot(tok::eof)) {
770     Blocks.push_back(parseBlockContent());
771 
772     // Skip extra newlines after paragraph end.
773     while (Tok.is(tok::newline))
774       consumeToken();
775   }
776   return S.actOnFullComment(S.copyArray(llvm::makeArrayRef(Blocks)));
777 }
778 
779 } // end namespace comments
780 } // end namespace clang
781