1 //===--- UnwrappedLineParser.cpp - Format C++ code ------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 ///
9 /// \file
10 /// This file contains the implementation of the UnwrappedLineParser,
11 /// which turns a stream of tokens into UnwrappedLines.
12 ///
13 //===----------------------------------------------------------------------===//
14 
15 #include "UnwrappedLineParser.h"
16 #include "FormatToken.h"
17 #include "FormatTokenLexer.h"
18 #include "FormatTokenSource.h"
19 #include "Macros.h"
20 #include "TokenAnnotator.h"
21 #include "clang/Basic/TokenKinds.h"
22 #include "llvm/ADT/STLExtras.h"
23 #include "llvm/ADT/StringRef.h"
24 #include "llvm/Support/Debug.h"
25 #include "llvm/Support/raw_os_ostream.h"
26 #include "llvm/Support/raw_ostream.h"
27 
28 #include <algorithm>
29 #include <utility>
30 
31 #define DEBUG_TYPE "format-parser"
32 
33 namespace clang {
34 namespace format {
35 
36 namespace {
37 
printLine(llvm::raw_ostream & OS,const UnwrappedLine & Line,StringRef Prefix="",bool PrintText=false)38 void printLine(llvm::raw_ostream &OS, const UnwrappedLine &Line,
39                StringRef Prefix = "", bool PrintText = false) {
40   OS << Prefix << "Line(" << Line.Level << ", FSC=" << Line.FirstStartColumn
41      << ")" << (Line.InPPDirective ? " MACRO" : "") << ": ";
42   bool NewLine = false;
43   for (std::list<UnwrappedLineNode>::const_iterator I = Line.Tokens.begin(),
44                                                     E = Line.Tokens.end();
45        I != E; ++I) {
46     if (NewLine) {
47       OS << Prefix;
48       NewLine = false;
49     }
50     OS << I->Tok->Tok.getName() << "[" << "T=" << (unsigned)I->Tok->getType()
51        << ", OC=" << I->Tok->OriginalColumn << ", \"" << I->Tok->TokenText
52        << "\"] ";
53     for (SmallVectorImpl<UnwrappedLine>::const_iterator
54              CI = I->Children.begin(),
55              CE = I->Children.end();
56          CI != CE; ++CI) {
57       OS << "\n";
58       printLine(OS, *CI, (Prefix + "  ").str());
59       NewLine = true;
60     }
61   }
62   if (!NewLine)
63     OS << "\n";
64 }
65 
printDebugInfo(const UnwrappedLine & Line)66 LLVM_ATTRIBUTE_UNUSED static void printDebugInfo(const UnwrappedLine &Line) {
67   printLine(llvm::dbgs(), Line);
68 }
69 
70 class ScopedDeclarationState {
71 public:
ScopedDeclarationState(UnwrappedLine & Line,llvm::BitVector & Stack,bool MustBeDeclaration)72   ScopedDeclarationState(UnwrappedLine &Line, llvm::BitVector &Stack,
73                          bool MustBeDeclaration)
74       : Line(Line), Stack(Stack) {
75     Line.MustBeDeclaration = MustBeDeclaration;
76     Stack.push_back(MustBeDeclaration);
77   }
~ScopedDeclarationState()78   ~ScopedDeclarationState() {
79     Stack.pop_back();
80     if (!Stack.empty())
81       Line.MustBeDeclaration = Stack.back();
82     else
83       Line.MustBeDeclaration = true;
84   }
85 
86 private:
87   UnwrappedLine &Line;
88   llvm::BitVector &Stack;
89 };
90 
91 } // end anonymous namespace
92 
93 class ScopedLineState {
94 public:
ScopedLineState(UnwrappedLineParser & Parser,bool SwitchToPreprocessorLines=false)95   ScopedLineState(UnwrappedLineParser &Parser,
96                   bool SwitchToPreprocessorLines = false)
97       : Parser(Parser), OriginalLines(Parser.CurrentLines) {
98     if (SwitchToPreprocessorLines)
99       Parser.CurrentLines = &Parser.PreprocessorDirectives;
100     else if (!Parser.Line->Tokens.empty())
101       Parser.CurrentLines = &Parser.Line->Tokens.back().Children;
102     PreBlockLine = std::move(Parser.Line);
103     Parser.Line = std::make_unique<UnwrappedLine>();
104     Parser.Line->Level = PreBlockLine->Level;
105     Parser.Line->PPLevel = PreBlockLine->PPLevel;
106     Parser.Line->InPPDirective = PreBlockLine->InPPDirective;
107     Parser.Line->InMacroBody = PreBlockLine->InMacroBody;
108   }
109 
~ScopedLineState()110   ~ScopedLineState() {
111     if (!Parser.Line->Tokens.empty())
112       Parser.addUnwrappedLine();
113     assert(Parser.Line->Tokens.empty());
114     Parser.Line = std::move(PreBlockLine);
115     if (Parser.CurrentLines == &Parser.PreprocessorDirectives)
116       Parser.MustBreakBeforeNextToken = true;
117     Parser.CurrentLines = OriginalLines;
118   }
119 
120 private:
121   UnwrappedLineParser &Parser;
122 
123   std::unique_ptr<UnwrappedLine> PreBlockLine;
124   SmallVectorImpl<UnwrappedLine> *OriginalLines;
125 };
126 
127 class CompoundStatementIndenter {
128 public:
CompoundStatementIndenter(UnwrappedLineParser * Parser,const FormatStyle & Style,unsigned & LineLevel)129   CompoundStatementIndenter(UnwrappedLineParser *Parser,
130                             const FormatStyle &Style, unsigned &LineLevel)
131       : CompoundStatementIndenter(Parser, LineLevel,
132                                   Style.BraceWrapping.AfterControlStatement,
133                                   Style.BraceWrapping.IndentBraces) {}
CompoundStatementIndenter(UnwrappedLineParser * Parser,unsigned & LineLevel,bool WrapBrace,bool IndentBrace)134   CompoundStatementIndenter(UnwrappedLineParser *Parser, unsigned &LineLevel,
135                             bool WrapBrace, bool IndentBrace)
136       : LineLevel(LineLevel), OldLineLevel(LineLevel) {
137     if (WrapBrace)
138       Parser->addUnwrappedLine();
139     if (IndentBrace)
140       ++LineLevel;
141   }
~CompoundStatementIndenter()142   ~CompoundStatementIndenter() { LineLevel = OldLineLevel; }
143 
144 private:
145   unsigned &LineLevel;
146   unsigned OldLineLevel;
147 };
148 
UnwrappedLineParser(SourceManager & SourceMgr,const FormatStyle & Style,const AdditionalKeywords & Keywords,unsigned FirstStartColumn,ArrayRef<FormatToken * > Tokens,UnwrappedLineConsumer & Callback,llvm::SpecificBumpPtrAllocator<FormatToken> & Allocator,IdentifierTable & IdentTable)149 UnwrappedLineParser::UnwrappedLineParser(
150     SourceManager &SourceMgr, const FormatStyle &Style,
151     const AdditionalKeywords &Keywords, unsigned FirstStartColumn,
152     ArrayRef<FormatToken *> Tokens, UnwrappedLineConsumer &Callback,
153     llvm::SpecificBumpPtrAllocator<FormatToken> &Allocator,
154     IdentifierTable &IdentTable)
155     : Line(new UnwrappedLine), MustBreakBeforeNextToken(false),
156       CurrentLines(&Lines), Style(Style), Keywords(Keywords),
157       CommentPragmasRegex(Style.CommentPragmas), Tokens(nullptr),
158       Callback(Callback), AllTokens(Tokens), PPBranchLevel(-1),
159       IncludeGuard(Style.IndentPPDirectives == FormatStyle::PPDIS_None
160                        ? IG_Rejected
161                        : IG_Inited),
162       IncludeGuardToken(nullptr), FirstStartColumn(FirstStartColumn),
163       Macros(Style.Macros, SourceMgr, Style, Allocator, IdentTable) {}
164 
reset()165 void UnwrappedLineParser::reset() {
166   PPBranchLevel = -1;
167   IncludeGuard = Style.IndentPPDirectives == FormatStyle::PPDIS_None
168                      ? IG_Rejected
169                      : IG_Inited;
170   IncludeGuardToken = nullptr;
171   Line.reset(new UnwrappedLine);
172   CommentsBeforeNextToken.clear();
173   FormatTok = nullptr;
174   MustBreakBeforeNextToken = false;
175   IsDecltypeAutoFunction = false;
176   PreprocessorDirectives.clear();
177   CurrentLines = &Lines;
178   DeclarationScopeStack.clear();
179   NestedTooDeep.clear();
180   NestedLambdas.clear();
181   PPStack.clear();
182   Line->FirstStartColumn = FirstStartColumn;
183 
184   if (!Unexpanded.empty())
185     for (FormatToken *Token : AllTokens)
186       Token->MacroCtx.reset();
187   CurrentExpandedLines.clear();
188   ExpandedLines.clear();
189   Unexpanded.clear();
190   InExpansion = false;
191   Reconstruct.reset();
192 }
193 
parse()194 void UnwrappedLineParser::parse() {
195   IndexedTokenSource TokenSource(AllTokens);
196   Line->FirstStartColumn = FirstStartColumn;
197   do {
198     LLVM_DEBUG(llvm::dbgs() << "----\n");
199     reset();
200     Tokens = &TokenSource;
201     TokenSource.reset();
202 
203     readToken();
204     parseFile();
205 
206     // If we found an include guard then all preprocessor directives (other than
207     // the guard) are over-indented by one.
208     if (IncludeGuard == IG_Found) {
209       for (auto &Line : Lines)
210         if (Line.InPPDirective && Line.Level > 0)
211           --Line.Level;
212     }
213 
214     // Create line with eof token.
215     assert(eof());
216     pushToken(FormatTok);
217     addUnwrappedLine();
218 
219     // In a first run, format everything with the lines containing macro calls
220     // replaced by the expansion.
221     if (!ExpandedLines.empty()) {
222       LLVM_DEBUG(llvm::dbgs() << "Expanded lines:\n");
223       for (const auto &Line : Lines) {
224         if (!Line.Tokens.empty()) {
225           auto it = ExpandedLines.find(Line.Tokens.begin()->Tok);
226           if (it != ExpandedLines.end()) {
227             for (const auto &Expanded : it->second) {
228               LLVM_DEBUG(printDebugInfo(Expanded));
229               Callback.consumeUnwrappedLine(Expanded);
230             }
231             continue;
232           }
233         }
234         LLVM_DEBUG(printDebugInfo(Line));
235         Callback.consumeUnwrappedLine(Line);
236       }
237       Callback.finishRun();
238     }
239 
240     LLVM_DEBUG(llvm::dbgs() << "Unwrapped lines:\n");
241     for (const UnwrappedLine &Line : Lines) {
242       LLVM_DEBUG(printDebugInfo(Line));
243       Callback.consumeUnwrappedLine(Line);
244     }
245     Callback.finishRun();
246     Lines.clear();
247     while (!PPLevelBranchIndex.empty() &&
248            PPLevelBranchIndex.back() + 1 >= PPLevelBranchCount.back()) {
249       PPLevelBranchIndex.resize(PPLevelBranchIndex.size() - 1);
250       PPLevelBranchCount.resize(PPLevelBranchCount.size() - 1);
251     }
252     if (!PPLevelBranchIndex.empty()) {
253       ++PPLevelBranchIndex.back();
254       assert(PPLevelBranchIndex.size() == PPLevelBranchCount.size());
255       assert(PPLevelBranchIndex.back() <= PPLevelBranchCount.back());
256     }
257   } while (!PPLevelBranchIndex.empty());
258 }
259 
parseFile()260 void UnwrappedLineParser::parseFile() {
261   // The top-level context in a file always has declarations, except for pre-
262   // processor directives and JavaScript files.
263   bool MustBeDeclaration = !Line->InPPDirective && !Style.isJavaScript();
264   ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack,
265                                           MustBeDeclaration);
266   if (Style.Language == FormatStyle::LK_TextProto)
267     parseBracedList();
268   else
269     parseLevel();
270   // Make sure to format the remaining tokens.
271   //
272   // LK_TextProto is special since its top-level is parsed as the body of a
273   // braced list, which does not necessarily have natural line separators such
274   // as a semicolon. Comments after the last entry that have been determined to
275   // not belong to that line, as in:
276   //   key: value
277   //   // endfile comment
278   // do not have a chance to be put on a line of their own until this point.
279   // Here we add this newline before end-of-file comments.
280   if (Style.Language == FormatStyle::LK_TextProto &&
281       !CommentsBeforeNextToken.empty()) {
282     addUnwrappedLine();
283   }
284   flushComments(true);
285   addUnwrappedLine();
286 }
287 
parseCSharpGenericTypeConstraint()288 void UnwrappedLineParser::parseCSharpGenericTypeConstraint() {
289   do {
290     switch (FormatTok->Tok.getKind()) {
291     case tok::l_brace:
292       return;
293     default:
294       if (FormatTok->is(Keywords.kw_where)) {
295         addUnwrappedLine();
296         nextToken();
297         parseCSharpGenericTypeConstraint();
298         break;
299       }
300       nextToken();
301       break;
302     }
303   } while (!eof());
304 }
305 
parseCSharpAttribute()306 void UnwrappedLineParser::parseCSharpAttribute() {
307   int UnpairedSquareBrackets = 1;
308   do {
309     switch (FormatTok->Tok.getKind()) {
310     case tok::r_square:
311       nextToken();
312       --UnpairedSquareBrackets;
313       if (UnpairedSquareBrackets == 0) {
314         addUnwrappedLine();
315         return;
316       }
317       break;
318     case tok::l_square:
319       ++UnpairedSquareBrackets;
320       nextToken();
321       break;
322     default:
323       nextToken();
324       break;
325     }
326   } while (!eof());
327 }
328 
precededByCommentOrPPDirective() const329 bool UnwrappedLineParser::precededByCommentOrPPDirective() const {
330   if (!Lines.empty() && Lines.back().InPPDirective)
331     return true;
332 
333   const FormatToken *Previous = Tokens->getPreviousToken();
334   return Previous && Previous->is(tok::comment) &&
335          (Previous->IsMultiline || Previous->NewlinesBefore > 0);
336 }
337 
338 /// \brief Parses a level, that is ???.
339 /// \param OpeningBrace Opening brace (\p nullptr if absent) of that level.
340 /// \param IfKind The \p if statement kind in the level.
341 /// \param IfLeftBrace The left brace of the \p if block in the level.
342 /// \returns true if a simple block of if/else/for/while, or false otherwise.
343 /// (A simple block has a single statement.)
parseLevel(const FormatToken * OpeningBrace,IfStmtKind * IfKind,FormatToken ** IfLeftBrace)344 bool UnwrappedLineParser::parseLevel(const FormatToken *OpeningBrace,
345                                      IfStmtKind *IfKind,
346                                      FormatToken **IfLeftBrace) {
347   const bool InRequiresExpression =
348       OpeningBrace && OpeningBrace->is(TT_RequiresExpressionLBrace);
349   const bool IsPrecededByCommentOrPPDirective =
350       !Style.RemoveBracesLLVM || precededByCommentOrPPDirective();
351   FormatToken *IfLBrace = nullptr;
352   bool HasDoWhile = false;
353   bool HasLabel = false;
354   unsigned StatementCount = 0;
355   bool SwitchLabelEncountered = false;
356 
357   do {
358     if (FormatTok->isAttribute()) {
359       nextToken();
360       continue;
361     }
362     tok::TokenKind kind = FormatTok->Tok.getKind();
363     if (FormatTok->getType() == TT_MacroBlockBegin)
364       kind = tok::l_brace;
365     else if (FormatTok->getType() == TT_MacroBlockEnd)
366       kind = tok::r_brace;
367 
368     auto ParseDefault = [this, OpeningBrace, IfKind, &IfLBrace, &HasDoWhile,
369                          &HasLabel, &StatementCount] {
370       parseStructuralElement(OpeningBrace, IfKind, &IfLBrace,
371                              HasDoWhile ? nullptr : &HasDoWhile,
372                              HasLabel ? nullptr : &HasLabel);
373       ++StatementCount;
374       assert(StatementCount > 0 && "StatementCount overflow!");
375     };
376 
377     switch (kind) {
378     case tok::comment:
379       nextToken();
380       addUnwrappedLine();
381       break;
382     case tok::l_brace:
383       if (InRequiresExpression) {
384         FormatTok->setFinalizedType(TT_RequiresExpressionLBrace);
385       } else if (FormatTok->Previous &&
386                  FormatTok->Previous->ClosesRequiresClause) {
387         // We need the 'default' case here to correctly parse a function
388         // l_brace.
389         ParseDefault();
390         continue;
391       }
392       if (!InRequiresExpression && FormatTok->isNot(TT_MacroBlockBegin) &&
393           tryToParseBracedList()) {
394         continue;
395       }
396       parseBlock();
397       ++StatementCount;
398       assert(StatementCount > 0 && "StatementCount overflow!");
399       addUnwrappedLine();
400       break;
401     case tok::r_brace:
402       if (OpeningBrace) {
403         if (!Style.RemoveBracesLLVM || Line->InPPDirective ||
404             !OpeningBrace->isOneOf(TT_ControlStatementLBrace, TT_ElseLBrace)) {
405           return false;
406         }
407         if (FormatTok->isNot(tok::r_brace) || StatementCount != 1 || HasLabel ||
408             HasDoWhile || IsPrecededByCommentOrPPDirective ||
409             precededByCommentOrPPDirective()) {
410           return false;
411         }
412         const FormatToken *Next = Tokens->peekNextToken();
413         if (Next->is(tok::comment) && Next->NewlinesBefore == 0)
414           return false;
415         if (IfLeftBrace)
416           *IfLeftBrace = IfLBrace;
417         return true;
418       }
419       nextToken();
420       addUnwrappedLine();
421       break;
422     case tok::kw_default: {
423       unsigned StoredPosition = Tokens->getPosition();
424       FormatToken *Next;
425       do {
426         Next = Tokens->getNextToken();
427         assert(Next);
428       } while (Next->is(tok::comment));
429       FormatTok = Tokens->setPosition(StoredPosition);
430       if (Next->isNot(tok::colon)) {
431         // default not followed by ':' is not a case label; treat it like
432         // an identifier.
433         parseStructuralElement();
434         break;
435       }
436       // Else, if it is 'default:', fall through to the case handling.
437       [[fallthrough]];
438     }
439     case tok::kw_case:
440       if (Style.Language == FormatStyle::LK_Proto || Style.isVerilog() ||
441           (Style.isJavaScript() && Line->MustBeDeclaration)) {
442         // Proto: there are no switch/case statements
443         // Verilog: Case labels don't have this word. We handle case
444         // labels including default in TokenAnnotator.
445         // JavaScript: A 'case: string' style field declaration.
446         ParseDefault();
447         break;
448       }
449       if (!SwitchLabelEncountered &&
450           (Style.IndentCaseLabels ||
451            (Line->InPPDirective && Line->Level == 1))) {
452         ++Line->Level;
453       }
454       SwitchLabelEncountered = true;
455       parseStructuralElement();
456       break;
457     case tok::l_square:
458       if (Style.isCSharp()) {
459         nextToken();
460         parseCSharpAttribute();
461         break;
462       }
463       if (handleCppAttributes())
464         break;
465       [[fallthrough]];
466     default:
467       ParseDefault();
468       break;
469     }
470   } while (!eof());
471 
472   return false;
473 }
474 
calculateBraceTypes(bool ExpectClassBody)475 void UnwrappedLineParser::calculateBraceTypes(bool ExpectClassBody) {
476   // We'll parse forward through the tokens until we hit
477   // a closing brace or eof - note that getNextToken() will
478   // parse macros, so this will magically work inside macro
479   // definitions, too.
480   unsigned StoredPosition = Tokens->getPosition();
481   FormatToken *Tok = FormatTok;
482   const FormatToken *PrevTok = Tok->Previous;
483   // Keep a stack of positions of lbrace tokens. We will
484   // update information about whether an lbrace starts a
485   // braced init list or a different block during the loop.
486   struct StackEntry {
487     FormatToken *Tok;
488     const FormatToken *PrevTok;
489   };
490   SmallVector<StackEntry, 8> LBraceStack;
491   assert(Tok->is(tok::l_brace));
492 
493   do {
494     FormatToken *NextTok;
495     do {
496       NextTok = Tokens->getNextToken();
497     } while (NextTok->is(tok::comment));
498 
499     if (!Line->InMacroBody) {
500       // Skip PPDirective lines and comments.
501       while (NextTok->is(tok::hash)) {
502         do {
503           NextTok = Tokens->getNextToken();
504         } while (NextTok->NewlinesBefore == 0 && NextTok->isNot(tok::eof));
505 
506         while (NextTok->is(tok::comment))
507           NextTok = Tokens->getNextToken();
508       }
509     }
510 
511     switch (Tok->Tok.getKind()) {
512     case tok::l_brace:
513       if (Style.isJavaScript() && PrevTok) {
514         if (PrevTok->isOneOf(tok::colon, tok::less)) {
515           // A ':' indicates this code is in a type, or a braced list
516           // following a label in an object literal ({a: {b: 1}}).
517           // A '<' could be an object used in a comparison, but that is nonsense
518           // code (can never return true), so more likely it is a generic type
519           // argument (`X<{a: string; b: number}>`).
520           // The code below could be confused by semicolons between the
521           // individual members in a type member list, which would normally
522           // trigger BK_Block. In both cases, this must be parsed as an inline
523           // braced init.
524           Tok->setBlockKind(BK_BracedInit);
525         } else if (PrevTok->is(tok::r_paren)) {
526           // `) { }` can only occur in function or method declarations in JS.
527           Tok->setBlockKind(BK_Block);
528         }
529       } else {
530         Tok->setBlockKind(BK_Unknown);
531       }
532       LBraceStack.push_back({Tok, PrevTok});
533       break;
534     case tok::r_brace:
535       if (LBraceStack.empty())
536         break;
537       if (LBraceStack.back().Tok->is(BK_Unknown)) {
538         bool ProbablyBracedList = false;
539         if (Style.Language == FormatStyle::LK_Proto) {
540           ProbablyBracedList = NextTok->isOneOf(tok::comma, tok::r_square);
541         } else {
542           // Using OriginalColumn to distinguish between ObjC methods and
543           // binary operators is a bit hacky.
544           bool NextIsObjCMethod = NextTok->isOneOf(tok::plus, tok::minus) &&
545                                   NextTok->OriginalColumn == 0;
546 
547           // Try to detect a braced list. Note that regardless how we mark inner
548           // braces here, we will overwrite the BlockKind later if we parse a
549           // braced list (where all blocks inside are by default braced lists),
550           // or when we explicitly detect blocks (for example while parsing
551           // lambdas).
552 
553           // If we already marked the opening brace as braced list, the closing
554           // must also be part of it.
555           ProbablyBracedList = LBraceStack.back().Tok->is(TT_BracedListLBrace);
556 
557           ProbablyBracedList = ProbablyBracedList ||
558                                (Style.isJavaScript() &&
559                                 NextTok->isOneOf(Keywords.kw_of, Keywords.kw_in,
560                                                  Keywords.kw_as));
561           ProbablyBracedList = ProbablyBracedList ||
562                                (Style.isCpp() && NextTok->is(tok::l_paren));
563 
564           // If there is a comma, semicolon or right paren after the closing
565           // brace, we assume this is a braced initializer list.
566           // FIXME: Some of these do not apply to JS, e.g. "} {" can never be a
567           // braced list in JS.
568           ProbablyBracedList =
569               ProbablyBracedList ||
570               NextTok->isOneOf(tok::comma, tok::period, tok::colon,
571                                tok::r_paren, tok::r_square, tok::ellipsis);
572 
573           // Distinguish between braced list in a constructor initializer list
574           // followed by constructor body, or just adjacent blocks.
575           ProbablyBracedList =
576               ProbablyBracedList ||
577               (NextTok->is(tok::l_brace) && LBraceStack.back().PrevTok &&
578                LBraceStack.back().PrevTok->isOneOf(tok::identifier,
579                                                    tok::greater));
580 
581           ProbablyBracedList =
582               ProbablyBracedList ||
583               (NextTok->is(tok::identifier) &&
584                !PrevTok->isOneOf(tok::semi, tok::r_brace, tok::l_brace));
585 
586           ProbablyBracedList = ProbablyBracedList ||
587                                (NextTok->is(tok::semi) &&
588                                 (!ExpectClassBody || LBraceStack.size() != 1));
589 
590           ProbablyBracedList =
591               ProbablyBracedList ||
592               (NextTok->isBinaryOperator() && !NextIsObjCMethod);
593 
594           if (!Style.isCSharp() && NextTok->is(tok::l_square)) {
595             // We can have an array subscript after a braced init
596             // list, but C++11 attributes are expected after blocks.
597             NextTok = Tokens->getNextToken();
598             ProbablyBracedList = NextTok->isNot(tok::l_square);
599           }
600 
601           // Cpp macro definition body that is a nonempty braced list or block:
602           if (Style.isCpp() && Line->InMacroBody && PrevTok != FormatTok &&
603               !FormatTok->Previous && NextTok->is(tok::eof) &&
604               // A statement can end with only `;` (simple statement), a block
605               // closing brace (compound statement), or `:` (label statement).
606               // If PrevTok is a block opening brace, Tok ends an empty block.
607               !PrevTok->isOneOf(tok::semi, BK_Block, tok::colon)) {
608             ProbablyBracedList = true;
609           }
610         }
611         if (ProbablyBracedList) {
612           Tok->setBlockKind(BK_BracedInit);
613           LBraceStack.back().Tok->setBlockKind(BK_BracedInit);
614         } else {
615           Tok->setBlockKind(BK_Block);
616           LBraceStack.back().Tok->setBlockKind(BK_Block);
617         }
618       }
619       LBraceStack.pop_back();
620       break;
621     case tok::identifier:
622       if (Tok->isNot(TT_StatementMacro))
623         break;
624       [[fallthrough]];
625     case tok::at:
626     case tok::semi:
627     case tok::kw_if:
628     case tok::kw_while:
629     case tok::kw_for:
630     case tok::kw_switch:
631     case tok::kw_try:
632     case tok::kw___try:
633       if (!LBraceStack.empty() && LBraceStack.back().Tok->is(BK_Unknown))
634         LBraceStack.back().Tok->setBlockKind(BK_Block);
635       break;
636     default:
637       break;
638     }
639 
640     PrevTok = Tok;
641     Tok = NextTok;
642   } while (Tok->isNot(tok::eof) && !LBraceStack.empty());
643 
644   // Assume other blocks for all unclosed opening braces.
645   for (const auto &Entry : LBraceStack)
646     if (Entry.Tok->is(BK_Unknown))
647       Entry.Tok->setBlockKind(BK_Block);
648 
649   FormatTok = Tokens->setPosition(StoredPosition);
650 }
651 
652 // Sets the token type of the directly previous right brace.
setPreviousRBraceType(TokenType Type)653 void UnwrappedLineParser::setPreviousRBraceType(TokenType Type) {
654   if (auto Prev = FormatTok->getPreviousNonComment();
655       Prev && Prev->is(tok::r_brace)) {
656     Prev->setFinalizedType(Type);
657   }
658 }
659 
660 template <class T>
hash_combine(std::size_t & seed,const T & v)661 static inline void hash_combine(std::size_t &seed, const T &v) {
662   std::hash<T> hasher;
663   seed ^= hasher(v) + 0x9e3779b9 + (seed << 6) + (seed >> 2);
664 }
665 
computePPHash() const666 size_t UnwrappedLineParser::computePPHash() const {
667   size_t h = 0;
668   for (const auto &i : PPStack) {
669     hash_combine(h, size_t(i.Kind));
670     hash_combine(h, i.Line);
671   }
672   return h;
673 }
674 
675 // Checks whether \p ParsedLine might fit on a single line. If \p OpeningBrace
676 // is not null, subtracts its length (plus the preceding space) when computing
677 // the length of \p ParsedLine. We must clone the tokens of \p ParsedLine before
678 // running the token annotator on it so that we can restore them afterward.
mightFitOnOneLine(UnwrappedLine & ParsedLine,const FormatToken * OpeningBrace) const679 bool UnwrappedLineParser::mightFitOnOneLine(
680     UnwrappedLine &ParsedLine, const FormatToken *OpeningBrace) const {
681   const auto ColumnLimit = Style.ColumnLimit;
682   if (ColumnLimit == 0)
683     return true;
684 
685   auto &Tokens = ParsedLine.Tokens;
686   assert(!Tokens.empty());
687 
688   const auto *LastToken = Tokens.back().Tok;
689   assert(LastToken);
690 
691   SmallVector<UnwrappedLineNode> SavedTokens(Tokens.size());
692 
693   int Index = 0;
694   for (const auto &Token : Tokens) {
695     assert(Token.Tok);
696     auto &SavedToken = SavedTokens[Index++];
697     SavedToken.Tok = new FormatToken;
698     SavedToken.Tok->copyFrom(*Token.Tok);
699     SavedToken.Children = std::move(Token.Children);
700   }
701 
702   AnnotatedLine Line(ParsedLine);
703   assert(Line.Last == LastToken);
704 
705   TokenAnnotator Annotator(Style, Keywords);
706   Annotator.annotate(Line);
707   Annotator.calculateFormattingInformation(Line);
708 
709   auto Length = LastToken->TotalLength;
710   if (OpeningBrace) {
711     assert(OpeningBrace != Tokens.front().Tok);
712     if (auto Prev = OpeningBrace->Previous;
713         Prev && Prev->TotalLength + ColumnLimit == OpeningBrace->TotalLength) {
714       Length -= ColumnLimit;
715     }
716     Length -= OpeningBrace->TokenText.size() + 1;
717   }
718 
719   if (const auto *FirstToken = Line.First; FirstToken->is(tok::r_brace)) {
720     assert(!OpeningBrace || OpeningBrace->is(TT_ControlStatementLBrace));
721     Length -= FirstToken->TokenText.size() + 1;
722   }
723 
724   Index = 0;
725   for (auto &Token : Tokens) {
726     const auto &SavedToken = SavedTokens[Index++];
727     Token.Tok->copyFrom(*SavedToken.Tok);
728     Token.Children = std::move(SavedToken.Children);
729     delete SavedToken.Tok;
730   }
731 
732   // If these change PPLevel needs to be used for get correct indentation.
733   assert(!Line.InMacroBody);
734   assert(!Line.InPPDirective);
735   return Line.Level * Style.IndentWidth + Length <= ColumnLimit;
736 }
737 
parseBlock(bool MustBeDeclaration,unsigned AddLevels,bool MunchSemi,bool KeepBraces,IfStmtKind * IfKind,bool UnindentWhitesmithsBraces)738 FormatToken *UnwrappedLineParser::parseBlock(bool MustBeDeclaration,
739                                              unsigned AddLevels, bool MunchSemi,
740                                              bool KeepBraces,
741                                              IfStmtKind *IfKind,
742                                              bool UnindentWhitesmithsBraces) {
743   auto HandleVerilogBlockLabel = [this]() {
744     // ":" name
745     if (Style.isVerilog() && FormatTok->is(tok::colon)) {
746       nextToken();
747       if (Keywords.isVerilogIdentifier(*FormatTok))
748         nextToken();
749     }
750   };
751 
752   // Whether this is a Verilog-specific block that has a special header like a
753   // module.
754   const bool VerilogHierarchy =
755       Style.isVerilog() && Keywords.isVerilogHierarchy(*FormatTok);
756   assert((FormatTok->isOneOf(tok::l_brace, TT_MacroBlockBegin) ||
757           (Style.isVerilog() &&
758            (Keywords.isVerilogBegin(*FormatTok) || VerilogHierarchy))) &&
759          "'{' or macro block token expected");
760   FormatToken *Tok = FormatTok;
761   const bool FollowedByComment = Tokens->peekNextToken()->is(tok::comment);
762   auto Index = CurrentLines->size();
763   const bool MacroBlock = FormatTok->is(TT_MacroBlockBegin);
764   FormatTok->setBlockKind(BK_Block);
765 
766   // For Whitesmiths mode, jump to the next level prior to skipping over the
767   // braces.
768   if (!VerilogHierarchy && AddLevels > 0 &&
769       Style.BreakBeforeBraces == FormatStyle::BS_Whitesmiths) {
770     ++Line->Level;
771   }
772 
773   size_t PPStartHash = computePPHash();
774 
775   const unsigned InitialLevel = Line->Level;
776   if (VerilogHierarchy) {
777     AddLevels += parseVerilogHierarchyHeader();
778   } else {
779     nextToken(/*LevelDifference=*/AddLevels);
780     HandleVerilogBlockLabel();
781   }
782 
783   // Bail out if there are too many levels. Otherwise, the stack might overflow.
784   if (Line->Level > 300)
785     return nullptr;
786 
787   if (MacroBlock && FormatTok->is(tok::l_paren))
788     parseParens();
789 
790   size_t NbPreprocessorDirectives =
791       !parsingPPDirective() ? PreprocessorDirectives.size() : 0;
792   addUnwrappedLine();
793   size_t OpeningLineIndex =
794       CurrentLines->empty()
795           ? (UnwrappedLine::kInvalidIndex)
796           : (CurrentLines->size() - 1 - NbPreprocessorDirectives);
797 
798   // Whitesmiths is weird here. The brace needs to be indented for the namespace
799   // block, but the block itself may not be indented depending on the style
800   // settings. This allows the format to back up one level in those cases.
801   if (UnindentWhitesmithsBraces)
802     --Line->Level;
803 
804   ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack,
805                                           MustBeDeclaration);
806   if (AddLevels > 0u && Style.BreakBeforeBraces != FormatStyle::BS_Whitesmiths)
807     Line->Level += AddLevels;
808 
809   FormatToken *IfLBrace = nullptr;
810   const bool SimpleBlock = parseLevel(Tok, IfKind, &IfLBrace);
811 
812   if (eof())
813     return IfLBrace;
814 
815   if (MacroBlock ? FormatTok->isNot(TT_MacroBlockEnd)
816                  : FormatTok->isNot(tok::r_brace)) {
817     Line->Level = InitialLevel;
818     FormatTok->setBlockKind(BK_Block);
819     return IfLBrace;
820   }
821 
822   if (FormatTok->is(tok::r_brace) && Tok->is(TT_NamespaceLBrace))
823     FormatTok->setFinalizedType(TT_NamespaceRBrace);
824 
825   const bool IsFunctionRBrace =
826       FormatTok->is(tok::r_brace) && Tok->is(TT_FunctionLBrace);
827 
828   auto RemoveBraces = [=]() mutable {
829     if (!SimpleBlock)
830       return false;
831     assert(Tok->isOneOf(TT_ControlStatementLBrace, TT_ElseLBrace));
832     assert(FormatTok->is(tok::r_brace));
833     const bool WrappedOpeningBrace = !Tok->Previous;
834     if (WrappedOpeningBrace && FollowedByComment)
835       return false;
836     const bool HasRequiredIfBraces = IfLBrace && !IfLBrace->Optional;
837     if (KeepBraces && !HasRequiredIfBraces)
838       return false;
839     if (Tok->isNot(TT_ElseLBrace) || !HasRequiredIfBraces) {
840       const FormatToken *Previous = Tokens->getPreviousToken();
841       assert(Previous);
842       if (Previous->is(tok::r_brace) && !Previous->Optional)
843         return false;
844     }
845     assert(!CurrentLines->empty());
846     auto &LastLine = CurrentLines->back();
847     if (LastLine.Level == InitialLevel + 1 && !mightFitOnOneLine(LastLine))
848       return false;
849     if (Tok->is(TT_ElseLBrace))
850       return true;
851     if (WrappedOpeningBrace) {
852       assert(Index > 0);
853       --Index; // The line above the wrapped l_brace.
854       Tok = nullptr;
855     }
856     return mightFitOnOneLine((*CurrentLines)[Index], Tok);
857   };
858   if (RemoveBraces()) {
859     Tok->MatchingParen = FormatTok;
860     FormatTok->MatchingParen = Tok;
861   }
862 
863   size_t PPEndHash = computePPHash();
864 
865   // Munch the closing brace.
866   nextToken(/*LevelDifference=*/-AddLevels);
867 
868   // When this is a function block and there is an unnecessary semicolon
869   // afterwards then mark it as optional (so the RemoveSemi pass can get rid of
870   // it later).
871   if (Style.RemoveSemicolon && IsFunctionRBrace) {
872     while (FormatTok->is(tok::semi)) {
873       FormatTok->Optional = true;
874       nextToken();
875     }
876   }
877 
878   HandleVerilogBlockLabel();
879 
880   if (MacroBlock && FormatTok->is(tok::l_paren))
881     parseParens();
882 
883   Line->Level = InitialLevel;
884 
885   if (FormatTok->is(tok::kw_noexcept)) {
886     // A noexcept in a requires expression.
887     nextToken();
888   }
889 
890   if (FormatTok->is(tok::arrow)) {
891     // Following the } or noexcept we can find a trailing return type arrow
892     // as part of an implicit conversion constraint.
893     nextToken();
894     parseStructuralElement();
895   }
896 
897   if (MunchSemi && FormatTok->is(tok::semi))
898     nextToken();
899 
900   if (PPStartHash == PPEndHash) {
901     Line->MatchingOpeningBlockLineIndex = OpeningLineIndex;
902     if (OpeningLineIndex != UnwrappedLine::kInvalidIndex) {
903       // Update the opening line to add the forward reference as well
904       (*CurrentLines)[OpeningLineIndex].MatchingClosingBlockLineIndex =
905           CurrentLines->size() - 1;
906     }
907   }
908 
909   return IfLBrace;
910 }
911 
isGoogScope(const UnwrappedLine & Line)912 static bool isGoogScope(const UnwrappedLine &Line) {
913   // FIXME: Closure-library specific stuff should not be hard-coded but be
914   // configurable.
915   if (Line.Tokens.size() < 4)
916     return false;
917   auto I = Line.Tokens.begin();
918   if (I->Tok->TokenText != "goog")
919     return false;
920   ++I;
921   if (I->Tok->isNot(tok::period))
922     return false;
923   ++I;
924   if (I->Tok->TokenText != "scope")
925     return false;
926   ++I;
927   return I->Tok->is(tok::l_paren);
928 }
929 
isIIFE(const UnwrappedLine & Line,const AdditionalKeywords & Keywords)930 static bool isIIFE(const UnwrappedLine &Line,
931                    const AdditionalKeywords &Keywords) {
932   // Look for the start of an immediately invoked anonymous function.
933   // https://en.wikipedia.org/wiki/Immediately-invoked_function_expression
934   // This is commonly done in JavaScript to create a new, anonymous scope.
935   // Example: (function() { ... })()
936   if (Line.Tokens.size() < 3)
937     return false;
938   auto I = Line.Tokens.begin();
939   if (I->Tok->isNot(tok::l_paren))
940     return false;
941   ++I;
942   if (I->Tok->isNot(Keywords.kw_function))
943     return false;
944   ++I;
945   return I->Tok->is(tok::l_paren);
946 }
947 
ShouldBreakBeforeBrace(const FormatStyle & Style,const FormatToken & InitialToken)948 static bool ShouldBreakBeforeBrace(const FormatStyle &Style,
949                                    const FormatToken &InitialToken) {
950   tok::TokenKind Kind = InitialToken.Tok.getKind();
951   if (InitialToken.is(TT_NamespaceMacro))
952     Kind = tok::kw_namespace;
953 
954   switch (Kind) {
955   case tok::kw_namespace:
956     return Style.BraceWrapping.AfterNamespace;
957   case tok::kw_class:
958     return Style.BraceWrapping.AfterClass;
959   case tok::kw_union:
960     return Style.BraceWrapping.AfterUnion;
961   case tok::kw_struct:
962     return Style.BraceWrapping.AfterStruct;
963   case tok::kw_enum:
964     return Style.BraceWrapping.AfterEnum;
965   default:
966     return false;
967   }
968 }
969 
parseChildBlock()970 void UnwrappedLineParser::parseChildBlock() {
971   assert(FormatTok->is(tok::l_brace));
972   FormatTok->setBlockKind(BK_Block);
973   const FormatToken *OpeningBrace = FormatTok;
974   nextToken();
975   {
976     bool SkipIndent = (Style.isJavaScript() &&
977                        (isGoogScope(*Line) || isIIFE(*Line, Keywords)));
978     ScopedLineState LineState(*this);
979     ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack,
980                                             /*MustBeDeclaration=*/false);
981     Line->Level += SkipIndent ? 0 : 1;
982     parseLevel(OpeningBrace);
983     flushComments(isOnNewLine(*FormatTok));
984     Line->Level -= SkipIndent ? 0 : 1;
985   }
986   nextToken();
987 }
988 
parsePPDirective()989 void UnwrappedLineParser::parsePPDirective() {
990   assert(FormatTok->is(tok::hash) && "'#' expected");
991   ScopedMacroState MacroState(*Line, Tokens, FormatTok);
992 
993   nextToken();
994 
995   if (!FormatTok->Tok.getIdentifierInfo()) {
996     parsePPUnknown();
997     return;
998   }
999 
1000   switch (FormatTok->Tok.getIdentifierInfo()->getPPKeywordID()) {
1001   case tok::pp_define:
1002     parsePPDefine();
1003     return;
1004   case tok::pp_if:
1005     parsePPIf(/*IfDef=*/false);
1006     break;
1007   case tok::pp_ifdef:
1008   case tok::pp_ifndef:
1009     parsePPIf(/*IfDef=*/true);
1010     break;
1011   case tok::pp_else:
1012   case tok::pp_elifdef:
1013   case tok::pp_elifndef:
1014   case tok::pp_elif:
1015     parsePPElse();
1016     break;
1017   case tok::pp_endif:
1018     parsePPEndIf();
1019     break;
1020   case tok::pp_pragma:
1021     parsePPPragma();
1022     break;
1023   default:
1024     parsePPUnknown();
1025     break;
1026   }
1027 }
1028 
conditionalCompilationCondition(bool Unreachable)1029 void UnwrappedLineParser::conditionalCompilationCondition(bool Unreachable) {
1030   size_t Line = CurrentLines->size();
1031   if (CurrentLines == &PreprocessorDirectives)
1032     Line += Lines.size();
1033 
1034   if (Unreachable ||
1035       (!PPStack.empty() && PPStack.back().Kind == PP_Unreachable)) {
1036     PPStack.push_back({PP_Unreachable, Line});
1037   } else {
1038     PPStack.push_back({PP_Conditional, Line});
1039   }
1040 }
1041 
conditionalCompilationStart(bool Unreachable)1042 void UnwrappedLineParser::conditionalCompilationStart(bool Unreachable) {
1043   ++PPBranchLevel;
1044   assert(PPBranchLevel >= 0 && PPBranchLevel <= (int)PPLevelBranchIndex.size());
1045   if (PPBranchLevel == (int)PPLevelBranchIndex.size()) {
1046     PPLevelBranchIndex.push_back(0);
1047     PPLevelBranchCount.push_back(0);
1048   }
1049   PPChainBranchIndex.push(Unreachable ? -1 : 0);
1050   bool Skip = PPLevelBranchIndex[PPBranchLevel] > 0;
1051   conditionalCompilationCondition(Unreachable || Skip);
1052 }
1053 
conditionalCompilationAlternative()1054 void UnwrappedLineParser::conditionalCompilationAlternative() {
1055   if (!PPStack.empty())
1056     PPStack.pop_back();
1057   assert(PPBranchLevel < (int)PPLevelBranchIndex.size());
1058   if (!PPChainBranchIndex.empty())
1059     ++PPChainBranchIndex.top();
1060   conditionalCompilationCondition(
1061       PPBranchLevel >= 0 && !PPChainBranchIndex.empty() &&
1062       PPLevelBranchIndex[PPBranchLevel] != PPChainBranchIndex.top());
1063 }
1064 
conditionalCompilationEnd()1065 void UnwrappedLineParser::conditionalCompilationEnd() {
1066   assert(PPBranchLevel < (int)PPLevelBranchIndex.size());
1067   if (PPBranchLevel >= 0 && !PPChainBranchIndex.empty()) {
1068     if (PPChainBranchIndex.top() + 1 > PPLevelBranchCount[PPBranchLevel])
1069       PPLevelBranchCount[PPBranchLevel] = PPChainBranchIndex.top() + 1;
1070   }
1071   // Guard against #endif's without #if.
1072   if (PPBranchLevel > -1)
1073     --PPBranchLevel;
1074   if (!PPChainBranchIndex.empty())
1075     PPChainBranchIndex.pop();
1076   if (!PPStack.empty())
1077     PPStack.pop_back();
1078 }
1079 
parsePPIf(bool IfDef)1080 void UnwrappedLineParser::parsePPIf(bool IfDef) {
1081   bool IfNDef = FormatTok->is(tok::pp_ifndef);
1082   nextToken();
1083   bool Unreachable = false;
1084   if (!IfDef && (FormatTok->is(tok::kw_false) || FormatTok->TokenText == "0"))
1085     Unreachable = true;
1086   if (IfDef && !IfNDef && FormatTok->TokenText == "SWIG")
1087     Unreachable = true;
1088   conditionalCompilationStart(Unreachable);
1089   FormatToken *IfCondition = FormatTok;
1090   // If there's a #ifndef on the first line, and the only lines before it are
1091   // comments, it could be an include guard.
1092   bool MaybeIncludeGuard = IfNDef;
1093   if (IncludeGuard == IG_Inited && MaybeIncludeGuard) {
1094     for (auto &Line : Lines) {
1095       if (Line.Tokens.front().Tok->isNot(tok::comment)) {
1096         MaybeIncludeGuard = false;
1097         IncludeGuard = IG_Rejected;
1098         break;
1099       }
1100     }
1101   }
1102   --PPBranchLevel;
1103   parsePPUnknown();
1104   ++PPBranchLevel;
1105   if (IncludeGuard == IG_Inited && MaybeIncludeGuard) {
1106     IncludeGuard = IG_IfNdefed;
1107     IncludeGuardToken = IfCondition;
1108   }
1109 }
1110 
parsePPElse()1111 void UnwrappedLineParser::parsePPElse() {
1112   // If a potential include guard has an #else, it's not an include guard.
1113   if (IncludeGuard == IG_Defined && PPBranchLevel == 0)
1114     IncludeGuard = IG_Rejected;
1115   // Don't crash when there is an #else without an #if.
1116   assert(PPBranchLevel >= -1);
1117   if (PPBranchLevel == -1)
1118     conditionalCompilationStart(/*Unreachable=*/true);
1119   conditionalCompilationAlternative();
1120   --PPBranchLevel;
1121   parsePPUnknown();
1122   ++PPBranchLevel;
1123 }
1124 
parsePPEndIf()1125 void UnwrappedLineParser::parsePPEndIf() {
1126   conditionalCompilationEnd();
1127   parsePPUnknown();
1128   // If the #endif of a potential include guard is the last thing in the file,
1129   // then we found an include guard.
1130   if (IncludeGuard == IG_Defined && PPBranchLevel == -1 && Tokens->isEOF() &&
1131       Style.IndentPPDirectives != FormatStyle::PPDIS_None) {
1132     IncludeGuard = IG_Found;
1133   }
1134 }
1135 
parsePPDefine()1136 void UnwrappedLineParser::parsePPDefine() {
1137   nextToken();
1138 
1139   if (!FormatTok->Tok.getIdentifierInfo()) {
1140     IncludeGuard = IG_Rejected;
1141     IncludeGuardToken = nullptr;
1142     parsePPUnknown();
1143     return;
1144   }
1145 
1146   if (IncludeGuard == IG_IfNdefed &&
1147       IncludeGuardToken->TokenText == FormatTok->TokenText) {
1148     IncludeGuard = IG_Defined;
1149     IncludeGuardToken = nullptr;
1150     for (auto &Line : Lines) {
1151       if (!Line.Tokens.front().Tok->isOneOf(tok::comment, tok::hash)) {
1152         IncludeGuard = IG_Rejected;
1153         break;
1154       }
1155     }
1156   }
1157 
1158   // In the context of a define, even keywords should be treated as normal
1159   // identifiers. Setting the kind to identifier is not enough, because we need
1160   // to treat additional keywords like __except as well, which are already
1161   // identifiers. Setting the identifier info to null interferes with include
1162   // guard processing above, and changes preprocessing nesting.
1163   FormatTok->Tok.setKind(tok::identifier);
1164   FormatTok->Tok.setIdentifierInfo(Keywords.kw_internal_ident_after_define);
1165   nextToken();
1166   if (FormatTok->Tok.getKind() == tok::l_paren &&
1167       !FormatTok->hasWhitespaceBefore()) {
1168     parseParens();
1169   }
1170   if (Style.IndentPPDirectives != FormatStyle::PPDIS_None)
1171     Line->Level += PPBranchLevel + 1;
1172   addUnwrappedLine();
1173   ++Line->Level;
1174 
1175   Line->PPLevel = PPBranchLevel + (IncludeGuard == IG_Defined ? 0 : 1);
1176   assert((int)Line->PPLevel >= 0);
1177   Line->InMacroBody = true;
1178 
1179   if (Style.SkipMacroDefinitionBody) {
1180     do {
1181       FormatTok->Finalized = true;
1182       nextToken();
1183     } while (!eof());
1184     addUnwrappedLine();
1185     return;
1186   }
1187 
1188   if (FormatTok->is(tok::identifier) &&
1189       Tokens->peekNextToken()->is(tok::colon)) {
1190     nextToken();
1191     nextToken();
1192   }
1193 
1194   // Errors during a preprocessor directive can only affect the layout of the
1195   // preprocessor directive, and thus we ignore them. An alternative approach
1196   // would be to use the same approach we use on the file level (no
1197   // re-indentation if there was a structural error) within the macro
1198   // definition.
1199   parseFile();
1200 }
1201 
parsePPPragma()1202 void UnwrappedLineParser::parsePPPragma() {
1203   Line->InPragmaDirective = true;
1204   parsePPUnknown();
1205 }
1206 
parsePPUnknown()1207 void UnwrappedLineParser::parsePPUnknown() {
1208   do {
1209     nextToken();
1210   } while (!eof());
1211   if (Style.IndentPPDirectives != FormatStyle::PPDIS_None)
1212     Line->Level += PPBranchLevel + 1;
1213   addUnwrappedLine();
1214 }
1215 
1216 // Here we exclude certain tokens that are not usually the first token in an
1217 // unwrapped line. This is used in attempt to distinguish macro calls without
1218 // trailing semicolons from other constructs split to several lines.
tokenCanStartNewLine(const FormatToken & Tok)1219 static bool tokenCanStartNewLine(const FormatToken &Tok) {
1220   // Semicolon can be a null-statement, l_square can be a start of a macro or
1221   // a C++11 attribute, but this doesn't seem to be common.
1222   assert(Tok.isNot(TT_AttributeSquare));
1223   return !Tok.isOneOf(tok::semi, tok::l_brace,
1224                       // Tokens that can only be used as binary operators and a
1225                       // part of overloaded operator names.
1226                       tok::period, tok::periodstar, tok::arrow, tok::arrowstar,
1227                       tok::less, tok::greater, tok::slash, tok::percent,
1228                       tok::lessless, tok::greatergreater, tok::equal,
1229                       tok::plusequal, tok::minusequal, tok::starequal,
1230                       tok::slashequal, tok::percentequal, tok::ampequal,
1231                       tok::pipeequal, tok::caretequal, tok::greatergreaterequal,
1232                       tok::lesslessequal,
1233                       // Colon is used in labels, base class lists, initializer
1234                       // lists, range-based for loops, ternary operator, but
1235                       // should never be the first token in an unwrapped line.
1236                       tok::colon,
1237                       // 'noexcept' is a trailing annotation.
1238                       tok::kw_noexcept);
1239 }
1240 
mustBeJSIdent(const AdditionalKeywords & Keywords,const FormatToken * FormatTok)1241 static bool mustBeJSIdent(const AdditionalKeywords &Keywords,
1242                           const FormatToken *FormatTok) {
1243   // FIXME: This returns true for C/C++ keywords like 'struct'.
1244   return FormatTok->is(tok::identifier) &&
1245          (!FormatTok->Tok.getIdentifierInfo() ||
1246           !FormatTok->isOneOf(
1247               Keywords.kw_in, Keywords.kw_of, Keywords.kw_as, Keywords.kw_async,
1248               Keywords.kw_await, Keywords.kw_yield, Keywords.kw_finally,
1249               Keywords.kw_function, Keywords.kw_import, Keywords.kw_is,
1250               Keywords.kw_let, Keywords.kw_var, tok::kw_const,
1251               Keywords.kw_abstract, Keywords.kw_extends, Keywords.kw_implements,
1252               Keywords.kw_instanceof, Keywords.kw_interface,
1253               Keywords.kw_override, Keywords.kw_throws, Keywords.kw_from));
1254 }
1255 
mustBeJSIdentOrValue(const AdditionalKeywords & Keywords,const FormatToken * FormatTok)1256 static bool mustBeJSIdentOrValue(const AdditionalKeywords &Keywords,
1257                                  const FormatToken *FormatTok) {
1258   return FormatTok->Tok.isLiteral() ||
1259          FormatTok->isOneOf(tok::kw_true, tok::kw_false) ||
1260          mustBeJSIdent(Keywords, FormatTok);
1261 }
1262 
1263 // isJSDeclOrStmt returns true if |FormatTok| starts a declaration or statement
1264 // when encountered after a value (see mustBeJSIdentOrValue).
isJSDeclOrStmt(const AdditionalKeywords & Keywords,const FormatToken * FormatTok)1265 static bool isJSDeclOrStmt(const AdditionalKeywords &Keywords,
1266                            const FormatToken *FormatTok) {
1267   return FormatTok->isOneOf(
1268       tok::kw_return, Keywords.kw_yield,
1269       // conditionals
1270       tok::kw_if, tok::kw_else,
1271       // loops
1272       tok::kw_for, tok::kw_while, tok::kw_do, tok::kw_continue, tok::kw_break,
1273       // switch/case
1274       tok::kw_switch, tok::kw_case,
1275       // exceptions
1276       tok::kw_throw, tok::kw_try, tok::kw_catch, Keywords.kw_finally,
1277       // declaration
1278       tok::kw_const, tok::kw_class, Keywords.kw_var, Keywords.kw_let,
1279       Keywords.kw_async, Keywords.kw_function,
1280       // import/export
1281       Keywords.kw_import, tok::kw_export);
1282 }
1283 
1284 // Checks whether a token is a type in K&R C (aka C78).
isC78Type(const FormatToken & Tok)1285 static bool isC78Type(const FormatToken &Tok) {
1286   return Tok.isOneOf(tok::kw_char, tok::kw_short, tok::kw_int, tok::kw_long,
1287                      tok::kw_unsigned, tok::kw_float, tok::kw_double,
1288                      tok::identifier);
1289 }
1290 
1291 // This function checks whether a token starts the first parameter declaration
1292 // in a K&R C (aka C78) function definition, e.g.:
1293 //   int f(a, b)
1294 //   short a, b;
1295 //   {
1296 //      return a + b;
1297 //   }
isC78ParameterDecl(const FormatToken * Tok,const FormatToken * Next,const FormatToken * FuncName)1298 static bool isC78ParameterDecl(const FormatToken *Tok, const FormatToken *Next,
1299                                const FormatToken *FuncName) {
1300   assert(Tok);
1301   assert(Next);
1302   assert(FuncName);
1303 
1304   if (FuncName->isNot(tok::identifier))
1305     return false;
1306 
1307   const FormatToken *Prev = FuncName->Previous;
1308   if (!Prev || (Prev->isNot(tok::star) && !isC78Type(*Prev)))
1309     return false;
1310 
1311   if (!isC78Type(*Tok) &&
1312       !Tok->isOneOf(tok::kw_register, tok::kw_struct, tok::kw_union)) {
1313     return false;
1314   }
1315 
1316   if (Next->isNot(tok::star) && !Next->Tok.getIdentifierInfo())
1317     return false;
1318 
1319   Tok = Tok->Previous;
1320   if (!Tok || Tok->isNot(tok::r_paren))
1321     return false;
1322 
1323   Tok = Tok->Previous;
1324   if (!Tok || Tok->isNot(tok::identifier))
1325     return false;
1326 
1327   return Tok->Previous && Tok->Previous->isOneOf(tok::l_paren, tok::comma);
1328 }
1329 
parseModuleImport()1330 bool UnwrappedLineParser::parseModuleImport() {
1331   assert(FormatTok->is(Keywords.kw_import) && "'import' expected");
1332 
1333   if (auto Token = Tokens->peekNextToken(/*SkipComment=*/true);
1334       !Token->Tok.getIdentifierInfo() &&
1335       !Token->isOneOf(tok::colon, tok::less, tok::string_literal)) {
1336     return false;
1337   }
1338 
1339   nextToken();
1340   while (!eof()) {
1341     if (FormatTok->is(tok::colon)) {
1342       FormatTok->setFinalizedType(TT_ModulePartitionColon);
1343     }
1344     // Handle import <foo/bar.h> as we would an include statement.
1345     else if (FormatTok->is(tok::less)) {
1346       nextToken();
1347       while (!FormatTok->isOneOf(tok::semi, tok::greater, tok::eof)) {
1348         // Mark tokens up to the trailing line comments as implicit string
1349         // literals.
1350         if (FormatTok->isNot(tok::comment) &&
1351             !FormatTok->TokenText.starts_with("//")) {
1352           FormatTok->setFinalizedType(TT_ImplicitStringLiteral);
1353         }
1354         nextToken();
1355       }
1356     }
1357     if (FormatTok->is(tok::semi)) {
1358       nextToken();
1359       break;
1360     }
1361     nextToken();
1362   }
1363 
1364   addUnwrappedLine();
1365   return true;
1366 }
1367 
1368 // readTokenWithJavaScriptASI reads the next token and terminates the current
1369 // line if JavaScript Automatic Semicolon Insertion must
1370 // happen between the current token and the next token.
1371 //
1372 // This method is conservative - it cannot cover all edge cases of JavaScript,
1373 // but only aims to correctly handle certain well known cases. It *must not*
1374 // return true in speculative cases.
readTokenWithJavaScriptASI()1375 void UnwrappedLineParser::readTokenWithJavaScriptASI() {
1376   FormatToken *Previous = FormatTok;
1377   readToken();
1378   FormatToken *Next = FormatTok;
1379 
1380   bool IsOnSameLine =
1381       CommentsBeforeNextToken.empty()
1382           ? Next->NewlinesBefore == 0
1383           : CommentsBeforeNextToken.front()->NewlinesBefore == 0;
1384   if (IsOnSameLine)
1385     return;
1386 
1387   bool PreviousMustBeValue = mustBeJSIdentOrValue(Keywords, Previous);
1388   bool PreviousStartsTemplateExpr =
1389       Previous->is(TT_TemplateString) && Previous->TokenText.ends_with("${");
1390   if (PreviousMustBeValue || Previous->is(tok::r_paren)) {
1391     // If the line contains an '@' sign, the previous token might be an
1392     // annotation, which can precede another identifier/value.
1393     bool HasAt = llvm::any_of(Line->Tokens, [](UnwrappedLineNode &LineNode) {
1394       return LineNode.Tok->is(tok::at);
1395     });
1396     if (HasAt)
1397       return;
1398   }
1399   if (Next->is(tok::exclaim) && PreviousMustBeValue)
1400     return addUnwrappedLine();
1401   bool NextMustBeValue = mustBeJSIdentOrValue(Keywords, Next);
1402   bool NextEndsTemplateExpr =
1403       Next->is(TT_TemplateString) && Next->TokenText.starts_with("}");
1404   if (NextMustBeValue && !NextEndsTemplateExpr && !PreviousStartsTemplateExpr &&
1405       (PreviousMustBeValue ||
1406        Previous->isOneOf(tok::r_square, tok::r_paren, tok::plusplus,
1407                          tok::minusminus))) {
1408     return addUnwrappedLine();
1409   }
1410   if ((PreviousMustBeValue || Previous->is(tok::r_paren)) &&
1411       isJSDeclOrStmt(Keywords, Next)) {
1412     return addUnwrappedLine();
1413   }
1414 }
1415 
parseStructuralElement(const FormatToken * OpeningBrace,IfStmtKind * IfKind,FormatToken ** IfLeftBrace,bool * HasDoWhile,bool * HasLabel)1416 void UnwrappedLineParser::parseStructuralElement(
1417     const FormatToken *OpeningBrace, IfStmtKind *IfKind,
1418     FormatToken **IfLeftBrace, bool *HasDoWhile, bool *HasLabel) {
1419   if (Style.Language == FormatStyle::LK_TableGen &&
1420       FormatTok->is(tok::pp_include)) {
1421     nextToken();
1422     if (FormatTok->is(tok::string_literal))
1423       nextToken();
1424     addUnwrappedLine();
1425     return;
1426   }
1427 
1428   if (Style.isCpp()) {
1429     while (FormatTok->is(tok::l_square) && handleCppAttributes()) {
1430     }
1431   } else if (Style.isVerilog()) {
1432     if (Keywords.isVerilogStructuredProcedure(*FormatTok)) {
1433       parseForOrWhileLoop(/*HasParens=*/false);
1434       return;
1435     }
1436     if (FormatTok->isOneOf(Keywords.kw_foreach, Keywords.kw_repeat)) {
1437       parseForOrWhileLoop();
1438       return;
1439     }
1440     if (FormatTok->isOneOf(tok::kw_restrict, Keywords.kw_assert,
1441                            Keywords.kw_assume, Keywords.kw_cover)) {
1442       parseIfThenElse(IfKind, /*KeepBraces=*/false, /*IsVerilogAssert=*/true);
1443       return;
1444     }
1445 
1446     // Skip things that can exist before keywords like 'if' and 'case'.
1447     while (true) {
1448       if (FormatTok->isOneOf(Keywords.kw_priority, Keywords.kw_unique,
1449                              Keywords.kw_unique0)) {
1450         nextToken();
1451       } else if (FormatTok->is(tok::l_paren) &&
1452                  Tokens->peekNextToken()->is(tok::star)) {
1453         parseParens();
1454       } else {
1455         break;
1456       }
1457     }
1458   }
1459 
1460   // Tokens that only make sense at the beginning of a line.
1461   switch (FormatTok->Tok.getKind()) {
1462   case tok::kw_asm:
1463     nextToken();
1464     if (FormatTok->is(tok::l_brace)) {
1465       FormatTok->setFinalizedType(TT_InlineASMBrace);
1466       nextToken();
1467       while (FormatTok && !eof()) {
1468         if (FormatTok->is(tok::r_brace)) {
1469           FormatTok->setFinalizedType(TT_InlineASMBrace);
1470           nextToken();
1471           addUnwrappedLine();
1472           break;
1473         }
1474         FormatTok->Finalized = true;
1475         nextToken();
1476       }
1477     }
1478     break;
1479   case tok::kw_namespace:
1480     parseNamespace();
1481     return;
1482   case tok::kw_public:
1483   case tok::kw_protected:
1484   case tok::kw_private:
1485     if (Style.Language == FormatStyle::LK_Java || Style.isJavaScript() ||
1486         Style.isCSharp()) {
1487       nextToken();
1488     } else {
1489       parseAccessSpecifier();
1490     }
1491     return;
1492   case tok::kw_if: {
1493     if (Style.isJavaScript() && Line->MustBeDeclaration) {
1494       // field/method declaration.
1495       break;
1496     }
1497     FormatToken *Tok = parseIfThenElse(IfKind);
1498     if (IfLeftBrace)
1499       *IfLeftBrace = Tok;
1500     return;
1501   }
1502   case tok::kw_for:
1503   case tok::kw_while:
1504     if (Style.isJavaScript() && Line->MustBeDeclaration) {
1505       // field/method declaration.
1506       break;
1507     }
1508     parseForOrWhileLoop();
1509     return;
1510   case tok::kw_do:
1511     if (Style.isJavaScript() && Line->MustBeDeclaration) {
1512       // field/method declaration.
1513       break;
1514     }
1515     parseDoWhile();
1516     if (HasDoWhile)
1517       *HasDoWhile = true;
1518     return;
1519   case tok::kw_switch:
1520     if (Style.isJavaScript() && Line->MustBeDeclaration) {
1521       // 'switch: string' field declaration.
1522       break;
1523     }
1524     parseSwitch();
1525     return;
1526   case tok::kw_default:
1527     // In Verilog default along with other labels are handled in the next loop.
1528     if (Style.isVerilog())
1529       break;
1530     if (Style.isJavaScript() && Line->MustBeDeclaration) {
1531       // 'default: string' field declaration.
1532       break;
1533     }
1534     nextToken();
1535     if (FormatTok->is(tok::colon)) {
1536       FormatTok->setFinalizedType(TT_CaseLabelColon);
1537       parseLabel();
1538       return;
1539     }
1540     // e.g. "default void f() {}" in a Java interface.
1541     break;
1542   case tok::kw_case:
1543     // Proto: there are no switch/case statements.
1544     if (Style.Language == FormatStyle::LK_Proto) {
1545       nextToken();
1546       return;
1547     }
1548     if (Style.isVerilog()) {
1549       parseBlock();
1550       addUnwrappedLine();
1551       return;
1552     }
1553     if (Style.isJavaScript() && Line->MustBeDeclaration) {
1554       // 'case: string' field declaration.
1555       nextToken();
1556       break;
1557     }
1558     parseCaseLabel();
1559     return;
1560   case tok::kw_try:
1561   case tok::kw___try:
1562     if (Style.isJavaScript() && Line->MustBeDeclaration) {
1563       // field/method declaration.
1564       break;
1565     }
1566     parseTryCatch();
1567     return;
1568   case tok::kw_extern:
1569     nextToken();
1570     if (Style.isVerilog()) {
1571       // In Verilog and extern module declaration looks like a start of module.
1572       // But there is no body and endmodule. So we handle it separately.
1573       if (Keywords.isVerilogHierarchy(*FormatTok)) {
1574         parseVerilogHierarchyHeader();
1575         return;
1576       }
1577     } else if (FormatTok->is(tok::string_literal)) {
1578       nextToken();
1579       if (FormatTok->is(tok::l_brace)) {
1580         if (Style.BraceWrapping.AfterExternBlock)
1581           addUnwrappedLine();
1582         // Either we indent or for backwards compatibility we follow the
1583         // AfterExternBlock style.
1584         unsigned AddLevels =
1585             (Style.IndentExternBlock == FormatStyle::IEBS_Indent) ||
1586                     (Style.BraceWrapping.AfterExternBlock &&
1587                      Style.IndentExternBlock ==
1588                          FormatStyle::IEBS_AfterExternBlock)
1589                 ? 1u
1590                 : 0u;
1591         parseBlock(/*MustBeDeclaration=*/true, AddLevels);
1592         addUnwrappedLine();
1593         return;
1594       }
1595     }
1596     break;
1597   case tok::kw_export:
1598     if (Style.isJavaScript()) {
1599       parseJavaScriptEs6ImportExport();
1600       return;
1601     }
1602     if (Style.isCpp()) {
1603       nextToken();
1604       if (FormatTok->is(tok::kw_namespace)) {
1605         parseNamespace();
1606         return;
1607       }
1608       if (FormatTok->is(Keywords.kw_import) && parseModuleImport())
1609         return;
1610     }
1611     break;
1612   case tok::kw_inline:
1613     nextToken();
1614     if (FormatTok->is(tok::kw_namespace)) {
1615       parseNamespace();
1616       return;
1617     }
1618     break;
1619   case tok::identifier:
1620     if (FormatTok->is(TT_ForEachMacro)) {
1621       parseForOrWhileLoop();
1622       return;
1623     }
1624     if (FormatTok->is(TT_MacroBlockBegin)) {
1625       parseBlock(/*MustBeDeclaration=*/false, /*AddLevels=*/1u,
1626                  /*MunchSemi=*/false);
1627       return;
1628     }
1629     if (FormatTok->is(Keywords.kw_import)) {
1630       if (Style.isJavaScript()) {
1631         parseJavaScriptEs6ImportExport();
1632         return;
1633       }
1634       if (Style.Language == FormatStyle::LK_Proto) {
1635         nextToken();
1636         if (FormatTok->is(tok::kw_public))
1637           nextToken();
1638         if (FormatTok->isNot(tok::string_literal))
1639           return;
1640         nextToken();
1641         if (FormatTok->is(tok::semi))
1642           nextToken();
1643         addUnwrappedLine();
1644         return;
1645       }
1646       if (Style.isCpp() && parseModuleImport())
1647         return;
1648     }
1649     if (Style.isCpp() &&
1650         FormatTok->isOneOf(Keywords.kw_signals, Keywords.kw_qsignals,
1651                            Keywords.kw_slots, Keywords.kw_qslots)) {
1652       nextToken();
1653       if (FormatTok->is(tok::colon)) {
1654         nextToken();
1655         addUnwrappedLine();
1656         return;
1657       }
1658     }
1659     if (Style.isCpp() && FormatTok->is(TT_StatementMacro)) {
1660       parseStatementMacro();
1661       return;
1662     }
1663     if (Style.isCpp() && FormatTok->is(TT_NamespaceMacro)) {
1664       parseNamespace();
1665       return;
1666     }
1667     // In Verilog labels can be any expression, so we don't do them here.
1668     // JS doesn't have macros, and within classes colons indicate fields, not
1669     // labels.
1670     // TableGen doesn't have labels.
1671     if (!Style.isJavaScript() && !Style.isVerilog() && !Style.isTableGen() &&
1672         Tokens->peekNextToken()->is(tok::colon) && !Line->MustBeDeclaration) {
1673       nextToken();
1674       Line->Tokens.begin()->Tok->MustBreakBefore = true;
1675       FormatTok->setFinalizedType(TT_GotoLabelColon);
1676       parseLabel(!Style.IndentGotoLabels);
1677       if (HasLabel)
1678         *HasLabel = true;
1679       return;
1680     }
1681     // In all other cases, parse the declaration.
1682     break;
1683   default:
1684     break;
1685   }
1686 
1687   const bool InRequiresExpression =
1688       OpeningBrace && OpeningBrace->is(TT_RequiresExpressionLBrace);
1689   do {
1690     const FormatToken *Previous = FormatTok->Previous;
1691     switch (FormatTok->Tok.getKind()) {
1692     case tok::at:
1693       nextToken();
1694       if (FormatTok->is(tok::l_brace)) {
1695         nextToken();
1696         parseBracedList();
1697         break;
1698       } else if (Style.Language == FormatStyle::LK_Java &&
1699                  FormatTok->is(Keywords.kw_interface)) {
1700         nextToken();
1701         break;
1702       }
1703       switch (FormatTok->Tok.getObjCKeywordID()) {
1704       case tok::objc_public:
1705       case tok::objc_protected:
1706       case tok::objc_package:
1707       case tok::objc_private:
1708         return parseAccessSpecifier();
1709       case tok::objc_interface:
1710       case tok::objc_implementation:
1711         return parseObjCInterfaceOrImplementation();
1712       case tok::objc_protocol:
1713         if (parseObjCProtocol())
1714           return;
1715         break;
1716       case tok::objc_end:
1717         return; // Handled by the caller.
1718       case tok::objc_optional:
1719       case tok::objc_required:
1720         nextToken();
1721         addUnwrappedLine();
1722         return;
1723       case tok::objc_autoreleasepool:
1724         nextToken();
1725         if (FormatTok->is(tok::l_brace)) {
1726           if (Style.BraceWrapping.AfterControlStatement ==
1727               FormatStyle::BWACS_Always) {
1728             addUnwrappedLine();
1729           }
1730           parseBlock();
1731         }
1732         addUnwrappedLine();
1733         return;
1734       case tok::objc_synchronized:
1735         nextToken();
1736         if (FormatTok->is(tok::l_paren)) {
1737           // Skip synchronization object
1738           parseParens();
1739         }
1740         if (FormatTok->is(tok::l_brace)) {
1741           if (Style.BraceWrapping.AfterControlStatement ==
1742               FormatStyle::BWACS_Always) {
1743             addUnwrappedLine();
1744           }
1745           parseBlock();
1746         }
1747         addUnwrappedLine();
1748         return;
1749       case tok::objc_try:
1750         // This branch isn't strictly necessary (the kw_try case below would
1751         // do this too after the tok::at is parsed above).  But be explicit.
1752         parseTryCatch();
1753         return;
1754       default:
1755         break;
1756       }
1757       break;
1758     case tok::kw_requires: {
1759       if (Style.isCpp()) {
1760         bool ParsedClause = parseRequires();
1761         if (ParsedClause)
1762           return;
1763       } else {
1764         nextToken();
1765       }
1766       break;
1767     }
1768     case tok::kw_enum:
1769       // Ignore if this is part of "template <enum ...".
1770       if (Previous && Previous->is(tok::less)) {
1771         nextToken();
1772         break;
1773       }
1774 
1775       // parseEnum falls through and does not yet add an unwrapped line as an
1776       // enum definition can start a structural element.
1777       if (!parseEnum())
1778         break;
1779       // This only applies to C++ and Verilog.
1780       if (!Style.isCpp() && !Style.isVerilog()) {
1781         addUnwrappedLine();
1782         return;
1783       }
1784       break;
1785     case tok::kw_typedef:
1786       nextToken();
1787       if (FormatTok->isOneOf(Keywords.kw_NS_ENUM, Keywords.kw_NS_OPTIONS,
1788                              Keywords.kw_CF_ENUM, Keywords.kw_CF_OPTIONS,
1789                              Keywords.kw_CF_CLOSED_ENUM,
1790                              Keywords.kw_NS_CLOSED_ENUM)) {
1791         parseEnum();
1792       }
1793       break;
1794     case tok::kw_class:
1795       if (Style.isVerilog()) {
1796         parseBlock();
1797         addUnwrappedLine();
1798         return;
1799       }
1800       if (Style.isTableGen()) {
1801         // Do nothing special. In this case the l_brace becomes FunctionLBrace.
1802         // This is same as def and so on.
1803         nextToken();
1804         break;
1805       }
1806       [[fallthrough]];
1807     case tok::kw_struct:
1808     case tok::kw_union:
1809       if (parseStructLike())
1810         return;
1811       break;
1812     case tok::kw_decltype:
1813       nextToken();
1814       if (FormatTok->is(tok::l_paren)) {
1815         parseParens();
1816         assert(FormatTok->Previous);
1817         if (FormatTok->Previous->endsSequence(tok::r_paren, tok::kw_auto,
1818                                               tok::l_paren)) {
1819           Line->SeenDecltypeAuto = true;
1820         }
1821       }
1822       break;
1823     case tok::period:
1824       nextToken();
1825       // In Java, classes have an implicit static member "class".
1826       if (Style.Language == FormatStyle::LK_Java && FormatTok &&
1827           FormatTok->is(tok::kw_class)) {
1828         nextToken();
1829       }
1830       if (Style.isJavaScript() && FormatTok &&
1831           FormatTok->Tok.getIdentifierInfo()) {
1832         // JavaScript only has pseudo keywords, all keywords are allowed to
1833         // appear in "IdentifierName" positions. See http://es5.github.io/#x7.6
1834         nextToken();
1835       }
1836       break;
1837     case tok::semi:
1838       nextToken();
1839       addUnwrappedLine();
1840       return;
1841     case tok::r_brace:
1842       addUnwrappedLine();
1843       return;
1844     case tok::l_paren: {
1845       parseParens();
1846       // Break the unwrapped line if a K&R C function definition has a parameter
1847       // declaration.
1848       if (OpeningBrace || !Style.isCpp() || !Previous || eof())
1849         break;
1850       if (isC78ParameterDecl(FormatTok,
1851                              Tokens->peekNextToken(/*SkipComment=*/true),
1852                              Previous)) {
1853         addUnwrappedLine();
1854         return;
1855       }
1856       break;
1857     }
1858     case tok::kw_operator:
1859       nextToken();
1860       if (FormatTok->isBinaryOperator())
1861         nextToken();
1862       break;
1863     case tok::caret:
1864       nextToken();
1865       // Block return type.
1866       if (FormatTok->Tok.isAnyIdentifier() ||
1867           FormatTok->isSimpleTypeSpecifier()) {
1868         nextToken();
1869         // Return types: pointers are ok too.
1870         while (FormatTok->is(tok::star))
1871           nextToken();
1872       }
1873       // Block argument list.
1874       if (FormatTok->is(tok::l_paren))
1875         parseParens();
1876       // Block body.
1877       if (FormatTok->is(tok::l_brace))
1878         parseChildBlock();
1879       break;
1880     case tok::l_brace:
1881       if (InRequiresExpression)
1882         FormatTok->setFinalizedType(TT_BracedListLBrace);
1883       if (!tryToParsePropertyAccessor() && !tryToParseBracedList()) {
1884         IsDecltypeAutoFunction = Line->SeenDecltypeAuto;
1885         // A block outside of parentheses must be the last part of a
1886         // structural element.
1887         // FIXME: Figure out cases where this is not true, and add projections
1888         // for them (the one we know is missing are lambdas).
1889         if (Style.Language == FormatStyle::LK_Java &&
1890             Line->Tokens.front().Tok->is(Keywords.kw_synchronized)) {
1891           // If necessary, we could set the type to something different than
1892           // TT_FunctionLBrace.
1893           if (Style.BraceWrapping.AfterControlStatement ==
1894               FormatStyle::BWACS_Always) {
1895             addUnwrappedLine();
1896           }
1897         } else if (Style.BraceWrapping.AfterFunction) {
1898           addUnwrappedLine();
1899         }
1900         FormatTok->setFinalizedType(TT_FunctionLBrace);
1901         parseBlock();
1902         IsDecltypeAutoFunction = false;
1903         addUnwrappedLine();
1904         return;
1905       }
1906       // Otherwise this was a braced init list, and the structural
1907       // element continues.
1908       break;
1909     case tok::kw_try:
1910       if (Style.isJavaScript() && Line->MustBeDeclaration) {
1911         // field/method declaration.
1912         nextToken();
1913         break;
1914       }
1915       // We arrive here when parsing function-try blocks.
1916       if (Style.BraceWrapping.AfterFunction)
1917         addUnwrappedLine();
1918       parseTryCatch();
1919       return;
1920     case tok::identifier: {
1921       if (Style.isCSharp() && FormatTok->is(Keywords.kw_where) &&
1922           Line->MustBeDeclaration) {
1923         addUnwrappedLine();
1924         parseCSharpGenericTypeConstraint();
1925         break;
1926       }
1927       if (FormatTok->is(TT_MacroBlockEnd)) {
1928         addUnwrappedLine();
1929         return;
1930       }
1931 
1932       // Function declarations (as opposed to function expressions) are parsed
1933       // on their own unwrapped line by continuing this loop. Function
1934       // expressions (functions that are not on their own line) must not create
1935       // a new unwrapped line, so they are special cased below.
1936       size_t TokenCount = Line->Tokens.size();
1937       if (Style.isJavaScript() && FormatTok->is(Keywords.kw_function) &&
1938           (TokenCount > 1 ||
1939            (TokenCount == 1 &&
1940             Line->Tokens.front().Tok->isNot(Keywords.kw_async)))) {
1941         tryToParseJSFunction();
1942         break;
1943       }
1944       if ((Style.isJavaScript() || Style.Language == FormatStyle::LK_Java) &&
1945           FormatTok->is(Keywords.kw_interface)) {
1946         if (Style.isJavaScript()) {
1947           // In JavaScript/TypeScript, "interface" can be used as a standalone
1948           // identifier, e.g. in `var interface = 1;`. If "interface" is
1949           // followed by another identifier, it is very like to be an actual
1950           // interface declaration.
1951           unsigned StoredPosition = Tokens->getPosition();
1952           FormatToken *Next = Tokens->getNextToken();
1953           FormatTok = Tokens->setPosition(StoredPosition);
1954           if (!mustBeJSIdent(Keywords, Next)) {
1955             nextToken();
1956             break;
1957           }
1958         }
1959         parseRecord();
1960         addUnwrappedLine();
1961         return;
1962       }
1963 
1964       if (Style.isVerilog()) {
1965         if (FormatTok->is(Keywords.kw_table)) {
1966           parseVerilogTable();
1967           return;
1968         }
1969         if (Keywords.isVerilogBegin(*FormatTok) ||
1970             Keywords.isVerilogHierarchy(*FormatTok)) {
1971           parseBlock();
1972           addUnwrappedLine();
1973           return;
1974         }
1975       }
1976 
1977       if (!Style.isCpp() && FormatTok->is(Keywords.kw_interface)) {
1978         if (parseStructLike())
1979           return;
1980         break;
1981       }
1982 
1983       if (Style.isCpp() && FormatTok->is(TT_StatementMacro)) {
1984         parseStatementMacro();
1985         return;
1986       }
1987 
1988       // See if the following token should start a new unwrapped line.
1989       StringRef Text = FormatTok->TokenText;
1990 
1991       FormatToken *PreviousToken = FormatTok;
1992       nextToken();
1993 
1994       // JS doesn't have macros, and within classes colons indicate fields, not
1995       // labels.
1996       if (Style.isJavaScript())
1997         break;
1998 
1999       auto OneTokenSoFar = [&]() {
2000         auto I = Line->Tokens.begin(), E = Line->Tokens.end();
2001         while (I != E && I->Tok->is(tok::comment))
2002           ++I;
2003         if (Style.isVerilog())
2004           while (I != E && I->Tok->is(tok::hash))
2005             ++I;
2006         return I != E && (++I == E);
2007       };
2008       if (OneTokenSoFar()) {
2009         // Recognize function-like macro usages without trailing semicolon as
2010         // well as free-standing macros like Q_OBJECT.
2011         bool FunctionLike = FormatTok->is(tok::l_paren);
2012         if (FunctionLike)
2013           parseParens();
2014 
2015         bool FollowedByNewline =
2016             CommentsBeforeNextToken.empty()
2017                 ? FormatTok->NewlinesBefore > 0
2018                 : CommentsBeforeNextToken.front()->NewlinesBefore > 0;
2019 
2020         if (FollowedByNewline && (Text.size() >= 5 || FunctionLike) &&
2021             tokenCanStartNewLine(*FormatTok) && Text == Text.upper()) {
2022           if (PreviousToken->isNot(TT_UntouchableMacroFunc))
2023             PreviousToken->setFinalizedType(TT_FunctionLikeOrFreestandingMacro);
2024           addUnwrappedLine();
2025           return;
2026         }
2027       }
2028       break;
2029     }
2030     case tok::equal:
2031       if ((Style.isJavaScript() || Style.isCSharp()) &&
2032           FormatTok->is(TT_FatArrow)) {
2033         tryToParseChildBlock();
2034         break;
2035       }
2036 
2037       nextToken();
2038       if (FormatTok->is(tok::l_brace)) {
2039         // Block kind should probably be set to BK_BracedInit for any language.
2040         // C# needs this change to ensure that array initialisers and object
2041         // initialisers are indented the same way.
2042         if (Style.isCSharp())
2043           FormatTok->setBlockKind(BK_BracedInit);
2044         // TableGen's defset statement has syntax of the form,
2045         // `defset <type> <name> = { <statement>... }`
2046         if (Style.isTableGen() &&
2047             Line->Tokens.begin()->Tok->is(Keywords.kw_defset)) {
2048           FormatTok->setFinalizedType(TT_FunctionLBrace);
2049           parseBlock(/*MustBeDeclaration=*/false, /*AddLevels=*/1u,
2050                      /*MunchSemi=*/false);
2051           addUnwrappedLine();
2052           break;
2053         }
2054         nextToken();
2055         parseBracedList();
2056       } else if (Style.Language == FormatStyle::LK_Proto &&
2057                  FormatTok->is(tok::less)) {
2058         nextToken();
2059         parseBracedList(/*IsAngleBracket=*/true);
2060       }
2061       break;
2062     case tok::l_square:
2063       parseSquare();
2064       break;
2065     case tok::kw_new:
2066       parseNew();
2067       break;
2068     case tok::kw_case:
2069       // Proto: there are no switch/case statements.
2070       if (Style.Language == FormatStyle::LK_Proto) {
2071         nextToken();
2072         return;
2073       }
2074       // In Verilog switch is called case.
2075       if (Style.isVerilog()) {
2076         parseBlock();
2077         addUnwrappedLine();
2078         return;
2079       }
2080       if (Style.isJavaScript() && Line->MustBeDeclaration) {
2081         // 'case: string' field declaration.
2082         nextToken();
2083         break;
2084       }
2085       parseCaseLabel();
2086       break;
2087     case tok::kw_default:
2088       nextToken();
2089       if (Style.isVerilog()) {
2090         if (FormatTok->is(tok::colon)) {
2091           // The label will be handled in the next iteration.
2092           break;
2093         }
2094         if (FormatTok->is(Keywords.kw_clocking)) {
2095           // A default clocking block.
2096           parseBlock();
2097           addUnwrappedLine();
2098           return;
2099         }
2100         parseVerilogCaseLabel();
2101         return;
2102       }
2103       break;
2104     case tok::colon:
2105       nextToken();
2106       if (Style.isVerilog()) {
2107         parseVerilogCaseLabel();
2108         return;
2109       }
2110       break;
2111     default:
2112       nextToken();
2113       break;
2114     }
2115   } while (!eof());
2116 }
2117 
tryToParsePropertyAccessor()2118 bool UnwrappedLineParser::tryToParsePropertyAccessor() {
2119   assert(FormatTok->is(tok::l_brace));
2120   if (!Style.isCSharp())
2121     return false;
2122   // See if it's a property accessor.
2123   if (FormatTok->Previous->isNot(tok::identifier))
2124     return false;
2125 
2126   // See if we are inside a property accessor.
2127   //
2128   // Record the current tokenPosition so that we can advance and
2129   // reset the current token. `Next` is not set yet so we need
2130   // another way to advance along the token stream.
2131   unsigned int StoredPosition = Tokens->getPosition();
2132   FormatToken *Tok = Tokens->getNextToken();
2133 
2134   // A trivial property accessor is of the form:
2135   // { [ACCESS_SPECIFIER] [get]; [ACCESS_SPECIFIER] [set|init] }
2136   // Track these as they do not require line breaks to be introduced.
2137   bool HasSpecialAccessor = false;
2138   bool IsTrivialPropertyAccessor = true;
2139   while (!eof()) {
2140     if (Tok->isOneOf(tok::semi, tok::kw_public, tok::kw_private,
2141                      tok::kw_protected, Keywords.kw_internal, Keywords.kw_get,
2142                      Keywords.kw_init, Keywords.kw_set)) {
2143       if (Tok->isOneOf(Keywords.kw_get, Keywords.kw_init, Keywords.kw_set))
2144         HasSpecialAccessor = true;
2145       Tok = Tokens->getNextToken();
2146       continue;
2147     }
2148     if (Tok->isNot(tok::r_brace))
2149       IsTrivialPropertyAccessor = false;
2150     break;
2151   }
2152 
2153   if (!HasSpecialAccessor) {
2154     Tokens->setPosition(StoredPosition);
2155     return false;
2156   }
2157 
2158   // Try to parse the property accessor:
2159   // https://docs.microsoft.com/en-us/dotnet/csharp/programming-guide/classes-and-structs/properties
2160   Tokens->setPosition(StoredPosition);
2161   if (!IsTrivialPropertyAccessor && Style.BraceWrapping.AfterFunction)
2162     addUnwrappedLine();
2163   nextToken();
2164   do {
2165     switch (FormatTok->Tok.getKind()) {
2166     case tok::r_brace:
2167       nextToken();
2168       if (FormatTok->is(tok::equal)) {
2169         while (!eof() && FormatTok->isNot(tok::semi))
2170           nextToken();
2171         nextToken();
2172       }
2173       addUnwrappedLine();
2174       return true;
2175     case tok::l_brace:
2176       ++Line->Level;
2177       parseBlock(/*MustBeDeclaration=*/true);
2178       addUnwrappedLine();
2179       --Line->Level;
2180       break;
2181     case tok::equal:
2182       if (FormatTok->is(TT_FatArrow)) {
2183         ++Line->Level;
2184         do {
2185           nextToken();
2186         } while (!eof() && FormatTok->isNot(tok::semi));
2187         nextToken();
2188         addUnwrappedLine();
2189         --Line->Level;
2190         break;
2191       }
2192       nextToken();
2193       break;
2194     default:
2195       if (FormatTok->isOneOf(Keywords.kw_get, Keywords.kw_init,
2196                              Keywords.kw_set) &&
2197           !IsTrivialPropertyAccessor) {
2198         // Non-trivial get/set needs to be on its own line.
2199         addUnwrappedLine();
2200       }
2201       nextToken();
2202     }
2203   } while (!eof());
2204 
2205   // Unreachable for well-formed code (paired '{' and '}').
2206   return true;
2207 }
2208 
tryToParseLambda()2209 bool UnwrappedLineParser::tryToParseLambda() {
2210   assert(FormatTok->is(tok::l_square));
2211   if (!Style.isCpp()) {
2212     nextToken();
2213     return false;
2214   }
2215   FormatToken &LSquare = *FormatTok;
2216   if (!tryToParseLambdaIntroducer())
2217     return false;
2218 
2219   bool SeenArrow = false;
2220   bool InTemplateParameterList = false;
2221 
2222   while (FormatTok->isNot(tok::l_brace)) {
2223     if (FormatTok->isSimpleTypeSpecifier()) {
2224       nextToken();
2225       continue;
2226     }
2227     switch (FormatTok->Tok.getKind()) {
2228     case tok::l_brace:
2229       break;
2230     case tok::l_paren:
2231       parseParens(/*AmpAmpTokenType=*/TT_PointerOrReference);
2232       break;
2233     case tok::l_square:
2234       parseSquare();
2235       break;
2236     case tok::less:
2237       assert(FormatTok->Previous);
2238       if (FormatTok->Previous->is(tok::r_square))
2239         InTemplateParameterList = true;
2240       nextToken();
2241       break;
2242     case tok::kw_auto:
2243     case tok::kw_class:
2244     case tok::kw_template:
2245     case tok::kw_typename:
2246     case tok::amp:
2247     case tok::star:
2248     case tok::kw_const:
2249     case tok::kw_constexpr:
2250     case tok::kw_consteval:
2251     case tok::comma:
2252     case tok::greater:
2253     case tok::identifier:
2254     case tok::numeric_constant:
2255     case tok::coloncolon:
2256     case tok::kw_mutable:
2257     case tok::kw_noexcept:
2258     case tok::kw_static:
2259       nextToken();
2260       break;
2261     // Specialization of a template with an integer parameter can contain
2262     // arithmetic, logical, comparison and ternary operators.
2263     //
2264     // FIXME: This also accepts sequences of operators that are not in the scope
2265     // of a template argument list.
2266     //
2267     // In a C++ lambda a template type can only occur after an arrow. We use
2268     // this as an heuristic to distinguish between Objective-C expressions
2269     // followed by an `a->b` expression, such as:
2270     // ([obj func:arg] + a->b)
2271     // Otherwise the code below would parse as a lambda.
2272     case tok::plus:
2273     case tok::minus:
2274     case tok::exclaim:
2275     case tok::tilde:
2276     case tok::slash:
2277     case tok::percent:
2278     case tok::lessless:
2279     case tok::pipe:
2280     case tok::pipepipe:
2281     case tok::ampamp:
2282     case tok::caret:
2283     case tok::equalequal:
2284     case tok::exclaimequal:
2285     case tok::greaterequal:
2286     case tok::lessequal:
2287     case tok::question:
2288     case tok::colon:
2289     case tok::ellipsis:
2290     case tok::kw_true:
2291     case tok::kw_false:
2292       if (SeenArrow || InTemplateParameterList) {
2293         nextToken();
2294         break;
2295       }
2296       return true;
2297     case tok::arrow:
2298       // This might or might not actually be a lambda arrow (this could be an
2299       // ObjC method invocation followed by a dereferencing arrow). We might
2300       // reset this back to TT_Unknown in TokenAnnotator.
2301       FormatTok->setFinalizedType(TT_TrailingReturnArrow);
2302       SeenArrow = true;
2303       nextToken();
2304       break;
2305     case tok::kw_requires: {
2306       auto *RequiresToken = FormatTok;
2307       nextToken();
2308       parseRequiresClause(RequiresToken);
2309       break;
2310     }
2311     case tok::equal:
2312       if (!InTemplateParameterList)
2313         return true;
2314       nextToken();
2315       break;
2316     default:
2317       return true;
2318     }
2319   }
2320 
2321   FormatTok->setFinalizedType(TT_LambdaLBrace);
2322   LSquare.setFinalizedType(TT_LambdaLSquare);
2323 
2324   NestedLambdas.push_back(Line->SeenDecltypeAuto);
2325   parseChildBlock();
2326   assert(!NestedLambdas.empty());
2327   NestedLambdas.pop_back();
2328 
2329   return true;
2330 }
2331 
tryToParseLambdaIntroducer()2332 bool UnwrappedLineParser::tryToParseLambdaIntroducer() {
2333   const FormatToken *Previous = FormatTok->Previous;
2334   const FormatToken *LeftSquare = FormatTok;
2335   nextToken();
2336   if ((Previous && ((Previous->Tok.getIdentifierInfo() &&
2337                      !Previous->isOneOf(tok::kw_return, tok::kw_co_await,
2338                                         tok::kw_co_yield, tok::kw_co_return)) ||
2339                     Previous->closesScope())) ||
2340       LeftSquare->isCppStructuredBinding(Style)) {
2341     return false;
2342   }
2343   if (FormatTok->is(tok::l_square) || tok::isLiteral(FormatTok->Tok.getKind()))
2344     return false;
2345   if (FormatTok->is(tok::r_square)) {
2346     const FormatToken *Next = Tokens->peekNextToken(/*SkipComment=*/true);
2347     if (Next->is(tok::greater))
2348       return false;
2349   }
2350   parseSquare(/*LambdaIntroducer=*/true);
2351   return true;
2352 }
2353 
tryToParseJSFunction()2354 void UnwrappedLineParser::tryToParseJSFunction() {
2355   assert(FormatTok->is(Keywords.kw_function));
2356   if (FormatTok->is(Keywords.kw_async))
2357     nextToken();
2358   // Consume "function".
2359   nextToken();
2360 
2361   // Consume * (generator function). Treat it like C++'s overloaded operators.
2362   if (FormatTok->is(tok::star)) {
2363     FormatTok->setFinalizedType(TT_OverloadedOperator);
2364     nextToken();
2365   }
2366 
2367   // Consume function name.
2368   if (FormatTok->is(tok::identifier))
2369     nextToken();
2370 
2371   if (FormatTok->isNot(tok::l_paren))
2372     return;
2373 
2374   // Parse formal parameter list.
2375   parseParens();
2376 
2377   if (FormatTok->is(tok::colon)) {
2378     // Parse a type definition.
2379     nextToken();
2380 
2381     // Eat the type declaration. For braced inline object types, balance braces,
2382     // otherwise just parse until finding an l_brace for the function body.
2383     if (FormatTok->is(tok::l_brace))
2384       tryToParseBracedList();
2385     else
2386       while (!FormatTok->isOneOf(tok::l_brace, tok::semi) && !eof())
2387         nextToken();
2388   }
2389 
2390   if (FormatTok->is(tok::semi))
2391     return;
2392 
2393   parseChildBlock();
2394 }
2395 
tryToParseBracedList()2396 bool UnwrappedLineParser::tryToParseBracedList() {
2397   if (FormatTok->is(BK_Unknown))
2398     calculateBraceTypes();
2399   assert(FormatTok->isNot(BK_Unknown));
2400   if (FormatTok->is(BK_Block))
2401     return false;
2402   nextToken();
2403   parseBracedList();
2404   return true;
2405 }
2406 
tryToParseChildBlock()2407 bool UnwrappedLineParser::tryToParseChildBlock() {
2408   assert(Style.isJavaScript() || Style.isCSharp());
2409   assert(FormatTok->is(TT_FatArrow));
2410   // Fat arrows (=>) have tok::TokenKind tok::equal but TokenType TT_FatArrow.
2411   // They always start an expression or a child block if followed by a curly
2412   // brace.
2413   nextToken();
2414   if (FormatTok->isNot(tok::l_brace))
2415     return false;
2416   parseChildBlock();
2417   return true;
2418 }
2419 
parseBracedList(bool IsAngleBracket,bool IsEnum)2420 bool UnwrappedLineParser::parseBracedList(bool IsAngleBracket, bool IsEnum) {
2421   bool HasError = false;
2422 
2423   // FIXME: Once we have an expression parser in the UnwrappedLineParser,
2424   // replace this by using parseAssignmentExpression() inside.
2425   do {
2426     if (Style.isCSharp() && FormatTok->is(TT_FatArrow) &&
2427         tryToParseChildBlock()) {
2428       continue;
2429     }
2430     if (Style.isJavaScript()) {
2431       if (FormatTok->is(Keywords.kw_function)) {
2432         tryToParseJSFunction();
2433         continue;
2434       }
2435       if (FormatTok->is(tok::l_brace)) {
2436         // Could be a method inside of a braced list `{a() { return 1; }}`.
2437         if (tryToParseBracedList())
2438           continue;
2439         parseChildBlock();
2440       }
2441     }
2442     if (FormatTok->is(IsAngleBracket ? tok::greater : tok::r_brace)) {
2443       if (IsEnum && !Style.AllowShortEnumsOnASingleLine)
2444         addUnwrappedLine();
2445       nextToken();
2446       return !HasError;
2447     }
2448     switch (FormatTok->Tok.getKind()) {
2449     case tok::l_square:
2450       if (Style.isCSharp())
2451         parseSquare();
2452       else
2453         tryToParseLambda();
2454       break;
2455     case tok::l_paren:
2456       parseParens();
2457       // JavaScript can just have free standing methods and getters/setters in
2458       // object literals. Detect them by a "{" following ")".
2459       if (Style.isJavaScript()) {
2460         if (FormatTok->is(tok::l_brace))
2461           parseChildBlock();
2462         break;
2463       }
2464       break;
2465     case tok::l_brace:
2466       // Assume there are no blocks inside a braced init list apart
2467       // from the ones we explicitly parse out (like lambdas).
2468       FormatTok->setBlockKind(BK_BracedInit);
2469       nextToken();
2470       parseBracedList();
2471       break;
2472     case tok::less:
2473       nextToken();
2474       if (IsAngleBracket)
2475         parseBracedList(/*IsAngleBracket=*/true);
2476       break;
2477     case tok::semi:
2478       // JavaScript (or more precisely TypeScript) can have semicolons in braced
2479       // lists (in so-called TypeMemberLists). Thus, the semicolon cannot be
2480       // used for error recovery if we have otherwise determined that this is
2481       // a braced list.
2482       if (Style.isJavaScript()) {
2483         nextToken();
2484         break;
2485       }
2486       HasError = true;
2487       if (!IsEnum)
2488         return false;
2489       nextToken();
2490       break;
2491     case tok::comma:
2492       nextToken();
2493       if (IsEnum && !Style.AllowShortEnumsOnASingleLine)
2494         addUnwrappedLine();
2495       break;
2496     default:
2497       nextToken();
2498       break;
2499     }
2500   } while (!eof());
2501   return false;
2502 }
2503 
2504 /// \brief Parses a pair of parentheses (and everything between them).
2505 /// \param AmpAmpTokenType If different than TT_Unknown sets this type for all
2506 /// double ampersands. This applies for all nested scopes as well.
2507 ///
2508 /// Returns whether there is a `=` token between the parentheses.
parseParens(TokenType AmpAmpTokenType)2509 bool UnwrappedLineParser::parseParens(TokenType AmpAmpTokenType) {
2510   assert(FormatTok->is(tok::l_paren) && "'(' expected.");
2511   auto *LeftParen = FormatTok;
2512   bool SeenEqual = false;
2513   bool MightBeFoldExpr = false;
2514   const bool MightBeStmtExpr = Tokens->peekNextToken()->is(tok::l_brace);
2515   nextToken();
2516   do {
2517     switch (FormatTok->Tok.getKind()) {
2518     case tok::l_paren:
2519       if (parseParens(AmpAmpTokenType))
2520         SeenEqual = true;
2521       if (Style.Language == FormatStyle::LK_Java && FormatTok->is(tok::l_brace))
2522         parseChildBlock();
2523       break;
2524     case tok::r_paren:
2525       if (!MightBeStmtExpr && !MightBeFoldExpr && !Line->InMacroBody &&
2526           Style.RemoveParentheses > FormatStyle::RPS_Leave) {
2527         const auto *Prev = LeftParen->Previous;
2528         const auto *Next = Tokens->peekNextToken();
2529         const bool DoubleParens =
2530             Prev && Prev->is(tok::l_paren) && Next && Next->is(tok::r_paren);
2531         const auto *PrevPrev = Prev ? Prev->getPreviousNonComment() : nullptr;
2532         const bool Blacklisted =
2533             PrevPrev &&
2534             (PrevPrev->isOneOf(tok::kw___attribute, tok::kw_decltype) ||
2535              (SeenEqual &&
2536               (PrevPrev->isOneOf(tok::kw_if, tok::kw_while) ||
2537                PrevPrev->endsSequence(tok::kw_constexpr, tok::kw_if))));
2538         const bool ReturnParens =
2539             Style.RemoveParentheses == FormatStyle::RPS_ReturnStatement &&
2540             ((NestedLambdas.empty() && !IsDecltypeAutoFunction) ||
2541              (!NestedLambdas.empty() && !NestedLambdas.back())) &&
2542             Prev && Prev->isOneOf(tok::kw_return, tok::kw_co_return) && Next &&
2543             Next->is(tok::semi);
2544         if ((DoubleParens && !Blacklisted) || ReturnParens) {
2545           LeftParen->Optional = true;
2546           FormatTok->Optional = true;
2547         }
2548       }
2549       nextToken();
2550       return SeenEqual;
2551     case tok::r_brace:
2552       // A "}" inside parenthesis is an error if there wasn't a matching "{".
2553       return SeenEqual;
2554     case tok::l_square:
2555       tryToParseLambda();
2556       break;
2557     case tok::l_brace:
2558       if (!tryToParseBracedList())
2559         parseChildBlock();
2560       break;
2561     case tok::at:
2562       nextToken();
2563       if (FormatTok->is(tok::l_brace)) {
2564         nextToken();
2565         parseBracedList();
2566       }
2567       break;
2568     case tok::ellipsis:
2569       MightBeFoldExpr = true;
2570       nextToken();
2571       break;
2572     case tok::equal:
2573       SeenEqual = true;
2574       if (Style.isCSharp() && FormatTok->is(TT_FatArrow))
2575         tryToParseChildBlock();
2576       else
2577         nextToken();
2578       break;
2579     case tok::kw_class:
2580       if (Style.isJavaScript())
2581         parseRecord(/*ParseAsExpr=*/true);
2582       else
2583         nextToken();
2584       break;
2585     case tok::identifier:
2586       if (Style.isJavaScript() && (FormatTok->is(Keywords.kw_function)))
2587         tryToParseJSFunction();
2588       else
2589         nextToken();
2590       break;
2591     case tok::kw_requires: {
2592       auto RequiresToken = FormatTok;
2593       nextToken();
2594       parseRequiresExpression(RequiresToken);
2595       break;
2596     }
2597     case tok::ampamp:
2598       if (AmpAmpTokenType != TT_Unknown)
2599         FormatTok->setFinalizedType(AmpAmpTokenType);
2600       [[fallthrough]];
2601     default:
2602       nextToken();
2603       break;
2604     }
2605   } while (!eof());
2606   return SeenEqual;
2607 }
2608 
parseSquare(bool LambdaIntroducer)2609 void UnwrappedLineParser::parseSquare(bool LambdaIntroducer) {
2610   if (!LambdaIntroducer) {
2611     assert(FormatTok->is(tok::l_square) && "'[' expected.");
2612     if (tryToParseLambda())
2613       return;
2614   }
2615   do {
2616     switch (FormatTok->Tok.getKind()) {
2617     case tok::l_paren:
2618       parseParens();
2619       break;
2620     case tok::r_square:
2621       nextToken();
2622       return;
2623     case tok::r_brace:
2624       // A "}" inside parenthesis is an error if there wasn't a matching "{".
2625       return;
2626     case tok::l_square:
2627       parseSquare();
2628       break;
2629     case tok::l_brace: {
2630       if (!tryToParseBracedList())
2631         parseChildBlock();
2632       break;
2633     }
2634     case tok::at:
2635       nextToken();
2636       if (FormatTok->is(tok::l_brace)) {
2637         nextToken();
2638         parseBracedList();
2639       }
2640       break;
2641     default:
2642       nextToken();
2643       break;
2644     }
2645   } while (!eof());
2646 }
2647 
keepAncestorBraces()2648 void UnwrappedLineParser::keepAncestorBraces() {
2649   if (!Style.RemoveBracesLLVM)
2650     return;
2651 
2652   const int MaxNestingLevels = 2;
2653   const int Size = NestedTooDeep.size();
2654   if (Size >= MaxNestingLevels)
2655     NestedTooDeep[Size - MaxNestingLevels] = true;
2656   NestedTooDeep.push_back(false);
2657 }
2658 
getLastNonComment(const UnwrappedLine & Line)2659 static FormatToken *getLastNonComment(const UnwrappedLine &Line) {
2660   for (const auto &Token : llvm::reverse(Line.Tokens))
2661     if (Token.Tok->isNot(tok::comment))
2662       return Token.Tok;
2663 
2664   return nullptr;
2665 }
2666 
parseUnbracedBody(bool CheckEOF)2667 void UnwrappedLineParser::parseUnbracedBody(bool CheckEOF) {
2668   FormatToken *Tok = nullptr;
2669 
2670   if (Style.InsertBraces && !Line->InPPDirective && !Line->Tokens.empty() &&
2671       PreprocessorDirectives.empty() && FormatTok->isNot(tok::semi)) {
2672     Tok = Style.BraceWrapping.AfterControlStatement == FormatStyle::BWACS_Never
2673               ? getLastNonComment(*Line)
2674               : Line->Tokens.back().Tok;
2675     assert(Tok);
2676     if (Tok->BraceCount < 0) {
2677       assert(Tok->BraceCount == -1);
2678       Tok = nullptr;
2679     } else {
2680       Tok->BraceCount = -1;
2681     }
2682   }
2683 
2684   addUnwrappedLine();
2685   ++Line->Level;
2686   parseStructuralElement();
2687 
2688   if (Tok) {
2689     assert(!Line->InPPDirective);
2690     Tok = nullptr;
2691     for (const auto &L : llvm::reverse(*CurrentLines)) {
2692       if (!L.InPPDirective && getLastNonComment(L)) {
2693         Tok = L.Tokens.back().Tok;
2694         break;
2695       }
2696     }
2697     assert(Tok);
2698     ++Tok->BraceCount;
2699   }
2700 
2701   if (CheckEOF && eof())
2702     addUnwrappedLine();
2703 
2704   --Line->Level;
2705 }
2706 
markOptionalBraces(FormatToken * LeftBrace)2707 static void markOptionalBraces(FormatToken *LeftBrace) {
2708   if (!LeftBrace)
2709     return;
2710 
2711   assert(LeftBrace->is(tok::l_brace));
2712 
2713   FormatToken *RightBrace = LeftBrace->MatchingParen;
2714   if (!RightBrace) {
2715     assert(!LeftBrace->Optional);
2716     return;
2717   }
2718 
2719   assert(RightBrace->is(tok::r_brace));
2720   assert(RightBrace->MatchingParen == LeftBrace);
2721   assert(LeftBrace->Optional == RightBrace->Optional);
2722 
2723   LeftBrace->Optional = true;
2724   RightBrace->Optional = true;
2725 }
2726 
handleAttributes()2727 void UnwrappedLineParser::handleAttributes() {
2728   // Handle AttributeMacro, e.g. `if (x) UNLIKELY`.
2729   if (FormatTok->isAttribute())
2730     nextToken();
2731   else if (FormatTok->is(tok::l_square))
2732     handleCppAttributes();
2733 }
2734 
handleCppAttributes()2735 bool UnwrappedLineParser::handleCppAttributes() {
2736   // Handle [[likely]] / [[unlikely]] attributes.
2737   assert(FormatTok->is(tok::l_square));
2738   if (!tryToParseSimpleAttribute())
2739     return false;
2740   parseSquare();
2741   return true;
2742 }
2743 
2744 /// Returns whether \c Tok begins a block.
isBlockBegin(const FormatToken & Tok) const2745 bool UnwrappedLineParser::isBlockBegin(const FormatToken &Tok) const {
2746   // FIXME: rename the function or make
2747   // Tok.isOneOf(tok::l_brace, TT_MacroBlockBegin) work.
2748   return Style.isVerilog() ? Keywords.isVerilogBegin(Tok)
2749                            : Tok.is(tok::l_brace);
2750 }
2751 
parseIfThenElse(IfStmtKind * IfKind,bool KeepBraces,bool IsVerilogAssert)2752 FormatToken *UnwrappedLineParser::parseIfThenElse(IfStmtKind *IfKind,
2753                                                   bool KeepBraces,
2754                                                   bool IsVerilogAssert) {
2755   assert((FormatTok->is(tok::kw_if) ||
2756           (Style.isVerilog() &&
2757            FormatTok->isOneOf(tok::kw_restrict, Keywords.kw_assert,
2758                               Keywords.kw_assume, Keywords.kw_cover))) &&
2759          "'if' expected");
2760   nextToken();
2761 
2762   if (IsVerilogAssert) {
2763     // Handle `assert #0` and `assert final`.
2764     if (FormatTok->is(Keywords.kw_verilogHash)) {
2765       nextToken();
2766       if (FormatTok->is(tok::numeric_constant))
2767         nextToken();
2768     } else if (FormatTok->isOneOf(Keywords.kw_final, Keywords.kw_property,
2769                                   Keywords.kw_sequence)) {
2770       nextToken();
2771     }
2772   }
2773 
2774   // TableGen's if statement has the form of `if <cond> then { ... }`.
2775   if (Style.isTableGen()) {
2776     while (!eof() && FormatTok->isNot(Keywords.kw_then)) {
2777       // Simply skip until then. This range only contains a value.
2778       nextToken();
2779     }
2780   }
2781 
2782   // Handle `if !consteval`.
2783   if (FormatTok->is(tok::exclaim))
2784     nextToken();
2785 
2786   bool KeepIfBraces = true;
2787   if (FormatTok->is(tok::kw_consteval)) {
2788     nextToken();
2789   } else {
2790     KeepIfBraces = !Style.RemoveBracesLLVM || KeepBraces;
2791     if (FormatTok->isOneOf(tok::kw_constexpr, tok::identifier))
2792       nextToken();
2793     if (FormatTok->is(tok::l_paren)) {
2794       FormatTok->setFinalizedType(TT_ConditionLParen);
2795       parseParens();
2796     }
2797   }
2798   handleAttributes();
2799   // The then action is optional in Verilog assert statements.
2800   if (IsVerilogAssert && FormatTok->is(tok::semi)) {
2801     nextToken();
2802     addUnwrappedLine();
2803     return nullptr;
2804   }
2805 
2806   bool NeedsUnwrappedLine = false;
2807   keepAncestorBraces();
2808 
2809   FormatToken *IfLeftBrace = nullptr;
2810   IfStmtKind IfBlockKind = IfStmtKind::NotIf;
2811 
2812   if (isBlockBegin(*FormatTok)) {
2813     FormatTok->setFinalizedType(TT_ControlStatementLBrace);
2814     IfLeftBrace = FormatTok;
2815     CompoundStatementIndenter Indenter(this, Style, Line->Level);
2816     parseBlock(/*MustBeDeclaration=*/false, /*AddLevels=*/1u,
2817                /*MunchSemi=*/true, KeepIfBraces, &IfBlockKind);
2818     setPreviousRBraceType(TT_ControlStatementRBrace);
2819     if (Style.BraceWrapping.BeforeElse)
2820       addUnwrappedLine();
2821     else
2822       NeedsUnwrappedLine = true;
2823   } else if (IsVerilogAssert && FormatTok->is(tok::kw_else)) {
2824     addUnwrappedLine();
2825   } else {
2826     parseUnbracedBody();
2827   }
2828 
2829   if (Style.RemoveBracesLLVM) {
2830     assert(!NestedTooDeep.empty());
2831     KeepIfBraces = KeepIfBraces ||
2832                    (IfLeftBrace && !IfLeftBrace->MatchingParen) ||
2833                    NestedTooDeep.back() || IfBlockKind == IfStmtKind::IfOnly ||
2834                    IfBlockKind == IfStmtKind::IfElseIf;
2835   }
2836 
2837   bool KeepElseBraces = KeepIfBraces;
2838   FormatToken *ElseLeftBrace = nullptr;
2839   IfStmtKind Kind = IfStmtKind::IfOnly;
2840 
2841   if (FormatTok->is(tok::kw_else)) {
2842     if (Style.RemoveBracesLLVM) {
2843       NestedTooDeep.back() = false;
2844       Kind = IfStmtKind::IfElse;
2845     }
2846     nextToken();
2847     handleAttributes();
2848     if (isBlockBegin(*FormatTok)) {
2849       const bool FollowedByIf = Tokens->peekNextToken()->is(tok::kw_if);
2850       FormatTok->setFinalizedType(TT_ElseLBrace);
2851       ElseLeftBrace = FormatTok;
2852       CompoundStatementIndenter Indenter(this, Style, Line->Level);
2853       IfStmtKind ElseBlockKind = IfStmtKind::NotIf;
2854       FormatToken *IfLBrace =
2855           parseBlock(/*MustBeDeclaration=*/false, /*AddLevels=*/1u,
2856                      /*MunchSemi=*/true, KeepElseBraces, &ElseBlockKind);
2857       setPreviousRBraceType(TT_ElseRBrace);
2858       if (FormatTok->is(tok::kw_else)) {
2859         KeepElseBraces = KeepElseBraces ||
2860                          ElseBlockKind == IfStmtKind::IfOnly ||
2861                          ElseBlockKind == IfStmtKind::IfElseIf;
2862       } else if (FollowedByIf && IfLBrace && !IfLBrace->Optional) {
2863         KeepElseBraces = true;
2864         assert(ElseLeftBrace->MatchingParen);
2865         markOptionalBraces(ElseLeftBrace);
2866       }
2867       addUnwrappedLine();
2868     } else if (!IsVerilogAssert && FormatTok->is(tok::kw_if)) {
2869       const FormatToken *Previous = Tokens->getPreviousToken();
2870       assert(Previous);
2871       const bool IsPrecededByComment = Previous->is(tok::comment);
2872       if (IsPrecededByComment) {
2873         addUnwrappedLine();
2874         ++Line->Level;
2875       }
2876       bool TooDeep = true;
2877       if (Style.RemoveBracesLLVM) {
2878         Kind = IfStmtKind::IfElseIf;
2879         TooDeep = NestedTooDeep.pop_back_val();
2880       }
2881       ElseLeftBrace = parseIfThenElse(/*IfKind=*/nullptr, KeepIfBraces);
2882       if (Style.RemoveBracesLLVM)
2883         NestedTooDeep.push_back(TooDeep);
2884       if (IsPrecededByComment)
2885         --Line->Level;
2886     } else {
2887       parseUnbracedBody(/*CheckEOF=*/true);
2888     }
2889   } else {
2890     KeepIfBraces = KeepIfBraces || IfBlockKind == IfStmtKind::IfElse;
2891     if (NeedsUnwrappedLine)
2892       addUnwrappedLine();
2893   }
2894 
2895   if (!Style.RemoveBracesLLVM)
2896     return nullptr;
2897 
2898   assert(!NestedTooDeep.empty());
2899   KeepElseBraces = KeepElseBraces ||
2900                    (ElseLeftBrace && !ElseLeftBrace->MatchingParen) ||
2901                    NestedTooDeep.back();
2902 
2903   NestedTooDeep.pop_back();
2904 
2905   if (!KeepIfBraces && !KeepElseBraces) {
2906     markOptionalBraces(IfLeftBrace);
2907     markOptionalBraces(ElseLeftBrace);
2908   } else if (IfLeftBrace) {
2909     FormatToken *IfRightBrace = IfLeftBrace->MatchingParen;
2910     if (IfRightBrace) {
2911       assert(IfRightBrace->MatchingParen == IfLeftBrace);
2912       assert(!IfLeftBrace->Optional);
2913       assert(!IfRightBrace->Optional);
2914       IfLeftBrace->MatchingParen = nullptr;
2915       IfRightBrace->MatchingParen = nullptr;
2916     }
2917   }
2918 
2919   if (IfKind)
2920     *IfKind = Kind;
2921 
2922   return IfLeftBrace;
2923 }
2924 
parseTryCatch()2925 void UnwrappedLineParser::parseTryCatch() {
2926   assert(FormatTok->isOneOf(tok::kw_try, tok::kw___try) && "'try' expected");
2927   nextToken();
2928   bool NeedsUnwrappedLine = false;
2929   if (FormatTok->is(tok::colon)) {
2930     // We are in a function try block, what comes is an initializer list.
2931     nextToken();
2932 
2933     // In case identifiers were removed by clang-tidy, what might follow is
2934     // multiple commas in sequence - before the first identifier.
2935     while (FormatTok->is(tok::comma))
2936       nextToken();
2937 
2938     while (FormatTok->is(tok::identifier)) {
2939       nextToken();
2940       if (FormatTok->is(tok::l_paren))
2941         parseParens();
2942       if (FormatTok->Previous && FormatTok->Previous->is(tok::identifier) &&
2943           FormatTok->is(tok::l_brace)) {
2944         do {
2945           nextToken();
2946         } while (FormatTok->isNot(tok::r_brace));
2947         nextToken();
2948       }
2949 
2950       // In case identifiers were removed by clang-tidy, what might follow is
2951       // multiple commas in sequence - after the first identifier.
2952       while (FormatTok->is(tok::comma))
2953         nextToken();
2954     }
2955   }
2956   // Parse try with resource.
2957   if (Style.Language == FormatStyle::LK_Java && FormatTok->is(tok::l_paren))
2958     parseParens();
2959 
2960   keepAncestorBraces();
2961 
2962   if (FormatTok->is(tok::l_brace)) {
2963     CompoundStatementIndenter Indenter(this, Style, Line->Level);
2964     parseBlock();
2965     if (Style.BraceWrapping.BeforeCatch)
2966       addUnwrappedLine();
2967     else
2968       NeedsUnwrappedLine = true;
2969   } else if (FormatTok->isNot(tok::kw_catch)) {
2970     // The C++ standard requires a compound-statement after a try.
2971     // If there's none, we try to assume there's a structuralElement
2972     // and try to continue.
2973     addUnwrappedLine();
2974     ++Line->Level;
2975     parseStructuralElement();
2976     --Line->Level;
2977   }
2978   while (true) {
2979     if (FormatTok->is(tok::at))
2980       nextToken();
2981     if (!(FormatTok->isOneOf(tok::kw_catch, Keywords.kw___except,
2982                              tok::kw___finally) ||
2983           ((Style.Language == FormatStyle::LK_Java || Style.isJavaScript()) &&
2984            FormatTok->is(Keywords.kw_finally)) ||
2985           (FormatTok->isObjCAtKeyword(tok::objc_catch) ||
2986            FormatTok->isObjCAtKeyword(tok::objc_finally)))) {
2987       break;
2988     }
2989     nextToken();
2990     while (FormatTok->isNot(tok::l_brace)) {
2991       if (FormatTok->is(tok::l_paren)) {
2992         parseParens();
2993         continue;
2994       }
2995       if (FormatTok->isOneOf(tok::semi, tok::r_brace, tok::eof)) {
2996         if (Style.RemoveBracesLLVM)
2997           NestedTooDeep.pop_back();
2998         return;
2999       }
3000       nextToken();
3001     }
3002     NeedsUnwrappedLine = false;
3003     Line->MustBeDeclaration = false;
3004     CompoundStatementIndenter Indenter(this, Style, Line->Level);
3005     parseBlock();
3006     if (Style.BraceWrapping.BeforeCatch)
3007       addUnwrappedLine();
3008     else
3009       NeedsUnwrappedLine = true;
3010   }
3011 
3012   if (Style.RemoveBracesLLVM)
3013     NestedTooDeep.pop_back();
3014 
3015   if (NeedsUnwrappedLine)
3016     addUnwrappedLine();
3017 }
3018 
parseNamespace()3019 void UnwrappedLineParser::parseNamespace() {
3020   assert(FormatTok->isOneOf(tok::kw_namespace, TT_NamespaceMacro) &&
3021          "'namespace' expected");
3022 
3023   const FormatToken &InitialToken = *FormatTok;
3024   nextToken();
3025   if (InitialToken.is(TT_NamespaceMacro)) {
3026     parseParens();
3027   } else {
3028     while (FormatTok->isOneOf(tok::identifier, tok::coloncolon, tok::kw_inline,
3029                               tok::l_square, tok::period, tok::l_paren) ||
3030            (Style.isCSharp() && FormatTok->is(tok::kw_union))) {
3031       if (FormatTok->is(tok::l_square))
3032         parseSquare();
3033       else if (FormatTok->is(tok::l_paren))
3034         parseParens();
3035       else
3036         nextToken();
3037     }
3038   }
3039   if (FormatTok->is(tok::l_brace)) {
3040     FormatTok->setFinalizedType(TT_NamespaceLBrace);
3041 
3042     if (ShouldBreakBeforeBrace(Style, InitialToken))
3043       addUnwrappedLine();
3044 
3045     unsigned AddLevels =
3046         Style.NamespaceIndentation == FormatStyle::NI_All ||
3047                 (Style.NamespaceIndentation == FormatStyle::NI_Inner &&
3048                  DeclarationScopeStack.size() > 1)
3049             ? 1u
3050             : 0u;
3051     bool ManageWhitesmithsBraces =
3052         AddLevels == 0u &&
3053         Style.BreakBeforeBraces == FormatStyle::BS_Whitesmiths;
3054 
3055     // If we're in Whitesmiths mode, indent the brace if we're not indenting
3056     // the whole block.
3057     if (ManageWhitesmithsBraces)
3058       ++Line->Level;
3059 
3060     // Munch the semicolon after a namespace. This is more common than one would
3061     // think. Putting the semicolon into its own line is very ugly.
3062     parseBlock(/*MustBeDeclaration=*/true, AddLevels, /*MunchSemi=*/true,
3063                /*KeepBraces=*/true, /*IfKind=*/nullptr,
3064                ManageWhitesmithsBraces);
3065 
3066     addUnwrappedLine(AddLevels > 0 ? LineLevel::Remove : LineLevel::Keep);
3067 
3068     if (ManageWhitesmithsBraces)
3069       --Line->Level;
3070   }
3071   // FIXME: Add error handling.
3072 }
3073 
parseNew()3074 void UnwrappedLineParser::parseNew() {
3075   assert(FormatTok->is(tok::kw_new) && "'new' expected");
3076   nextToken();
3077 
3078   if (Style.isCSharp()) {
3079     do {
3080       // Handle constructor invocation, e.g. `new(field: value)`.
3081       if (FormatTok->is(tok::l_paren))
3082         parseParens();
3083 
3084       // Handle array initialization syntax, e.g. `new[] {10, 20, 30}`.
3085       if (FormatTok->is(tok::l_brace))
3086         parseBracedList();
3087 
3088       if (FormatTok->isOneOf(tok::semi, tok::comma))
3089         return;
3090 
3091       nextToken();
3092     } while (!eof());
3093   }
3094 
3095   if (Style.Language != FormatStyle::LK_Java)
3096     return;
3097 
3098   // In Java, we can parse everything up to the parens, which aren't optional.
3099   do {
3100     // There should not be a ;, { or } before the new's open paren.
3101     if (FormatTok->isOneOf(tok::semi, tok::l_brace, tok::r_brace))
3102       return;
3103 
3104     // Consume the parens.
3105     if (FormatTok->is(tok::l_paren)) {
3106       parseParens();
3107 
3108       // If there is a class body of an anonymous class, consume that as child.
3109       if (FormatTok->is(tok::l_brace))
3110         parseChildBlock();
3111       return;
3112     }
3113     nextToken();
3114   } while (!eof());
3115 }
3116 
parseLoopBody(bool KeepBraces,bool WrapRightBrace)3117 void UnwrappedLineParser::parseLoopBody(bool KeepBraces, bool WrapRightBrace) {
3118   keepAncestorBraces();
3119 
3120   if (isBlockBegin(*FormatTok)) {
3121     FormatTok->setFinalizedType(TT_ControlStatementLBrace);
3122     FormatToken *LeftBrace = FormatTok;
3123     CompoundStatementIndenter Indenter(this, Style, Line->Level);
3124     parseBlock(/*MustBeDeclaration=*/false, /*AddLevels=*/1u,
3125                /*MunchSemi=*/true, KeepBraces);
3126     setPreviousRBraceType(TT_ControlStatementRBrace);
3127     if (!KeepBraces) {
3128       assert(!NestedTooDeep.empty());
3129       if (!NestedTooDeep.back())
3130         markOptionalBraces(LeftBrace);
3131     }
3132     if (WrapRightBrace)
3133       addUnwrappedLine();
3134   } else {
3135     parseUnbracedBody();
3136   }
3137 
3138   if (!KeepBraces)
3139     NestedTooDeep.pop_back();
3140 }
3141 
parseForOrWhileLoop(bool HasParens)3142 void UnwrappedLineParser::parseForOrWhileLoop(bool HasParens) {
3143   assert((FormatTok->isOneOf(tok::kw_for, tok::kw_while, TT_ForEachMacro) ||
3144           (Style.isVerilog() &&
3145            FormatTok->isOneOf(Keywords.kw_always, Keywords.kw_always_comb,
3146                               Keywords.kw_always_ff, Keywords.kw_always_latch,
3147                               Keywords.kw_final, Keywords.kw_initial,
3148                               Keywords.kw_foreach, Keywords.kw_forever,
3149                               Keywords.kw_repeat))) &&
3150          "'for', 'while' or foreach macro expected");
3151   const bool KeepBraces = !Style.RemoveBracesLLVM ||
3152                           !FormatTok->isOneOf(tok::kw_for, tok::kw_while);
3153 
3154   nextToken();
3155   // JS' for await ( ...
3156   if (Style.isJavaScript() && FormatTok->is(Keywords.kw_await))
3157     nextToken();
3158   if (Style.isCpp() && FormatTok->is(tok::kw_co_await))
3159     nextToken();
3160   if (HasParens && FormatTok->is(tok::l_paren)) {
3161     // The type is only set for Verilog basically because we were afraid to
3162     // change the existing behavior for loops. See the discussion on D121756 for
3163     // details.
3164     if (Style.isVerilog())
3165       FormatTok->setFinalizedType(TT_ConditionLParen);
3166     parseParens();
3167   }
3168 
3169   if (Style.isVerilog()) {
3170     // Event control.
3171     parseVerilogSensitivityList();
3172   } else if (Style.AllowShortLoopsOnASingleLine && FormatTok->is(tok::semi) &&
3173              Tokens->getPreviousToken()->is(tok::r_paren)) {
3174     nextToken();
3175     addUnwrappedLine();
3176     return;
3177   }
3178 
3179   handleAttributes();
3180   parseLoopBody(KeepBraces, /*WrapRightBrace=*/true);
3181 }
3182 
parseDoWhile()3183 void UnwrappedLineParser::parseDoWhile() {
3184   assert(FormatTok->is(tok::kw_do) && "'do' expected");
3185   nextToken();
3186 
3187   parseLoopBody(/*KeepBraces=*/true, Style.BraceWrapping.BeforeWhile);
3188 
3189   // FIXME: Add error handling.
3190   if (FormatTok->isNot(tok::kw_while)) {
3191     addUnwrappedLine();
3192     return;
3193   }
3194 
3195   FormatTok->setFinalizedType(TT_DoWhile);
3196 
3197   // If in Whitesmiths mode, the line with the while() needs to be indented
3198   // to the same level as the block.
3199   if (Style.BreakBeforeBraces == FormatStyle::BS_Whitesmiths)
3200     ++Line->Level;
3201 
3202   nextToken();
3203   parseStructuralElement();
3204 }
3205 
parseLabel(bool LeftAlignLabel)3206 void UnwrappedLineParser::parseLabel(bool LeftAlignLabel) {
3207   nextToken();
3208   unsigned OldLineLevel = Line->Level;
3209   if (Line->Level > 1 || (!Line->InPPDirective && Line->Level > 0))
3210     --Line->Level;
3211   if (LeftAlignLabel)
3212     Line->Level = 0;
3213 
3214   if (!Style.IndentCaseBlocks && CommentsBeforeNextToken.empty() &&
3215       FormatTok->is(tok::l_brace)) {
3216 
3217     CompoundStatementIndenter Indenter(this, Line->Level,
3218                                        Style.BraceWrapping.AfterCaseLabel,
3219                                        Style.BraceWrapping.IndentBraces);
3220     parseBlock();
3221     if (FormatTok->is(tok::kw_break)) {
3222       if (Style.BraceWrapping.AfterControlStatement ==
3223           FormatStyle::BWACS_Always) {
3224         addUnwrappedLine();
3225         if (!Style.IndentCaseBlocks &&
3226             Style.BreakBeforeBraces == FormatStyle::BS_Whitesmiths) {
3227           ++Line->Level;
3228         }
3229       }
3230       parseStructuralElement();
3231     }
3232     addUnwrappedLine();
3233   } else {
3234     if (FormatTok->is(tok::semi))
3235       nextToken();
3236     addUnwrappedLine();
3237   }
3238   Line->Level = OldLineLevel;
3239   if (FormatTok->isNot(tok::l_brace)) {
3240     parseStructuralElement();
3241     addUnwrappedLine();
3242   }
3243 }
3244 
parseCaseLabel()3245 void UnwrappedLineParser::parseCaseLabel() {
3246   assert(FormatTok->is(tok::kw_case) && "'case' expected");
3247 
3248   // FIXME: fix handling of complex expressions here.
3249   do {
3250     nextToken();
3251     if (FormatTok->is(tok::colon)) {
3252       FormatTok->setFinalizedType(TT_CaseLabelColon);
3253       break;
3254     }
3255   } while (!eof());
3256   parseLabel();
3257 }
3258 
parseSwitch()3259 void UnwrappedLineParser::parseSwitch() {
3260   assert(FormatTok->is(tok::kw_switch) && "'switch' expected");
3261   nextToken();
3262   if (FormatTok->is(tok::l_paren))
3263     parseParens();
3264 
3265   keepAncestorBraces();
3266 
3267   if (FormatTok->is(tok::l_brace)) {
3268     CompoundStatementIndenter Indenter(this, Style, Line->Level);
3269     FormatTok->setFinalizedType(TT_ControlStatementLBrace);
3270     parseBlock();
3271     setPreviousRBraceType(TT_ControlStatementRBrace);
3272     addUnwrappedLine();
3273   } else {
3274     addUnwrappedLine();
3275     ++Line->Level;
3276     parseStructuralElement();
3277     --Line->Level;
3278   }
3279 
3280   if (Style.RemoveBracesLLVM)
3281     NestedTooDeep.pop_back();
3282 }
3283 
3284 // Operators that can follow a C variable.
isCOperatorFollowingVar(tok::TokenKind kind)3285 static bool isCOperatorFollowingVar(tok::TokenKind kind) {
3286   switch (kind) {
3287   case tok::ampamp:
3288   case tok::ampequal:
3289   case tok::arrow:
3290   case tok::caret:
3291   case tok::caretequal:
3292   case tok::comma:
3293   case tok::ellipsis:
3294   case tok::equal:
3295   case tok::equalequal:
3296   case tok::exclaim:
3297   case tok::exclaimequal:
3298   case tok::greater:
3299   case tok::greaterequal:
3300   case tok::greatergreater:
3301   case tok::greatergreaterequal:
3302   case tok::l_paren:
3303   case tok::l_square:
3304   case tok::less:
3305   case tok::lessequal:
3306   case tok::lessless:
3307   case tok::lesslessequal:
3308   case tok::minus:
3309   case tok::minusequal:
3310   case tok::minusminus:
3311   case tok::percent:
3312   case tok::percentequal:
3313   case tok::period:
3314   case tok::pipe:
3315   case tok::pipeequal:
3316   case tok::pipepipe:
3317   case tok::plus:
3318   case tok::plusequal:
3319   case tok::plusplus:
3320   case tok::question:
3321   case tok::r_brace:
3322   case tok::r_paren:
3323   case tok::r_square:
3324   case tok::semi:
3325   case tok::slash:
3326   case tok::slashequal:
3327   case tok::star:
3328   case tok::starequal:
3329     return true;
3330   default:
3331     return false;
3332   }
3333 }
3334 
parseAccessSpecifier()3335 void UnwrappedLineParser::parseAccessSpecifier() {
3336   FormatToken *AccessSpecifierCandidate = FormatTok;
3337   nextToken();
3338   // Understand Qt's slots.
3339   if (FormatTok->isOneOf(Keywords.kw_slots, Keywords.kw_qslots))
3340     nextToken();
3341   // Otherwise, we don't know what it is, and we'd better keep the next token.
3342   if (FormatTok->is(tok::colon)) {
3343     nextToken();
3344     addUnwrappedLine();
3345   } else if (FormatTok->isNot(tok::coloncolon) &&
3346              !isCOperatorFollowingVar(FormatTok->Tok.getKind())) {
3347     // Not a variable name nor namespace name.
3348     addUnwrappedLine();
3349   } else if (AccessSpecifierCandidate) {
3350     // Consider the access specifier to be a C identifier.
3351     AccessSpecifierCandidate->Tok.setKind(tok::identifier);
3352   }
3353 }
3354 
3355 /// \brief Parses a requires, decides if it is a clause or an expression.
3356 /// \pre The current token has to be the requires keyword.
3357 /// \returns true if it parsed a clause.
parseRequires()3358 bool clang::format::UnwrappedLineParser::parseRequires() {
3359   assert(FormatTok->is(tok::kw_requires) && "'requires' expected");
3360   auto RequiresToken = FormatTok;
3361 
3362   // We try to guess if it is a requires clause, or a requires expression. For
3363   // that we first consume the keyword and check the next token.
3364   nextToken();
3365 
3366   switch (FormatTok->Tok.getKind()) {
3367   case tok::l_brace:
3368     // This can only be an expression, never a clause.
3369     parseRequiresExpression(RequiresToken);
3370     return false;
3371   case tok::l_paren:
3372     // Clauses and expression can start with a paren, it's unclear what we have.
3373     break;
3374   default:
3375     // All other tokens can only be a clause.
3376     parseRequiresClause(RequiresToken);
3377     return true;
3378   }
3379 
3380   // Looking forward we would have to decide if there are function declaration
3381   // like arguments to the requires expression:
3382   // requires (T t) {
3383   // Or there is a constraint expression for the requires clause:
3384   // requires (C<T> && ...
3385 
3386   // But first let's look behind.
3387   auto *PreviousNonComment = RequiresToken->getPreviousNonComment();
3388 
3389   if (!PreviousNonComment ||
3390       PreviousNonComment->is(TT_RequiresExpressionLBrace)) {
3391     // If there is no token, or an expression left brace, we are a requires
3392     // clause within a requires expression.
3393     parseRequiresClause(RequiresToken);
3394     return true;
3395   }
3396 
3397   switch (PreviousNonComment->Tok.getKind()) {
3398   case tok::greater:
3399   case tok::r_paren:
3400   case tok::kw_noexcept:
3401   case tok::kw_const:
3402     // This is a requires clause.
3403     parseRequiresClause(RequiresToken);
3404     return true;
3405   case tok::amp:
3406   case tok::ampamp: {
3407     // This can be either:
3408     // if (... && requires (T t) ...)
3409     // Or
3410     // void member(...) && requires (C<T> ...
3411     // We check the one token before that for a const:
3412     // void member(...) const && requires (C<T> ...
3413     auto PrevPrev = PreviousNonComment->getPreviousNonComment();
3414     if (PrevPrev && PrevPrev->is(tok::kw_const)) {
3415       parseRequiresClause(RequiresToken);
3416       return true;
3417     }
3418     break;
3419   }
3420   default:
3421     if (PreviousNonComment->isTypeOrIdentifier()) {
3422       // This is a requires clause.
3423       parseRequiresClause(RequiresToken);
3424       return true;
3425     }
3426     // It's an expression.
3427     parseRequiresExpression(RequiresToken);
3428     return false;
3429   }
3430 
3431   // Now we look forward and try to check if the paren content is a parameter
3432   // list. The parameters can be cv-qualified and contain references or
3433   // pointers.
3434   // So we want basically to check for TYPE NAME, but TYPE can contain all kinds
3435   // of stuff: typename, const, *, &, &&, ::, identifiers.
3436 
3437   unsigned StoredPosition = Tokens->getPosition();
3438   FormatToken *NextToken = Tokens->getNextToken();
3439   int Lookahead = 0;
3440   auto PeekNext = [&Lookahead, &NextToken, this] {
3441     ++Lookahead;
3442     NextToken = Tokens->getNextToken();
3443   };
3444 
3445   bool FoundType = false;
3446   bool LastWasColonColon = false;
3447   int OpenAngles = 0;
3448 
3449   for (; Lookahead < 50; PeekNext()) {
3450     switch (NextToken->Tok.getKind()) {
3451     case tok::kw_volatile:
3452     case tok::kw_const:
3453     case tok::comma:
3454       if (OpenAngles == 0) {
3455         FormatTok = Tokens->setPosition(StoredPosition);
3456         parseRequiresExpression(RequiresToken);
3457         return false;
3458       }
3459       break;
3460     case tok::r_paren:
3461     case tok::pipepipe:
3462       FormatTok = Tokens->setPosition(StoredPosition);
3463       parseRequiresClause(RequiresToken);
3464       return true;
3465     case tok::eof:
3466       // Break out of the loop.
3467       Lookahead = 50;
3468       break;
3469     case tok::coloncolon:
3470       LastWasColonColon = true;
3471       break;
3472     case tok::identifier:
3473       if (FoundType && !LastWasColonColon && OpenAngles == 0) {
3474         FormatTok = Tokens->setPosition(StoredPosition);
3475         parseRequiresExpression(RequiresToken);
3476         return false;
3477       }
3478       FoundType = true;
3479       LastWasColonColon = false;
3480       break;
3481     case tok::less:
3482       ++OpenAngles;
3483       break;
3484     case tok::greater:
3485       --OpenAngles;
3486       break;
3487     default:
3488       if (NextToken->isSimpleTypeSpecifier()) {
3489         FormatTok = Tokens->setPosition(StoredPosition);
3490         parseRequiresExpression(RequiresToken);
3491         return false;
3492       }
3493       break;
3494     }
3495   }
3496   // This seems to be a complicated expression, just assume it's a clause.
3497   FormatTok = Tokens->setPosition(StoredPosition);
3498   parseRequiresClause(RequiresToken);
3499   return true;
3500 }
3501 
3502 /// \brief Parses a requires clause.
3503 /// \param RequiresToken The requires keyword token, which starts this clause.
3504 /// \pre We need to be on the next token after the requires keyword.
3505 /// \sa parseRequiresExpression
3506 ///
3507 /// Returns if it either has finished parsing the clause, or it detects, that
3508 /// the clause is incorrect.
parseRequiresClause(FormatToken * RequiresToken)3509 void UnwrappedLineParser::parseRequiresClause(FormatToken *RequiresToken) {
3510   assert(FormatTok->getPreviousNonComment() == RequiresToken);
3511   assert(RequiresToken->is(tok::kw_requires) && "'requires' expected");
3512 
3513   // If there is no previous token, we are within a requires expression,
3514   // otherwise we will always have the template or function declaration in front
3515   // of it.
3516   bool InRequiresExpression =
3517       !RequiresToken->Previous ||
3518       RequiresToken->Previous->is(TT_RequiresExpressionLBrace);
3519 
3520   RequiresToken->setFinalizedType(InRequiresExpression
3521                                       ? TT_RequiresClauseInARequiresExpression
3522                                       : TT_RequiresClause);
3523 
3524   // NOTE: parseConstraintExpression is only ever called from this function.
3525   // It could be inlined into here.
3526   parseConstraintExpression();
3527 
3528   if (!InRequiresExpression)
3529     FormatTok->Previous->ClosesRequiresClause = true;
3530 }
3531 
3532 /// \brief Parses a requires expression.
3533 /// \param RequiresToken The requires keyword token, which starts this clause.
3534 /// \pre We need to be on the next token after the requires keyword.
3535 /// \sa parseRequiresClause
3536 ///
3537 /// Returns if it either has finished parsing the expression, or it detects,
3538 /// that the expression is incorrect.
parseRequiresExpression(FormatToken * RequiresToken)3539 void UnwrappedLineParser::parseRequiresExpression(FormatToken *RequiresToken) {
3540   assert(FormatTok->getPreviousNonComment() == RequiresToken);
3541   assert(RequiresToken->is(tok::kw_requires) && "'requires' expected");
3542 
3543   RequiresToken->setFinalizedType(TT_RequiresExpression);
3544 
3545   if (FormatTok->is(tok::l_paren)) {
3546     FormatTok->setFinalizedType(TT_RequiresExpressionLParen);
3547     parseParens();
3548   }
3549 
3550   if (FormatTok->is(tok::l_brace)) {
3551     FormatTok->setFinalizedType(TT_RequiresExpressionLBrace);
3552     parseChildBlock();
3553   }
3554 }
3555 
3556 /// \brief Parses a constraint expression.
3557 ///
3558 /// This is the body of a requires clause. It returns, when the parsing is
3559 /// complete, or the expression is incorrect.
parseConstraintExpression()3560 void UnwrappedLineParser::parseConstraintExpression() {
3561   // The special handling for lambdas is needed since tryToParseLambda() eats a
3562   // token and if a requires expression is the last part of a requires clause
3563   // and followed by an attribute like [[nodiscard]] the ClosesRequiresClause is
3564   // not set on the correct token. Thus we need to be aware if we even expect a
3565   // lambda to be possible.
3566   // template <typename T> requires requires { ... } [[nodiscard]] ...;
3567   bool LambdaNextTimeAllowed = true;
3568 
3569   // Within lambda declarations, it is permitted to put a requires clause after
3570   // its template parameter list, which would place the requires clause right
3571   // before the parentheses of the parameters of the lambda declaration. Thus,
3572   // we track if we expect to see grouping parentheses at all.
3573   // Without this check, `requires foo<T> (T t)` in the below example would be
3574   // seen as the whole requires clause, accidentally eating the parameters of
3575   // the lambda.
3576   // [&]<typename T> requires foo<T> (T t) { ... };
3577   bool TopLevelParensAllowed = true;
3578 
3579   do {
3580     bool LambdaThisTimeAllowed = std::exchange(LambdaNextTimeAllowed, false);
3581 
3582     switch (FormatTok->Tok.getKind()) {
3583     case tok::kw_requires: {
3584       auto RequiresToken = FormatTok;
3585       nextToken();
3586       parseRequiresExpression(RequiresToken);
3587       break;
3588     }
3589 
3590     case tok::l_paren:
3591       if (!TopLevelParensAllowed)
3592         return;
3593       parseParens(/*AmpAmpTokenType=*/TT_BinaryOperator);
3594       TopLevelParensAllowed = false;
3595       break;
3596 
3597     case tok::l_square:
3598       if (!LambdaThisTimeAllowed || !tryToParseLambda())
3599         return;
3600       break;
3601 
3602     case tok::kw_const:
3603     case tok::semi:
3604     case tok::kw_class:
3605     case tok::kw_struct:
3606     case tok::kw_union:
3607       return;
3608 
3609     case tok::l_brace:
3610       // Potential function body.
3611       return;
3612 
3613     case tok::ampamp:
3614     case tok::pipepipe:
3615       FormatTok->setFinalizedType(TT_BinaryOperator);
3616       nextToken();
3617       LambdaNextTimeAllowed = true;
3618       TopLevelParensAllowed = true;
3619       break;
3620 
3621     case tok::comma:
3622     case tok::comment:
3623       LambdaNextTimeAllowed = LambdaThisTimeAllowed;
3624       nextToken();
3625       break;
3626 
3627     case tok::kw_sizeof:
3628     case tok::greater:
3629     case tok::greaterequal:
3630     case tok::greatergreater:
3631     case tok::less:
3632     case tok::lessequal:
3633     case tok::lessless:
3634     case tok::equalequal:
3635     case tok::exclaim:
3636     case tok::exclaimequal:
3637     case tok::plus:
3638     case tok::minus:
3639     case tok::star:
3640     case tok::slash:
3641       LambdaNextTimeAllowed = true;
3642       TopLevelParensAllowed = true;
3643       // Just eat them.
3644       nextToken();
3645       break;
3646 
3647     case tok::numeric_constant:
3648     case tok::coloncolon:
3649     case tok::kw_true:
3650     case tok::kw_false:
3651       TopLevelParensAllowed = false;
3652       // Just eat them.
3653       nextToken();
3654       break;
3655 
3656     case tok::kw_static_cast:
3657     case tok::kw_const_cast:
3658     case tok::kw_reinterpret_cast:
3659     case tok::kw_dynamic_cast:
3660       nextToken();
3661       if (FormatTok->isNot(tok::less))
3662         return;
3663 
3664       nextToken();
3665       parseBracedList(/*IsAngleBracket=*/true);
3666       break;
3667 
3668     default:
3669       if (!FormatTok->Tok.getIdentifierInfo()) {
3670         // Identifiers are part of the default case, we check for more then
3671         // tok::identifier to handle builtin type traits.
3672         return;
3673       }
3674 
3675       // We need to differentiate identifiers for a template deduction guide,
3676       // variables, or function return types (the constraint expression has
3677       // ended before that), and basically all other cases. But it's easier to
3678       // check the other way around.
3679       assert(FormatTok->Previous);
3680       switch (FormatTok->Previous->Tok.getKind()) {
3681       case tok::coloncolon:  // Nested identifier.
3682       case tok::ampamp:      // Start of a function or variable for the
3683       case tok::pipepipe:    // constraint expression. (binary)
3684       case tok::exclaim:     // The same as above, but unary.
3685       case tok::kw_requires: // Initial identifier of a requires clause.
3686       case tok::equal:       // Initial identifier of a concept declaration.
3687         break;
3688       default:
3689         return;
3690       }
3691 
3692       // Read identifier with optional template declaration.
3693       nextToken();
3694       if (FormatTok->is(tok::less)) {
3695         nextToken();
3696         parseBracedList(/*IsAngleBracket=*/true);
3697       }
3698       TopLevelParensAllowed = false;
3699       break;
3700     }
3701   } while (!eof());
3702 }
3703 
parseEnum()3704 bool UnwrappedLineParser::parseEnum() {
3705   const FormatToken &InitialToken = *FormatTok;
3706 
3707   // Won't be 'enum' for NS_ENUMs.
3708   if (FormatTok->is(tok::kw_enum))
3709     nextToken();
3710 
3711   // In TypeScript, "enum" can also be used as property name, e.g. in interface
3712   // declarations. An "enum" keyword followed by a colon would be a syntax
3713   // error and thus assume it is just an identifier.
3714   if (Style.isJavaScript() && FormatTok->isOneOf(tok::colon, tok::question))
3715     return false;
3716 
3717   // In protobuf, "enum" can be used as a field name.
3718   if (Style.Language == FormatStyle::LK_Proto && FormatTok->is(tok::equal))
3719     return false;
3720 
3721   // Eat up enum class ...
3722   if (FormatTok->isOneOf(tok::kw_class, tok::kw_struct))
3723     nextToken();
3724 
3725   while (FormatTok->Tok.getIdentifierInfo() ||
3726          FormatTok->isOneOf(tok::colon, tok::coloncolon, tok::less,
3727                             tok::greater, tok::comma, tok::question,
3728                             tok::l_square, tok::r_square)) {
3729     if (Style.isVerilog()) {
3730       FormatTok->setFinalizedType(TT_VerilogDimensionedTypeName);
3731       nextToken();
3732       // In Verilog the base type can have dimensions.
3733       while (FormatTok->is(tok::l_square))
3734         parseSquare();
3735     } else {
3736       nextToken();
3737     }
3738     // We can have macros or attributes in between 'enum' and the enum name.
3739     if (FormatTok->is(tok::l_paren))
3740       parseParens();
3741     assert(FormatTok->isNot(TT_AttributeSquare));
3742     if (FormatTok->is(tok::identifier)) {
3743       nextToken();
3744       // If there are two identifiers in a row, this is likely an elaborate
3745       // return type. In Java, this can be "implements", etc.
3746       if (Style.isCpp() && FormatTok->is(tok::identifier))
3747         return false;
3748     }
3749   }
3750 
3751   // Just a declaration or something is wrong.
3752   if (FormatTok->isNot(tok::l_brace))
3753     return true;
3754   FormatTok->setFinalizedType(TT_EnumLBrace);
3755   FormatTok->setBlockKind(BK_Block);
3756 
3757   if (Style.Language == FormatStyle::LK_Java) {
3758     // Java enums are different.
3759     parseJavaEnumBody();
3760     return true;
3761   }
3762   if (Style.Language == FormatStyle::LK_Proto) {
3763     parseBlock(/*MustBeDeclaration=*/true);
3764     return true;
3765   }
3766 
3767   if (!Style.AllowShortEnumsOnASingleLine &&
3768       ShouldBreakBeforeBrace(Style, InitialToken)) {
3769     addUnwrappedLine();
3770   }
3771   // Parse enum body.
3772   nextToken();
3773   if (!Style.AllowShortEnumsOnASingleLine) {
3774     addUnwrappedLine();
3775     Line->Level += 1;
3776   }
3777   bool HasError = !parseBracedList(/*IsAngleBracket=*/false, /*IsEnum=*/true);
3778   if (!Style.AllowShortEnumsOnASingleLine)
3779     Line->Level -= 1;
3780   if (HasError) {
3781     if (FormatTok->is(tok::semi))
3782       nextToken();
3783     addUnwrappedLine();
3784   }
3785   setPreviousRBraceType(TT_EnumRBrace);
3786   return true;
3787 
3788   // There is no addUnwrappedLine() here so that we fall through to parsing a
3789   // structural element afterwards. Thus, in "enum A {} n, m;",
3790   // "} n, m;" will end up in one unwrapped line.
3791 }
3792 
parseStructLike()3793 bool UnwrappedLineParser::parseStructLike() {
3794   // parseRecord falls through and does not yet add an unwrapped line as a
3795   // record declaration or definition can start a structural element.
3796   parseRecord();
3797   // This does not apply to Java, JavaScript and C#.
3798   if (Style.Language == FormatStyle::LK_Java || Style.isJavaScript() ||
3799       Style.isCSharp()) {
3800     if (FormatTok->is(tok::semi))
3801       nextToken();
3802     addUnwrappedLine();
3803     return true;
3804   }
3805   return false;
3806 }
3807 
3808 namespace {
3809 // A class used to set and restore the Token position when peeking
3810 // ahead in the token source.
3811 class ScopedTokenPosition {
3812   unsigned StoredPosition;
3813   FormatTokenSource *Tokens;
3814 
3815 public:
ScopedTokenPosition(FormatTokenSource * Tokens)3816   ScopedTokenPosition(FormatTokenSource *Tokens) : Tokens(Tokens) {
3817     assert(Tokens && "Tokens expected to not be null");
3818     StoredPosition = Tokens->getPosition();
3819   }
3820 
~ScopedTokenPosition()3821   ~ScopedTokenPosition() { Tokens->setPosition(StoredPosition); }
3822 };
3823 } // namespace
3824 
3825 // Look to see if we have [[ by looking ahead, if
3826 // its not then rewind to the original position.
tryToParseSimpleAttribute()3827 bool UnwrappedLineParser::tryToParseSimpleAttribute() {
3828   ScopedTokenPosition AutoPosition(Tokens);
3829   FormatToken *Tok = Tokens->getNextToken();
3830   // We already read the first [ check for the second.
3831   if (Tok->isNot(tok::l_square))
3832     return false;
3833   // Double check that the attribute is just something
3834   // fairly simple.
3835   while (Tok->isNot(tok::eof)) {
3836     if (Tok->is(tok::r_square))
3837       break;
3838     Tok = Tokens->getNextToken();
3839   }
3840   if (Tok->is(tok::eof))
3841     return false;
3842   Tok = Tokens->getNextToken();
3843   if (Tok->isNot(tok::r_square))
3844     return false;
3845   Tok = Tokens->getNextToken();
3846   if (Tok->is(tok::semi))
3847     return false;
3848   return true;
3849 }
3850 
parseJavaEnumBody()3851 void UnwrappedLineParser::parseJavaEnumBody() {
3852   assert(FormatTok->is(tok::l_brace));
3853   const FormatToken *OpeningBrace = FormatTok;
3854 
3855   // Determine whether the enum is simple, i.e. does not have a semicolon or
3856   // constants with class bodies. Simple enums can be formatted like braced
3857   // lists, contracted to a single line, etc.
3858   unsigned StoredPosition = Tokens->getPosition();
3859   bool IsSimple = true;
3860   FormatToken *Tok = Tokens->getNextToken();
3861   while (Tok->isNot(tok::eof)) {
3862     if (Tok->is(tok::r_brace))
3863       break;
3864     if (Tok->isOneOf(tok::l_brace, tok::semi)) {
3865       IsSimple = false;
3866       break;
3867     }
3868     // FIXME: This will also mark enums with braces in the arguments to enum
3869     // constants as "not simple". This is probably fine in practice, though.
3870     Tok = Tokens->getNextToken();
3871   }
3872   FormatTok = Tokens->setPosition(StoredPosition);
3873 
3874   if (IsSimple) {
3875     nextToken();
3876     parseBracedList();
3877     addUnwrappedLine();
3878     return;
3879   }
3880 
3881   // Parse the body of a more complex enum.
3882   // First add a line for everything up to the "{".
3883   nextToken();
3884   addUnwrappedLine();
3885   ++Line->Level;
3886 
3887   // Parse the enum constants.
3888   while (!eof()) {
3889     if (FormatTok->is(tok::l_brace)) {
3890       // Parse the constant's class body.
3891       parseBlock(/*MustBeDeclaration=*/true, /*AddLevels=*/1u,
3892                  /*MunchSemi=*/false);
3893     } else if (FormatTok->is(tok::l_paren)) {
3894       parseParens();
3895     } else if (FormatTok->is(tok::comma)) {
3896       nextToken();
3897       addUnwrappedLine();
3898     } else if (FormatTok->is(tok::semi)) {
3899       nextToken();
3900       addUnwrappedLine();
3901       break;
3902     } else if (FormatTok->is(tok::r_brace)) {
3903       addUnwrappedLine();
3904       break;
3905     } else {
3906       nextToken();
3907     }
3908   }
3909 
3910   // Parse the class body after the enum's ";" if any.
3911   parseLevel(OpeningBrace);
3912   nextToken();
3913   --Line->Level;
3914   addUnwrappedLine();
3915 }
3916 
parseRecord(bool ParseAsExpr)3917 void UnwrappedLineParser::parseRecord(bool ParseAsExpr) {
3918   const FormatToken &InitialToken = *FormatTok;
3919   nextToken();
3920 
3921   auto IsNonMacroIdentifier = [](const FormatToken *Tok) {
3922     return Tok->is(tok::identifier) && Tok->TokenText != Tok->TokenText.upper();
3923   };
3924   // The actual identifier can be a nested name specifier, and in macros
3925   // it is often token-pasted.
3926   // An [[attribute]] can be before the identifier.
3927   while (FormatTok->isOneOf(tok::identifier, tok::coloncolon, tok::hashhash,
3928                             tok::kw_alignas, tok::l_square) ||
3929          FormatTok->isAttribute() ||
3930          ((Style.Language == FormatStyle::LK_Java || Style.isJavaScript()) &&
3931           FormatTok->isOneOf(tok::period, tok::comma))) {
3932     if (Style.isJavaScript() &&
3933         FormatTok->isOneOf(Keywords.kw_extends, Keywords.kw_implements)) {
3934       // JavaScript/TypeScript supports inline object types in
3935       // extends/implements positions:
3936       //     class Foo implements {bar: number} { }
3937       nextToken();
3938       if (FormatTok->is(tok::l_brace)) {
3939         tryToParseBracedList();
3940         continue;
3941       }
3942     }
3943     if (FormatTok->is(tok::l_square) && handleCppAttributes())
3944       continue;
3945     nextToken();
3946     // We can have macros in between 'class' and the class name.
3947     if (!IsNonMacroIdentifier(FormatTok->Previous) &&
3948         FormatTok->is(tok::l_paren)) {
3949       parseParens();
3950     }
3951   }
3952 
3953   if (FormatTok->isOneOf(tok::colon, tok::less)) {
3954     int AngleNestingLevel = 0;
3955     do {
3956       if (FormatTok->is(tok::less))
3957         ++AngleNestingLevel;
3958       else if (FormatTok->is(tok::greater))
3959         --AngleNestingLevel;
3960 
3961       if (AngleNestingLevel == 0 && FormatTok->is(tok::l_paren) &&
3962           IsNonMacroIdentifier(FormatTok->Previous)) {
3963         break;
3964       }
3965       if (FormatTok->is(tok::l_brace)) {
3966         calculateBraceTypes(/*ExpectClassBody=*/true);
3967         if (!tryToParseBracedList())
3968           break;
3969       }
3970       if (FormatTok->is(tok::l_square)) {
3971         FormatToken *Previous = FormatTok->Previous;
3972         if (!Previous ||
3973             !(Previous->is(tok::r_paren) || Previous->isTypeOrIdentifier())) {
3974           // Don't try parsing a lambda if we had a closing parenthesis before,
3975           // it was probably a pointer to an array: int (*)[].
3976           if (!tryToParseLambda())
3977             continue;
3978         } else {
3979           parseSquare();
3980           continue;
3981         }
3982       }
3983       if (FormatTok->is(tok::semi))
3984         return;
3985       if (Style.isCSharp() && FormatTok->is(Keywords.kw_where)) {
3986         addUnwrappedLine();
3987         nextToken();
3988         parseCSharpGenericTypeConstraint();
3989         break;
3990       }
3991       nextToken();
3992     } while (!eof());
3993   }
3994 
3995   auto GetBraceTypes =
3996       [](const FormatToken &RecordTok) -> std::pair<TokenType, TokenType> {
3997     switch (RecordTok.Tok.getKind()) {
3998     case tok::kw_class:
3999       return {TT_ClassLBrace, TT_ClassRBrace};
4000     case tok::kw_struct:
4001       return {TT_StructLBrace, TT_StructRBrace};
4002     case tok::kw_union:
4003       return {TT_UnionLBrace, TT_UnionRBrace};
4004     default:
4005       // Useful for e.g. interface.
4006       return {TT_RecordLBrace, TT_RecordRBrace};
4007     }
4008   };
4009   if (FormatTok->is(tok::l_brace)) {
4010     auto [OpenBraceType, ClosingBraceType] = GetBraceTypes(InitialToken);
4011     FormatTok->setFinalizedType(OpenBraceType);
4012     if (ParseAsExpr) {
4013       parseChildBlock();
4014     } else {
4015       if (ShouldBreakBeforeBrace(Style, InitialToken))
4016         addUnwrappedLine();
4017 
4018       unsigned AddLevels = Style.IndentAccessModifiers ? 2u : 1u;
4019       parseBlock(/*MustBeDeclaration=*/true, AddLevels, /*MunchSemi=*/false);
4020     }
4021     setPreviousRBraceType(ClosingBraceType);
4022   }
4023   // There is no addUnwrappedLine() here so that we fall through to parsing a
4024   // structural element afterwards. Thus, in "class A {} n, m;",
4025   // "} n, m;" will end up in one unwrapped line.
4026 }
4027 
parseObjCMethod()4028 void UnwrappedLineParser::parseObjCMethod() {
4029   assert(FormatTok->isOneOf(tok::l_paren, tok::identifier) &&
4030          "'(' or identifier expected.");
4031   do {
4032     if (FormatTok->is(tok::semi)) {
4033       nextToken();
4034       addUnwrappedLine();
4035       return;
4036     } else if (FormatTok->is(tok::l_brace)) {
4037       if (Style.BraceWrapping.AfterFunction)
4038         addUnwrappedLine();
4039       parseBlock();
4040       addUnwrappedLine();
4041       return;
4042     } else {
4043       nextToken();
4044     }
4045   } while (!eof());
4046 }
4047 
parseObjCProtocolList()4048 void UnwrappedLineParser::parseObjCProtocolList() {
4049   assert(FormatTok->is(tok::less) && "'<' expected.");
4050   do {
4051     nextToken();
4052     // Early exit in case someone forgot a close angle.
4053     if (FormatTok->isOneOf(tok::semi, tok::l_brace) ||
4054         FormatTok->isObjCAtKeyword(tok::objc_end)) {
4055       return;
4056     }
4057   } while (!eof() && FormatTok->isNot(tok::greater));
4058   nextToken(); // Skip '>'.
4059 }
4060 
parseObjCUntilAtEnd()4061 void UnwrappedLineParser::parseObjCUntilAtEnd() {
4062   do {
4063     if (FormatTok->isObjCAtKeyword(tok::objc_end)) {
4064       nextToken();
4065       addUnwrappedLine();
4066       break;
4067     }
4068     if (FormatTok->is(tok::l_brace)) {
4069       parseBlock();
4070       // In ObjC interfaces, nothing should be following the "}".
4071       addUnwrappedLine();
4072     } else if (FormatTok->is(tok::r_brace)) {
4073       // Ignore stray "}". parseStructuralElement doesn't consume them.
4074       nextToken();
4075       addUnwrappedLine();
4076     } else if (FormatTok->isOneOf(tok::minus, tok::plus)) {
4077       nextToken();
4078       parseObjCMethod();
4079     } else {
4080       parseStructuralElement();
4081     }
4082   } while (!eof());
4083 }
4084 
parseObjCInterfaceOrImplementation()4085 void UnwrappedLineParser::parseObjCInterfaceOrImplementation() {
4086   assert(FormatTok->Tok.getObjCKeywordID() == tok::objc_interface ||
4087          FormatTok->Tok.getObjCKeywordID() == tok::objc_implementation);
4088   nextToken();
4089   nextToken(); // interface name
4090 
4091   // @interface can be followed by a lightweight generic
4092   // specialization list, then either a base class or a category.
4093   if (FormatTok->is(tok::less))
4094     parseObjCLightweightGenerics();
4095   if (FormatTok->is(tok::colon)) {
4096     nextToken();
4097     nextToken(); // base class name
4098     // The base class can also have lightweight generics applied to it.
4099     if (FormatTok->is(tok::less))
4100       parseObjCLightweightGenerics();
4101   } else if (FormatTok->is(tok::l_paren)) {
4102     // Skip category, if present.
4103     parseParens();
4104   }
4105 
4106   if (FormatTok->is(tok::less))
4107     parseObjCProtocolList();
4108 
4109   if (FormatTok->is(tok::l_brace)) {
4110     if (Style.BraceWrapping.AfterObjCDeclaration)
4111       addUnwrappedLine();
4112     parseBlock(/*MustBeDeclaration=*/true);
4113   }
4114 
4115   // With instance variables, this puts '}' on its own line.  Without instance
4116   // variables, this ends the @interface line.
4117   addUnwrappedLine();
4118 
4119   parseObjCUntilAtEnd();
4120 }
4121 
parseObjCLightweightGenerics()4122 void UnwrappedLineParser::parseObjCLightweightGenerics() {
4123   assert(FormatTok->is(tok::less));
4124   // Unlike protocol lists, generic parameterizations support
4125   // nested angles:
4126   //
4127   // @interface Foo<ValueType : id <NSCopying, NSSecureCoding>> :
4128   //     NSObject <NSCopying, NSSecureCoding>
4129   //
4130   // so we need to count how many open angles we have left.
4131   unsigned NumOpenAngles = 1;
4132   do {
4133     nextToken();
4134     // Early exit in case someone forgot a close angle.
4135     if (FormatTok->isOneOf(tok::semi, tok::l_brace) ||
4136         FormatTok->isObjCAtKeyword(tok::objc_end)) {
4137       break;
4138     }
4139     if (FormatTok->is(tok::less)) {
4140       ++NumOpenAngles;
4141     } else if (FormatTok->is(tok::greater)) {
4142       assert(NumOpenAngles > 0 && "'>' makes NumOpenAngles negative");
4143       --NumOpenAngles;
4144     }
4145   } while (!eof() && NumOpenAngles != 0);
4146   nextToken(); // Skip '>'.
4147 }
4148 
4149 // Returns true for the declaration/definition form of @protocol,
4150 // false for the expression form.
parseObjCProtocol()4151 bool UnwrappedLineParser::parseObjCProtocol() {
4152   assert(FormatTok->Tok.getObjCKeywordID() == tok::objc_protocol);
4153   nextToken();
4154 
4155   if (FormatTok->is(tok::l_paren)) {
4156     // The expression form of @protocol, e.g. "Protocol* p = @protocol(foo);".
4157     return false;
4158   }
4159 
4160   // The definition/declaration form,
4161   // @protocol Foo
4162   // - (int)someMethod;
4163   // @end
4164 
4165   nextToken(); // protocol name
4166 
4167   if (FormatTok->is(tok::less))
4168     parseObjCProtocolList();
4169 
4170   // Check for protocol declaration.
4171   if (FormatTok->is(tok::semi)) {
4172     nextToken();
4173     addUnwrappedLine();
4174     return true;
4175   }
4176 
4177   addUnwrappedLine();
4178   parseObjCUntilAtEnd();
4179   return true;
4180 }
4181 
parseJavaScriptEs6ImportExport()4182 void UnwrappedLineParser::parseJavaScriptEs6ImportExport() {
4183   bool IsImport = FormatTok->is(Keywords.kw_import);
4184   assert(IsImport || FormatTok->is(tok::kw_export));
4185   nextToken();
4186 
4187   // Consume the "default" in "export default class/function".
4188   if (FormatTok->is(tok::kw_default))
4189     nextToken();
4190 
4191   // Consume "async function", "function" and "default function", so that these
4192   // get parsed as free-standing JS functions, i.e. do not require a trailing
4193   // semicolon.
4194   if (FormatTok->is(Keywords.kw_async))
4195     nextToken();
4196   if (FormatTok->is(Keywords.kw_function)) {
4197     nextToken();
4198     return;
4199   }
4200 
4201   // For imports, `export *`, `export {...}`, consume the rest of the line up
4202   // to the terminating `;`. For everything else, just return and continue
4203   // parsing the structural element, i.e. the declaration or expression for
4204   // `export default`.
4205   if (!IsImport && !FormatTok->isOneOf(tok::l_brace, tok::star) &&
4206       !FormatTok->isStringLiteral() &&
4207       !(FormatTok->is(Keywords.kw_type) &&
4208         Tokens->peekNextToken()->isOneOf(tok::l_brace, tok::star))) {
4209     return;
4210   }
4211 
4212   while (!eof()) {
4213     if (FormatTok->is(tok::semi))
4214       return;
4215     if (Line->Tokens.empty()) {
4216       // Common issue: Automatic Semicolon Insertion wrapped the line, so the
4217       // import statement should terminate.
4218       return;
4219     }
4220     if (FormatTok->is(tok::l_brace)) {
4221       FormatTok->setBlockKind(BK_Block);
4222       nextToken();
4223       parseBracedList();
4224     } else {
4225       nextToken();
4226     }
4227   }
4228 }
4229 
parseStatementMacro()4230 void UnwrappedLineParser::parseStatementMacro() {
4231   nextToken();
4232   if (FormatTok->is(tok::l_paren))
4233     parseParens();
4234   if (FormatTok->is(tok::semi))
4235     nextToken();
4236   addUnwrappedLine();
4237 }
4238 
parseVerilogHierarchyIdentifier()4239 void UnwrappedLineParser::parseVerilogHierarchyIdentifier() {
4240   // consume things like a::`b.c[d:e] or a::*
4241   while (true) {
4242     if (FormatTok->isOneOf(tok::star, tok::period, tok::periodstar,
4243                            tok::coloncolon, tok::hash) ||
4244         Keywords.isVerilogIdentifier(*FormatTok)) {
4245       nextToken();
4246     } else if (FormatTok->is(tok::l_square)) {
4247       parseSquare();
4248     } else {
4249       break;
4250     }
4251   }
4252 }
4253 
parseVerilogSensitivityList()4254 void UnwrappedLineParser::parseVerilogSensitivityList() {
4255   if (FormatTok->isNot(tok::at))
4256     return;
4257   nextToken();
4258   // A block event expression has 2 at signs.
4259   if (FormatTok->is(tok::at))
4260     nextToken();
4261   switch (FormatTok->Tok.getKind()) {
4262   case tok::star:
4263     nextToken();
4264     break;
4265   case tok::l_paren:
4266     parseParens();
4267     break;
4268   default:
4269     parseVerilogHierarchyIdentifier();
4270     break;
4271   }
4272 }
4273 
parseVerilogHierarchyHeader()4274 unsigned UnwrappedLineParser::parseVerilogHierarchyHeader() {
4275   unsigned AddLevels = 0;
4276 
4277   if (FormatTok->is(Keywords.kw_clocking)) {
4278     nextToken();
4279     if (Keywords.isVerilogIdentifier(*FormatTok))
4280       nextToken();
4281     parseVerilogSensitivityList();
4282     if (FormatTok->is(tok::semi))
4283       nextToken();
4284   } else if (FormatTok->isOneOf(tok::kw_case, Keywords.kw_casex,
4285                                 Keywords.kw_casez, Keywords.kw_randcase,
4286                                 Keywords.kw_randsequence)) {
4287     if (Style.IndentCaseLabels)
4288       AddLevels++;
4289     nextToken();
4290     if (FormatTok->is(tok::l_paren)) {
4291       FormatTok->setFinalizedType(TT_ConditionLParen);
4292       parseParens();
4293     }
4294     if (FormatTok->isOneOf(Keywords.kw_inside, Keywords.kw_matches))
4295       nextToken();
4296     // The case header has no semicolon.
4297   } else {
4298     // "module" etc.
4299     nextToken();
4300     // all the words like the name of the module and specifiers like
4301     // "automatic" and the width of function return type
4302     while (true) {
4303       if (FormatTok->is(tok::l_square)) {
4304         auto Prev = FormatTok->getPreviousNonComment();
4305         if (Prev && Keywords.isVerilogIdentifier(*Prev))
4306           Prev->setFinalizedType(TT_VerilogDimensionedTypeName);
4307         parseSquare();
4308       } else if (Keywords.isVerilogIdentifier(*FormatTok) ||
4309                  FormatTok->isOneOf(Keywords.kw_automatic, tok::kw_static)) {
4310         nextToken();
4311       } else {
4312         break;
4313       }
4314     }
4315 
4316     auto NewLine = [this]() {
4317       addUnwrappedLine();
4318       Line->IsContinuation = true;
4319     };
4320 
4321     // package imports
4322     while (FormatTok->is(Keywords.kw_import)) {
4323       NewLine();
4324       nextToken();
4325       parseVerilogHierarchyIdentifier();
4326       if (FormatTok->is(tok::semi))
4327         nextToken();
4328     }
4329 
4330     // parameters and ports
4331     if (FormatTok->is(Keywords.kw_verilogHash)) {
4332       NewLine();
4333       nextToken();
4334       if (FormatTok->is(tok::l_paren)) {
4335         FormatTok->setFinalizedType(TT_VerilogMultiLineListLParen);
4336         parseParens();
4337       }
4338     }
4339     if (FormatTok->is(tok::l_paren)) {
4340       NewLine();
4341       FormatTok->setFinalizedType(TT_VerilogMultiLineListLParen);
4342       parseParens();
4343     }
4344 
4345     // extends and implements
4346     if (FormatTok->is(Keywords.kw_extends)) {
4347       NewLine();
4348       nextToken();
4349       parseVerilogHierarchyIdentifier();
4350       if (FormatTok->is(tok::l_paren))
4351         parseParens();
4352     }
4353     if (FormatTok->is(Keywords.kw_implements)) {
4354       NewLine();
4355       do {
4356         nextToken();
4357         parseVerilogHierarchyIdentifier();
4358       } while (FormatTok->is(tok::comma));
4359     }
4360 
4361     // Coverage event for cover groups.
4362     if (FormatTok->is(tok::at)) {
4363       NewLine();
4364       parseVerilogSensitivityList();
4365     }
4366 
4367     if (FormatTok->is(tok::semi))
4368       nextToken(/*LevelDifference=*/1);
4369     addUnwrappedLine();
4370   }
4371 
4372   return AddLevels;
4373 }
4374 
parseVerilogTable()4375 void UnwrappedLineParser::parseVerilogTable() {
4376   assert(FormatTok->is(Keywords.kw_table));
4377   nextToken(/*LevelDifference=*/1);
4378   addUnwrappedLine();
4379 
4380   auto InitialLevel = Line->Level++;
4381   while (!eof() && !Keywords.isVerilogEnd(*FormatTok)) {
4382     FormatToken *Tok = FormatTok;
4383     nextToken();
4384     if (Tok->is(tok::semi))
4385       addUnwrappedLine();
4386     else if (Tok->isOneOf(tok::star, tok::colon, tok::question, tok::minus))
4387       Tok->setFinalizedType(TT_VerilogTableItem);
4388   }
4389   Line->Level = InitialLevel;
4390   nextToken(/*LevelDifference=*/-1);
4391   addUnwrappedLine();
4392 }
4393 
parseVerilogCaseLabel()4394 void UnwrappedLineParser::parseVerilogCaseLabel() {
4395   // The label will get unindented in AnnotatingParser. If there are no leading
4396   // spaces, indent the rest here so that things inside the block will be
4397   // indented relative to things outside. We don't use parseLabel because we
4398   // don't know whether this colon is a label or a ternary expression at this
4399   // point.
4400   auto OrigLevel = Line->Level;
4401   auto FirstLine = CurrentLines->size();
4402   if (Line->Level == 0 || (Line->InPPDirective && Line->Level <= 1))
4403     ++Line->Level;
4404   else if (!Style.IndentCaseBlocks && Keywords.isVerilogBegin(*FormatTok))
4405     --Line->Level;
4406   parseStructuralElement();
4407   // Restore the indentation in both the new line and the line that has the
4408   // label.
4409   if (CurrentLines->size() > FirstLine)
4410     (*CurrentLines)[FirstLine].Level = OrigLevel;
4411   Line->Level = OrigLevel;
4412 }
4413 
containsExpansion(const UnwrappedLine & Line) const4414 bool UnwrappedLineParser::containsExpansion(const UnwrappedLine &Line) const {
4415   for (const auto &N : Line.Tokens) {
4416     if (N.Tok->MacroCtx)
4417       return true;
4418     for (const UnwrappedLine &Child : N.Children)
4419       if (containsExpansion(Child))
4420         return true;
4421   }
4422   return false;
4423 }
4424 
addUnwrappedLine(LineLevel AdjustLevel)4425 void UnwrappedLineParser::addUnwrappedLine(LineLevel AdjustLevel) {
4426   if (Line->Tokens.empty())
4427     return;
4428   LLVM_DEBUG({
4429     if (!parsingPPDirective()) {
4430       llvm::dbgs() << "Adding unwrapped line:\n";
4431       printDebugInfo(*Line);
4432     }
4433   });
4434 
4435   // If this line closes a block when in Whitesmiths mode, remember that
4436   // information so that the level can be decreased after the line is added.
4437   // This has to happen after the addition of the line since the line itself
4438   // needs to be indented.
4439   bool ClosesWhitesmithsBlock =
4440       Line->MatchingOpeningBlockLineIndex != UnwrappedLine::kInvalidIndex &&
4441       Style.BreakBeforeBraces == FormatStyle::BS_Whitesmiths;
4442 
4443   // If the current line was expanded from a macro call, we use it to
4444   // reconstruct an unwrapped line from the structure of the expanded unwrapped
4445   // line and the unexpanded token stream.
4446   if (!parsingPPDirective() && !InExpansion && containsExpansion(*Line)) {
4447     if (!Reconstruct)
4448       Reconstruct.emplace(Line->Level, Unexpanded);
4449     Reconstruct->addLine(*Line);
4450 
4451     // While the reconstructed unexpanded lines are stored in the normal
4452     // flow of lines, the expanded lines are stored on the side to be analyzed
4453     // in an extra step.
4454     CurrentExpandedLines.push_back(std::move(*Line));
4455 
4456     if (Reconstruct->finished()) {
4457       UnwrappedLine Reconstructed = std::move(*Reconstruct).takeResult();
4458       assert(!Reconstructed.Tokens.empty() &&
4459              "Reconstructed must at least contain the macro identifier.");
4460       assert(!parsingPPDirective());
4461       LLVM_DEBUG({
4462         llvm::dbgs() << "Adding unexpanded line:\n";
4463         printDebugInfo(Reconstructed);
4464       });
4465       ExpandedLines[Reconstructed.Tokens.begin()->Tok] = CurrentExpandedLines;
4466       Lines.push_back(std::move(Reconstructed));
4467       CurrentExpandedLines.clear();
4468       Reconstruct.reset();
4469     }
4470   } else {
4471     // At the top level we only get here when no unexpansion is going on, or
4472     // when conditional formatting led to unfinished macro reconstructions.
4473     assert(!Reconstruct || (CurrentLines != &Lines) || PPStack.size() > 0);
4474     CurrentLines->push_back(std::move(*Line));
4475   }
4476   Line->Tokens.clear();
4477   Line->MatchingOpeningBlockLineIndex = UnwrappedLine::kInvalidIndex;
4478   Line->FirstStartColumn = 0;
4479   Line->IsContinuation = false;
4480   Line->SeenDecltypeAuto = false;
4481 
4482   if (ClosesWhitesmithsBlock && AdjustLevel == LineLevel::Remove)
4483     --Line->Level;
4484   if (!parsingPPDirective() && !PreprocessorDirectives.empty()) {
4485     CurrentLines->append(
4486         std::make_move_iterator(PreprocessorDirectives.begin()),
4487         std::make_move_iterator(PreprocessorDirectives.end()));
4488     PreprocessorDirectives.clear();
4489   }
4490   // Disconnect the current token from the last token on the previous line.
4491   FormatTok->Previous = nullptr;
4492 }
4493 
eof() const4494 bool UnwrappedLineParser::eof() const { return FormatTok->is(tok::eof); }
4495 
isOnNewLine(const FormatToken & FormatTok)4496 bool UnwrappedLineParser::isOnNewLine(const FormatToken &FormatTok) {
4497   return (Line->InPPDirective || FormatTok.HasUnescapedNewline) &&
4498          FormatTok.NewlinesBefore > 0;
4499 }
4500 
4501 // Checks if \p FormatTok is a line comment that continues the line comment
4502 // section on \p Line.
4503 static bool
continuesLineCommentSection(const FormatToken & FormatTok,const UnwrappedLine & Line,const llvm::Regex & CommentPragmasRegex)4504 continuesLineCommentSection(const FormatToken &FormatTok,
4505                             const UnwrappedLine &Line,
4506                             const llvm::Regex &CommentPragmasRegex) {
4507   if (Line.Tokens.empty())
4508     return false;
4509 
4510   StringRef IndentContent = FormatTok.TokenText;
4511   if (FormatTok.TokenText.starts_with("//") ||
4512       FormatTok.TokenText.starts_with("/*")) {
4513     IndentContent = FormatTok.TokenText.substr(2);
4514   }
4515   if (CommentPragmasRegex.match(IndentContent))
4516     return false;
4517 
4518   // If Line starts with a line comment, then FormatTok continues the comment
4519   // section if its original column is greater or equal to the original start
4520   // column of the line.
4521   //
4522   // Define the min column token of a line as follows: if a line ends in '{' or
4523   // contains a '{' followed by a line comment, then the min column token is
4524   // that '{'. Otherwise, the min column token of the line is the first token of
4525   // the line.
4526   //
4527   // If Line starts with a token other than a line comment, then FormatTok
4528   // continues the comment section if its original column is greater than the
4529   // original start column of the min column token of the line.
4530   //
4531   // For example, the second line comment continues the first in these cases:
4532   //
4533   // // first line
4534   // // second line
4535   //
4536   // and:
4537   //
4538   // // first line
4539   //  // second line
4540   //
4541   // and:
4542   //
4543   // int i; // first line
4544   //  // second line
4545   //
4546   // and:
4547   //
4548   // do { // first line
4549   //      // second line
4550   //   int i;
4551   // } while (true);
4552   //
4553   // and:
4554   //
4555   // enum {
4556   //   a, // first line
4557   //    // second line
4558   //   b
4559   // };
4560   //
4561   // The second line comment doesn't continue the first in these cases:
4562   //
4563   //   // first line
4564   //  // second line
4565   //
4566   // and:
4567   //
4568   // int i; // first line
4569   // // second line
4570   //
4571   // and:
4572   //
4573   // do { // first line
4574   //   // second line
4575   //   int i;
4576   // } while (true);
4577   //
4578   // and:
4579   //
4580   // enum {
4581   //   a, // first line
4582   //   // second line
4583   // };
4584   const FormatToken *MinColumnToken = Line.Tokens.front().Tok;
4585 
4586   // Scan for '{//'. If found, use the column of '{' as a min column for line
4587   // comment section continuation.
4588   const FormatToken *PreviousToken = nullptr;
4589   for (const UnwrappedLineNode &Node : Line.Tokens) {
4590     if (PreviousToken && PreviousToken->is(tok::l_brace) &&
4591         isLineComment(*Node.Tok)) {
4592       MinColumnToken = PreviousToken;
4593       break;
4594     }
4595     PreviousToken = Node.Tok;
4596 
4597     // Grab the last newline preceding a token in this unwrapped line.
4598     if (Node.Tok->NewlinesBefore > 0)
4599       MinColumnToken = Node.Tok;
4600   }
4601   if (PreviousToken && PreviousToken->is(tok::l_brace))
4602     MinColumnToken = PreviousToken;
4603 
4604   return continuesLineComment(FormatTok, /*Previous=*/Line.Tokens.back().Tok,
4605                               MinColumnToken);
4606 }
4607 
flushComments(bool NewlineBeforeNext)4608 void UnwrappedLineParser::flushComments(bool NewlineBeforeNext) {
4609   bool JustComments = Line->Tokens.empty();
4610   for (FormatToken *Tok : CommentsBeforeNextToken) {
4611     // Line comments that belong to the same line comment section are put on the
4612     // same line since later we might want to reflow content between them.
4613     // Additional fine-grained breaking of line comment sections is controlled
4614     // by the class BreakableLineCommentSection in case it is desirable to keep
4615     // several line comment sections in the same unwrapped line.
4616     //
4617     // FIXME: Consider putting separate line comment sections as children to the
4618     // unwrapped line instead.
4619     Tok->ContinuesLineCommentSection =
4620         continuesLineCommentSection(*Tok, *Line, CommentPragmasRegex);
4621     if (isOnNewLine(*Tok) && JustComments && !Tok->ContinuesLineCommentSection)
4622       addUnwrappedLine();
4623     pushToken(Tok);
4624   }
4625   if (NewlineBeforeNext && JustComments)
4626     addUnwrappedLine();
4627   CommentsBeforeNextToken.clear();
4628 }
4629 
nextToken(int LevelDifference)4630 void UnwrappedLineParser::nextToken(int LevelDifference) {
4631   if (eof())
4632     return;
4633   flushComments(isOnNewLine(*FormatTok));
4634   pushToken(FormatTok);
4635   FormatToken *Previous = FormatTok;
4636   if (!Style.isJavaScript())
4637     readToken(LevelDifference);
4638   else
4639     readTokenWithJavaScriptASI();
4640   FormatTok->Previous = Previous;
4641   if (Style.isVerilog()) {
4642     // Blocks in Verilog can have `begin` and `end` instead of braces.  For
4643     // keywords like `begin`, we can't treat them the same as left braces
4644     // because some contexts require one of them.  For example structs use
4645     // braces and if blocks use keywords, and a left brace can occur in an if
4646     // statement, but it is not a block.  For keywords like `end`, we simply
4647     // treat them the same as right braces.
4648     if (Keywords.isVerilogEnd(*FormatTok))
4649       FormatTok->Tok.setKind(tok::r_brace);
4650   }
4651 }
4652 
distributeComments(const SmallVectorImpl<FormatToken * > & Comments,const FormatToken * NextTok)4653 void UnwrappedLineParser::distributeComments(
4654     const SmallVectorImpl<FormatToken *> &Comments,
4655     const FormatToken *NextTok) {
4656   // Whether or not a line comment token continues a line is controlled by
4657   // the method continuesLineCommentSection, with the following caveat:
4658   //
4659   // Define a trail of Comments to be a nonempty proper postfix of Comments such
4660   // that each comment line from the trail is aligned with the next token, if
4661   // the next token exists. If a trail exists, the beginning of the maximal
4662   // trail is marked as a start of a new comment section.
4663   //
4664   // For example in this code:
4665   //
4666   // int a; // line about a
4667   //   // line 1 about b
4668   //   // line 2 about b
4669   //   int b;
4670   //
4671   // the two lines about b form a maximal trail, so there are two sections, the
4672   // first one consisting of the single comment "// line about a" and the
4673   // second one consisting of the next two comments.
4674   if (Comments.empty())
4675     return;
4676   bool ShouldPushCommentsInCurrentLine = true;
4677   bool HasTrailAlignedWithNextToken = false;
4678   unsigned StartOfTrailAlignedWithNextToken = 0;
4679   if (NextTok) {
4680     // We are skipping the first element intentionally.
4681     for (unsigned i = Comments.size() - 1; i > 0; --i) {
4682       if (Comments[i]->OriginalColumn == NextTok->OriginalColumn) {
4683         HasTrailAlignedWithNextToken = true;
4684         StartOfTrailAlignedWithNextToken = i;
4685       }
4686     }
4687   }
4688   for (unsigned i = 0, e = Comments.size(); i < e; ++i) {
4689     FormatToken *FormatTok = Comments[i];
4690     if (HasTrailAlignedWithNextToken && i == StartOfTrailAlignedWithNextToken) {
4691       FormatTok->ContinuesLineCommentSection = false;
4692     } else {
4693       FormatTok->ContinuesLineCommentSection =
4694           continuesLineCommentSection(*FormatTok, *Line, CommentPragmasRegex);
4695     }
4696     if (!FormatTok->ContinuesLineCommentSection &&
4697         (isOnNewLine(*FormatTok) || FormatTok->IsFirst)) {
4698       ShouldPushCommentsInCurrentLine = false;
4699     }
4700     if (ShouldPushCommentsInCurrentLine)
4701       pushToken(FormatTok);
4702     else
4703       CommentsBeforeNextToken.push_back(FormatTok);
4704   }
4705 }
4706 
readToken(int LevelDifference)4707 void UnwrappedLineParser::readToken(int LevelDifference) {
4708   SmallVector<FormatToken *, 1> Comments;
4709   bool PreviousWasComment = false;
4710   bool FirstNonCommentOnLine = false;
4711   do {
4712     FormatTok = Tokens->getNextToken();
4713     assert(FormatTok);
4714     while (FormatTok->getType() == TT_ConflictStart ||
4715            FormatTok->getType() == TT_ConflictEnd ||
4716            FormatTok->getType() == TT_ConflictAlternative) {
4717       if (FormatTok->getType() == TT_ConflictStart)
4718         conditionalCompilationStart(/*Unreachable=*/false);
4719       else if (FormatTok->getType() == TT_ConflictAlternative)
4720         conditionalCompilationAlternative();
4721       else if (FormatTok->getType() == TT_ConflictEnd)
4722         conditionalCompilationEnd();
4723       FormatTok = Tokens->getNextToken();
4724       FormatTok->MustBreakBefore = true;
4725       FormatTok->MustBreakBeforeFinalized = true;
4726     }
4727 
4728     auto IsFirstNonCommentOnLine = [](bool FirstNonCommentOnLine,
4729                                       const FormatToken &Tok,
4730                                       bool PreviousWasComment) {
4731       auto IsFirstOnLine = [](const FormatToken &Tok) {
4732         return Tok.HasUnescapedNewline || Tok.IsFirst;
4733       };
4734 
4735       // Consider preprocessor directives preceded by block comments as first
4736       // on line.
4737       if (PreviousWasComment)
4738         return FirstNonCommentOnLine || IsFirstOnLine(Tok);
4739       return IsFirstOnLine(Tok);
4740     };
4741 
4742     FirstNonCommentOnLine = IsFirstNonCommentOnLine(
4743         FirstNonCommentOnLine, *FormatTok, PreviousWasComment);
4744     PreviousWasComment = FormatTok->is(tok::comment);
4745 
4746     while (!Line->InPPDirective && FormatTok->is(tok::hash) &&
4747            (!Style.isVerilog() ||
4748             Keywords.isVerilogPPDirective(*Tokens->peekNextToken())) &&
4749            FirstNonCommentOnLine) {
4750       distributeComments(Comments, FormatTok);
4751       Comments.clear();
4752       // If there is an unfinished unwrapped line, we flush the preprocessor
4753       // directives only after that unwrapped line was finished later.
4754       bool SwitchToPreprocessorLines = !Line->Tokens.empty();
4755       ScopedLineState BlockState(*this, SwitchToPreprocessorLines);
4756       assert((LevelDifference >= 0 ||
4757               static_cast<unsigned>(-LevelDifference) <= Line->Level) &&
4758              "LevelDifference makes Line->Level negative");
4759       Line->Level += LevelDifference;
4760       // Comments stored before the preprocessor directive need to be output
4761       // before the preprocessor directive, at the same level as the
4762       // preprocessor directive, as we consider them to apply to the directive.
4763       if (Style.IndentPPDirectives == FormatStyle::PPDIS_BeforeHash &&
4764           PPBranchLevel > 0) {
4765         Line->Level += PPBranchLevel;
4766       }
4767       flushComments(isOnNewLine(*FormatTok));
4768       parsePPDirective();
4769       PreviousWasComment = FormatTok->is(tok::comment);
4770       FirstNonCommentOnLine = IsFirstNonCommentOnLine(
4771           FirstNonCommentOnLine, *FormatTok, PreviousWasComment);
4772     }
4773 
4774     if (!PPStack.empty() && (PPStack.back().Kind == PP_Unreachable) &&
4775         !Line->InPPDirective) {
4776       continue;
4777     }
4778 
4779     if (FormatTok->is(tok::identifier) &&
4780         Macros.defined(FormatTok->TokenText) &&
4781         // FIXME: Allow expanding macros in preprocessor directives.
4782         !Line->InPPDirective) {
4783       FormatToken *ID = FormatTok;
4784       unsigned Position = Tokens->getPosition();
4785 
4786       // To correctly parse the code, we need to replace the tokens of the macro
4787       // call with its expansion.
4788       auto PreCall = std::move(Line);
4789       Line.reset(new UnwrappedLine);
4790       bool OldInExpansion = InExpansion;
4791       InExpansion = true;
4792       // We parse the macro call into a new line.
4793       auto Args = parseMacroCall();
4794       InExpansion = OldInExpansion;
4795       assert(Line->Tokens.front().Tok == ID);
4796       // And remember the unexpanded macro call tokens.
4797       auto UnexpandedLine = std::move(Line);
4798       // Reset to the old line.
4799       Line = std::move(PreCall);
4800 
4801       LLVM_DEBUG({
4802         llvm::dbgs() << "Macro call: " << ID->TokenText << "(";
4803         if (Args) {
4804           llvm::dbgs() << "(";
4805           for (const auto &Arg : Args.value())
4806             for (const auto &T : Arg)
4807               llvm::dbgs() << T->TokenText << " ";
4808           llvm::dbgs() << ")";
4809         }
4810         llvm::dbgs() << "\n";
4811       });
4812       if (Macros.objectLike(ID->TokenText) && Args &&
4813           !Macros.hasArity(ID->TokenText, Args->size())) {
4814         // The macro is either
4815         // - object-like, but we got argumnets, or
4816         // - overloaded to be both object-like and function-like, but none of
4817         //   the function-like arities match the number of arguments.
4818         // Thus, expand as object-like macro.
4819         LLVM_DEBUG(llvm::dbgs()
4820                    << "Macro \"" << ID->TokenText
4821                    << "\" not overloaded for arity " << Args->size()
4822                    << "or not function-like, using object-like overload.");
4823         Args.reset();
4824         UnexpandedLine->Tokens.resize(1);
4825         Tokens->setPosition(Position);
4826         nextToken();
4827         assert(!Args && Macros.objectLike(ID->TokenText));
4828       }
4829       if ((!Args && Macros.objectLike(ID->TokenText)) ||
4830           (Args && Macros.hasArity(ID->TokenText, Args->size()))) {
4831         // Next, we insert the expanded tokens in the token stream at the
4832         // current position, and continue parsing.
4833         Unexpanded[ID] = std::move(UnexpandedLine);
4834         SmallVector<FormatToken *, 8> Expansion =
4835             Macros.expand(ID, std::move(Args));
4836         if (!Expansion.empty())
4837           FormatTok = Tokens->insertTokens(Expansion);
4838 
4839         LLVM_DEBUG({
4840           llvm::dbgs() << "Expanded: ";
4841           for (const auto &T : Expansion)
4842             llvm::dbgs() << T->TokenText << " ";
4843           llvm::dbgs() << "\n";
4844         });
4845       } else {
4846         LLVM_DEBUG({
4847           llvm::dbgs() << "Did not expand macro \"" << ID->TokenText
4848                        << "\", because it was used ";
4849           if (Args)
4850             llvm::dbgs() << "with " << Args->size();
4851           else
4852             llvm::dbgs() << "without";
4853           llvm::dbgs() << " arguments, which doesn't match any definition.\n";
4854         });
4855         Tokens->setPosition(Position);
4856         FormatTok = ID;
4857       }
4858     }
4859 
4860     if (FormatTok->isNot(tok::comment)) {
4861       distributeComments(Comments, FormatTok);
4862       Comments.clear();
4863       return;
4864     }
4865 
4866     Comments.push_back(FormatTok);
4867   } while (!eof());
4868 
4869   distributeComments(Comments, nullptr);
4870   Comments.clear();
4871 }
4872 
4873 namespace {
4874 template <typename Iterator>
pushTokens(Iterator Begin,Iterator End,llvm::SmallVectorImpl<FormatToken * > & Into)4875 void pushTokens(Iterator Begin, Iterator End,
4876                 llvm::SmallVectorImpl<FormatToken *> &Into) {
4877   for (auto I = Begin; I != End; ++I) {
4878     Into.push_back(I->Tok);
4879     for (const auto &Child : I->Children)
4880       pushTokens(Child.Tokens.begin(), Child.Tokens.end(), Into);
4881   }
4882 }
4883 } // namespace
4884 
4885 std::optional<llvm::SmallVector<llvm::SmallVector<FormatToken *, 8>, 1>>
parseMacroCall()4886 UnwrappedLineParser::parseMacroCall() {
4887   std::optional<llvm::SmallVector<llvm::SmallVector<FormatToken *, 8>, 1>> Args;
4888   assert(Line->Tokens.empty());
4889   nextToken();
4890   if (FormatTok->isNot(tok::l_paren))
4891     return Args;
4892   unsigned Position = Tokens->getPosition();
4893   FormatToken *Tok = FormatTok;
4894   nextToken();
4895   Args.emplace();
4896   auto ArgStart = std::prev(Line->Tokens.end());
4897 
4898   int Parens = 0;
4899   do {
4900     switch (FormatTok->Tok.getKind()) {
4901     case tok::l_paren:
4902       ++Parens;
4903       nextToken();
4904       break;
4905     case tok::r_paren: {
4906       if (Parens > 0) {
4907         --Parens;
4908         nextToken();
4909         break;
4910       }
4911       Args->push_back({});
4912       pushTokens(std::next(ArgStart), Line->Tokens.end(), Args->back());
4913       nextToken();
4914       return Args;
4915     }
4916     case tok::comma: {
4917       if (Parens > 0) {
4918         nextToken();
4919         break;
4920       }
4921       Args->push_back({});
4922       pushTokens(std::next(ArgStart), Line->Tokens.end(), Args->back());
4923       nextToken();
4924       ArgStart = std::prev(Line->Tokens.end());
4925       break;
4926     }
4927     default:
4928       nextToken();
4929       break;
4930     }
4931   } while (!eof());
4932   Line->Tokens.resize(1);
4933   Tokens->setPosition(Position);
4934   FormatTok = Tok;
4935   return {};
4936 }
4937 
pushToken(FormatToken * Tok)4938 void UnwrappedLineParser::pushToken(FormatToken *Tok) {
4939   Line->Tokens.push_back(UnwrappedLineNode(Tok));
4940   if (MustBreakBeforeNextToken) {
4941     Line->Tokens.back().Tok->MustBreakBefore = true;
4942     Line->Tokens.back().Tok->MustBreakBeforeFinalized = true;
4943     MustBreakBeforeNextToken = false;
4944   }
4945 }
4946 
4947 } // end namespace format
4948 } // end namespace clang
4949