1 //===--- UnwrappedLineParser.cpp - Format C++ code ------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 ///
9 /// \file
10 /// This file contains the implementation of the UnwrappedLineParser,
11 /// which turns a stream of tokens into UnwrappedLines.
12 ///
13 //===----------------------------------------------------------------------===//
14 
15 #include "UnwrappedLineParser.h"
16 #include "FormatToken.h"
17 #include "FormatTokenLexer.h"
18 #include "FormatTokenSource.h"
19 #include "Macros.h"
20 #include "TokenAnnotator.h"
21 #include "clang/Basic/TokenKinds.h"
22 #include "llvm/ADT/STLExtras.h"
23 #include "llvm/ADT/StringRef.h"
24 #include "llvm/Support/Debug.h"
25 #include "llvm/Support/raw_os_ostream.h"
26 #include "llvm/Support/raw_ostream.h"
27 
28 #include <algorithm>
29 #include <utility>
30 
31 #define DEBUG_TYPE "format-parser"
32 
33 namespace clang {
34 namespace format {
35 
36 namespace {
37 
38 void printLine(llvm::raw_ostream &OS, const UnwrappedLine &Line,
39                StringRef Prefix = "", bool PrintText = false) {
40   OS << Prefix << "Line(" << Line.Level << ", FSC=" << Line.FirstStartColumn
41      << ")" << (Line.InPPDirective ? " MACRO" : "") << ": ";
42   bool NewLine = false;
43   for (std::list<UnwrappedLineNode>::const_iterator I = Line.Tokens.begin(),
44                                                     E = Line.Tokens.end();
45        I != E; ++I) {
46     if (NewLine) {
47       OS << Prefix;
48       NewLine = false;
49     }
50     OS << I->Tok->Tok.getName() << "["
51        << "T=" << (unsigned)I->Tok->getType()
52        << ", OC=" << I->Tok->OriginalColumn << ", \"" << I->Tok->TokenText
53        << "\"] ";
54     for (SmallVectorImpl<UnwrappedLine>::const_iterator
55              CI = I->Children.begin(),
56              CE = I->Children.end();
57          CI != CE; ++CI) {
58       OS << "\n";
59       printLine(OS, *CI, (Prefix + "  ").str());
60       NewLine = true;
61     }
62   }
63   if (!NewLine)
64     OS << "\n";
65 }
66 
67 LLVM_ATTRIBUTE_UNUSED static void printDebugInfo(const UnwrappedLine &Line) {
68   printLine(llvm::dbgs(), Line);
69 }
70 
71 class ScopedDeclarationState {
72 public:
73   ScopedDeclarationState(UnwrappedLine &Line, llvm::BitVector &Stack,
74                          bool MustBeDeclaration)
75       : Line(Line), Stack(Stack) {
76     Line.MustBeDeclaration = MustBeDeclaration;
77     Stack.push_back(MustBeDeclaration);
78   }
79   ~ScopedDeclarationState() {
80     Stack.pop_back();
81     if (!Stack.empty())
82       Line.MustBeDeclaration = Stack.back();
83     else
84       Line.MustBeDeclaration = true;
85   }
86 
87 private:
88   UnwrappedLine &Line;
89   llvm::BitVector &Stack;
90 };
91 
92 } // end anonymous namespace
93 
94 class ScopedLineState {
95 public:
96   ScopedLineState(UnwrappedLineParser &Parser,
97                   bool SwitchToPreprocessorLines = false)
98       : Parser(Parser), OriginalLines(Parser.CurrentLines) {
99     if (SwitchToPreprocessorLines)
100       Parser.CurrentLines = &Parser.PreprocessorDirectives;
101     else if (!Parser.Line->Tokens.empty())
102       Parser.CurrentLines = &Parser.Line->Tokens.back().Children;
103     PreBlockLine = std::move(Parser.Line);
104     Parser.Line = std::make_unique<UnwrappedLine>();
105     Parser.Line->Level = PreBlockLine->Level;
106     Parser.Line->PPLevel = PreBlockLine->PPLevel;
107     Parser.Line->InPPDirective = PreBlockLine->InPPDirective;
108     Parser.Line->InMacroBody = PreBlockLine->InMacroBody;
109   }
110 
111   ~ScopedLineState() {
112     if (!Parser.Line->Tokens.empty())
113       Parser.addUnwrappedLine();
114     assert(Parser.Line->Tokens.empty());
115     Parser.Line = std::move(PreBlockLine);
116     if (Parser.CurrentLines == &Parser.PreprocessorDirectives)
117       Parser.MustBreakBeforeNextToken = true;
118     Parser.CurrentLines = OriginalLines;
119   }
120 
121 private:
122   UnwrappedLineParser &Parser;
123 
124   std::unique_ptr<UnwrappedLine> PreBlockLine;
125   SmallVectorImpl<UnwrappedLine> *OriginalLines;
126 };
127 
128 class CompoundStatementIndenter {
129 public:
130   CompoundStatementIndenter(UnwrappedLineParser *Parser,
131                             const FormatStyle &Style, unsigned &LineLevel)
132       : CompoundStatementIndenter(Parser, LineLevel,
133                                   Style.BraceWrapping.AfterControlStatement,
134                                   Style.BraceWrapping.IndentBraces) {}
135   CompoundStatementIndenter(UnwrappedLineParser *Parser, unsigned &LineLevel,
136                             bool WrapBrace, bool IndentBrace)
137       : LineLevel(LineLevel), OldLineLevel(LineLevel) {
138     if (WrapBrace)
139       Parser->addUnwrappedLine();
140     if (IndentBrace)
141       ++LineLevel;
142   }
143   ~CompoundStatementIndenter() { LineLevel = OldLineLevel; }
144 
145 private:
146   unsigned &LineLevel;
147   unsigned OldLineLevel;
148 };
149 
150 UnwrappedLineParser::UnwrappedLineParser(
151     SourceManager &SourceMgr, const FormatStyle &Style,
152     const AdditionalKeywords &Keywords, unsigned FirstStartColumn,
153     ArrayRef<FormatToken *> Tokens, UnwrappedLineConsumer &Callback,
154     llvm::SpecificBumpPtrAllocator<FormatToken> &Allocator,
155     IdentifierTable &IdentTable)
156     : Line(new UnwrappedLine), MustBreakBeforeNextToken(false),
157       CurrentLines(&Lines), Style(Style), Keywords(Keywords),
158       CommentPragmasRegex(Style.CommentPragmas), Tokens(nullptr),
159       Callback(Callback), AllTokens(Tokens), PPBranchLevel(-1),
160       IncludeGuard(Style.IndentPPDirectives == FormatStyle::PPDIS_None
161                        ? IG_Rejected
162                        : IG_Inited),
163       IncludeGuardToken(nullptr), FirstStartColumn(FirstStartColumn),
164       Macros(Style.Macros, SourceMgr, Style, Allocator, IdentTable) {}
165 
166 void UnwrappedLineParser::reset() {
167   PPBranchLevel = -1;
168   IncludeGuard = Style.IndentPPDirectives == FormatStyle::PPDIS_None
169                      ? IG_Rejected
170                      : IG_Inited;
171   IncludeGuardToken = nullptr;
172   Line.reset(new UnwrappedLine);
173   CommentsBeforeNextToken.clear();
174   FormatTok = nullptr;
175   MustBreakBeforeNextToken = false;
176   IsDecltypeAutoFunction = false;
177   PreprocessorDirectives.clear();
178   CurrentLines = &Lines;
179   DeclarationScopeStack.clear();
180   NestedTooDeep.clear();
181   NestedLambdas.clear();
182   PPStack.clear();
183   Line->FirstStartColumn = FirstStartColumn;
184 
185   if (!Unexpanded.empty())
186     for (FormatToken *Token : AllTokens)
187       Token->MacroCtx.reset();
188   CurrentExpandedLines.clear();
189   ExpandedLines.clear();
190   Unexpanded.clear();
191   InExpansion = false;
192   Reconstruct.reset();
193 }
194 
195 void UnwrappedLineParser::parse() {
196   IndexedTokenSource TokenSource(AllTokens);
197   Line->FirstStartColumn = FirstStartColumn;
198   do {
199     LLVM_DEBUG(llvm::dbgs() << "----\n");
200     reset();
201     Tokens = &TokenSource;
202     TokenSource.reset();
203 
204     readToken();
205     parseFile();
206 
207     // If we found an include guard then all preprocessor directives (other than
208     // the guard) are over-indented by one.
209     if (IncludeGuard == IG_Found) {
210       for (auto &Line : Lines)
211         if (Line.InPPDirective && Line.Level > 0)
212           --Line.Level;
213     }
214 
215     // Create line with eof token.
216     assert(FormatTok->is(tok::eof));
217     pushToken(FormatTok);
218     addUnwrappedLine();
219 
220     // In a first run, format everything with the lines containing macro calls
221     // replaced by the expansion.
222     if (!ExpandedLines.empty()) {
223       LLVM_DEBUG(llvm::dbgs() << "Expanded lines:\n");
224       for (const auto &Line : Lines) {
225         if (!Line.Tokens.empty()) {
226           auto it = ExpandedLines.find(Line.Tokens.begin()->Tok);
227           if (it != ExpandedLines.end()) {
228             for (const auto &Expanded : it->second) {
229               LLVM_DEBUG(printDebugInfo(Expanded));
230               Callback.consumeUnwrappedLine(Expanded);
231             }
232             continue;
233           }
234         }
235         LLVM_DEBUG(printDebugInfo(Line));
236         Callback.consumeUnwrappedLine(Line);
237       }
238       Callback.finishRun();
239     }
240 
241     LLVM_DEBUG(llvm::dbgs() << "Unwrapped lines:\n");
242     for (const UnwrappedLine &Line : Lines) {
243       LLVM_DEBUG(printDebugInfo(Line));
244       Callback.consumeUnwrappedLine(Line);
245     }
246     Callback.finishRun();
247     Lines.clear();
248     while (!PPLevelBranchIndex.empty() &&
249            PPLevelBranchIndex.back() + 1 >= PPLevelBranchCount.back()) {
250       PPLevelBranchIndex.resize(PPLevelBranchIndex.size() - 1);
251       PPLevelBranchCount.resize(PPLevelBranchCount.size() - 1);
252     }
253     if (!PPLevelBranchIndex.empty()) {
254       ++PPLevelBranchIndex.back();
255       assert(PPLevelBranchIndex.size() == PPLevelBranchCount.size());
256       assert(PPLevelBranchIndex.back() <= PPLevelBranchCount.back());
257     }
258   } while (!PPLevelBranchIndex.empty());
259 }
260 
261 void UnwrappedLineParser::parseFile() {
262   // The top-level context in a file always has declarations, except for pre-
263   // processor directives and JavaScript files.
264   bool MustBeDeclaration = !Line->InPPDirective && !Style.isJavaScript();
265   ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack,
266                                           MustBeDeclaration);
267   if (Style.Language == FormatStyle::LK_TextProto)
268     parseBracedList();
269   else
270     parseLevel();
271   // Make sure to format the remaining tokens.
272   //
273   // LK_TextProto is special since its top-level is parsed as the body of a
274   // braced list, which does not necessarily have natural line separators such
275   // as a semicolon. Comments after the last entry that have been determined to
276   // not belong to that line, as in:
277   //   key: value
278   //   // endfile comment
279   // do not have a chance to be put on a line of their own until this point.
280   // Here we add this newline before end-of-file comments.
281   if (Style.Language == FormatStyle::LK_TextProto &&
282       !CommentsBeforeNextToken.empty()) {
283     addUnwrappedLine();
284   }
285   flushComments(true);
286   addUnwrappedLine();
287 }
288 
289 void UnwrappedLineParser::parseCSharpGenericTypeConstraint() {
290   do {
291     switch (FormatTok->Tok.getKind()) {
292     case tok::l_brace:
293       return;
294     default:
295       if (FormatTok->is(Keywords.kw_where)) {
296         addUnwrappedLine();
297         nextToken();
298         parseCSharpGenericTypeConstraint();
299         break;
300       }
301       nextToken();
302       break;
303     }
304   } while (!eof());
305 }
306 
307 void UnwrappedLineParser::parseCSharpAttribute() {
308   int UnpairedSquareBrackets = 1;
309   do {
310     switch (FormatTok->Tok.getKind()) {
311     case tok::r_square:
312       nextToken();
313       --UnpairedSquareBrackets;
314       if (UnpairedSquareBrackets == 0) {
315         addUnwrappedLine();
316         return;
317       }
318       break;
319     case tok::l_square:
320       ++UnpairedSquareBrackets;
321       nextToken();
322       break;
323     default:
324       nextToken();
325       break;
326     }
327   } while (!eof());
328 }
329 
330 bool UnwrappedLineParser::precededByCommentOrPPDirective() const {
331   if (!Lines.empty() && Lines.back().InPPDirective)
332     return true;
333 
334   const FormatToken *Previous = Tokens->getPreviousToken();
335   return Previous && Previous->is(tok::comment) &&
336          (Previous->IsMultiline || Previous->NewlinesBefore > 0);
337 }
338 
339 /// \brief Parses a level, that is ???.
340 /// \param OpeningBrace Opening brace (\p nullptr if absent) of that level
341 /// \param CanContainBracedList If the content can contain (at any level) a
342 /// braced list.
343 /// \param NextLBracesType The type for left brace found in this level.
344 /// \param IfKind The \p if statement kind in the level.
345 /// \param IfLeftBrace The left brace of the \p if block in the level.
346 /// \returns true if a simple block of if/else/for/while, or false otherwise.
347 /// (A simple block has a single statement.)
348 bool UnwrappedLineParser::parseLevel(const FormatToken *OpeningBrace,
349                                      bool CanContainBracedList,
350                                      TokenType NextLBracesType,
351                                      IfStmtKind *IfKind,
352                                      FormatToken **IfLeftBrace) {
353   auto NextLevelLBracesType = NextLBracesType == TT_CompoundRequirementLBrace
354                                   ? TT_BracedListLBrace
355                                   : TT_Unknown;
356   const bool IsPrecededByCommentOrPPDirective =
357       !Style.RemoveBracesLLVM || precededByCommentOrPPDirective();
358   FormatToken *IfLBrace = nullptr;
359   bool HasDoWhile = false;
360   bool HasLabel = false;
361   unsigned StatementCount = 0;
362   bool SwitchLabelEncountered = false;
363 
364   do {
365     if (FormatTok->getType() == TT_AttributeMacro) {
366       nextToken();
367       continue;
368     }
369     tok::TokenKind kind = FormatTok->Tok.getKind();
370     if (FormatTok->getType() == TT_MacroBlockBegin)
371       kind = tok::l_brace;
372     else if (FormatTok->getType() == TT_MacroBlockEnd)
373       kind = tok::r_brace;
374 
375     auto ParseDefault = [this, OpeningBrace, NextLevelLBracesType, IfKind,
376                          &IfLBrace, &HasDoWhile, &HasLabel, &StatementCount] {
377       parseStructuralElement(!OpeningBrace, NextLevelLBracesType, IfKind,
378                              &IfLBrace, HasDoWhile ? nullptr : &HasDoWhile,
379                              HasLabel ? nullptr : &HasLabel);
380       ++StatementCount;
381       assert(StatementCount > 0 && "StatementCount overflow!");
382     };
383 
384     switch (kind) {
385     case tok::comment:
386       nextToken();
387       addUnwrappedLine();
388       break;
389     case tok::l_brace:
390       if (NextLBracesType != TT_Unknown) {
391         FormatTok->setFinalizedType(NextLBracesType);
392       } else if (FormatTok->Previous &&
393                  FormatTok->Previous->ClosesRequiresClause) {
394         // We need the 'default' case here to correctly parse a function
395         // l_brace.
396         ParseDefault();
397         continue;
398       }
399       if (CanContainBracedList && !FormatTok->is(TT_MacroBlockBegin) &&
400           tryToParseBracedList()) {
401         continue;
402       }
403       parseBlock(/*MustBeDeclaration=*/false, /*AddLevels=*/1u,
404                  /*MunchSemi=*/true, /*KeepBraces=*/true, /*IfKind=*/nullptr,
405                  /*UnindentWhitesmithsBraces=*/false, CanContainBracedList,
406                  NextLBracesType);
407       ++StatementCount;
408       assert(StatementCount > 0 && "StatementCount overflow!");
409       addUnwrappedLine();
410       break;
411     case tok::r_brace:
412       if (OpeningBrace) {
413         if (!Style.RemoveBracesLLVM || Line->InPPDirective ||
414             !OpeningBrace->isOneOf(TT_ControlStatementLBrace, TT_ElseLBrace)) {
415           return false;
416         }
417         if (FormatTok->isNot(tok::r_brace) || StatementCount != 1 || HasLabel ||
418             HasDoWhile || IsPrecededByCommentOrPPDirective ||
419             precededByCommentOrPPDirective()) {
420           return false;
421         }
422         const FormatToken *Next = Tokens->peekNextToken();
423         if (Next->is(tok::comment) && Next->NewlinesBefore == 0)
424           return false;
425         if (IfLeftBrace)
426           *IfLeftBrace = IfLBrace;
427         return true;
428       }
429       nextToken();
430       addUnwrappedLine();
431       break;
432     case tok::kw_default: {
433       unsigned StoredPosition = Tokens->getPosition();
434       FormatToken *Next;
435       do {
436         Next = Tokens->getNextToken();
437         assert(Next);
438       } while (Next->is(tok::comment));
439       FormatTok = Tokens->setPosition(StoredPosition);
440       if (Next->isNot(tok::colon)) {
441         // default not followed by ':' is not a case label; treat it like
442         // an identifier.
443         parseStructuralElement();
444         break;
445       }
446       // Else, if it is 'default:', fall through to the case handling.
447       [[fallthrough]];
448     }
449     case tok::kw_case:
450       if (Style.isProto() || Style.isVerilog() ||
451           (Style.isJavaScript() && Line->MustBeDeclaration)) {
452         // Proto: there are no switch/case statements
453         // Verilog: Case labels don't have this word. We handle case
454         // labels including default in TokenAnnotator.
455         // JavaScript: A 'case: string' style field declaration.
456         ParseDefault();
457         break;
458       }
459       if (!SwitchLabelEncountered &&
460           (Style.IndentCaseLabels ||
461            (Line->InPPDirective && Line->Level == 1))) {
462         ++Line->Level;
463       }
464       SwitchLabelEncountered = true;
465       parseStructuralElement();
466       break;
467     case tok::l_square:
468       if (Style.isCSharp()) {
469         nextToken();
470         parseCSharpAttribute();
471         break;
472       }
473       if (handleCppAttributes())
474         break;
475       [[fallthrough]];
476     default:
477       ParseDefault();
478       break;
479     }
480   } while (!eof());
481 
482   return false;
483 }
484 
485 void UnwrappedLineParser::calculateBraceTypes(bool ExpectClassBody) {
486   // We'll parse forward through the tokens until we hit
487   // a closing brace or eof - note that getNextToken() will
488   // parse macros, so this will magically work inside macro
489   // definitions, too.
490   unsigned StoredPosition = Tokens->getPosition();
491   FormatToken *Tok = FormatTok;
492   const FormatToken *PrevTok = Tok->Previous;
493   // Keep a stack of positions of lbrace tokens. We will
494   // update information about whether an lbrace starts a
495   // braced init list or a different block during the loop.
496   struct StackEntry {
497     FormatToken *Tok;
498     const FormatToken *PrevTok;
499   };
500   SmallVector<StackEntry, 8> LBraceStack;
501   assert(Tok->is(tok::l_brace));
502   do {
503     // Get next non-comment token.
504     FormatToken *NextTok;
505     do {
506       NextTok = Tokens->getNextToken();
507     } while (NextTok->is(tok::comment));
508 
509     switch (Tok->Tok.getKind()) {
510     case tok::l_brace:
511       if (Style.isJavaScript() && PrevTok) {
512         if (PrevTok->isOneOf(tok::colon, tok::less)) {
513           // A ':' indicates this code is in a type, or a braced list
514           // following a label in an object literal ({a: {b: 1}}).
515           // A '<' could be an object used in a comparison, but that is nonsense
516           // code (can never return true), so more likely it is a generic type
517           // argument (`X<{a: string; b: number}>`).
518           // The code below could be confused by semicolons between the
519           // individual members in a type member list, which would normally
520           // trigger BK_Block. In both cases, this must be parsed as an inline
521           // braced init.
522           Tok->setBlockKind(BK_BracedInit);
523         } else if (PrevTok->is(tok::r_paren)) {
524           // `) { }` can only occur in function or method declarations in JS.
525           Tok->setBlockKind(BK_Block);
526         }
527       } else {
528         Tok->setBlockKind(BK_Unknown);
529       }
530       LBraceStack.push_back({Tok, PrevTok});
531       break;
532     case tok::r_brace:
533       if (LBraceStack.empty())
534         break;
535       if (LBraceStack.back().Tok->is(BK_Unknown)) {
536         bool ProbablyBracedList = false;
537         if (Style.Language == FormatStyle::LK_Proto) {
538           ProbablyBracedList = NextTok->isOneOf(tok::comma, tok::r_square);
539         } else {
540           // Skip NextTok over preprocessor lines, otherwise we may not
541           // properly diagnose the block as a braced intializer
542           // if the comma separator appears after the pp directive.
543           while (NextTok->is(tok::hash)) {
544             ScopedMacroState MacroState(*Line, Tokens, NextTok);
545             do {
546               NextTok = Tokens->getNextToken();
547             } while (NextTok->isNot(tok::eof));
548           }
549 
550           // Using OriginalColumn to distinguish between ObjC methods and
551           // binary operators is a bit hacky.
552           bool NextIsObjCMethod = NextTok->isOneOf(tok::plus, tok::minus) &&
553                                   NextTok->OriginalColumn == 0;
554 
555           // Try to detect a braced list. Note that regardless how we mark inner
556           // braces here, we will overwrite the BlockKind later if we parse a
557           // braced list (where all blocks inside are by default braced lists),
558           // or when we explicitly detect blocks (for example while parsing
559           // lambdas).
560 
561           // If we already marked the opening brace as braced list, the closing
562           // must also be part of it.
563           ProbablyBracedList = LBraceStack.back().Tok->is(TT_BracedListLBrace);
564 
565           ProbablyBracedList = ProbablyBracedList ||
566                                (Style.isJavaScript() &&
567                                 NextTok->isOneOf(Keywords.kw_of, Keywords.kw_in,
568                                                  Keywords.kw_as));
569           ProbablyBracedList = ProbablyBracedList ||
570                                (Style.isCpp() && NextTok->is(tok::l_paren));
571 
572           // If there is a comma, semicolon or right paren after the closing
573           // brace, we assume this is a braced initializer list.
574           // FIXME: Some of these do not apply to JS, e.g. "} {" can never be a
575           // braced list in JS.
576           ProbablyBracedList =
577               ProbablyBracedList ||
578               NextTok->isOneOf(tok::comma, tok::period, tok::colon,
579                                tok::r_paren, tok::r_square, tok::ellipsis);
580 
581           // Distinguish between braced list in a constructor initializer list
582           // followed by constructor body, or just adjacent blocks.
583           ProbablyBracedList =
584               ProbablyBracedList ||
585               (NextTok->is(tok::l_brace) && LBraceStack.back().PrevTok &&
586                LBraceStack.back().PrevTok->isOneOf(tok::identifier,
587                                                    tok::greater));
588 
589           ProbablyBracedList =
590               ProbablyBracedList ||
591               (NextTok->is(tok::identifier) &&
592                !PrevTok->isOneOf(tok::semi, tok::r_brace, tok::l_brace));
593 
594           ProbablyBracedList = ProbablyBracedList ||
595                                (NextTok->is(tok::semi) &&
596                                 (!ExpectClassBody || LBraceStack.size() != 1));
597 
598           ProbablyBracedList =
599               ProbablyBracedList ||
600               (NextTok->isBinaryOperator() && !NextIsObjCMethod);
601 
602           if (!Style.isCSharp() && NextTok->is(tok::l_square)) {
603             // We can have an array subscript after a braced init
604             // list, but C++11 attributes are expected after blocks.
605             NextTok = Tokens->getNextToken();
606             ProbablyBracedList = NextTok->isNot(tok::l_square);
607           }
608         }
609         if (ProbablyBracedList) {
610           Tok->setBlockKind(BK_BracedInit);
611           LBraceStack.back().Tok->setBlockKind(BK_BracedInit);
612         } else {
613           Tok->setBlockKind(BK_Block);
614           LBraceStack.back().Tok->setBlockKind(BK_Block);
615         }
616       }
617       LBraceStack.pop_back();
618       break;
619     case tok::identifier:
620       if (!Tok->is(TT_StatementMacro))
621         break;
622       [[fallthrough]];
623     case tok::at:
624     case tok::semi:
625     case tok::kw_if:
626     case tok::kw_while:
627     case tok::kw_for:
628     case tok::kw_switch:
629     case tok::kw_try:
630     case tok::kw___try:
631       if (!LBraceStack.empty() && LBraceStack.back().Tok->is(BK_Unknown))
632         LBraceStack.back().Tok->setBlockKind(BK_Block);
633       break;
634     default:
635       break;
636     }
637     PrevTok = Tok;
638     Tok = NextTok;
639   } while (Tok->isNot(tok::eof) && !LBraceStack.empty());
640 
641   // Assume other blocks for all unclosed opening braces.
642   for (const auto &Entry : LBraceStack)
643     if (Entry.Tok->is(BK_Unknown))
644       Entry.Tok->setBlockKind(BK_Block);
645 
646   FormatTok = Tokens->setPosition(StoredPosition);
647 }
648 
649 template <class T>
650 static inline void hash_combine(std::size_t &seed, const T &v) {
651   std::hash<T> hasher;
652   seed ^= hasher(v) + 0x9e3779b9 + (seed << 6) + (seed >> 2);
653 }
654 
655 size_t UnwrappedLineParser::computePPHash() const {
656   size_t h = 0;
657   for (const auto &i : PPStack) {
658     hash_combine(h, size_t(i.Kind));
659     hash_combine(h, i.Line);
660   }
661   return h;
662 }
663 
664 // Checks whether \p ParsedLine might fit on a single line. If \p OpeningBrace
665 // is not null, subtracts its length (plus the preceding space) when computing
666 // the length of \p ParsedLine. We must clone the tokens of \p ParsedLine before
667 // running the token annotator on it so that we can restore them afterward.
668 bool UnwrappedLineParser::mightFitOnOneLine(
669     UnwrappedLine &ParsedLine, const FormatToken *OpeningBrace) const {
670   const auto ColumnLimit = Style.ColumnLimit;
671   if (ColumnLimit == 0)
672     return true;
673 
674   auto &Tokens = ParsedLine.Tokens;
675   assert(!Tokens.empty());
676 
677   const auto *LastToken = Tokens.back().Tok;
678   assert(LastToken);
679 
680   SmallVector<UnwrappedLineNode> SavedTokens(Tokens.size());
681 
682   int Index = 0;
683   for (const auto &Token : Tokens) {
684     assert(Token.Tok);
685     auto &SavedToken = SavedTokens[Index++];
686     SavedToken.Tok = new FormatToken;
687     SavedToken.Tok->copyFrom(*Token.Tok);
688     SavedToken.Children = std::move(Token.Children);
689   }
690 
691   AnnotatedLine Line(ParsedLine);
692   assert(Line.Last == LastToken);
693 
694   TokenAnnotator Annotator(Style, Keywords);
695   Annotator.annotate(Line);
696   Annotator.calculateFormattingInformation(Line);
697 
698   auto Length = LastToken->TotalLength;
699   if (OpeningBrace) {
700     assert(OpeningBrace != Tokens.front().Tok);
701     if (auto Prev = OpeningBrace->Previous;
702         Prev && Prev->TotalLength + ColumnLimit == OpeningBrace->TotalLength) {
703       Length -= ColumnLimit;
704     }
705     Length -= OpeningBrace->TokenText.size() + 1;
706   }
707 
708   if (const auto *FirstToken = Line.First; FirstToken->is(tok::r_brace)) {
709     assert(!OpeningBrace || OpeningBrace->is(TT_ControlStatementLBrace));
710     Length -= FirstToken->TokenText.size() + 1;
711   }
712 
713   Index = 0;
714   for (auto &Token : Tokens) {
715     const auto &SavedToken = SavedTokens[Index++];
716     Token.Tok->copyFrom(*SavedToken.Tok);
717     Token.Children = std::move(SavedToken.Children);
718     delete SavedToken.Tok;
719   }
720 
721   // If these change PPLevel needs to be used for get correct indentation.
722   assert(!Line.InMacroBody);
723   assert(!Line.InPPDirective);
724   return Line.Level * Style.IndentWidth + Length <= ColumnLimit;
725 }
726 
727 FormatToken *UnwrappedLineParser::parseBlock(
728     bool MustBeDeclaration, unsigned AddLevels, bool MunchSemi, bool KeepBraces,
729     IfStmtKind *IfKind, bool UnindentWhitesmithsBraces,
730     bool CanContainBracedList, TokenType NextLBracesType) {
731   auto HandleVerilogBlockLabel = [this]() {
732     // ":" name
733     if (Style.isVerilog() && FormatTok->is(tok::colon)) {
734       nextToken();
735       if (Keywords.isVerilogIdentifier(*FormatTok))
736         nextToken();
737     }
738   };
739 
740   // Whether this is a Verilog-specific block that has a special header like a
741   // module.
742   const bool VerilogHierarchy =
743       Style.isVerilog() && Keywords.isVerilogHierarchy(*FormatTok);
744   assert((FormatTok->isOneOf(tok::l_brace, TT_MacroBlockBegin) ||
745           (Style.isVerilog() &&
746            (Keywords.isVerilogBegin(*FormatTok) || VerilogHierarchy))) &&
747          "'{' or macro block token expected");
748   FormatToken *Tok = FormatTok;
749   const bool FollowedByComment = Tokens->peekNextToken()->is(tok::comment);
750   auto Index = CurrentLines->size();
751   const bool MacroBlock = FormatTok->is(TT_MacroBlockBegin);
752   FormatTok->setBlockKind(BK_Block);
753 
754   // For Whitesmiths mode, jump to the next level prior to skipping over the
755   // braces.
756   if (!VerilogHierarchy && AddLevels > 0 &&
757       Style.BreakBeforeBraces == FormatStyle::BS_Whitesmiths) {
758     ++Line->Level;
759   }
760 
761   size_t PPStartHash = computePPHash();
762 
763   const unsigned InitialLevel = Line->Level;
764   if (VerilogHierarchy) {
765     AddLevels += parseVerilogHierarchyHeader();
766   } else {
767     nextToken(/*LevelDifference=*/AddLevels);
768     HandleVerilogBlockLabel();
769   }
770 
771   // Bail out if there are too many levels. Otherwise, the stack might overflow.
772   if (Line->Level > 300)
773     return nullptr;
774 
775   if (MacroBlock && FormatTok->is(tok::l_paren))
776     parseParens();
777 
778   size_t NbPreprocessorDirectives =
779       !parsingPPDirective() ? PreprocessorDirectives.size() : 0;
780   addUnwrappedLine();
781   size_t OpeningLineIndex =
782       CurrentLines->empty()
783           ? (UnwrappedLine::kInvalidIndex)
784           : (CurrentLines->size() - 1 - NbPreprocessorDirectives);
785 
786   // Whitesmiths is weird here. The brace needs to be indented for the namespace
787   // block, but the block itself may not be indented depending on the style
788   // settings. This allows the format to back up one level in those cases.
789   if (UnindentWhitesmithsBraces)
790     --Line->Level;
791 
792   ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack,
793                                           MustBeDeclaration);
794   if (AddLevels > 0u && Style.BreakBeforeBraces != FormatStyle::BS_Whitesmiths)
795     Line->Level += AddLevels;
796 
797   FormatToken *IfLBrace = nullptr;
798   const bool SimpleBlock =
799       parseLevel(Tok, CanContainBracedList, NextLBracesType, IfKind, &IfLBrace);
800 
801   if (eof())
802     return IfLBrace;
803 
804   if (MacroBlock ? !FormatTok->is(TT_MacroBlockEnd)
805                  : !FormatTok->is(tok::r_brace)) {
806     Line->Level = InitialLevel;
807     FormatTok->setBlockKind(BK_Block);
808     return IfLBrace;
809   }
810 
811   const bool IsFunctionRBrace =
812       FormatTok->is(tok::r_brace) && Tok->is(TT_FunctionLBrace);
813 
814   auto RemoveBraces = [=]() mutable {
815     if (!SimpleBlock)
816       return false;
817     assert(Tok->isOneOf(TT_ControlStatementLBrace, TT_ElseLBrace));
818     assert(FormatTok->is(tok::r_brace));
819     const bool WrappedOpeningBrace = !Tok->Previous;
820     if (WrappedOpeningBrace && FollowedByComment)
821       return false;
822     const bool HasRequiredIfBraces = IfLBrace && !IfLBrace->Optional;
823     if (KeepBraces && !HasRequiredIfBraces)
824       return false;
825     if (Tok->isNot(TT_ElseLBrace) || !HasRequiredIfBraces) {
826       const FormatToken *Previous = Tokens->getPreviousToken();
827       assert(Previous);
828       if (Previous->is(tok::r_brace) && !Previous->Optional)
829         return false;
830     }
831     assert(!CurrentLines->empty());
832     auto &LastLine = CurrentLines->back();
833     if (LastLine.Level == InitialLevel + 1 && !mightFitOnOneLine(LastLine))
834       return false;
835     if (Tok->is(TT_ElseLBrace))
836       return true;
837     if (WrappedOpeningBrace) {
838       assert(Index > 0);
839       --Index; // The line above the wrapped l_brace.
840       Tok = nullptr;
841     }
842     return mightFitOnOneLine((*CurrentLines)[Index], Tok);
843   };
844   if (RemoveBraces()) {
845     Tok->MatchingParen = FormatTok;
846     FormatTok->MatchingParen = Tok;
847   }
848 
849   size_t PPEndHash = computePPHash();
850 
851   // Munch the closing brace.
852   nextToken(/*LevelDifference=*/-AddLevels);
853 
854   // When this is a function block and there is an unnecessary semicolon
855   // afterwards then mark it as optional (so the RemoveSemi pass can get rid of
856   // it later).
857   if (Style.RemoveSemicolon && IsFunctionRBrace) {
858     while (FormatTok->is(tok::semi)) {
859       FormatTok->Optional = true;
860       nextToken();
861     }
862   }
863 
864   HandleVerilogBlockLabel();
865 
866   if (MacroBlock && FormatTok->is(tok::l_paren))
867     parseParens();
868 
869   Line->Level = InitialLevel;
870 
871   if (FormatTok->is(tok::kw_noexcept)) {
872     // A noexcept in a requires expression.
873     nextToken();
874   }
875 
876   if (FormatTok->is(tok::arrow)) {
877     // Following the } or noexcept we can find a trailing return type arrow
878     // as part of an implicit conversion constraint.
879     nextToken();
880     parseStructuralElement();
881   }
882 
883   if (MunchSemi && FormatTok->is(tok::semi))
884     nextToken();
885 
886   if (PPStartHash == PPEndHash) {
887     Line->MatchingOpeningBlockLineIndex = OpeningLineIndex;
888     if (OpeningLineIndex != UnwrappedLine::kInvalidIndex) {
889       // Update the opening line to add the forward reference as well
890       (*CurrentLines)[OpeningLineIndex].MatchingClosingBlockLineIndex =
891           CurrentLines->size() - 1;
892     }
893   }
894 
895   return IfLBrace;
896 }
897 
898 static bool isGoogScope(const UnwrappedLine &Line) {
899   // FIXME: Closure-library specific stuff should not be hard-coded but be
900   // configurable.
901   if (Line.Tokens.size() < 4)
902     return false;
903   auto I = Line.Tokens.begin();
904   if (I->Tok->TokenText != "goog")
905     return false;
906   ++I;
907   if (I->Tok->isNot(tok::period))
908     return false;
909   ++I;
910   if (I->Tok->TokenText != "scope")
911     return false;
912   ++I;
913   return I->Tok->is(tok::l_paren);
914 }
915 
916 static bool isIIFE(const UnwrappedLine &Line,
917                    const AdditionalKeywords &Keywords) {
918   // Look for the start of an immediately invoked anonymous function.
919   // https://en.wikipedia.org/wiki/Immediately-invoked_function_expression
920   // This is commonly done in JavaScript to create a new, anonymous scope.
921   // Example: (function() { ... })()
922   if (Line.Tokens.size() < 3)
923     return false;
924   auto I = Line.Tokens.begin();
925   if (I->Tok->isNot(tok::l_paren))
926     return false;
927   ++I;
928   if (I->Tok->isNot(Keywords.kw_function))
929     return false;
930   ++I;
931   return I->Tok->is(tok::l_paren);
932 }
933 
934 static bool ShouldBreakBeforeBrace(const FormatStyle &Style,
935                                    const FormatToken &InitialToken) {
936   tok::TokenKind Kind = InitialToken.Tok.getKind();
937   if (InitialToken.is(TT_NamespaceMacro))
938     Kind = tok::kw_namespace;
939 
940   switch (Kind) {
941   case tok::kw_namespace:
942     return Style.BraceWrapping.AfterNamespace;
943   case tok::kw_class:
944     return Style.BraceWrapping.AfterClass;
945   case tok::kw_union:
946     return Style.BraceWrapping.AfterUnion;
947   case tok::kw_struct:
948     return Style.BraceWrapping.AfterStruct;
949   case tok::kw_enum:
950     return Style.BraceWrapping.AfterEnum;
951   default:
952     return false;
953   }
954 }
955 
956 void UnwrappedLineParser::parseChildBlock(
957     bool CanContainBracedList, clang::format::TokenType NextLBracesType) {
958   assert(FormatTok->is(tok::l_brace));
959   FormatTok->setBlockKind(BK_Block);
960   const FormatToken *OpeningBrace = FormatTok;
961   nextToken();
962   {
963     bool SkipIndent = (Style.isJavaScript() &&
964                        (isGoogScope(*Line) || isIIFE(*Line, Keywords)));
965     ScopedLineState LineState(*this);
966     ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack,
967                                             /*MustBeDeclaration=*/false);
968     Line->Level += SkipIndent ? 0 : 1;
969     parseLevel(OpeningBrace, CanContainBracedList, NextLBracesType);
970     flushComments(isOnNewLine(*FormatTok));
971     Line->Level -= SkipIndent ? 0 : 1;
972   }
973   nextToken();
974 }
975 
976 void UnwrappedLineParser::parsePPDirective() {
977   assert(FormatTok->is(tok::hash) && "'#' expected");
978   ScopedMacroState MacroState(*Line, Tokens, FormatTok);
979 
980   nextToken();
981 
982   if (!FormatTok->Tok.getIdentifierInfo()) {
983     parsePPUnknown();
984     return;
985   }
986 
987   switch (FormatTok->Tok.getIdentifierInfo()->getPPKeywordID()) {
988   case tok::pp_define:
989     parsePPDefine();
990     return;
991   case tok::pp_if:
992     parsePPIf(/*IfDef=*/false);
993     break;
994   case tok::pp_ifdef:
995   case tok::pp_ifndef:
996     parsePPIf(/*IfDef=*/true);
997     break;
998   case tok::pp_else:
999   case tok::pp_elifdef:
1000   case tok::pp_elifndef:
1001   case tok::pp_elif:
1002     parsePPElse();
1003     break;
1004   case tok::pp_endif:
1005     parsePPEndIf();
1006     break;
1007   case tok::pp_pragma:
1008     parsePPPragma();
1009     break;
1010   default:
1011     parsePPUnknown();
1012     break;
1013   }
1014 }
1015 
1016 void UnwrappedLineParser::conditionalCompilationCondition(bool Unreachable) {
1017   size_t Line = CurrentLines->size();
1018   if (CurrentLines == &PreprocessorDirectives)
1019     Line += Lines.size();
1020 
1021   if (Unreachable ||
1022       (!PPStack.empty() && PPStack.back().Kind == PP_Unreachable)) {
1023     PPStack.push_back({PP_Unreachable, Line});
1024   } else {
1025     PPStack.push_back({PP_Conditional, Line});
1026   }
1027 }
1028 
1029 void UnwrappedLineParser::conditionalCompilationStart(bool Unreachable) {
1030   ++PPBranchLevel;
1031   assert(PPBranchLevel >= 0 && PPBranchLevel <= (int)PPLevelBranchIndex.size());
1032   if (PPBranchLevel == (int)PPLevelBranchIndex.size()) {
1033     PPLevelBranchIndex.push_back(0);
1034     PPLevelBranchCount.push_back(0);
1035   }
1036   PPChainBranchIndex.push(Unreachable ? -1 : 0);
1037   bool Skip = PPLevelBranchIndex[PPBranchLevel] > 0;
1038   conditionalCompilationCondition(Unreachable || Skip);
1039 }
1040 
1041 void UnwrappedLineParser::conditionalCompilationAlternative() {
1042   if (!PPStack.empty())
1043     PPStack.pop_back();
1044   assert(PPBranchLevel < (int)PPLevelBranchIndex.size());
1045   if (!PPChainBranchIndex.empty())
1046     ++PPChainBranchIndex.top();
1047   conditionalCompilationCondition(
1048       PPBranchLevel >= 0 && !PPChainBranchIndex.empty() &&
1049       PPLevelBranchIndex[PPBranchLevel] != PPChainBranchIndex.top());
1050 }
1051 
1052 void UnwrappedLineParser::conditionalCompilationEnd() {
1053   assert(PPBranchLevel < (int)PPLevelBranchIndex.size());
1054   if (PPBranchLevel >= 0 && !PPChainBranchIndex.empty()) {
1055     if (PPChainBranchIndex.top() + 1 > PPLevelBranchCount[PPBranchLevel])
1056       PPLevelBranchCount[PPBranchLevel] = PPChainBranchIndex.top() + 1;
1057   }
1058   // Guard against #endif's without #if.
1059   if (PPBranchLevel > -1)
1060     --PPBranchLevel;
1061   if (!PPChainBranchIndex.empty())
1062     PPChainBranchIndex.pop();
1063   if (!PPStack.empty())
1064     PPStack.pop_back();
1065 }
1066 
1067 void UnwrappedLineParser::parsePPIf(bool IfDef) {
1068   bool IfNDef = FormatTok->is(tok::pp_ifndef);
1069   nextToken();
1070   bool Unreachable = false;
1071   if (!IfDef && (FormatTok->is(tok::kw_false) || FormatTok->TokenText == "0"))
1072     Unreachable = true;
1073   if (IfDef && !IfNDef && FormatTok->TokenText == "SWIG")
1074     Unreachable = true;
1075   conditionalCompilationStart(Unreachable);
1076   FormatToken *IfCondition = FormatTok;
1077   // If there's a #ifndef on the first line, and the only lines before it are
1078   // comments, it could be an include guard.
1079   bool MaybeIncludeGuard = IfNDef;
1080   if (IncludeGuard == IG_Inited && MaybeIncludeGuard) {
1081     for (auto &Line : Lines) {
1082       if (!Line.Tokens.front().Tok->is(tok::comment)) {
1083         MaybeIncludeGuard = false;
1084         IncludeGuard = IG_Rejected;
1085         break;
1086       }
1087     }
1088   }
1089   --PPBranchLevel;
1090   parsePPUnknown();
1091   ++PPBranchLevel;
1092   if (IncludeGuard == IG_Inited && MaybeIncludeGuard) {
1093     IncludeGuard = IG_IfNdefed;
1094     IncludeGuardToken = IfCondition;
1095   }
1096 }
1097 
1098 void UnwrappedLineParser::parsePPElse() {
1099   // If a potential include guard has an #else, it's not an include guard.
1100   if (IncludeGuard == IG_Defined && PPBranchLevel == 0)
1101     IncludeGuard = IG_Rejected;
1102   // Don't crash when there is an #else without an #if.
1103   assert(PPBranchLevel >= -1);
1104   if (PPBranchLevel == -1)
1105     conditionalCompilationStart(/*Unreachable=*/true);
1106   conditionalCompilationAlternative();
1107   --PPBranchLevel;
1108   parsePPUnknown();
1109   ++PPBranchLevel;
1110 }
1111 
1112 void UnwrappedLineParser::parsePPEndIf() {
1113   conditionalCompilationEnd();
1114   parsePPUnknown();
1115   // If the #endif of a potential include guard is the last thing in the file,
1116   // then we found an include guard.
1117   if (IncludeGuard == IG_Defined && PPBranchLevel == -1 && Tokens->isEOF() &&
1118       Style.IndentPPDirectives != FormatStyle::PPDIS_None) {
1119     IncludeGuard = IG_Found;
1120   }
1121 }
1122 
1123 void UnwrappedLineParser::parsePPDefine() {
1124   nextToken();
1125 
1126   if (!FormatTok->Tok.getIdentifierInfo()) {
1127     IncludeGuard = IG_Rejected;
1128     IncludeGuardToken = nullptr;
1129     parsePPUnknown();
1130     return;
1131   }
1132 
1133   if (IncludeGuard == IG_IfNdefed &&
1134       IncludeGuardToken->TokenText == FormatTok->TokenText) {
1135     IncludeGuard = IG_Defined;
1136     IncludeGuardToken = nullptr;
1137     for (auto &Line : Lines) {
1138       if (!Line.Tokens.front().Tok->isOneOf(tok::comment, tok::hash)) {
1139         IncludeGuard = IG_Rejected;
1140         break;
1141       }
1142     }
1143   }
1144 
1145   // In the context of a define, even keywords should be treated as normal
1146   // identifiers. Setting the kind to identifier is not enough, because we need
1147   // to treat additional keywords like __except as well, which are already
1148   // identifiers. Setting the identifier info to null interferes with include
1149   // guard processing above, and changes preprocessing nesting.
1150   FormatTok->Tok.setKind(tok::identifier);
1151   FormatTok->Tok.setIdentifierInfo(Keywords.kw_internal_ident_after_define);
1152   nextToken();
1153   if (FormatTok->Tok.getKind() == tok::l_paren &&
1154       !FormatTok->hasWhitespaceBefore()) {
1155     parseParens();
1156   }
1157   if (Style.IndentPPDirectives != FormatStyle::PPDIS_None)
1158     Line->Level += PPBranchLevel + 1;
1159   addUnwrappedLine();
1160   ++Line->Level;
1161 
1162   Line->PPLevel = PPBranchLevel + (IncludeGuard == IG_Defined ? 0 : 1);
1163   assert((int)Line->PPLevel >= 0);
1164   Line->InMacroBody = true;
1165 
1166   // Errors during a preprocessor directive can only affect the layout of the
1167   // preprocessor directive, and thus we ignore them. An alternative approach
1168   // would be to use the same approach we use on the file level (no
1169   // re-indentation if there was a structural error) within the macro
1170   // definition.
1171   parseFile();
1172 }
1173 
1174 void UnwrappedLineParser::parsePPPragma() {
1175   Line->InPragmaDirective = true;
1176   parsePPUnknown();
1177 }
1178 
1179 void UnwrappedLineParser::parsePPUnknown() {
1180   do {
1181     nextToken();
1182   } while (!eof());
1183   if (Style.IndentPPDirectives != FormatStyle::PPDIS_None)
1184     Line->Level += PPBranchLevel + 1;
1185   addUnwrappedLine();
1186 }
1187 
1188 // Here we exclude certain tokens that are not usually the first token in an
1189 // unwrapped line. This is used in attempt to distinguish macro calls without
1190 // trailing semicolons from other constructs split to several lines.
1191 static bool tokenCanStartNewLine(const FormatToken &Tok) {
1192   // Semicolon can be a null-statement, l_square can be a start of a macro or
1193   // a C++11 attribute, but this doesn't seem to be common.
1194   return Tok.isNot(tok::semi) && Tok.isNot(tok::l_brace) &&
1195          Tok.isNot(TT_AttributeSquare) &&
1196          // Tokens that can only be used as binary operators and a part of
1197          // overloaded operator names.
1198          Tok.isNot(tok::period) && Tok.isNot(tok::periodstar) &&
1199          Tok.isNot(tok::arrow) && Tok.isNot(tok::arrowstar) &&
1200          Tok.isNot(tok::less) && Tok.isNot(tok::greater) &&
1201          Tok.isNot(tok::slash) && Tok.isNot(tok::percent) &&
1202          Tok.isNot(tok::lessless) && Tok.isNot(tok::greatergreater) &&
1203          Tok.isNot(tok::equal) && Tok.isNot(tok::plusequal) &&
1204          Tok.isNot(tok::minusequal) && Tok.isNot(tok::starequal) &&
1205          Tok.isNot(tok::slashequal) && Tok.isNot(tok::percentequal) &&
1206          Tok.isNot(tok::ampequal) && Tok.isNot(tok::pipeequal) &&
1207          Tok.isNot(tok::caretequal) && Tok.isNot(tok::greatergreaterequal) &&
1208          Tok.isNot(tok::lesslessequal) &&
1209          // Colon is used in labels, base class lists, initializer lists,
1210          // range-based for loops, ternary operator, but should never be the
1211          // first token in an unwrapped line.
1212          Tok.isNot(tok::colon) &&
1213          // 'noexcept' is a trailing annotation.
1214          Tok.isNot(tok::kw_noexcept);
1215 }
1216 
1217 static bool mustBeJSIdent(const AdditionalKeywords &Keywords,
1218                           const FormatToken *FormatTok) {
1219   // FIXME: This returns true for C/C++ keywords like 'struct'.
1220   return FormatTok->is(tok::identifier) &&
1221          (!FormatTok->Tok.getIdentifierInfo() ||
1222           !FormatTok->isOneOf(
1223               Keywords.kw_in, Keywords.kw_of, Keywords.kw_as, Keywords.kw_async,
1224               Keywords.kw_await, Keywords.kw_yield, Keywords.kw_finally,
1225               Keywords.kw_function, Keywords.kw_import, Keywords.kw_is,
1226               Keywords.kw_let, Keywords.kw_var, tok::kw_const,
1227               Keywords.kw_abstract, Keywords.kw_extends, Keywords.kw_implements,
1228               Keywords.kw_instanceof, Keywords.kw_interface,
1229               Keywords.kw_override, Keywords.kw_throws, Keywords.kw_from));
1230 }
1231 
1232 static bool mustBeJSIdentOrValue(const AdditionalKeywords &Keywords,
1233                                  const FormatToken *FormatTok) {
1234   return FormatTok->Tok.isLiteral() ||
1235          FormatTok->isOneOf(tok::kw_true, tok::kw_false) ||
1236          mustBeJSIdent(Keywords, FormatTok);
1237 }
1238 
1239 // isJSDeclOrStmt returns true if |FormatTok| starts a declaration or statement
1240 // when encountered after a value (see mustBeJSIdentOrValue).
1241 static bool isJSDeclOrStmt(const AdditionalKeywords &Keywords,
1242                            const FormatToken *FormatTok) {
1243   return FormatTok->isOneOf(
1244       tok::kw_return, Keywords.kw_yield,
1245       // conditionals
1246       tok::kw_if, tok::kw_else,
1247       // loops
1248       tok::kw_for, tok::kw_while, tok::kw_do, tok::kw_continue, tok::kw_break,
1249       // switch/case
1250       tok::kw_switch, tok::kw_case,
1251       // exceptions
1252       tok::kw_throw, tok::kw_try, tok::kw_catch, Keywords.kw_finally,
1253       // declaration
1254       tok::kw_const, tok::kw_class, Keywords.kw_var, Keywords.kw_let,
1255       Keywords.kw_async, Keywords.kw_function,
1256       // import/export
1257       Keywords.kw_import, tok::kw_export);
1258 }
1259 
1260 // Checks whether a token is a type in K&R C (aka C78).
1261 static bool isC78Type(const FormatToken &Tok) {
1262   return Tok.isOneOf(tok::kw_char, tok::kw_short, tok::kw_int, tok::kw_long,
1263                      tok::kw_unsigned, tok::kw_float, tok::kw_double,
1264                      tok::identifier);
1265 }
1266 
1267 // This function checks whether a token starts the first parameter declaration
1268 // in a K&R C (aka C78) function definition, e.g.:
1269 //   int f(a, b)
1270 //   short a, b;
1271 //   {
1272 //      return a + b;
1273 //   }
1274 static bool isC78ParameterDecl(const FormatToken *Tok, const FormatToken *Next,
1275                                const FormatToken *FuncName) {
1276   assert(Tok);
1277   assert(Next);
1278   assert(FuncName);
1279 
1280   if (FuncName->isNot(tok::identifier))
1281     return false;
1282 
1283   const FormatToken *Prev = FuncName->Previous;
1284   if (!Prev || (Prev->isNot(tok::star) && !isC78Type(*Prev)))
1285     return false;
1286 
1287   if (!isC78Type(*Tok) &&
1288       !Tok->isOneOf(tok::kw_register, tok::kw_struct, tok::kw_union)) {
1289     return false;
1290   }
1291 
1292   if (Next->isNot(tok::star) && !Next->Tok.getIdentifierInfo())
1293     return false;
1294 
1295   Tok = Tok->Previous;
1296   if (!Tok || Tok->isNot(tok::r_paren))
1297     return false;
1298 
1299   Tok = Tok->Previous;
1300   if (!Tok || Tok->isNot(tok::identifier))
1301     return false;
1302 
1303   return Tok->Previous && Tok->Previous->isOneOf(tok::l_paren, tok::comma);
1304 }
1305 
1306 bool UnwrappedLineParser::parseModuleImport() {
1307   assert(FormatTok->is(Keywords.kw_import) && "'import' expected");
1308 
1309   if (auto Token = Tokens->peekNextToken(/*SkipComment=*/true);
1310       !Token->Tok.getIdentifierInfo() &&
1311       !Token->isOneOf(tok::colon, tok::less, tok::string_literal)) {
1312     return false;
1313   }
1314 
1315   nextToken();
1316   while (!eof()) {
1317     if (FormatTok->is(tok::colon)) {
1318       FormatTok->setFinalizedType(TT_ModulePartitionColon);
1319     }
1320     // Handle import <foo/bar.h> as we would an include statement.
1321     else if (FormatTok->is(tok::less)) {
1322       nextToken();
1323       while (!FormatTok->isOneOf(tok::semi, tok::greater, tok::eof)) {
1324         // Mark tokens up to the trailing line comments as implicit string
1325         // literals.
1326         if (FormatTok->isNot(tok::comment) &&
1327             !FormatTok->TokenText.startswith("//")) {
1328           FormatTok->setFinalizedType(TT_ImplicitStringLiteral);
1329         }
1330         nextToken();
1331       }
1332     }
1333     if (FormatTok->is(tok::semi)) {
1334       nextToken();
1335       break;
1336     }
1337     nextToken();
1338   }
1339 
1340   addUnwrappedLine();
1341   return true;
1342 }
1343 
1344 // readTokenWithJavaScriptASI reads the next token and terminates the current
1345 // line if JavaScript Automatic Semicolon Insertion must
1346 // happen between the current token and the next token.
1347 //
1348 // This method is conservative - it cannot cover all edge cases of JavaScript,
1349 // but only aims to correctly handle certain well known cases. It *must not*
1350 // return true in speculative cases.
1351 void UnwrappedLineParser::readTokenWithJavaScriptASI() {
1352   FormatToken *Previous = FormatTok;
1353   readToken();
1354   FormatToken *Next = FormatTok;
1355 
1356   bool IsOnSameLine =
1357       CommentsBeforeNextToken.empty()
1358           ? Next->NewlinesBefore == 0
1359           : CommentsBeforeNextToken.front()->NewlinesBefore == 0;
1360   if (IsOnSameLine)
1361     return;
1362 
1363   bool PreviousMustBeValue = mustBeJSIdentOrValue(Keywords, Previous);
1364   bool PreviousStartsTemplateExpr =
1365       Previous->is(TT_TemplateString) && Previous->TokenText.endswith("${");
1366   if (PreviousMustBeValue || Previous->is(tok::r_paren)) {
1367     // If the line contains an '@' sign, the previous token might be an
1368     // annotation, which can precede another identifier/value.
1369     bool HasAt = llvm::any_of(Line->Tokens, [](UnwrappedLineNode &LineNode) {
1370       return LineNode.Tok->is(tok::at);
1371     });
1372     if (HasAt)
1373       return;
1374   }
1375   if (Next->is(tok::exclaim) && PreviousMustBeValue)
1376     return addUnwrappedLine();
1377   bool NextMustBeValue = mustBeJSIdentOrValue(Keywords, Next);
1378   bool NextEndsTemplateExpr =
1379       Next->is(TT_TemplateString) && Next->TokenText.startswith("}");
1380   if (NextMustBeValue && !NextEndsTemplateExpr && !PreviousStartsTemplateExpr &&
1381       (PreviousMustBeValue ||
1382        Previous->isOneOf(tok::r_square, tok::r_paren, tok::plusplus,
1383                          tok::minusminus))) {
1384     return addUnwrappedLine();
1385   }
1386   if ((PreviousMustBeValue || Previous->is(tok::r_paren)) &&
1387       isJSDeclOrStmt(Keywords, Next)) {
1388     return addUnwrappedLine();
1389   }
1390 }
1391 
1392 void UnwrappedLineParser::parseStructuralElement(
1393     bool IsTopLevel, TokenType NextLBracesType, IfStmtKind *IfKind,
1394     FormatToken **IfLeftBrace, bool *HasDoWhile, bool *HasLabel) {
1395   if (Style.Language == FormatStyle::LK_TableGen &&
1396       FormatTok->is(tok::pp_include)) {
1397     nextToken();
1398     if (FormatTok->is(tok::string_literal))
1399       nextToken();
1400     addUnwrappedLine();
1401     return;
1402   }
1403 
1404   if (Style.isVerilog()) {
1405     if (Keywords.isVerilogStructuredProcedure(*FormatTok)) {
1406       parseForOrWhileLoop(/*HasParens=*/false);
1407       return;
1408     }
1409     if (FormatTok->isOneOf(Keywords.kw_foreach, Keywords.kw_repeat)) {
1410       parseForOrWhileLoop();
1411       return;
1412     }
1413     if (FormatTok->isOneOf(tok::kw_restrict, Keywords.kw_assert,
1414                            Keywords.kw_assume, Keywords.kw_cover)) {
1415       parseIfThenElse(IfKind, /*KeepBraces=*/false, /*IsVerilogAssert=*/true);
1416       return;
1417     }
1418 
1419     // Skip things that can exist before keywords like 'if' and 'case'.
1420     while (true) {
1421       if (FormatTok->isOneOf(Keywords.kw_priority, Keywords.kw_unique,
1422                              Keywords.kw_unique0)) {
1423         nextToken();
1424       } else if (FormatTok->is(tok::l_paren) &&
1425                  Tokens->peekNextToken()->is(tok::star)) {
1426         parseParens();
1427       } else {
1428         break;
1429       }
1430     }
1431   }
1432 
1433   // Tokens that only make sense at the beginning of a line.
1434   switch (FormatTok->Tok.getKind()) {
1435   case tok::kw_asm:
1436     nextToken();
1437     if (FormatTok->is(tok::l_brace)) {
1438       FormatTok->setFinalizedType(TT_InlineASMBrace);
1439       nextToken();
1440       while (FormatTok && !eof()) {
1441         if (FormatTok->is(tok::r_brace)) {
1442           FormatTok->setFinalizedType(TT_InlineASMBrace);
1443           nextToken();
1444           addUnwrappedLine();
1445           break;
1446         }
1447         FormatTok->Finalized = true;
1448         nextToken();
1449       }
1450     }
1451     break;
1452   case tok::kw_namespace:
1453     parseNamespace();
1454     return;
1455   case tok::kw_public:
1456   case tok::kw_protected:
1457   case tok::kw_private:
1458     if (Style.Language == FormatStyle::LK_Java || Style.isJavaScript() ||
1459         Style.isCSharp()) {
1460       nextToken();
1461     } else {
1462       parseAccessSpecifier();
1463     }
1464     return;
1465   case tok::kw_if: {
1466     if (Style.isJavaScript() && Line->MustBeDeclaration) {
1467       // field/method declaration.
1468       break;
1469     }
1470     FormatToken *Tok = parseIfThenElse(IfKind);
1471     if (IfLeftBrace)
1472       *IfLeftBrace = Tok;
1473     return;
1474   }
1475   case tok::kw_for:
1476   case tok::kw_while:
1477     if (Style.isJavaScript() && Line->MustBeDeclaration) {
1478       // field/method declaration.
1479       break;
1480     }
1481     parseForOrWhileLoop();
1482     return;
1483   case tok::kw_do:
1484     if (Style.isJavaScript() && Line->MustBeDeclaration) {
1485       // field/method declaration.
1486       break;
1487     }
1488     parseDoWhile();
1489     if (HasDoWhile)
1490       *HasDoWhile = true;
1491     return;
1492   case tok::kw_switch:
1493     if (Style.isJavaScript() && Line->MustBeDeclaration) {
1494       // 'switch: string' field declaration.
1495       break;
1496     }
1497     parseSwitch();
1498     return;
1499   case tok::kw_default:
1500     // In Verilog default along with other labels are handled in the next loop.
1501     if (Style.isVerilog())
1502       break;
1503     if (Style.isJavaScript() && Line->MustBeDeclaration) {
1504       // 'default: string' field declaration.
1505       break;
1506     }
1507     nextToken();
1508     if (FormatTok->is(tok::colon)) {
1509       FormatTok->setFinalizedType(TT_CaseLabelColon);
1510       parseLabel();
1511       return;
1512     }
1513     // e.g. "default void f() {}" in a Java interface.
1514     break;
1515   case tok::kw_case:
1516     // Proto: there are no switch/case statements.
1517     if (Style.isProto()) {
1518       nextToken();
1519       return;
1520     }
1521     if (Style.isVerilog()) {
1522       parseBlock();
1523       addUnwrappedLine();
1524       return;
1525     }
1526     if (Style.isJavaScript() && Line->MustBeDeclaration) {
1527       // 'case: string' field declaration.
1528       nextToken();
1529       break;
1530     }
1531     parseCaseLabel();
1532     return;
1533   case tok::kw_try:
1534   case tok::kw___try:
1535     if (Style.isJavaScript() && Line->MustBeDeclaration) {
1536       // field/method declaration.
1537       break;
1538     }
1539     parseTryCatch();
1540     return;
1541   case tok::kw_extern:
1542     nextToken();
1543     if (Style.isVerilog()) {
1544       // In Verilog and extern module declaration looks like a start of module.
1545       // But there is no body and endmodule. So we handle it separately.
1546       if (Keywords.isVerilogHierarchy(*FormatTok)) {
1547         parseVerilogHierarchyHeader();
1548         return;
1549       }
1550     } else if (FormatTok->is(tok::string_literal)) {
1551       nextToken();
1552       if (FormatTok->is(tok::l_brace)) {
1553         if (Style.BraceWrapping.AfterExternBlock)
1554           addUnwrappedLine();
1555         // Either we indent or for backwards compatibility we follow the
1556         // AfterExternBlock style.
1557         unsigned AddLevels =
1558             (Style.IndentExternBlock == FormatStyle::IEBS_Indent) ||
1559                     (Style.BraceWrapping.AfterExternBlock &&
1560                      Style.IndentExternBlock ==
1561                          FormatStyle::IEBS_AfterExternBlock)
1562                 ? 1u
1563                 : 0u;
1564         parseBlock(/*MustBeDeclaration=*/true, AddLevels);
1565         addUnwrappedLine();
1566         return;
1567       }
1568     }
1569     break;
1570   case tok::kw_export:
1571     if (Style.isJavaScript()) {
1572       parseJavaScriptEs6ImportExport();
1573       return;
1574     }
1575     if (Style.isCpp()) {
1576       nextToken();
1577       if (FormatTok->is(tok::kw_namespace)) {
1578         parseNamespace();
1579         return;
1580       }
1581       if (FormatTok->is(Keywords.kw_import) && parseModuleImport())
1582         return;
1583     }
1584     break;
1585   case tok::kw_inline:
1586     nextToken();
1587     if (FormatTok->is(tok::kw_namespace)) {
1588       parseNamespace();
1589       return;
1590     }
1591     break;
1592   case tok::identifier:
1593     if (FormatTok->is(TT_ForEachMacro)) {
1594       parseForOrWhileLoop();
1595       return;
1596     }
1597     if (FormatTok->is(TT_MacroBlockBegin)) {
1598       parseBlock(/*MustBeDeclaration=*/false, /*AddLevels=*/1u,
1599                  /*MunchSemi=*/false);
1600       return;
1601     }
1602     if (FormatTok->is(Keywords.kw_import)) {
1603       if (Style.isJavaScript()) {
1604         parseJavaScriptEs6ImportExport();
1605         return;
1606       }
1607       if (Style.Language == FormatStyle::LK_Proto) {
1608         nextToken();
1609         if (FormatTok->is(tok::kw_public))
1610           nextToken();
1611         if (!FormatTok->is(tok::string_literal))
1612           return;
1613         nextToken();
1614         if (FormatTok->is(tok::semi))
1615           nextToken();
1616         addUnwrappedLine();
1617         return;
1618       }
1619       if (Style.isCpp() && parseModuleImport())
1620         return;
1621     }
1622     if (Style.isCpp() &&
1623         FormatTok->isOneOf(Keywords.kw_signals, Keywords.kw_qsignals,
1624                            Keywords.kw_slots, Keywords.kw_qslots)) {
1625       nextToken();
1626       if (FormatTok->is(tok::colon)) {
1627         nextToken();
1628         addUnwrappedLine();
1629         return;
1630       }
1631     }
1632     if (Style.isCpp() && FormatTok->is(TT_StatementMacro)) {
1633       parseStatementMacro();
1634       return;
1635     }
1636     if (Style.isCpp() && FormatTok->is(TT_NamespaceMacro)) {
1637       parseNamespace();
1638       return;
1639     }
1640     // In all other cases, parse the declaration.
1641     break;
1642   default:
1643     break;
1644   }
1645   do {
1646     const FormatToken *Previous = FormatTok->Previous;
1647     switch (FormatTok->Tok.getKind()) {
1648     case tok::at:
1649       nextToken();
1650       if (FormatTok->is(tok::l_brace)) {
1651         nextToken();
1652         parseBracedList();
1653         break;
1654       } else if (Style.Language == FormatStyle::LK_Java &&
1655                  FormatTok->is(Keywords.kw_interface)) {
1656         nextToken();
1657         break;
1658       }
1659       switch (FormatTok->Tok.getObjCKeywordID()) {
1660       case tok::objc_public:
1661       case tok::objc_protected:
1662       case tok::objc_package:
1663       case tok::objc_private:
1664         return parseAccessSpecifier();
1665       case tok::objc_interface:
1666       case tok::objc_implementation:
1667         return parseObjCInterfaceOrImplementation();
1668       case tok::objc_protocol:
1669         if (parseObjCProtocol())
1670           return;
1671         break;
1672       case tok::objc_end:
1673         return; // Handled by the caller.
1674       case tok::objc_optional:
1675       case tok::objc_required:
1676         nextToken();
1677         addUnwrappedLine();
1678         return;
1679       case tok::objc_autoreleasepool:
1680         nextToken();
1681         if (FormatTok->is(tok::l_brace)) {
1682           if (Style.BraceWrapping.AfterControlStatement ==
1683               FormatStyle::BWACS_Always) {
1684             addUnwrappedLine();
1685           }
1686           parseBlock();
1687         }
1688         addUnwrappedLine();
1689         return;
1690       case tok::objc_synchronized:
1691         nextToken();
1692         if (FormatTok->is(tok::l_paren)) {
1693           // Skip synchronization object
1694           parseParens();
1695         }
1696         if (FormatTok->is(tok::l_brace)) {
1697           if (Style.BraceWrapping.AfterControlStatement ==
1698               FormatStyle::BWACS_Always) {
1699             addUnwrappedLine();
1700           }
1701           parseBlock();
1702         }
1703         addUnwrappedLine();
1704         return;
1705       case tok::objc_try:
1706         // This branch isn't strictly necessary (the kw_try case below would
1707         // do this too after the tok::at is parsed above).  But be explicit.
1708         parseTryCatch();
1709         return;
1710       default:
1711         break;
1712       }
1713       break;
1714     case tok::kw_requires: {
1715       if (Style.isCpp()) {
1716         bool ParsedClause = parseRequires();
1717         if (ParsedClause)
1718           return;
1719       } else {
1720         nextToken();
1721       }
1722       break;
1723     }
1724     case tok::kw_enum:
1725       // Ignore if this is part of "template <enum ...".
1726       if (Previous && Previous->is(tok::less)) {
1727         nextToken();
1728         break;
1729       }
1730 
1731       // parseEnum falls through and does not yet add an unwrapped line as an
1732       // enum definition can start a structural element.
1733       if (!parseEnum())
1734         break;
1735       // This only applies to C++ and Verilog.
1736       if (!Style.isCpp() && !Style.isVerilog()) {
1737         addUnwrappedLine();
1738         return;
1739       }
1740       break;
1741     case tok::kw_typedef:
1742       nextToken();
1743       if (FormatTok->isOneOf(Keywords.kw_NS_ENUM, Keywords.kw_NS_OPTIONS,
1744                              Keywords.kw_CF_ENUM, Keywords.kw_CF_OPTIONS,
1745                              Keywords.kw_CF_CLOSED_ENUM,
1746                              Keywords.kw_NS_CLOSED_ENUM)) {
1747         parseEnum();
1748       }
1749       break;
1750     case tok::kw_class:
1751       if (Style.isVerilog()) {
1752         parseBlock();
1753         addUnwrappedLine();
1754         return;
1755       }
1756       [[fallthrough]];
1757     case tok::kw_struct:
1758     case tok::kw_union:
1759       if (parseStructLike())
1760         return;
1761       break;
1762     case tok::kw_decltype:
1763       nextToken();
1764       if (FormatTok->is(tok::l_paren)) {
1765         parseParens();
1766         assert(FormatTok->Previous);
1767         if (FormatTok->Previous->endsSequence(tok::r_paren, tok::kw_auto,
1768                                               tok::l_paren)) {
1769           Line->SeenDecltypeAuto = true;
1770         }
1771       }
1772       break;
1773     case tok::period:
1774       nextToken();
1775       // In Java, classes have an implicit static member "class".
1776       if (Style.Language == FormatStyle::LK_Java && FormatTok &&
1777           FormatTok->is(tok::kw_class)) {
1778         nextToken();
1779       }
1780       if (Style.isJavaScript() && FormatTok &&
1781           FormatTok->Tok.getIdentifierInfo()) {
1782         // JavaScript only has pseudo keywords, all keywords are allowed to
1783         // appear in "IdentifierName" positions. See http://es5.github.io/#x7.6
1784         nextToken();
1785       }
1786       break;
1787     case tok::semi:
1788       nextToken();
1789       addUnwrappedLine();
1790       return;
1791     case tok::r_brace:
1792       addUnwrappedLine();
1793       return;
1794     case tok::l_paren: {
1795       parseParens();
1796       // Break the unwrapped line if a K&R C function definition has a parameter
1797       // declaration.
1798       if (!IsTopLevel || !Style.isCpp() || !Previous || eof())
1799         break;
1800       if (isC78ParameterDecl(FormatTok,
1801                              Tokens->peekNextToken(/*SkipComment=*/true),
1802                              Previous)) {
1803         addUnwrappedLine();
1804         return;
1805       }
1806       break;
1807     }
1808     case tok::kw_operator:
1809       nextToken();
1810       if (FormatTok->isBinaryOperator())
1811         nextToken();
1812       break;
1813     case tok::caret:
1814       nextToken();
1815       // Block return type.
1816       if (FormatTok->Tok.isAnyIdentifier() ||
1817           FormatTok->isSimpleTypeSpecifier()) {
1818         nextToken();
1819         // Return types: pointers are ok too.
1820         while (FormatTok->is(tok::star))
1821           nextToken();
1822       }
1823       // Block argument list.
1824       if (FormatTok->is(tok::l_paren))
1825         parseParens();
1826       // Block body.
1827       if (FormatTok->is(tok::l_brace))
1828         parseChildBlock();
1829       break;
1830     case tok::l_brace:
1831       if (NextLBracesType != TT_Unknown)
1832         FormatTok->setFinalizedType(NextLBracesType);
1833       if (!tryToParsePropertyAccessor() && !tryToParseBracedList()) {
1834         IsDecltypeAutoFunction = Line->SeenDecltypeAuto;
1835         // A block outside of parentheses must be the last part of a
1836         // structural element.
1837         // FIXME: Figure out cases where this is not true, and add projections
1838         // for them (the one we know is missing are lambdas).
1839         if (Style.Language == FormatStyle::LK_Java &&
1840             Line->Tokens.front().Tok->is(Keywords.kw_synchronized)) {
1841           // If necessary, we could set the type to something different than
1842           // TT_FunctionLBrace.
1843           if (Style.BraceWrapping.AfterControlStatement ==
1844               FormatStyle::BWACS_Always) {
1845             addUnwrappedLine();
1846           }
1847         } else if (Style.BraceWrapping.AfterFunction) {
1848           addUnwrappedLine();
1849         }
1850         FormatTok->setFinalizedType(TT_FunctionLBrace);
1851         parseBlock();
1852         IsDecltypeAutoFunction = false;
1853         addUnwrappedLine();
1854         return;
1855       }
1856       // Otherwise this was a braced init list, and the structural
1857       // element continues.
1858       break;
1859     case tok::kw_try:
1860       if (Style.isJavaScript() && Line->MustBeDeclaration) {
1861         // field/method declaration.
1862         nextToken();
1863         break;
1864       }
1865       // We arrive here when parsing function-try blocks.
1866       if (Style.BraceWrapping.AfterFunction)
1867         addUnwrappedLine();
1868       parseTryCatch();
1869       return;
1870     case tok::identifier: {
1871       if (Style.isCSharp() && FormatTok->is(Keywords.kw_where) &&
1872           Line->MustBeDeclaration) {
1873         addUnwrappedLine();
1874         parseCSharpGenericTypeConstraint();
1875         break;
1876       }
1877       if (FormatTok->is(TT_MacroBlockEnd)) {
1878         addUnwrappedLine();
1879         return;
1880       }
1881 
1882       // Function declarations (as opposed to function expressions) are parsed
1883       // on their own unwrapped line by continuing this loop. Function
1884       // expressions (functions that are not on their own line) must not create
1885       // a new unwrapped line, so they are special cased below.
1886       size_t TokenCount = Line->Tokens.size();
1887       if (Style.isJavaScript() && FormatTok->is(Keywords.kw_function) &&
1888           (TokenCount > 1 || (TokenCount == 1 && !Line->Tokens.front().Tok->is(
1889                                                      Keywords.kw_async)))) {
1890         tryToParseJSFunction();
1891         break;
1892       }
1893       if ((Style.isJavaScript() || Style.Language == FormatStyle::LK_Java) &&
1894           FormatTok->is(Keywords.kw_interface)) {
1895         if (Style.isJavaScript()) {
1896           // In JavaScript/TypeScript, "interface" can be used as a standalone
1897           // identifier, e.g. in `var interface = 1;`. If "interface" is
1898           // followed by another identifier, it is very like to be an actual
1899           // interface declaration.
1900           unsigned StoredPosition = Tokens->getPosition();
1901           FormatToken *Next = Tokens->getNextToken();
1902           FormatTok = Tokens->setPosition(StoredPosition);
1903           if (!mustBeJSIdent(Keywords, Next)) {
1904             nextToken();
1905             break;
1906           }
1907         }
1908         parseRecord();
1909         addUnwrappedLine();
1910         return;
1911       }
1912 
1913       if (Style.isVerilog()) {
1914         if (FormatTok->is(Keywords.kw_table)) {
1915           parseVerilogTable();
1916           return;
1917         }
1918         if (Keywords.isVerilogBegin(*FormatTok) ||
1919             Keywords.isVerilogHierarchy(*FormatTok)) {
1920           parseBlock();
1921           addUnwrappedLine();
1922           return;
1923         }
1924       }
1925 
1926       if (!Style.isCpp() && FormatTok->is(Keywords.kw_interface)) {
1927         if (parseStructLike())
1928           return;
1929         break;
1930       }
1931 
1932       if (Style.isCpp() && FormatTok->is(TT_StatementMacro)) {
1933         parseStatementMacro();
1934         return;
1935       }
1936 
1937       // See if the following token should start a new unwrapped line.
1938       StringRef Text = FormatTok->TokenText;
1939 
1940       FormatToken *PreviousToken = FormatTok;
1941       nextToken();
1942 
1943       // JS doesn't have macros, and within classes colons indicate fields, not
1944       // labels.
1945       if (Style.isJavaScript())
1946         break;
1947 
1948       auto OneTokenSoFar = [&]() {
1949         auto I = Line->Tokens.begin(), E = Line->Tokens.end();
1950         while (I != E && I->Tok->is(tok::comment))
1951           ++I;
1952         while (I != E && Style.isVerilog() && I->Tok->is(tok::hash))
1953           ++I;
1954         return I != E && (++I == E);
1955       };
1956       if (OneTokenSoFar()) {
1957         // In Verilog labels can be any expression, so we don't do them here.
1958         if (!Style.isVerilog() && FormatTok->is(tok::colon) &&
1959             !Line->MustBeDeclaration) {
1960           Line->Tokens.begin()->Tok->MustBreakBefore = true;
1961           FormatTok->setFinalizedType(TT_GotoLabelColon);
1962           parseLabel(!Style.IndentGotoLabels);
1963           if (HasLabel)
1964             *HasLabel = true;
1965           return;
1966         }
1967         // Recognize function-like macro usages without trailing semicolon as
1968         // well as free-standing macros like Q_OBJECT.
1969         bool FunctionLike = FormatTok->is(tok::l_paren);
1970         if (FunctionLike)
1971           parseParens();
1972 
1973         bool FollowedByNewline =
1974             CommentsBeforeNextToken.empty()
1975                 ? FormatTok->NewlinesBefore > 0
1976                 : CommentsBeforeNextToken.front()->NewlinesBefore > 0;
1977 
1978         if (FollowedByNewline && (Text.size() >= 5 || FunctionLike) &&
1979             tokenCanStartNewLine(*FormatTok) && Text == Text.upper()) {
1980           if (PreviousToken->isNot(TT_UntouchableMacroFunc))
1981             PreviousToken->setFinalizedType(TT_FunctionLikeOrFreestandingMacro);
1982           addUnwrappedLine();
1983           return;
1984         }
1985       }
1986       break;
1987     }
1988     case tok::equal:
1989       if ((Style.isJavaScript() || Style.isCSharp()) &&
1990           FormatTok->is(TT_FatArrow)) {
1991         tryToParseChildBlock();
1992         break;
1993       }
1994 
1995       nextToken();
1996       if (FormatTok->is(tok::l_brace)) {
1997         // Block kind should probably be set to BK_BracedInit for any language.
1998         // C# needs this change to ensure that array initialisers and object
1999         // initialisers are indented the same way.
2000         if (Style.isCSharp())
2001           FormatTok->setBlockKind(BK_BracedInit);
2002         nextToken();
2003         parseBracedList();
2004       } else if (Style.Language == FormatStyle::LK_Proto &&
2005                  FormatTok->is(tok::less)) {
2006         nextToken();
2007         parseBracedList(/*ContinueOnSemicolons=*/false, /*IsEnum=*/false,
2008                         /*ClosingBraceKind=*/tok::greater);
2009       }
2010       break;
2011     case tok::l_square:
2012       parseSquare();
2013       break;
2014     case tok::kw_new:
2015       parseNew();
2016       break;
2017     case tok::kw_case:
2018       // Proto: there are no switch/case statements.
2019       if (Style.isProto()) {
2020         nextToken();
2021         return;
2022       }
2023       // In Verilog switch is called case.
2024       if (Style.isVerilog()) {
2025         parseBlock();
2026         addUnwrappedLine();
2027         return;
2028       }
2029       if (Style.isJavaScript() && Line->MustBeDeclaration) {
2030         // 'case: string' field declaration.
2031         nextToken();
2032         break;
2033       }
2034       parseCaseLabel();
2035       break;
2036     case tok::kw_default:
2037       nextToken();
2038       if (Style.isVerilog()) {
2039         if (FormatTok->is(tok::colon)) {
2040           // The label will be handled in the next iteration.
2041           break;
2042         }
2043         if (FormatTok->is(Keywords.kw_clocking)) {
2044           // A default clocking block.
2045           parseBlock();
2046           addUnwrappedLine();
2047           return;
2048         }
2049         parseVerilogCaseLabel();
2050         return;
2051       }
2052       break;
2053     case tok::colon:
2054       nextToken();
2055       if (Style.isVerilog()) {
2056         parseVerilogCaseLabel();
2057         return;
2058       }
2059       break;
2060     default:
2061       nextToken();
2062       break;
2063     }
2064   } while (!eof());
2065 }
2066 
2067 bool UnwrappedLineParser::tryToParsePropertyAccessor() {
2068   assert(FormatTok->is(tok::l_brace));
2069   if (!Style.isCSharp())
2070     return false;
2071   // See if it's a property accessor.
2072   if (FormatTok->Previous->isNot(tok::identifier))
2073     return false;
2074 
2075   // See if we are inside a property accessor.
2076   //
2077   // Record the current tokenPosition so that we can advance and
2078   // reset the current token. `Next` is not set yet so we need
2079   // another way to advance along the token stream.
2080   unsigned int StoredPosition = Tokens->getPosition();
2081   FormatToken *Tok = Tokens->getNextToken();
2082 
2083   // A trivial property accessor is of the form:
2084   // { [ACCESS_SPECIFIER] [get]; [ACCESS_SPECIFIER] [set|init] }
2085   // Track these as they do not require line breaks to be introduced.
2086   bool HasSpecialAccessor = false;
2087   bool IsTrivialPropertyAccessor = true;
2088   while (!eof()) {
2089     if (Tok->isOneOf(tok::semi, tok::kw_public, tok::kw_private,
2090                      tok::kw_protected, Keywords.kw_internal, Keywords.kw_get,
2091                      Keywords.kw_init, Keywords.kw_set)) {
2092       if (Tok->isOneOf(Keywords.kw_get, Keywords.kw_init, Keywords.kw_set))
2093         HasSpecialAccessor = true;
2094       Tok = Tokens->getNextToken();
2095       continue;
2096     }
2097     if (Tok->isNot(tok::r_brace))
2098       IsTrivialPropertyAccessor = false;
2099     break;
2100   }
2101 
2102   if (!HasSpecialAccessor) {
2103     Tokens->setPosition(StoredPosition);
2104     return false;
2105   }
2106 
2107   // Try to parse the property accessor:
2108   // https://docs.microsoft.com/en-us/dotnet/csharp/programming-guide/classes-and-structs/properties
2109   Tokens->setPosition(StoredPosition);
2110   if (!IsTrivialPropertyAccessor && Style.BraceWrapping.AfterFunction)
2111     addUnwrappedLine();
2112   nextToken();
2113   do {
2114     switch (FormatTok->Tok.getKind()) {
2115     case tok::r_brace:
2116       nextToken();
2117       if (FormatTok->is(tok::equal)) {
2118         while (!eof() && FormatTok->isNot(tok::semi))
2119           nextToken();
2120         nextToken();
2121       }
2122       addUnwrappedLine();
2123       return true;
2124     case tok::l_brace:
2125       ++Line->Level;
2126       parseBlock(/*MustBeDeclaration=*/true);
2127       addUnwrappedLine();
2128       --Line->Level;
2129       break;
2130     case tok::equal:
2131       if (FormatTok->is(TT_FatArrow)) {
2132         ++Line->Level;
2133         do {
2134           nextToken();
2135         } while (!eof() && FormatTok->isNot(tok::semi));
2136         nextToken();
2137         addUnwrappedLine();
2138         --Line->Level;
2139         break;
2140       }
2141       nextToken();
2142       break;
2143     default:
2144       if (FormatTok->isOneOf(Keywords.kw_get, Keywords.kw_init,
2145                              Keywords.kw_set) &&
2146           !IsTrivialPropertyAccessor) {
2147         // Non-trivial get/set needs to be on its own line.
2148         addUnwrappedLine();
2149       }
2150       nextToken();
2151     }
2152   } while (!eof());
2153 
2154   // Unreachable for well-formed code (paired '{' and '}').
2155   return true;
2156 }
2157 
2158 bool UnwrappedLineParser::tryToParseLambda() {
2159   assert(FormatTok->is(tok::l_square));
2160   if (!Style.isCpp()) {
2161     nextToken();
2162     return false;
2163   }
2164   FormatToken &LSquare = *FormatTok;
2165   if (!tryToParseLambdaIntroducer())
2166     return false;
2167 
2168   bool SeenArrow = false;
2169   bool InTemplateParameterList = false;
2170 
2171   while (FormatTok->isNot(tok::l_brace)) {
2172     if (FormatTok->isSimpleTypeSpecifier()) {
2173       nextToken();
2174       continue;
2175     }
2176     switch (FormatTok->Tok.getKind()) {
2177     case tok::l_brace:
2178       break;
2179     case tok::l_paren:
2180       parseParens(/*AmpAmpTokenType=*/TT_PointerOrReference);
2181       break;
2182     case tok::l_square:
2183       parseSquare();
2184       break;
2185     case tok::less:
2186       assert(FormatTok->Previous);
2187       if (FormatTok->Previous->is(tok::r_square))
2188         InTemplateParameterList = true;
2189       nextToken();
2190       break;
2191     case tok::kw_auto:
2192     case tok::kw_class:
2193     case tok::kw_template:
2194     case tok::kw_typename:
2195     case tok::amp:
2196     case tok::star:
2197     case tok::kw_const:
2198     case tok::kw_constexpr:
2199     case tok::kw_consteval:
2200     case tok::comma:
2201     case tok::greater:
2202     case tok::identifier:
2203     case tok::numeric_constant:
2204     case tok::coloncolon:
2205     case tok::kw_mutable:
2206     case tok::kw_noexcept:
2207     case tok::kw_static:
2208       nextToken();
2209       break;
2210     // Specialization of a template with an integer parameter can contain
2211     // arithmetic, logical, comparison and ternary operators.
2212     //
2213     // FIXME: This also accepts sequences of operators that are not in the scope
2214     // of a template argument list.
2215     //
2216     // In a C++ lambda a template type can only occur after an arrow. We use
2217     // this as an heuristic to distinguish between Objective-C expressions
2218     // followed by an `a->b` expression, such as:
2219     // ([obj func:arg] + a->b)
2220     // Otherwise the code below would parse as a lambda.
2221     //
2222     // FIXME: This heuristic is incorrect for C++20 generic lambdas with
2223     // explicit template lists: []<bool b = true && false>(U &&u){}
2224     case tok::plus:
2225     case tok::minus:
2226     case tok::exclaim:
2227     case tok::tilde:
2228     case tok::slash:
2229     case tok::percent:
2230     case tok::lessless:
2231     case tok::pipe:
2232     case tok::pipepipe:
2233     case tok::ampamp:
2234     case tok::caret:
2235     case tok::equalequal:
2236     case tok::exclaimequal:
2237     case tok::greaterequal:
2238     case tok::lessequal:
2239     case tok::question:
2240     case tok::colon:
2241     case tok::ellipsis:
2242     case tok::kw_true:
2243     case tok::kw_false:
2244       if (SeenArrow || InTemplateParameterList) {
2245         nextToken();
2246         break;
2247       }
2248       return true;
2249     case tok::arrow:
2250       // This might or might not actually be a lambda arrow (this could be an
2251       // ObjC method invocation followed by a dereferencing arrow). We might
2252       // reset this back to TT_Unknown in TokenAnnotator.
2253       FormatTok->setFinalizedType(TT_LambdaArrow);
2254       SeenArrow = true;
2255       nextToken();
2256       break;
2257     case tok::kw_requires: {
2258       auto *RequiresToken = FormatTok;
2259       nextToken();
2260       parseRequiresClause(RequiresToken);
2261       break;
2262     }
2263     default:
2264       return true;
2265     }
2266   }
2267 
2268   FormatTok->setFinalizedType(TT_LambdaLBrace);
2269   LSquare.setFinalizedType(TT_LambdaLSquare);
2270 
2271   NestedLambdas.push_back(Line->SeenDecltypeAuto);
2272   parseChildBlock();
2273   assert(!NestedLambdas.empty());
2274   NestedLambdas.pop_back();
2275 
2276   return true;
2277 }
2278 
2279 bool UnwrappedLineParser::tryToParseLambdaIntroducer() {
2280   const FormatToken *Previous = FormatTok->Previous;
2281   const FormatToken *LeftSquare = FormatTok;
2282   nextToken();
2283   if ((Previous && ((Previous->Tok.getIdentifierInfo() &&
2284                      !Previous->isOneOf(tok::kw_return, tok::kw_co_await,
2285                                         tok::kw_co_yield, tok::kw_co_return)) ||
2286                     Previous->closesScope())) ||
2287       LeftSquare->isCppStructuredBinding(Style)) {
2288     return false;
2289   }
2290   if (FormatTok->is(tok::l_square))
2291     return false;
2292   if (FormatTok->is(tok::r_square)) {
2293     const FormatToken *Next = Tokens->peekNextToken(/*SkipComment=*/true);
2294     if (Next->is(tok::greater))
2295       return false;
2296   }
2297   parseSquare(/*LambdaIntroducer=*/true);
2298   return true;
2299 }
2300 
2301 void UnwrappedLineParser::tryToParseJSFunction() {
2302   assert(FormatTok->is(Keywords.kw_function) ||
2303          FormatTok->startsSequence(Keywords.kw_async, Keywords.kw_function));
2304   if (FormatTok->is(Keywords.kw_async))
2305     nextToken();
2306   // Consume "function".
2307   nextToken();
2308 
2309   // Consume * (generator function). Treat it like C++'s overloaded operators.
2310   if (FormatTok->is(tok::star)) {
2311     FormatTok->setFinalizedType(TT_OverloadedOperator);
2312     nextToken();
2313   }
2314 
2315   // Consume function name.
2316   if (FormatTok->is(tok::identifier))
2317     nextToken();
2318 
2319   if (FormatTok->isNot(tok::l_paren))
2320     return;
2321 
2322   // Parse formal parameter list.
2323   parseParens();
2324 
2325   if (FormatTok->is(tok::colon)) {
2326     // Parse a type definition.
2327     nextToken();
2328 
2329     // Eat the type declaration. For braced inline object types, balance braces,
2330     // otherwise just parse until finding an l_brace for the function body.
2331     if (FormatTok->is(tok::l_brace))
2332       tryToParseBracedList();
2333     else
2334       while (!FormatTok->isOneOf(tok::l_brace, tok::semi) && !eof())
2335         nextToken();
2336   }
2337 
2338   if (FormatTok->is(tok::semi))
2339     return;
2340 
2341   parseChildBlock();
2342 }
2343 
2344 bool UnwrappedLineParser::tryToParseBracedList() {
2345   if (FormatTok->is(BK_Unknown))
2346     calculateBraceTypes();
2347   assert(FormatTok->isNot(BK_Unknown));
2348   if (FormatTok->is(BK_Block))
2349     return false;
2350   nextToken();
2351   parseBracedList();
2352   return true;
2353 }
2354 
2355 bool UnwrappedLineParser::tryToParseChildBlock() {
2356   assert(Style.isJavaScript() || Style.isCSharp());
2357   assert(FormatTok->is(TT_FatArrow));
2358   // Fat arrows (=>) have tok::TokenKind tok::equal but TokenType TT_FatArrow.
2359   // They always start an expression or a child block if followed by a curly
2360   // brace.
2361   nextToken();
2362   if (FormatTok->isNot(tok::l_brace))
2363     return false;
2364   parseChildBlock();
2365   return true;
2366 }
2367 
2368 bool UnwrappedLineParser::parseBracedList(bool ContinueOnSemicolons,
2369                                           bool IsEnum,
2370                                           tok::TokenKind ClosingBraceKind) {
2371   bool HasError = false;
2372 
2373   // FIXME: Once we have an expression parser in the UnwrappedLineParser,
2374   // replace this by using parseAssignmentExpression() inside.
2375   do {
2376     if (Style.isCSharp() && FormatTok->is(TT_FatArrow) &&
2377         tryToParseChildBlock()) {
2378       continue;
2379     }
2380     if (Style.isJavaScript()) {
2381       if (FormatTok->is(Keywords.kw_function) ||
2382           FormatTok->startsSequence(Keywords.kw_async, Keywords.kw_function)) {
2383         tryToParseJSFunction();
2384         continue;
2385       }
2386       if (FormatTok->is(tok::l_brace)) {
2387         // Could be a method inside of a braced list `{a() { return 1; }}`.
2388         if (tryToParseBracedList())
2389           continue;
2390         parseChildBlock();
2391       }
2392     }
2393     if (FormatTok->Tok.getKind() == ClosingBraceKind) {
2394       if (IsEnum && !Style.AllowShortEnumsOnASingleLine)
2395         addUnwrappedLine();
2396       nextToken();
2397       return !HasError;
2398     }
2399     switch (FormatTok->Tok.getKind()) {
2400     case tok::l_square:
2401       if (Style.isCSharp())
2402         parseSquare();
2403       else
2404         tryToParseLambda();
2405       break;
2406     case tok::l_paren:
2407       parseParens();
2408       // JavaScript can just have free standing methods and getters/setters in
2409       // object literals. Detect them by a "{" following ")".
2410       if (Style.isJavaScript()) {
2411         if (FormatTok->is(tok::l_brace))
2412           parseChildBlock();
2413         break;
2414       }
2415       break;
2416     case tok::l_brace:
2417       // Assume there are no blocks inside a braced init list apart
2418       // from the ones we explicitly parse out (like lambdas).
2419       FormatTok->setBlockKind(BK_BracedInit);
2420       nextToken();
2421       parseBracedList();
2422       break;
2423     case tok::less:
2424       if (Style.Language == FormatStyle::LK_Proto ||
2425           ClosingBraceKind == tok::greater) {
2426         nextToken();
2427         parseBracedList(/*ContinueOnSemicolons=*/false, /*IsEnum=*/false,
2428                         /*ClosingBraceKind=*/tok::greater);
2429       } else {
2430         nextToken();
2431       }
2432       break;
2433     case tok::semi:
2434       // JavaScript (or more precisely TypeScript) can have semicolons in braced
2435       // lists (in so-called TypeMemberLists). Thus, the semicolon cannot be
2436       // used for error recovery if we have otherwise determined that this is
2437       // a braced list.
2438       if (Style.isJavaScript()) {
2439         nextToken();
2440         break;
2441       }
2442       HasError = true;
2443       if (!ContinueOnSemicolons)
2444         return !HasError;
2445       nextToken();
2446       break;
2447     case tok::comma:
2448       nextToken();
2449       if (IsEnum && !Style.AllowShortEnumsOnASingleLine)
2450         addUnwrappedLine();
2451       break;
2452     default:
2453       nextToken();
2454       break;
2455     }
2456   } while (!eof());
2457   return false;
2458 }
2459 
2460 /// \brief Parses a pair of parentheses (and everything between them).
2461 /// \param AmpAmpTokenType If different than TT_Unknown sets this type for all
2462 /// double ampersands. This applies for all nested scopes as well.
2463 ///
2464 /// Returns whether there is a `=` token between the parentheses.
2465 bool UnwrappedLineParser::parseParens(TokenType AmpAmpTokenType) {
2466   assert(FormatTok->is(tok::l_paren) && "'(' expected.");
2467   auto *LeftParen = FormatTok;
2468   bool SeenEqual = false;
2469   const bool MightBeStmtExpr = Tokens->peekNextToken()->is(tok::l_brace);
2470   nextToken();
2471   do {
2472     switch (FormatTok->Tok.getKind()) {
2473     case tok::l_paren:
2474       if (parseParens(AmpAmpTokenType))
2475         SeenEqual = true;
2476       if (Style.Language == FormatStyle::LK_Java && FormatTok->is(tok::l_brace))
2477         parseChildBlock();
2478       break;
2479     case tok::r_paren:
2480       if (!MightBeStmtExpr &&
2481           Style.RemoveParentheses > FormatStyle::RPS_Leave) {
2482         const auto *Prev = LeftParen->Previous;
2483         const auto *Next = Tokens->peekNextToken();
2484         const bool DoubleParens =
2485             Prev && Prev->is(tok::l_paren) && Next && Next->is(tok::r_paren);
2486         const auto *PrevPrev = Prev ? Prev->getPreviousNonComment() : nullptr;
2487         const bool Blacklisted =
2488             PrevPrev &&
2489             (PrevPrev->isOneOf(tok::kw___attribute, tok::kw_decltype) ||
2490              (SeenEqual &&
2491               (PrevPrev->isOneOf(tok::kw_if, tok::kw_while) ||
2492                PrevPrev->endsSequence(tok::kw_constexpr, tok::kw_if))));
2493         const bool ReturnParens =
2494             Style.RemoveParentheses == FormatStyle::RPS_ReturnStatement &&
2495             ((NestedLambdas.empty() && !IsDecltypeAutoFunction) ||
2496              (!NestedLambdas.empty() && !NestedLambdas.back())) &&
2497             Prev && Prev->isOneOf(tok::kw_return, tok::kw_co_return) && Next &&
2498             Next->is(tok::semi);
2499         if ((DoubleParens && !Blacklisted) || ReturnParens) {
2500           LeftParen->Optional = true;
2501           FormatTok->Optional = true;
2502         }
2503       }
2504       nextToken();
2505       return SeenEqual;
2506     case tok::r_brace:
2507       // A "}" inside parenthesis is an error if there wasn't a matching "{".
2508       return SeenEqual;
2509     case tok::l_square:
2510       tryToParseLambda();
2511       break;
2512     case tok::l_brace:
2513       if (!tryToParseBracedList())
2514         parseChildBlock();
2515       break;
2516     case tok::at:
2517       nextToken();
2518       if (FormatTok->is(tok::l_brace)) {
2519         nextToken();
2520         parseBracedList();
2521       }
2522       break;
2523     case tok::equal:
2524       SeenEqual = true;
2525       if (Style.isCSharp() && FormatTok->is(TT_FatArrow))
2526         tryToParseChildBlock();
2527       else
2528         nextToken();
2529       break;
2530     case tok::kw_class:
2531       if (Style.isJavaScript())
2532         parseRecord(/*ParseAsExpr=*/true);
2533       else
2534         nextToken();
2535       break;
2536     case tok::identifier:
2537       if (Style.isJavaScript() &&
2538           (FormatTok->is(Keywords.kw_function) ||
2539            FormatTok->startsSequence(Keywords.kw_async,
2540                                      Keywords.kw_function))) {
2541         tryToParseJSFunction();
2542       } else {
2543         nextToken();
2544       }
2545       break;
2546     case tok::kw_requires: {
2547       auto RequiresToken = FormatTok;
2548       nextToken();
2549       parseRequiresExpression(RequiresToken);
2550       break;
2551     }
2552     case tok::ampamp:
2553       if (AmpAmpTokenType != TT_Unknown)
2554         FormatTok->setFinalizedType(AmpAmpTokenType);
2555       [[fallthrough]];
2556     default:
2557       nextToken();
2558       break;
2559     }
2560   } while (!eof());
2561   return SeenEqual;
2562 }
2563 
2564 void UnwrappedLineParser::parseSquare(bool LambdaIntroducer) {
2565   if (!LambdaIntroducer) {
2566     assert(FormatTok->is(tok::l_square) && "'[' expected.");
2567     if (tryToParseLambda())
2568       return;
2569   }
2570   do {
2571     switch (FormatTok->Tok.getKind()) {
2572     case tok::l_paren:
2573       parseParens();
2574       break;
2575     case tok::r_square:
2576       nextToken();
2577       return;
2578     case tok::r_brace:
2579       // A "}" inside parenthesis is an error if there wasn't a matching "{".
2580       return;
2581     case tok::l_square:
2582       parseSquare();
2583       break;
2584     case tok::l_brace: {
2585       if (!tryToParseBracedList())
2586         parseChildBlock();
2587       break;
2588     }
2589     case tok::at:
2590       nextToken();
2591       if (FormatTok->is(tok::l_brace)) {
2592         nextToken();
2593         parseBracedList();
2594       }
2595       break;
2596     default:
2597       nextToken();
2598       break;
2599     }
2600   } while (!eof());
2601 }
2602 
2603 void UnwrappedLineParser::keepAncestorBraces() {
2604   if (!Style.RemoveBracesLLVM)
2605     return;
2606 
2607   const int MaxNestingLevels = 2;
2608   const int Size = NestedTooDeep.size();
2609   if (Size >= MaxNestingLevels)
2610     NestedTooDeep[Size - MaxNestingLevels] = true;
2611   NestedTooDeep.push_back(false);
2612 }
2613 
2614 static FormatToken *getLastNonComment(const UnwrappedLine &Line) {
2615   for (const auto &Token : llvm::reverse(Line.Tokens))
2616     if (Token.Tok->isNot(tok::comment))
2617       return Token.Tok;
2618 
2619   return nullptr;
2620 }
2621 
2622 void UnwrappedLineParser::parseUnbracedBody(bool CheckEOF) {
2623   FormatToken *Tok = nullptr;
2624 
2625   if (Style.InsertBraces && !Line->InPPDirective && !Line->Tokens.empty() &&
2626       PreprocessorDirectives.empty() && FormatTok->isNot(tok::semi)) {
2627     Tok = Style.BraceWrapping.AfterControlStatement == FormatStyle::BWACS_Never
2628               ? getLastNonComment(*Line)
2629               : Line->Tokens.back().Tok;
2630     assert(Tok);
2631     if (Tok->BraceCount < 0) {
2632       assert(Tok->BraceCount == -1);
2633       Tok = nullptr;
2634     } else {
2635       Tok->BraceCount = -1;
2636     }
2637   }
2638 
2639   addUnwrappedLine();
2640   ++Line->Level;
2641   parseStructuralElement();
2642 
2643   if (Tok) {
2644     assert(!Line->InPPDirective);
2645     Tok = nullptr;
2646     for (const auto &L : llvm::reverse(*CurrentLines)) {
2647       if (!L.InPPDirective && getLastNonComment(L)) {
2648         Tok = L.Tokens.back().Tok;
2649         break;
2650       }
2651     }
2652     assert(Tok);
2653     ++Tok->BraceCount;
2654   }
2655 
2656   if (CheckEOF && eof())
2657     addUnwrappedLine();
2658 
2659   --Line->Level;
2660 }
2661 
2662 static void markOptionalBraces(FormatToken *LeftBrace) {
2663   if (!LeftBrace)
2664     return;
2665 
2666   assert(LeftBrace->is(tok::l_brace));
2667 
2668   FormatToken *RightBrace = LeftBrace->MatchingParen;
2669   if (!RightBrace) {
2670     assert(!LeftBrace->Optional);
2671     return;
2672   }
2673 
2674   assert(RightBrace->is(tok::r_brace));
2675   assert(RightBrace->MatchingParen == LeftBrace);
2676   assert(LeftBrace->Optional == RightBrace->Optional);
2677 
2678   LeftBrace->Optional = true;
2679   RightBrace->Optional = true;
2680 }
2681 
2682 void UnwrappedLineParser::handleAttributes() {
2683   // Handle AttributeMacro, e.g. `if (x) UNLIKELY`.
2684   if (FormatTok->is(TT_AttributeMacro))
2685     nextToken();
2686   if (FormatTok->is(tok::l_square))
2687     handleCppAttributes();
2688 }
2689 
2690 bool UnwrappedLineParser::handleCppAttributes() {
2691   // Handle [[likely]] / [[unlikely]] attributes.
2692   assert(FormatTok->is(tok::l_square));
2693   if (!tryToParseSimpleAttribute())
2694     return false;
2695   parseSquare();
2696   return true;
2697 }
2698 
2699 /// Returns whether \c Tok begins a block.
2700 bool UnwrappedLineParser::isBlockBegin(const FormatToken &Tok) const {
2701   // FIXME: rename the function or make
2702   // Tok.isOneOf(tok::l_brace, TT_MacroBlockBegin) work.
2703   return Style.isVerilog() ? Keywords.isVerilogBegin(Tok)
2704                            : Tok.is(tok::l_brace);
2705 }
2706 
2707 FormatToken *UnwrappedLineParser::parseIfThenElse(IfStmtKind *IfKind,
2708                                                   bool KeepBraces,
2709                                                   bool IsVerilogAssert) {
2710   assert((FormatTok->is(tok::kw_if) ||
2711           (Style.isVerilog() &&
2712            FormatTok->isOneOf(tok::kw_restrict, Keywords.kw_assert,
2713                               Keywords.kw_assume, Keywords.kw_cover))) &&
2714          "'if' expected");
2715   nextToken();
2716 
2717   if (IsVerilogAssert) {
2718     // Handle `assert #0` and `assert final`.
2719     if (FormatTok->is(Keywords.kw_verilogHash)) {
2720       nextToken();
2721       if (FormatTok->is(tok::numeric_constant))
2722         nextToken();
2723     } else if (FormatTok->isOneOf(Keywords.kw_final, Keywords.kw_property,
2724                                   Keywords.kw_sequence)) {
2725       nextToken();
2726     }
2727   }
2728 
2729   // Handle `if !consteval`.
2730   if (FormatTok->is(tok::exclaim))
2731     nextToken();
2732 
2733   bool KeepIfBraces = true;
2734   if (FormatTok->is(tok::kw_consteval)) {
2735     nextToken();
2736   } else {
2737     KeepIfBraces = !Style.RemoveBracesLLVM || KeepBraces;
2738     if (FormatTok->isOneOf(tok::kw_constexpr, tok::identifier))
2739       nextToken();
2740     if (FormatTok->is(tok::l_paren)) {
2741       FormatTok->setFinalizedType(TT_ConditionLParen);
2742       parseParens();
2743     }
2744   }
2745   handleAttributes();
2746   // The then action is optional in Verilog assert statements.
2747   if (IsVerilogAssert && FormatTok->is(tok::semi)) {
2748     nextToken();
2749     addUnwrappedLine();
2750     return nullptr;
2751   }
2752 
2753   bool NeedsUnwrappedLine = false;
2754   keepAncestorBraces();
2755 
2756   FormatToken *IfLeftBrace = nullptr;
2757   IfStmtKind IfBlockKind = IfStmtKind::NotIf;
2758 
2759   if (isBlockBegin(*FormatTok)) {
2760     FormatTok->setFinalizedType(TT_ControlStatementLBrace);
2761     IfLeftBrace = FormatTok;
2762     CompoundStatementIndenter Indenter(this, Style, Line->Level);
2763     parseBlock(/*MustBeDeclaration=*/false, /*AddLevels=*/1u,
2764                /*MunchSemi=*/true, KeepIfBraces, &IfBlockKind);
2765     if (Style.BraceWrapping.BeforeElse)
2766       addUnwrappedLine();
2767     else
2768       NeedsUnwrappedLine = true;
2769   } else if (IsVerilogAssert && FormatTok->is(tok::kw_else)) {
2770     addUnwrappedLine();
2771   } else {
2772     parseUnbracedBody();
2773   }
2774 
2775   if (Style.RemoveBracesLLVM) {
2776     assert(!NestedTooDeep.empty());
2777     KeepIfBraces = KeepIfBraces ||
2778                    (IfLeftBrace && !IfLeftBrace->MatchingParen) ||
2779                    NestedTooDeep.back() || IfBlockKind == IfStmtKind::IfOnly ||
2780                    IfBlockKind == IfStmtKind::IfElseIf;
2781   }
2782 
2783   bool KeepElseBraces = KeepIfBraces;
2784   FormatToken *ElseLeftBrace = nullptr;
2785   IfStmtKind Kind = IfStmtKind::IfOnly;
2786 
2787   if (FormatTok->is(tok::kw_else)) {
2788     if (Style.RemoveBracesLLVM) {
2789       NestedTooDeep.back() = false;
2790       Kind = IfStmtKind::IfElse;
2791     }
2792     nextToken();
2793     handleAttributes();
2794     if (isBlockBegin(*FormatTok)) {
2795       const bool FollowedByIf = Tokens->peekNextToken()->is(tok::kw_if);
2796       FormatTok->setFinalizedType(TT_ElseLBrace);
2797       ElseLeftBrace = FormatTok;
2798       CompoundStatementIndenter Indenter(this, Style, Line->Level);
2799       IfStmtKind ElseBlockKind = IfStmtKind::NotIf;
2800       FormatToken *IfLBrace =
2801           parseBlock(/*MustBeDeclaration=*/false, /*AddLevels=*/1u,
2802                      /*MunchSemi=*/true, KeepElseBraces, &ElseBlockKind);
2803       if (FormatTok->is(tok::kw_else)) {
2804         KeepElseBraces = KeepElseBraces ||
2805                          ElseBlockKind == IfStmtKind::IfOnly ||
2806                          ElseBlockKind == IfStmtKind::IfElseIf;
2807       } else if (FollowedByIf && IfLBrace && !IfLBrace->Optional) {
2808         KeepElseBraces = true;
2809         assert(ElseLeftBrace->MatchingParen);
2810         markOptionalBraces(ElseLeftBrace);
2811       }
2812       addUnwrappedLine();
2813     } else if (!IsVerilogAssert && FormatTok->is(tok::kw_if)) {
2814       const FormatToken *Previous = Tokens->getPreviousToken();
2815       assert(Previous);
2816       const bool IsPrecededByComment = Previous->is(tok::comment);
2817       if (IsPrecededByComment) {
2818         addUnwrappedLine();
2819         ++Line->Level;
2820       }
2821       bool TooDeep = true;
2822       if (Style.RemoveBracesLLVM) {
2823         Kind = IfStmtKind::IfElseIf;
2824         TooDeep = NestedTooDeep.pop_back_val();
2825       }
2826       ElseLeftBrace = parseIfThenElse(/*IfKind=*/nullptr, KeepIfBraces);
2827       if (Style.RemoveBracesLLVM)
2828         NestedTooDeep.push_back(TooDeep);
2829       if (IsPrecededByComment)
2830         --Line->Level;
2831     } else {
2832       parseUnbracedBody(/*CheckEOF=*/true);
2833     }
2834   } else {
2835     KeepIfBraces = KeepIfBraces || IfBlockKind == IfStmtKind::IfElse;
2836     if (NeedsUnwrappedLine)
2837       addUnwrappedLine();
2838   }
2839 
2840   if (!Style.RemoveBracesLLVM)
2841     return nullptr;
2842 
2843   assert(!NestedTooDeep.empty());
2844   KeepElseBraces = KeepElseBraces ||
2845                    (ElseLeftBrace && !ElseLeftBrace->MatchingParen) ||
2846                    NestedTooDeep.back();
2847 
2848   NestedTooDeep.pop_back();
2849 
2850   if (!KeepIfBraces && !KeepElseBraces) {
2851     markOptionalBraces(IfLeftBrace);
2852     markOptionalBraces(ElseLeftBrace);
2853   } else if (IfLeftBrace) {
2854     FormatToken *IfRightBrace = IfLeftBrace->MatchingParen;
2855     if (IfRightBrace) {
2856       assert(IfRightBrace->MatchingParen == IfLeftBrace);
2857       assert(!IfLeftBrace->Optional);
2858       assert(!IfRightBrace->Optional);
2859       IfLeftBrace->MatchingParen = nullptr;
2860       IfRightBrace->MatchingParen = nullptr;
2861     }
2862   }
2863 
2864   if (IfKind)
2865     *IfKind = Kind;
2866 
2867   return IfLeftBrace;
2868 }
2869 
2870 void UnwrappedLineParser::parseTryCatch() {
2871   assert(FormatTok->isOneOf(tok::kw_try, tok::kw___try) && "'try' expected");
2872   nextToken();
2873   bool NeedsUnwrappedLine = false;
2874   if (FormatTok->is(tok::colon)) {
2875     // We are in a function try block, what comes is an initializer list.
2876     nextToken();
2877 
2878     // In case identifiers were removed by clang-tidy, what might follow is
2879     // multiple commas in sequence - before the first identifier.
2880     while (FormatTok->is(tok::comma))
2881       nextToken();
2882 
2883     while (FormatTok->is(tok::identifier)) {
2884       nextToken();
2885       if (FormatTok->is(tok::l_paren))
2886         parseParens();
2887       if (FormatTok->Previous && FormatTok->Previous->is(tok::identifier) &&
2888           FormatTok->is(tok::l_brace)) {
2889         do {
2890           nextToken();
2891         } while (!FormatTok->is(tok::r_brace));
2892         nextToken();
2893       }
2894 
2895       // In case identifiers were removed by clang-tidy, what might follow is
2896       // multiple commas in sequence - after the first identifier.
2897       while (FormatTok->is(tok::comma))
2898         nextToken();
2899     }
2900   }
2901   // Parse try with resource.
2902   if (Style.Language == FormatStyle::LK_Java && FormatTok->is(tok::l_paren))
2903     parseParens();
2904 
2905   keepAncestorBraces();
2906 
2907   if (FormatTok->is(tok::l_brace)) {
2908     CompoundStatementIndenter Indenter(this, Style, Line->Level);
2909     parseBlock();
2910     if (Style.BraceWrapping.BeforeCatch)
2911       addUnwrappedLine();
2912     else
2913       NeedsUnwrappedLine = true;
2914   } else if (!FormatTok->is(tok::kw_catch)) {
2915     // The C++ standard requires a compound-statement after a try.
2916     // If there's none, we try to assume there's a structuralElement
2917     // and try to continue.
2918     addUnwrappedLine();
2919     ++Line->Level;
2920     parseStructuralElement();
2921     --Line->Level;
2922   }
2923   while (true) {
2924     if (FormatTok->is(tok::at))
2925       nextToken();
2926     if (!(FormatTok->isOneOf(tok::kw_catch, Keywords.kw___except,
2927                              tok::kw___finally) ||
2928           ((Style.Language == FormatStyle::LK_Java || Style.isJavaScript()) &&
2929            FormatTok->is(Keywords.kw_finally)) ||
2930           (FormatTok->isObjCAtKeyword(tok::objc_catch) ||
2931            FormatTok->isObjCAtKeyword(tok::objc_finally)))) {
2932       break;
2933     }
2934     nextToken();
2935     while (FormatTok->isNot(tok::l_brace)) {
2936       if (FormatTok->is(tok::l_paren)) {
2937         parseParens();
2938         continue;
2939       }
2940       if (FormatTok->isOneOf(tok::semi, tok::r_brace, tok::eof)) {
2941         if (Style.RemoveBracesLLVM)
2942           NestedTooDeep.pop_back();
2943         return;
2944       }
2945       nextToken();
2946     }
2947     NeedsUnwrappedLine = false;
2948     Line->MustBeDeclaration = false;
2949     CompoundStatementIndenter Indenter(this, Style, Line->Level);
2950     parseBlock();
2951     if (Style.BraceWrapping.BeforeCatch)
2952       addUnwrappedLine();
2953     else
2954       NeedsUnwrappedLine = true;
2955   }
2956 
2957   if (Style.RemoveBracesLLVM)
2958     NestedTooDeep.pop_back();
2959 
2960   if (NeedsUnwrappedLine)
2961     addUnwrappedLine();
2962 }
2963 
2964 void UnwrappedLineParser::parseNamespace() {
2965   assert(FormatTok->isOneOf(tok::kw_namespace, TT_NamespaceMacro) &&
2966          "'namespace' expected");
2967 
2968   const FormatToken &InitialToken = *FormatTok;
2969   nextToken();
2970   if (InitialToken.is(TT_NamespaceMacro)) {
2971     parseParens();
2972   } else {
2973     while (FormatTok->isOneOf(tok::identifier, tok::coloncolon, tok::kw_inline,
2974                               tok::l_square, tok::period, tok::l_paren) ||
2975            (Style.isCSharp() && FormatTok->is(tok::kw_union))) {
2976       if (FormatTok->is(tok::l_square))
2977         parseSquare();
2978       else if (FormatTok->is(tok::l_paren))
2979         parseParens();
2980       else
2981         nextToken();
2982     }
2983   }
2984   if (FormatTok->is(tok::l_brace)) {
2985     if (ShouldBreakBeforeBrace(Style, InitialToken))
2986       addUnwrappedLine();
2987 
2988     unsigned AddLevels =
2989         Style.NamespaceIndentation == FormatStyle::NI_All ||
2990                 (Style.NamespaceIndentation == FormatStyle::NI_Inner &&
2991                  DeclarationScopeStack.size() > 1)
2992             ? 1u
2993             : 0u;
2994     bool ManageWhitesmithsBraces =
2995         AddLevels == 0u &&
2996         Style.BreakBeforeBraces == FormatStyle::BS_Whitesmiths;
2997 
2998     // If we're in Whitesmiths mode, indent the brace if we're not indenting
2999     // the whole block.
3000     if (ManageWhitesmithsBraces)
3001       ++Line->Level;
3002 
3003     // Munch the semicolon after a namespace. This is more common than one would
3004     // think. Putting the semicolon into its own line is very ugly.
3005     parseBlock(/*MustBeDeclaration=*/true, AddLevels, /*MunchSemi=*/true,
3006                /*KeepBraces=*/true, /*IfKind=*/nullptr,
3007                ManageWhitesmithsBraces);
3008 
3009     addUnwrappedLine(AddLevels > 0 ? LineLevel::Remove : LineLevel::Keep);
3010 
3011     if (ManageWhitesmithsBraces)
3012       --Line->Level;
3013   }
3014   // FIXME: Add error handling.
3015 }
3016 
3017 void UnwrappedLineParser::parseNew() {
3018   assert(FormatTok->is(tok::kw_new) && "'new' expected");
3019   nextToken();
3020 
3021   if (Style.isCSharp()) {
3022     do {
3023       // Handle constructor invocation, e.g. `new(field: value)`.
3024       if (FormatTok->is(tok::l_paren))
3025         parseParens();
3026 
3027       // Handle array initialization syntax, e.g. `new[] {10, 20, 30}`.
3028       if (FormatTok->is(tok::l_brace))
3029         parseBracedList();
3030 
3031       if (FormatTok->isOneOf(tok::semi, tok::comma))
3032         return;
3033 
3034       nextToken();
3035     } while (!eof());
3036   }
3037 
3038   if (Style.Language != FormatStyle::LK_Java)
3039     return;
3040 
3041   // In Java, we can parse everything up to the parens, which aren't optional.
3042   do {
3043     // There should not be a ;, { or } before the new's open paren.
3044     if (FormatTok->isOneOf(tok::semi, tok::l_brace, tok::r_brace))
3045       return;
3046 
3047     // Consume the parens.
3048     if (FormatTok->is(tok::l_paren)) {
3049       parseParens();
3050 
3051       // If there is a class body of an anonymous class, consume that as child.
3052       if (FormatTok->is(tok::l_brace))
3053         parseChildBlock();
3054       return;
3055     }
3056     nextToken();
3057   } while (!eof());
3058 }
3059 
3060 void UnwrappedLineParser::parseLoopBody(bool KeepBraces, bool WrapRightBrace) {
3061   keepAncestorBraces();
3062 
3063   if (isBlockBegin(*FormatTok)) {
3064     if (!KeepBraces)
3065       FormatTok->setFinalizedType(TT_ControlStatementLBrace);
3066     FormatToken *LeftBrace = FormatTok;
3067     CompoundStatementIndenter Indenter(this, Style, Line->Level);
3068     parseBlock(/*MustBeDeclaration=*/false, /*AddLevels=*/1u,
3069                /*MunchSemi=*/true, KeepBraces);
3070     if (!KeepBraces) {
3071       assert(!NestedTooDeep.empty());
3072       if (!NestedTooDeep.back())
3073         markOptionalBraces(LeftBrace);
3074     }
3075     if (WrapRightBrace)
3076       addUnwrappedLine();
3077   } else {
3078     parseUnbracedBody();
3079   }
3080 
3081   if (!KeepBraces)
3082     NestedTooDeep.pop_back();
3083 }
3084 
3085 void UnwrappedLineParser::parseForOrWhileLoop(bool HasParens) {
3086   assert((FormatTok->isOneOf(tok::kw_for, tok::kw_while, TT_ForEachMacro) ||
3087           (Style.isVerilog() &&
3088            FormatTok->isOneOf(Keywords.kw_always, Keywords.kw_always_comb,
3089                               Keywords.kw_always_ff, Keywords.kw_always_latch,
3090                               Keywords.kw_final, Keywords.kw_initial,
3091                               Keywords.kw_foreach, Keywords.kw_forever,
3092                               Keywords.kw_repeat))) &&
3093          "'for', 'while' or foreach macro expected");
3094   const bool KeepBraces = !Style.RemoveBracesLLVM ||
3095                           !FormatTok->isOneOf(tok::kw_for, tok::kw_while);
3096 
3097   nextToken();
3098   // JS' for await ( ...
3099   if (Style.isJavaScript() && FormatTok->is(Keywords.kw_await))
3100     nextToken();
3101   if (Style.isCpp() && FormatTok->is(tok::kw_co_await))
3102     nextToken();
3103   if (HasParens && FormatTok->is(tok::l_paren)) {
3104     // The type is only set for Verilog basically because we were afraid to
3105     // change the existing behavior for loops. See the discussion on D121756 for
3106     // details.
3107     if (Style.isVerilog())
3108       FormatTok->setFinalizedType(TT_ConditionLParen);
3109     parseParens();
3110   }
3111   // Event control.
3112   if (Style.isVerilog())
3113     parseVerilogSensitivityList();
3114 
3115   handleAttributes();
3116   parseLoopBody(KeepBraces, /*WrapRightBrace=*/true);
3117 }
3118 
3119 void UnwrappedLineParser::parseDoWhile() {
3120   assert(FormatTok->is(tok::kw_do) && "'do' expected");
3121   nextToken();
3122 
3123   parseLoopBody(/*KeepBraces=*/true, Style.BraceWrapping.BeforeWhile);
3124 
3125   // FIXME: Add error handling.
3126   if (!FormatTok->is(tok::kw_while)) {
3127     addUnwrappedLine();
3128     return;
3129   }
3130 
3131   // If in Whitesmiths mode, the line with the while() needs to be indented
3132   // to the same level as the block.
3133   if (Style.BreakBeforeBraces == FormatStyle::BS_Whitesmiths)
3134     ++Line->Level;
3135 
3136   nextToken();
3137   parseStructuralElement();
3138 }
3139 
3140 void UnwrappedLineParser::parseLabel(bool LeftAlignLabel) {
3141   nextToken();
3142   unsigned OldLineLevel = Line->Level;
3143   if (Line->Level > 1 || (!Line->InPPDirective && Line->Level > 0))
3144     --Line->Level;
3145   if (LeftAlignLabel)
3146     Line->Level = 0;
3147 
3148   if (!Style.IndentCaseBlocks && CommentsBeforeNextToken.empty() &&
3149       FormatTok->is(tok::l_brace)) {
3150 
3151     CompoundStatementIndenter Indenter(this, Line->Level,
3152                                        Style.BraceWrapping.AfterCaseLabel,
3153                                        Style.BraceWrapping.IndentBraces);
3154     parseBlock();
3155     if (FormatTok->is(tok::kw_break)) {
3156       if (Style.BraceWrapping.AfterControlStatement ==
3157           FormatStyle::BWACS_Always) {
3158         addUnwrappedLine();
3159         if (!Style.IndentCaseBlocks &&
3160             Style.BreakBeforeBraces == FormatStyle::BS_Whitesmiths) {
3161           ++Line->Level;
3162         }
3163       }
3164       parseStructuralElement();
3165     }
3166     addUnwrappedLine();
3167   } else {
3168     if (FormatTok->is(tok::semi))
3169       nextToken();
3170     addUnwrappedLine();
3171   }
3172   Line->Level = OldLineLevel;
3173   if (FormatTok->isNot(tok::l_brace)) {
3174     parseStructuralElement();
3175     addUnwrappedLine();
3176   }
3177 }
3178 
3179 void UnwrappedLineParser::parseCaseLabel() {
3180   assert(FormatTok->is(tok::kw_case) && "'case' expected");
3181 
3182   // FIXME: fix handling of complex expressions here.
3183   do {
3184     nextToken();
3185     if (FormatTok->is(tok::colon)) {
3186       FormatTok->setFinalizedType(TT_CaseLabelColon);
3187       break;
3188     }
3189   } while (!eof());
3190   parseLabel();
3191 }
3192 
3193 void UnwrappedLineParser::parseSwitch() {
3194   assert(FormatTok->is(tok::kw_switch) && "'switch' expected");
3195   nextToken();
3196   if (FormatTok->is(tok::l_paren))
3197     parseParens();
3198 
3199   keepAncestorBraces();
3200 
3201   if (FormatTok->is(tok::l_brace)) {
3202     CompoundStatementIndenter Indenter(this, Style, Line->Level);
3203     parseBlock();
3204     addUnwrappedLine();
3205   } else {
3206     addUnwrappedLine();
3207     ++Line->Level;
3208     parseStructuralElement();
3209     --Line->Level;
3210   }
3211 
3212   if (Style.RemoveBracesLLVM)
3213     NestedTooDeep.pop_back();
3214 }
3215 
3216 // Operators that can follow a C variable.
3217 static bool isCOperatorFollowingVar(tok::TokenKind kind) {
3218   switch (kind) {
3219   case tok::ampamp:
3220   case tok::ampequal:
3221   case tok::arrow:
3222   case tok::caret:
3223   case tok::caretequal:
3224   case tok::comma:
3225   case tok::ellipsis:
3226   case tok::equal:
3227   case tok::equalequal:
3228   case tok::exclaim:
3229   case tok::exclaimequal:
3230   case tok::greater:
3231   case tok::greaterequal:
3232   case tok::greatergreater:
3233   case tok::greatergreaterequal:
3234   case tok::l_paren:
3235   case tok::l_square:
3236   case tok::less:
3237   case tok::lessequal:
3238   case tok::lessless:
3239   case tok::lesslessequal:
3240   case tok::minus:
3241   case tok::minusequal:
3242   case tok::minusminus:
3243   case tok::percent:
3244   case tok::percentequal:
3245   case tok::period:
3246   case tok::pipe:
3247   case tok::pipeequal:
3248   case tok::pipepipe:
3249   case tok::plus:
3250   case tok::plusequal:
3251   case tok::plusplus:
3252   case tok::question:
3253   case tok::r_brace:
3254   case tok::r_paren:
3255   case tok::r_square:
3256   case tok::semi:
3257   case tok::slash:
3258   case tok::slashequal:
3259   case tok::star:
3260   case tok::starequal:
3261     return true;
3262   default:
3263     return false;
3264   }
3265 }
3266 
3267 void UnwrappedLineParser::parseAccessSpecifier() {
3268   FormatToken *AccessSpecifierCandidate = FormatTok;
3269   nextToken();
3270   // Understand Qt's slots.
3271   if (FormatTok->isOneOf(Keywords.kw_slots, Keywords.kw_qslots))
3272     nextToken();
3273   // Otherwise, we don't know what it is, and we'd better keep the next token.
3274   if (FormatTok->is(tok::colon)) {
3275     nextToken();
3276     addUnwrappedLine();
3277   } else if (!FormatTok->is(tok::coloncolon) &&
3278              !isCOperatorFollowingVar(FormatTok->Tok.getKind())) {
3279     // Not a variable name nor namespace name.
3280     addUnwrappedLine();
3281   } else if (AccessSpecifierCandidate) {
3282     // Consider the access specifier to be a C identifier.
3283     AccessSpecifierCandidate->Tok.setKind(tok::identifier);
3284   }
3285 }
3286 
3287 /// \brief Parses a requires, decides if it is a clause or an expression.
3288 /// \pre The current token has to be the requires keyword.
3289 /// \returns true if it parsed a clause.
3290 bool clang::format::UnwrappedLineParser::parseRequires() {
3291   assert(FormatTok->is(tok::kw_requires) && "'requires' expected");
3292   auto RequiresToken = FormatTok;
3293 
3294   // We try to guess if it is a requires clause, or a requires expression. For
3295   // that we first consume the keyword and check the next token.
3296   nextToken();
3297 
3298   switch (FormatTok->Tok.getKind()) {
3299   case tok::l_brace:
3300     // This can only be an expression, never a clause.
3301     parseRequiresExpression(RequiresToken);
3302     return false;
3303   case tok::l_paren:
3304     // Clauses and expression can start with a paren, it's unclear what we have.
3305     break;
3306   default:
3307     // All other tokens can only be a clause.
3308     parseRequiresClause(RequiresToken);
3309     return true;
3310   }
3311 
3312   // Looking forward we would have to decide if there are function declaration
3313   // like arguments to the requires expression:
3314   // requires (T t) {
3315   // Or there is a constraint expression for the requires clause:
3316   // requires (C<T> && ...
3317 
3318   // But first let's look behind.
3319   auto *PreviousNonComment = RequiresToken->getPreviousNonComment();
3320 
3321   if (!PreviousNonComment ||
3322       PreviousNonComment->is(TT_RequiresExpressionLBrace)) {
3323     // If there is no token, or an expression left brace, we are a requires
3324     // clause within a requires expression.
3325     parseRequiresClause(RequiresToken);
3326     return true;
3327   }
3328 
3329   switch (PreviousNonComment->Tok.getKind()) {
3330   case tok::greater:
3331   case tok::r_paren:
3332   case tok::kw_noexcept:
3333   case tok::kw_const:
3334     // This is a requires clause.
3335     parseRequiresClause(RequiresToken);
3336     return true;
3337   case tok::amp:
3338   case tok::ampamp: {
3339     // This can be either:
3340     // if (... && requires (T t) ...)
3341     // Or
3342     // void member(...) && requires (C<T> ...
3343     // We check the one token before that for a const:
3344     // void member(...) const && requires (C<T> ...
3345     auto PrevPrev = PreviousNonComment->getPreviousNonComment();
3346     if (PrevPrev && PrevPrev->is(tok::kw_const)) {
3347       parseRequiresClause(RequiresToken);
3348       return true;
3349     }
3350     break;
3351   }
3352   default:
3353     if (PreviousNonComment->isTypeOrIdentifier()) {
3354       // This is a requires clause.
3355       parseRequiresClause(RequiresToken);
3356       return true;
3357     }
3358     // It's an expression.
3359     parseRequiresExpression(RequiresToken);
3360     return false;
3361   }
3362 
3363   // Now we look forward and try to check if the paren content is a parameter
3364   // list. The parameters can be cv-qualified and contain references or
3365   // pointers.
3366   // So we want basically to check for TYPE NAME, but TYPE can contain all kinds
3367   // of stuff: typename, const, *, &, &&, ::, identifiers.
3368 
3369   unsigned StoredPosition = Tokens->getPosition();
3370   FormatToken *NextToken = Tokens->getNextToken();
3371   int Lookahead = 0;
3372   auto PeekNext = [&Lookahead, &NextToken, this] {
3373     ++Lookahead;
3374     NextToken = Tokens->getNextToken();
3375   };
3376 
3377   bool FoundType = false;
3378   bool LastWasColonColon = false;
3379   int OpenAngles = 0;
3380 
3381   for (; Lookahead < 50; PeekNext()) {
3382     switch (NextToken->Tok.getKind()) {
3383     case tok::kw_volatile:
3384     case tok::kw_const:
3385     case tok::comma:
3386       FormatTok = Tokens->setPosition(StoredPosition);
3387       parseRequiresExpression(RequiresToken);
3388       return false;
3389     case tok::r_paren:
3390     case tok::pipepipe:
3391       FormatTok = Tokens->setPosition(StoredPosition);
3392       parseRequiresClause(RequiresToken);
3393       return true;
3394     case tok::eof:
3395       // Break out of the loop.
3396       Lookahead = 50;
3397       break;
3398     case tok::coloncolon:
3399       LastWasColonColon = true;
3400       break;
3401     case tok::identifier:
3402       if (FoundType && !LastWasColonColon && OpenAngles == 0) {
3403         FormatTok = Tokens->setPosition(StoredPosition);
3404         parseRequiresExpression(RequiresToken);
3405         return false;
3406       }
3407       FoundType = true;
3408       LastWasColonColon = false;
3409       break;
3410     case tok::less:
3411       ++OpenAngles;
3412       break;
3413     case tok::greater:
3414       --OpenAngles;
3415       break;
3416     default:
3417       if (NextToken->isSimpleTypeSpecifier()) {
3418         FormatTok = Tokens->setPosition(StoredPosition);
3419         parseRequiresExpression(RequiresToken);
3420         return false;
3421       }
3422       break;
3423     }
3424   }
3425   // This seems to be a complicated expression, just assume it's a clause.
3426   FormatTok = Tokens->setPosition(StoredPosition);
3427   parseRequiresClause(RequiresToken);
3428   return true;
3429 }
3430 
3431 /// \brief Parses a requires clause.
3432 /// \param RequiresToken The requires keyword token, which starts this clause.
3433 /// \pre We need to be on the next token after the requires keyword.
3434 /// \sa parseRequiresExpression
3435 ///
3436 /// Returns if it either has finished parsing the clause, or it detects, that
3437 /// the clause is incorrect.
3438 void UnwrappedLineParser::parseRequiresClause(FormatToken *RequiresToken) {
3439   assert(FormatTok->getPreviousNonComment() == RequiresToken);
3440   assert(RequiresToken->is(tok::kw_requires) && "'requires' expected");
3441 
3442   // If there is no previous token, we are within a requires expression,
3443   // otherwise we will always have the template or function declaration in front
3444   // of it.
3445   bool InRequiresExpression =
3446       !RequiresToken->Previous ||
3447       RequiresToken->Previous->is(TT_RequiresExpressionLBrace);
3448 
3449   RequiresToken->setFinalizedType(InRequiresExpression
3450                                       ? TT_RequiresClauseInARequiresExpression
3451                                       : TT_RequiresClause);
3452 
3453   // NOTE: parseConstraintExpression is only ever called from this function.
3454   // It could be inlined into here.
3455   parseConstraintExpression();
3456 
3457   if (!InRequiresExpression)
3458     FormatTok->Previous->ClosesRequiresClause = true;
3459 }
3460 
3461 /// \brief Parses a requires expression.
3462 /// \param RequiresToken The requires keyword token, which starts this clause.
3463 /// \pre We need to be on the next token after the requires keyword.
3464 /// \sa parseRequiresClause
3465 ///
3466 /// Returns if it either has finished parsing the expression, or it detects,
3467 /// that the expression is incorrect.
3468 void UnwrappedLineParser::parseRequiresExpression(FormatToken *RequiresToken) {
3469   assert(FormatTok->getPreviousNonComment() == RequiresToken);
3470   assert(RequiresToken->is(tok::kw_requires) && "'requires' expected");
3471 
3472   RequiresToken->setFinalizedType(TT_RequiresExpression);
3473 
3474   if (FormatTok->is(tok::l_paren)) {
3475     FormatTok->setFinalizedType(TT_RequiresExpressionLParen);
3476     parseParens();
3477   }
3478 
3479   if (FormatTok->is(tok::l_brace)) {
3480     FormatTok->setFinalizedType(TT_RequiresExpressionLBrace);
3481     parseChildBlock(/*CanContainBracedList=*/false,
3482                     /*NextLBracesType=*/TT_CompoundRequirementLBrace);
3483   }
3484 }
3485 
3486 /// \brief Parses a constraint expression.
3487 ///
3488 /// This is the body of a requires clause. It returns, when the parsing is
3489 /// complete, or the expression is incorrect.
3490 void UnwrappedLineParser::parseConstraintExpression() {
3491   // The special handling for lambdas is needed since tryToParseLambda() eats a
3492   // token and if a requires expression is the last part of a requires clause
3493   // and followed by an attribute like [[nodiscard]] the ClosesRequiresClause is
3494   // not set on the correct token. Thus we need to be aware if we even expect a
3495   // lambda to be possible.
3496   // template <typename T> requires requires { ... } [[nodiscard]] ...;
3497   bool LambdaNextTimeAllowed = true;
3498 
3499   // Within lambda declarations, it is permitted to put a requires clause after
3500   // its template parameter list, which would place the requires clause right
3501   // before the parentheses of the parameters of the lambda declaration. Thus,
3502   // we track if we expect to see grouping parentheses at all.
3503   // Without this check, `requires foo<T> (T t)` in the below example would be
3504   // seen as the whole requires clause, accidentally eating the parameters of
3505   // the lambda.
3506   // [&]<typename T> requires foo<T> (T t) { ... };
3507   bool TopLevelParensAllowed = true;
3508 
3509   do {
3510     bool LambdaThisTimeAllowed = std::exchange(LambdaNextTimeAllowed, false);
3511 
3512     switch (FormatTok->Tok.getKind()) {
3513     case tok::kw_requires: {
3514       auto RequiresToken = FormatTok;
3515       nextToken();
3516       parseRequiresExpression(RequiresToken);
3517       break;
3518     }
3519 
3520     case tok::l_paren:
3521       if (!TopLevelParensAllowed)
3522         return;
3523       parseParens(/*AmpAmpTokenType=*/TT_BinaryOperator);
3524       TopLevelParensAllowed = false;
3525       break;
3526 
3527     case tok::l_square:
3528       if (!LambdaThisTimeAllowed || !tryToParseLambda())
3529         return;
3530       break;
3531 
3532     case tok::kw_const:
3533     case tok::semi:
3534     case tok::kw_class:
3535     case tok::kw_struct:
3536     case tok::kw_union:
3537       return;
3538 
3539     case tok::l_brace:
3540       // Potential function body.
3541       return;
3542 
3543     case tok::ampamp:
3544     case tok::pipepipe:
3545       FormatTok->setFinalizedType(TT_BinaryOperator);
3546       nextToken();
3547       LambdaNextTimeAllowed = true;
3548       TopLevelParensAllowed = true;
3549       break;
3550 
3551     case tok::comma:
3552     case tok::comment:
3553       LambdaNextTimeAllowed = LambdaThisTimeAllowed;
3554       nextToken();
3555       break;
3556 
3557     case tok::kw_sizeof:
3558     case tok::greater:
3559     case tok::greaterequal:
3560     case tok::greatergreater:
3561     case tok::less:
3562     case tok::lessequal:
3563     case tok::lessless:
3564     case tok::equalequal:
3565     case tok::exclaim:
3566     case tok::exclaimequal:
3567     case tok::plus:
3568     case tok::minus:
3569     case tok::star:
3570     case tok::slash:
3571       LambdaNextTimeAllowed = true;
3572       TopLevelParensAllowed = true;
3573       // Just eat them.
3574       nextToken();
3575       break;
3576 
3577     case tok::numeric_constant:
3578     case tok::coloncolon:
3579     case tok::kw_true:
3580     case tok::kw_false:
3581       TopLevelParensAllowed = false;
3582       // Just eat them.
3583       nextToken();
3584       break;
3585 
3586     case tok::kw_static_cast:
3587     case tok::kw_const_cast:
3588     case tok::kw_reinterpret_cast:
3589     case tok::kw_dynamic_cast:
3590       nextToken();
3591       if (!FormatTok->is(tok::less))
3592         return;
3593 
3594       nextToken();
3595       parseBracedList(/*ContinueOnSemicolons=*/false, /*IsEnum=*/false,
3596                       /*ClosingBraceKind=*/tok::greater);
3597       break;
3598 
3599     default:
3600       if (!FormatTok->Tok.getIdentifierInfo()) {
3601         // Identifiers are part of the default case, we check for more then
3602         // tok::identifier to handle builtin type traits.
3603         return;
3604       }
3605 
3606       // We need to differentiate identifiers for a template deduction guide,
3607       // variables, or function return types (the constraint expression has
3608       // ended before that), and basically all other cases. But it's easier to
3609       // check the other way around.
3610       assert(FormatTok->Previous);
3611       switch (FormatTok->Previous->Tok.getKind()) {
3612       case tok::coloncolon:  // Nested identifier.
3613       case tok::ampamp:      // Start of a function or variable for the
3614       case tok::pipepipe:    // constraint expression. (binary)
3615       case tok::exclaim:     // The same as above, but unary.
3616       case tok::kw_requires: // Initial identifier of a requires clause.
3617       case tok::equal:       // Initial identifier of a concept declaration.
3618         break;
3619       default:
3620         return;
3621       }
3622 
3623       // Read identifier with optional template declaration.
3624       nextToken();
3625       if (FormatTok->is(tok::less)) {
3626         nextToken();
3627         parseBracedList(/*ContinueOnSemicolons=*/false, /*IsEnum=*/false,
3628                         /*ClosingBraceKind=*/tok::greater);
3629       }
3630       TopLevelParensAllowed = false;
3631       break;
3632     }
3633   } while (!eof());
3634 }
3635 
3636 bool UnwrappedLineParser::parseEnum() {
3637   const FormatToken &InitialToken = *FormatTok;
3638 
3639   // Won't be 'enum' for NS_ENUMs.
3640   if (FormatTok->is(tok::kw_enum))
3641     nextToken();
3642 
3643   // In TypeScript, "enum" can also be used as property name, e.g. in interface
3644   // declarations. An "enum" keyword followed by a colon would be a syntax
3645   // error and thus assume it is just an identifier.
3646   if (Style.isJavaScript() && FormatTok->isOneOf(tok::colon, tok::question))
3647     return false;
3648 
3649   // In protobuf, "enum" can be used as a field name.
3650   if (Style.Language == FormatStyle::LK_Proto && FormatTok->is(tok::equal))
3651     return false;
3652 
3653   // Eat up enum class ...
3654   if (FormatTok->isOneOf(tok::kw_class, tok::kw_struct))
3655     nextToken();
3656 
3657   while (FormatTok->Tok.getIdentifierInfo() ||
3658          FormatTok->isOneOf(tok::colon, tok::coloncolon, tok::less,
3659                             tok::greater, tok::comma, tok::question,
3660                             tok::l_square, tok::r_square)) {
3661     if (Style.isVerilog()) {
3662       FormatTok->setFinalizedType(TT_VerilogDimensionedTypeName);
3663       nextToken();
3664       // In Verilog the base type can have dimensions.
3665       while (FormatTok->is(tok::l_square))
3666         parseSquare();
3667     } else {
3668       nextToken();
3669     }
3670     // We can have macros or attributes in between 'enum' and the enum name.
3671     if (FormatTok->is(tok::l_paren))
3672       parseParens();
3673     if (FormatTok->is(TT_AttributeSquare)) {
3674       parseSquare();
3675       // Consume the closing TT_AttributeSquare.
3676       if (FormatTok->Next && FormatTok->is(TT_AttributeSquare))
3677         nextToken();
3678     }
3679     if (FormatTok->is(tok::identifier)) {
3680       nextToken();
3681       // If there are two identifiers in a row, this is likely an elaborate
3682       // return type. In Java, this can be "implements", etc.
3683       if (Style.isCpp() && FormatTok->is(tok::identifier))
3684         return false;
3685     }
3686   }
3687 
3688   // Just a declaration or something is wrong.
3689   if (FormatTok->isNot(tok::l_brace))
3690     return true;
3691   FormatTok->setFinalizedType(TT_EnumLBrace);
3692   FormatTok->setBlockKind(BK_Block);
3693 
3694   if (Style.Language == FormatStyle::LK_Java) {
3695     // Java enums are different.
3696     parseJavaEnumBody();
3697     return true;
3698   }
3699   if (Style.Language == FormatStyle::LK_Proto) {
3700     parseBlock(/*MustBeDeclaration=*/true);
3701     return true;
3702   }
3703 
3704   if (!Style.AllowShortEnumsOnASingleLine &&
3705       ShouldBreakBeforeBrace(Style, InitialToken)) {
3706     addUnwrappedLine();
3707   }
3708   // Parse enum body.
3709   nextToken();
3710   if (!Style.AllowShortEnumsOnASingleLine) {
3711     addUnwrappedLine();
3712     Line->Level += 1;
3713   }
3714   bool HasError = !parseBracedList(/*ContinueOnSemicolons=*/true,
3715                                    /*IsEnum=*/true);
3716   if (!Style.AllowShortEnumsOnASingleLine)
3717     Line->Level -= 1;
3718   if (HasError) {
3719     if (FormatTok->is(tok::semi))
3720       nextToken();
3721     addUnwrappedLine();
3722   }
3723   return true;
3724 
3725   // There is no addUnwrappedLine() here so that we fall through to parsing a
3726   // structural element afterwards. Thus, in "enum A {} n, m;",
3727   // "} n, m;" will end up in one unwrapped line.
3728 }
3729 
3730 bool UnwrappedLineParser::parseStructLike() {
3731   // parseRecord falls through and does not yet add an unwrapped line as a
3732   // record declaration or definition can start a structural element.
3733   parseRecord();
3734   // This does not apply to Java, JavaScript and C#.
3735   if (Style.Language == FormatStyle::LK_Java || Style.isJavaScript() ||
3736       Style.isCSharp()) {
3737     if (FormatTok->is(tok::semi))
3738       nextToken();
3739     addUnwrappedLine();
3740     return true;
3741   }
3742   return false;
3743 }
3744 
3745 namespace {
3746 // A class used to set and restore the Token position when peeking
3747 // ahead in the token source.
3748 class ScopedTokenPosition {
3749   unsigned StoredPosition;
3750   FormatTokenSource *Tokens;
3751 
3752 public:
3753   ScopedTokenPosition(FormatTokenSource *Tokens) : Tokens(Tokens) {
3754     assert(Tokens && "Tokens expected to not be null");
3755     StoredPosition = Tokens->getPosition();
3756   }
3757 
3758   ~ScopedTokenPosition() { Tokens->setPosition(StoredPosition); }
3759 };
3760 } // namespace
3761 
3762 // Look to see if we have [[ by looking ahead, if
3763 // its not then rewind to the original position.
3764 bool UnwrappedLineParser::tryToParseSimpleAttribute() {
3765   ScopedTokenPosition AutoPosition(Tokens);
3766   FormatToken *Tok = Tokens->getNextToken();
3767   // We already read the first [ check for the second.
3768   if (!Tok->is(tok::l_square))
3769     return false;
3770   // Double check that the attribute is just something
3771   // fairly simple.
3772   while (Tok->isNot(tok::eof)) {
3773     if (Tok->is(tok::r_square))
3774       break;
3775     Tok = Tokens->getNextToken();
3776   }
3777   if (Tok->is(tok::eof))
3778     return false;
3779   Tok = Tokens->getNextToken();
3780   if (!Tok->is(tok::r_square))
3781     return false;
3782   Tok = Tokens->getNextToken();
3783   if (Tok->is(tok::semi))
3784     return false;
3785   return true;
3786 }
3787 
3788 void UnwrappedLineParser::parseJavaEnumBody() {
3789   assert(FormatTok->is(tok::l_brace));
3790   const FormatToken *OpeningBrace = FormatTok;
3791 
3792   // Determine whether the enum is simple, i.e. does not have a semicolon or
3793   // constants with class bodies. Simple enums can be formatted like braced
3794   // lists, contracted to a single line, etc.
3795   unsigned StoredPosition = Tokens->getPosition();
3796   bool IsSimple = true;
3797   FormatToken *Tok = Tokens->getNextToken();
3798   while (!Tok->is(tok::eof)) {
3799     if (Tok->is(tok::r_brace))
3800       break;
3801     if (Tok->isOneOf(tok::l_brace, tok::semi)) {
3802       IsSimple = false;
3803       break;
3804     }
3805     // FIXME: This will also mark enums with braces in the arguments to enum
3806     // constants as "not simple". This is probably fine in practice, though.
3807     Tok = Tokens->getNextToken();
3808   }
3809   FormatTok = Tokens->setPosition(StoredPosition);
3810 
3811   if (IsSimple) {
3812     nextToken();
3813     parseBracedList();
3814     addUnwrappedLine();
3815     return;
3816   }
3817 
3818   // Parse the body of a more complex enum.
3819   // First add a line for everything up to the "{".
3820   nextToken();
3821   addUnwrappedLine();
3822   ++Line->Level;
3823 
3824   // Parse the enum constants.
3825   while (!eof()) {
3826     if (FormatTok->is(tok::l_brace)) {
3827       // Parse the constant's class body.
3828       parseBlock(/*MustBeDeclaration=*/true, /*AddLevels=*/1u,
3829                  /*MunchSemi=*/false);
3830     } else if (FormatTok->is(tok::l_paren)) {
3831       parseParens();
3832     } else if (FormatTok->is(tok::comma)) {
3833       nextToken();
3834       addUnwrappedLine();
3835     } else if (FormatTok->is(tok::semi)) {
3836       nextToken();
3837       addUnwrappedLine();
3838       break;
3839     } else if (FormatTok->is(tok::r_brace)) {
3840       addUnwrappedLine();
3841       break;
3842     } else {
3843       nextToken();
3844     }
3845   }
3846 
3847   // Parse the class body after the enum's ";" if any.
3848   parseLevel(OpeningBrace);
3849   nextToken();
3850   --Line->Level;
3851   addUnwrappedLine();
3852 }
3853 
3854 void UnwrappedLineParser::parseRecord(bool ParseAsExpr) {
3855   const FormatToken &InitialToken = *FormatTok;
3856   nextToken();
3857 
3858   // The actual identifier can be a nested name specifier, and in macros
3859   // it is often token-pasted.
3860   // An [[attribute]] can be before the identifier.
3861   while (FormatTok->isOneOf(tok::identifier, tok::coloncolon, tok::hashhash,
3862                             tok::kw___attribute, tok::kw___declspec,
3863                             tok::kw_alignas, tok::l_square) ||
3864          ((Style.Language == FormatStyle::LK_Java || Style.isJavaScript()) &&
3865           FormatTok->isOneOf(tok::period, tok::comma))) {
3866     if (Style.isJavaScript() &&
3867         FormatTok->isOneOf(Keywords.kw_extends, Keywords.kw_implements)) {
3868       // JavaScript/TypeScript supports inline object types in
3869       // extends/implements positions:
3870       //     class Foo implements {bar: number} { }
3871       nextToken();
3872       if (FormatTok->is(tok::l_brace)) {
3873         tryToParseBracedList();
3874         continue;
3875       }
3876     }
3877     if (FormatTok->is(tok::l_square) && handleCppAttributes())
3878       continue;
3879     bool IsNonMacroIdentifier =
3880         FormatTok->is(tok::identifier) &&
3881         FormatTok->TokenText != FormatTok->TokenText.upper();
3882     nextToken();
3883     // We can have macros in between 'class' and the class name.
3884     if (!IsNonMacroIdentifier && FormatTok->is(tok::l_paren))
3885       parseParens();
3886   }
3887 
3888   // Note that parsing away template declarations here leads to incorrectly
3889   // accepting function declarations as record declarations.
3890   // In general, we cannot solve this problem. Consider:
3891   // class A<int> B() {}
3892   // which can be a function definition or a class definition when B() is a
3893   // macro. If we find enough real-world cases where this is a problem, we
3894   // can parse for the 'template' keyword in the beginning of the statement,
3895   // and thus rule out the record production in case there is no template
3896   // (this would still leave us with an ambiguity between template function
3897   // and class declarations).
3898   if (FormatTok->isOneOf(tok::colon, tok::less)) {
3899     do {
3900       if (FormatTok->is(tok::l_brace)) {
3901         calculateBraceTypes(/*ExpectClassBody=*/true);
3902         if (!tryToParseBracedList())
3903           break;
3904       }
3905       if (FormatTok->is(tok::l_square)) {
3906         FormatToken *Previous = FormatTok->Previous;
3907         if (!Previous ||
3908             !(Previous->is(tok::r_paren) || Previous->isTypeOrIdentifier())) {
3909           // Don't try parsing a lambda if we had a closing parenthesis before,
3910           // it was probably a pointer to an array: int (*)[].
3911           if (!tryToParseLambda())
3912             continue;
3913         } else {
3914           parseSquare();
3915           continue;
3916         }
3917       }
3918       if (FormatTok->is(tok::semi))
3919         return;
3920       if (Style.isCSharp() && FormatTok->is(Keywords.kw_where)) {
3921         addUnwrappedLine();
3922         nextToken();
3923         parseCSharpGenericTypeConstraint();
3924         break;
3925       }
3926       nextToken();
3927     } while (!eof());
3928   }
3929 
3930   auto GetBraceType = [](const FormatToken &RecordTok) {
3931     switch (RecordTok.Tok.getKind()) {
3932     case tok::kw_class:
3933       return TT_ClassLBrace;
3934     case tok::kw_struct:
3935       return TT_StructLBrace;
3936     case tok::kw_union:
3937       return TT_UnionLBrace;
3938     default:
3939       // Useful for e.g. interface.
3940       return TT_RecordLBrace;
3941     }
3942   };
3943   if (FormatTok->is(tok::l_brace)) {
3944     FormatTok->setFinalizedType(GetBraceType(InitialToken));
3945     if (ParseAsExpr) {
3946       parseChildBlock();
3947     } else {
3948       if (ShouldBreakBeforeBrace(Style, InitialToken))
3949         addUnwrappedLine();
3950 
3951       unsigned AddLevels = Style.IndentAccessModifiers ? 2u : 1u;
3952       parseBlock(/*MustBeDeclaration=*/true, AddLevels, /*MunchSemi=*/false);
3953     }
3954   }
3955   // There is no addUnwrappedLine() here so that we fall through to parsing a
3956   // structural element afterwards. Thus, in "class A {} n, m;",
3957   // "} n, m;" will end up in one unwrapped line.
3958 }
3959 
3960 void UnwrappedLineParser::parseObjCMethod() {
3961   assert(FormatTok->isOneOf(tok::l_paren, tok::identifier) &&
3962          "'(' or identifier expected.");
3963   do {
3964     if (FormatTok->is(tok::semi)) {
3965       nextToken();
3966       addUnwrappedLine();
3967       return;
3968     } else if (FormatTok->is(tok::l_brace)) {
3969       if (Style.BraceWrapping.AfterFunction)
3970         addUnwrappedLine();
3971       parseBlock();
3972       addUnwrappedLine();
3973       return;
3974     } else {
3975       nextToken();
3976     }
3977   } while (!eof());
3978 }
3979 
3980 void UnwrappedLineParser::parseObjCProtocolList() {
3981   assert(FormatTok->is(tok::less) && "'<' expected.");
3982   do {
3983     nextToken();
3984     // Early exit in case someone forgot a close angle.
3985     if (FormatTok->isOneOf(tok::semi, tok::l_brace) ||
3986         FormatTok->isObjCAtKeyword(tok::objc_end)) {
3987       return;
3988     }
3989   } while (!eof() && FormatTok->isNot(tok::greater));
3990   nextToken(); // Skip '>'.
3991 }
3992 
3993 void UnwrappedLineParser::parseObjCUntilAtEnd() {
3994   do {
3995     if (FormatTok->isObjCAtKeyword(tok::objc_end)) {
3996       nextToken();
3997       addUnwrappedLine();
3998       break;
3999     }
4000     if (FormatTok->is(tok::l_brace)) {
4001       parseBlock();
4002       // In ObjC interfaces, nothing should be following the "}".
4003       addUnwrappedLine();
4004     } else if (FormatTok->is(tok::r_brace)) {
4005       // Ignore stray "}". parseStructuralElement doesn't consume them.
4006       nextToken();
4007       addUnwrappedLine();
4008     } else if (FormatTok->isOneOf(tok::minus, tok::plus)) {
4009       nextToken();
4010       parseObjCMethod();
4011     } else {
4012       parseStructuralElement();
4013     }
4014   } while (!eof());
4015 }
4016 
4017 void UnwrappedLineParser::parseObjCInterfaceOrImplementation() {
4018   assert(FormatTok->Tok.getObjCKeywordID() == tok::objc_interface ||
4019          FormatTok->Tok.getObjCKeywordID() == tok::objc_implementation);
4020   nextToken();
4021   nextToken(); // interface name
4022 
4023   // @interface can be followed by a lightweight generic
4024   // specialization list, then either a base class or a category.
4025   if (FormatTok->is(tok::less))
4026     parseObjCLightweightGenerics();
4027   if (FormatTok->is(tok::colon)) {
4028     nextToken();
4029     nextToken(); // base class name
4030     // The base class can also have lightweight generics applied to it.
4031     if (FormatTok->is(tok::less))
4032       parseObjCLightweightGenerics();
4033   } else if (FormatTok->is(tok::l_paren)) {
4034     // Skip category, if present.
4035     parseParens();
4036   }
4037 
4038   if (FormatTok->is(tok::less))
4039     parseObjCProtocolList();
4040 
4041   if (FormatTok->is(tok::l_brace)) {
4042     if (Style.BraceWrapping.AfterObjCDeclaration)
4043       addUnwrappedLine();
4044     parseBlock(/*MustBeDeclaration=*/true);
4045   }
4046 
4047   // With instance variables, this puts '}' on its own line.  Without instance
4048   // variables, this ends the @interface line.
4049   addUnwrappedLine();
4050 
4051   parseObjCUntilAtEnd();
4052 }
4053 
4054 void UnwrappedLineParser::parseObjCLightweightGenerics() {
4055   assert(FormatTok->is(tok::less));
4056   // Unlike protocol lists, generic parameterizations support
4057   // nested angles:
4058   //
4059   // @interface Foo<ValueType : id <NSCopying, NSSecureCoding>> :
4060   //     NSObject <NSCopying, NSSecureCoding>
4061   //
4062   // so we need to count how many open angles we have left.
4063   unsigned NumOpenAngles = 1;
4064   do {
4065     nextToken();
4066     // Early exit in case someone forgot a close angle.
4067     if (FormatTok->isOneOf(tok::semi, tok::l_brace) ||
4068         FormatTok->isObjCAtKeyword(tok::objc_end)) {
4069       break;
4070     }
4071     if (FormatTok->is(tok::less)) {
4072       ++NumOpenAngles;
4073     } else if (FormatTok->is(tok::greater)) {
4074       assert(NumOpenAngles > 0 && "'>' makes NumOpenAngles negative");
4075       --NumOpenAngles;
4076     }
4077   } while (!eof() && NumOpenAngles != 0);
4078   nextToken(); // Skip '>'.
4079 }
4080 
4081 // Returns true for the declaration/definition form of @protocol,
4082 // false for the expression form.
4083 bool UnwrappedLineParser::parseObjCProtocol() {
4084   assert(FormatTok->Tok.getObjCKeywordID() == tok::objc_protocol);
4085   nextToken();
4086 
4087   if (FormatTok->is(tok::l_paren)) {
4088     // The expression form of @protocol, e.g. "Protocol* p = @protocol(foo);".
4089     return false;
4090   }
4091 
4092   // The definition/declaration form,
4093   // @protocol Foo
4094   // - (int)someMethod;
4095   // @end
4096 
4097   nextToken(); // protocol name
4098 
4099   if (FormatTok->is(tok::less))
4100     parseObjCProtocolList();
4101 
4102   // Check for protocol declaration.
4103   if (FormatTok->is(tok::semi)) {
4104     nextToken();
4105     addUnwrappedLine();
4106     return true;
4107   }
4108 
4109   addUnwrappedLine();
4110   parseObjCUntilAtEnd();
4111   return true;
4112 }
4113 
4114 void UnwrappedLineParser::parseJavaScriptEs6ImportExport() {
4115   bool IsImport = FormatTok->is(Keywords.kw_import);
4116   assert(IsImport || FormatTok->is(tok::kw_export));
4117   nextToken();
4118 
4119   // Consume the "default" in "export default class/function".
4120   if (FormatTok->is(tok::kw_default))
4121     nextToken();
4122 
4123   // Consume "async function", "function" and "default function", so that these
4124   // get parsed as free-standing JS functions, i.e. do not require a trailing
4125   // semicolon.
4126   if (FormatTok->is(Keywords.kw_async))
4127     nextToken();
4128   if (FormatTok->is(Keywords.kw_function)) {
4129     nextToken();
4130     return;
4131   }
4132 
4133   // For imports, `export *`, `export {...}`, consume the rest of the line up
4134   // to the terminating `;`. For everything else, just return and continue
4135   // parsing the structural element, i.e. the declaration or expression for
4136   // `export default`.
4137   if (!IsImport && !FormatTok->isOneOf(tok::l_brace, tok::star) &&
4138       !FormatTok->isStringLiteral() &&
4139       !(FormatTok->is(Keywords.kw_type) &&
4140         Tokens->peekNextToken()->isOneOf(tok::l_brace, tok::star))) {
4141     return;
4142   }
4143 
4144   while (!eof()) {
4145     if (FormatTok->is(tok::semi))
4146       return;
4147     if (Line->Tokens.empty()) {
4148       // Common issue: Automatic Semicolon Insertion wrapped the line, so the
4149       // import statement should terminate.
4150       return;
4151     }
4152     if (FormatTok->is(tok::l_brace)) {
4153       FormatTok->setBlockKind(BK_Block);
4154       nextToken();
4155       parseBracedList();
4156     } else {
4157       nextToken();
4158     }
4159   }
4160 }
4161 
4162 void UnwrappedLineParser::parseStatementMacro() {
4163   nextToken();
4164   if (FormatTok->is(tok::l_paren))
4165     parseParens();
4166   if (FormatTok->is(tok::semi))
4167     nextToken();
4168   addUnwrappedLine();
4169 }
4170 
4171 void UnwrappedLineParser::parseVerilogHierarchyIdentifier() {
4172   // consume things like a::`b.c[d:e] or a::*
4173   while (true) {
4174     if (FormatTok->isOneOf(tok::star, tok::period, tok::periodstar,
4175                            tok::coloncolon, tok::hash) ||
4176         Keywords.isVerilogIdentifier(*FormatTok)) {
4177       nextToken();
4178     } else if (FormatTok->is(tok::l_square)) {
4179       parseSquare();
4180     } else {
4181       break;
4182     }
4183   }
4184 }
4185 
4186 void UnwrappedLineParser::parseVerilogSensitivityList() {
4187   if (!FormatTok->is(tok::at))
4188     return;
4189   nextToken();
4190   // A block event expression has 2 at signs.
4191   if (FormatTok->is(tok::at))
4192     nextToken();
4193   switch (FormatTok->Tok.getKind()) {
4194   case tok::star:
4195     nextToken();
4196     break;
4197   case tok::l_paren:
4198     parseParens();
4199     break;
4200   default:
4201     parseVerilogHierarchyIdentifier();
4202     break;
4203   }
4204 }
4205 
4206 unsigned UnwrappedLineParser::parseVerilogHierarchyHeader() {
4207   unsigned AddLevels = 0;
4208 
4209   if (FormatTok->is(Keywords.kw_clocking)) {
4210     nextToken();
4211     if (Keywords.isVerilogIdentifier(*FormatTok))
4212       nextToken();
4213     parseVerilogSensitivityList();
4214     if (FormatTok->is(tok::semi))
4215       nextToken();
4216   } else if (FormatTok->isOneOf(tok::kw_case, Keywords.kw_casex,
4217                                 Keywords.kw_casez, Keywords.kw_randcase,
4218                                 Keywords.kw_randsequence)) {
4219     if (Style.IndentCaseLabels)
4220       AddLevels++;
4221     nextToken();
4222     if (FormatTok->is(tok::l_paren)) {
4223       FormatTok->setFinalizedType(TT_ConditionLParen);
4224       parseParens();
4225     }
4226     if (FormatTok->isOneOf(Keywords.kw_inside, Keywords.kw_matches))
4227       nextToken();
4228     // The case header has no semicolon.
4229   } else {
4230     // "module" etc.
4231     nextToken();
4232     // all the words like the name of the module and specifiers like
4233     // "automatic" and the width of function return type
4234     while (true) {
4235       if (FormatTok->is(tok::l_square)) {
4236         auto Prev = FormatTok->getPreviousNonComment();
4237         if (Prev && Keywords.isVerilogIdentifier(*Prev))
4238           Prev->setFinalizedType(TT_VerilogDimensionedTypeName);
4239         parseSquare();
4240       } else if (Keywords.isVerilogIdentifier(*FormatTok) ||
4241                  FormatTok->isOneOf(Keywords.kw_automatic, tok::kw_static)) {
4242         nextToken();
4243       } else {
4244         break;
4245       }
4246     }
4247 
4248     auto NewLine = [this]() {
4249       addUnwrappedLine();
4250       Line->IsContinuation = true;
4251     };
4252 
4253     // package imports
4254     while (FormatTok->is(Keywords.kw_import)) {
4255       NewLine();
4256       nextToken();
4257       parseVerilogHierarchyIdentifier();
4258       if (FormatTok->is(tok::semi))
4259         nextToken();
4260     }
4261 
4262     // parameters and ports
4263     if (FormatTok->is(Keywords.kw_verilogHash)) {
4264       NewLine();
4265       nextToken();
4266       if (FormatTok->is(tok::l_paren)) {
4267         FormatTok->setFinalizedType(TT_VerilogMultiLineListLParen);
4268         parseParens();
4269       }
4270     }
4271     if (FormatTok->is(tok::l_paren)) {
4272       NewLine();
4273       FormatTok->setFinalizedType(TT_VerilogMultiLineListLParen);
4274       parseParens();
4275     }
4276 
4277     // extends and implements
4278     if (FormatTok->is(Keywords.kw_extends)) {
4279       NewLine();
4280       nextToken();
4281       parseVerilogHierarchyIdentifier();
4282       if (FormatTok->is(tok::l_paren))
4283         parseParens();
4284     }
4285     if (FormatTok->is(Keywords.kw_implements)) {
4286       NewLine();
4287       do {
4288         nextToken();
4289         parseVerilogHierarchyIdentifier();
4290       } while (FormatTok->is(tok::comma));
4291     }
4292 
4293     // Coverage event for cover groups.
4294     if (FormatTok->is(tok::at)) {
4295       NewLine();
4296       parseVerilogSensitivityList();
4297     }
4298 
4299     if (FormatTok->is(tok::semi))
4300       nextToken(/*LevelDifference=*/1);
4301     addUnwrappedLine();
4302   }
4303 
4304   return AddLevels;
4305 }
4306 
4307 void UnwrappedLineParser::parseVerilogTable() {
4308   assert(FormatTok->is(Keywords.kw_table));
4309   nextToken(/*LevelDifference=*/1);
4310   addUnwrappedLine();
4311 
4312   auto InitialLevel = Line->Level++;
4313   while (!eof() && !Keywords.isVerilogEnd(*FormatTok)) {
4314     FormatToken *Tok = FormatTok;
4315     nextToken();
4316     if (Tok->is(tok::semi))
4317       addUnwrappedLine();
4318     else if (Tok->isOneOf(tok::star, tok::colon, tok::question, tok::minus))
4319       Tok->setFinalizedType(TT_VerilogTableItem);
4320   }
4321   Line->Level = InitialLevel;
4322   nextToken(/*LevelDifference=*/-1);
4323   addUnwrappedLine();
4324 }
4325 
4326 void UnwrappedLineParser::parseVerilogCaseLabel() {
4327   // The label will get unindented in AnnotatingParser. If there are no leading
4328   // spaces, indent the rest here so that things inside the block will be
4329   // indented relative to things outside. We don't use parseLabel because we
4330   // don't know whether this colon is a label or a ternary expression at this
4331   // point.
4332   auto OrigLevel = Line->Level;
4333   auto FirstLine = CurrentLines->size();
4334   if (Line->Level == 0 || (Line->InPPDirective && Line->Level <= 1))
4335     ++Line->Level;
4336   else if (!Style.IndentCaseBlocks && Keywords.isVerilogBegin(*FormatTok))
4337     --Line->Level;
4338   parseStructuralElement();
4339   // Restore the indentation in both the new line and the line that has the
4340   // label.
4341   if (CurrentLines->size() > FirstLine)
4342     (*CurrentLines)[FirstLine].Level = OrigLevel;
4343   Line->Level = OrigLevel;
4344 }
4345 
4346 bool UnwrappedLineParser::containsExpansion(const UnwrappedLine &Line) const {
4347   for (const auto &N : Line.Tokens) {
4348     if (N.Tok->MacroCtx)
4349       return true;
4350     for (const UnwrappedLine &Child : N.Children)
4351       if (containsExpansion(Child))
4352         return true;
4353   }
4354   return false;
4355 }
4356 
4357 void UnwrappedLineParser::addUnwrappedLine(LineLevel AdjustLevel) {
4358   if (Line->Tokens.empty())
4359     return;
4360   LLVM_DEBUG({
4361     if (!parsingPPDirective()) {
4362       llvm::dbgs() << "Adding unwrapped line:\n";
4363       printDebugInfo(*Line);
4364     }
4365   });
4366 
4367   // If this line closes a block when in Whitesmiths mode, remember that
4368   // information so that the level can be decreased after the line is added.
4369   // This has to happen after the addition of the line since the line itself
4370   // needs to be indented.
4371   bool ClosesWhitesmithsBlock =
4372       Line->MatchingOpeningBlockLineIndex != UnwrappedLine::kInvalidIndex &&
4373       Style.BreakBeforeBraces == FormatStyle::BS_Whitesmiths;
4374 
4375   // If the current line was expanded from a macro call, we use it to
4376   // reconstruct an unwrapped line from the structure of the expanded unwrapped
4377   // line and the unexpanded token stream.
4378   if (!parsingPPDirective() && !InExpansion && containsExpansion(*Line)) {
4379     if (!Reconstruct)
4380       Reconstruct.emplace(Line->Level, Unexpanded);
4381     Reconstruct->addLine(*Line);
4382 
4383     // While the reconstructed unexpanded lines are stored in the normal
4384     // flow of lines, the expanded lines are stored on the side to be analyzed
4385     // in an extra step.
4386     CurrentExpandedLines.push_back(std::move(*Line));
4387 
4388     if (Reconstruct->finished()) {
4389       UnwrappedLine Reconstructed = std::move(*Reconstruct).takeResult();
4390       assert(!Reconstructed.Tokens.empty() &&
4391              "Reconstructed must at least contain the macro identifier.");
4392       assert(!parsingPPDirective());
4393       LLVM_DEBUG({
4394         llvm::dbgs() << "Adding unexpanded line:\n";
4395         printDebugInfo(Reconstructed);
4396       });
4397       ExpandedLines[Reconstructed.Tokens.begin()->Tok] = CurrentExpandedLines;
4398       Lines.push_back(std::move(Reconstructed));
4399       CurrentExpandedLines.clear();
4400       Reconstruct.reset();
4401     }
4402   } else {
4403     // At the top level we only get here when no unexpansion is going on, or
4404     // when conditional formatting led to unfinished macro reconstructions.
4405     assert(!Reconstruct || (CurrentLines != &Lines) || PPStack.size() > 0);
4406     CurrentLines->push_back(std::move(*Line));
4407   }
4408   Line->Tokens.clear();
4409   Line->MatchingOpeningBlockLineIndex = UnwrappedLine::kInvalidIndex;
4410   Line->FirstStartColumn = 0;
4411   Line->IsContinuation = false;
4412   Line->SeenDecltypeAuto = false;
4413 
4414   if (ClosesWhitesmithsBlock && AdjustLevel == LineLevel::Remove)
4415     --Line->Level;
4416   if (!parsingPPDirective() && !PreprocessorDirectives.empty()) {
4417     CurrentLines->append(
4418         std::make_move_iterator(PreprocessorDirectives.begin()),
4419         std::make_move_iterator(PreprocessorDirectives.end()));
4420     PreprocessorDirectives.clear();
4421   }
4422   // Disconnect the current token from the last token on the previous line.
4423   FormatTok->Previous = nullptr;
4424 }
4425 
4426 bool UnwrappedLineParser::eof() const { return FormatTok->is(tok::eof); }
4427 
4428 bool UnwrappedLineParser::isOnNewLine(const FormatToken &FormatTok) {
4429   return (Line->InPPDirective || FormatTok.HasUnescapedNewline) &&
4430          FormatTok.NewlinesBefore > 0;
4431 }
4432 
4433 // Checks if \p FormatTok is a line comment that continues the line comment
4434 // section on \p Line.
4435 static bool
4436 continuesLineCommentSection(const FormatToken &FormatTok,
4437                             const UnwrappedLine &Line,
4438                             const llvm::Regex &CommentPragmasRegex) {
4439   if (Line.Tokens.empty())
4440     return false;
4441 
4442   StringRef IndentContent = FormatTok.TokenText;
4443   if (FormatTok.TokenText.startswith("//") ||
4444       FormatTok.TokenText.startswith("/*")) {
4445     IndentContent = FormatTok.TokenText.substr(2);
4446   }
4447   if (CommentPragmasRegex.match(IndentContent))
4448     return false;
4449 
4450   // If Line starts with a line comment, then FormatTok continues the comment
4451   // section if its original column is greater or equal to the original start
4452   // column of the line.
4453   //
4454   // Define the min column token of a line as follows: if a line ends in '{' or
4455   // contains a '{' followed by a line comment, then the min column token is
4456   // that '{'. Otherwise, the min column token of the line is the first token of
4457   // the line.
4458   //
4459   // If Line starts with a token other than a line comment, then FormatTok
4460   // continues the comment section if its original column is greater than the
4461   // original start column of the min column token of the line.
4462   //
4463   // For example, the second line comment continues the first in these cases:
4464   //
4465   // // first line
4466   // // second line
4467   //
4468   // and:
4469   //
4470   // // first line
4471   //  // second line
4472   //
4473   // and:
4474   //
4475   // int i; // first line
4476   //  // second line
4477   //
4478   // and:
4479   //
4480   // do { // first line
4481   //      // second line
4482   //   int i;
4483   // } while (true);
4484   //
4485   // and:
4486   //
4487   // enum {
4488   //   a, // first line
4489   //    // second line
4490   //   b
4491   // };
4492   //
4493   // The second line comment doesn't continue the first in these cases:
4494   //
4495   //   // first line
4496   //  // second line
4497   //
4498   // and:
4499   //
4500   // int i; // first line
4501   // // second line
4502   //
4503   // and:
4504   //
4505   // do { // first line
4506   //   // second line
4507   //   int i;
4508   // } while (true);
4509   //
4510   // and:
4511   //
4512   // enum {
4513   //   a, // first line
4514   //   // second line
4515   // };
4516   const FormatToken *MinColumnToken = Line.Tokens.front().Tok;
4517 
4518   // Scan for '{//'. If found, use the column of '{' as a min column for line
4519   // comment section continuation.
4520   const FormatToken *PreviousToken = nullptr;
4521   for (const UnwrappedLineNode &Node : Line.Tokens) {
4522     if (PreviousToken && PreviousToken->is(tok::l_brace) &&
4523         isLineComment(*Node.Tok)) {
4524       MinColumnToken = PreviousToken;
4525       break;
4526     }
4527     PreviousToken = Node.Tok;
4528 
4529     // Grab the last newline preceding a token in this unwrapped line.
4530     if (Node.Tok->NewlinesBefore > 0)
4531       MinColumnToken = Node.Tok;
4532   }
4533   if (PreviousToken && PreviousToken->is(tok::l_brace))
4534     MinColumnToken = PreviousToken;
4535 
4536   return continuesLineComment(FormatTok, /*Previous=*/Line.Tokens.back().Tok,
4537                               MinColumnToken);
4538 }
4539 
4540 void UnwrappedLineParser::flushComments(bool NewlineBeforeNext) {
4541   bool JustComments = Line->Tokens.empty();
4542   for (FormatToken *Tok : CommentsBeforeNextToken) {
4543     // Line comments that belong to the same line comment section are put on the
4544     // same line since later we might want to reflow content between them.
4545     // Additional fine-grained breaking of line comment sections is controlled
4546     // by the class BreakableLineCommentSection in case it is desirable to keep
4547     // several line comment sections in the same unwrapped line.
4548     //
4549     // FIXME: Consider putting separate line comment sections as children to the
4550     // unwrapped line instead.
4551     Tok->ContinuesLineCommentSection =
4552         continuesLineCommentSection(*Tok, *Line, CommentPragmasRegex);
4553     if (isOnNewLine(*Tok) && JustComments && !Tok->ContinuesLineCommentSection)
4554       addUnwrappedLine();
4555     pushToken(Tok);
4556   }
4557   if (NewlineBeforeNext && JustComments)
4558     addUnwrappedLine();
4559   CommentsBeforeNextToken.clear();
4560 }
4561 
4562 void UnwrappedLineParser::nextToken(int LevelDifference) {
4563   if (eof())
4564     return;
4565   flushComments(isOnNewLine(*FormatTok));
4566   pushToken(FormatTok);
4567   FormatToken *Previous = FormatTok;
4568   if (!Style.isJavaScript())
4569     readToken(LevelDifference);
4570   else
4571     readTokenWithJavaScriptASI();
4572   FormatTok->Previous = Previous;
4573   if (Style.isVerilog()) {
4574     // Blocks in Verilog can have `begin` and `end` instead of braces.  For
4575     // keywords like `begin`, we can't treat them the same as left braces
4576     // because some contexts require one of them.  For example structs use
4577     // braces and if blocks use keywords, and a left brace can occur in an if
4578     // statement, but it is not a block.  For keywords like `end`, we simply
4579     // treat them the same as right braces.
4580     if (Keywords.isVerilogEnd(*FormatTok))
4581       FormatTok->Tok.setKind(tok::r_brace);
4582   }
4583 }
4584 
4585 void UnwrappedLineParser::distributeComments(
4586     const SmallVectorImpl<FormatToken *> &Comments,
4587     const FormatToken *NextTok) {
4588   // Whether or not a line comment token continues a line is controlled by
4589   // the method continuesLineCommentSection, with the following caveat:
4590   //
4591   // Define a trail of Comments to be a nonempty proper postfix of Comments such
4592   // that each comment line from the trail is aligned with the next token, if
4593   // the next token exists. If a trail exists, the beginning of the maximal
4594   // trail is marked as a start of a new comment section.
4595   //
4596   // For example in this code:
4597   //
4598   // int a; // line about a
4599   //   // line 1 about b
4600   //   // line 2 about b
4601   //   int b;
4602   //
4603   // the two lines about b form a maximal trail, so there are two sections, the
4604   // first one consisting of the single comment "// line about a" and the
4605   // second one consisting of the next two comments.
4606   if (Comments.empty())
4607     return;
4608   bool ShouldPushCommentsInCurrentLine = true;
4609   bool HasTrailAlignedWithNextToken = false;
4610   unsigned StartOfTrailAlignedWithNextToken = 0;
4611   if (NextTok) {
4612     // We are skipping the first element intentionally.
4613     for (unsigned i = Comments.size() - 1; i > 0; --i) {
4614       if (Comments[i]->OriginalColumn == NextTok->OriginalColumn) {
4615         HasTrailAlignedWithNextToken = true;
4616         StartOfTrailAlignedWithNextToken = i;
4617       }
4618     }
4619   }
4620   for (unsigned i = 0, e = Comments.size(); i < e; ++i) {
4621     FormatToken *FormatTok = Comments[i];
4622     if (HasTrailAlignedWithNextToken && i == StartOfTrailAlignedWithNextToken) {
4623       FormatTok->ContinuesLineCommentSection = false;
4624     } else {
4625       FormatTok->ContinuesLineCommentSection =
4626           continuesLineCommentSection(*FormatTok, *Line, CommentPragmasRegex);
4627     }
4628     if (!FormatTok->ContinuesLineCommentSection &&
4629         (isOnNewLine(*FormatTok) || FormatTok->IsFirst)) {
4630       ShouldPushCommentsInCurrentLine = false;
4631     }
4632     if (ShouldPushCommentsInCurrentLine)
4633       pushToken(FormatTok);
4634     else
4635       CommentsBeforeNextToken.push_back(FormatTok);
4636   }
4637 }
4638 
4639 void UnwrappedLineParser::readToken(int LevelDifference) {
4640   SmallVector<FormatToken *, 1> Comments;
4641   bool PreviousWasComment = false;
4642   bool FirstNonCommentOnLine = false;
4643   do {
4644     FormatTok = Tokens->getNextToken();
4645     assert(FormatTok);
4646     while (FormatTok->getType() == TT_ConflictStart ||
4647            FormatTok->getType() == TT_ConflictEnd ||
4648            FormatTok->getType() == TT_ConflictAlternative) {
4649       if (FormatTok->getType() == TT_ConflictStart)
4650         conditionalCompilationStart(/*Unreachable=*/false);
4651       else if (FormatTok->getType() == TT_ConflictAlternative)
4652         conditionalCompilationAlternative();
4653       else if (FormatTok->getType() == TT_ConflictEnd)
4654         conditionalCompilationEnd();
4655       FormatTok = Tokens->getNextToken();
4656       FormatTok->MustBreakBefore = true;
4657     }
4658 
4659     auto IsFirstNonCommentOnLine = [](bool FirstNonCommentOnLine,
4660                                       const FormatToken &Tok,
4661                                       bool PreviousWasComment) {
4662       auto IsFirstOnLine = [](const FormatToken &Tok) {
4663         return Tok.HasUnescapedNewline || Tok.IsFirst;
4664       };
4665 
4666       // Consider preprocessor directives preceded by block comments as first
4667       // on line.
4668       if (PreviousWasComment)
4669         return FirstNonCommentOnLine || IsFirstOnLine(Tok);
4670       return IsFirstOnLine(Tok);
4671     };
4672 
4673     FirstNonCommentOnLine = IsFirstNonCommentOnLine(
4674         FirstNonCommentOnLine, *FormatTok, PreviousWasComment);
4675     PreviousWasComment = FormatTok->is(tok::comment);
4676 
4677     while (!Line->InPPDirective && FormatTok->is(tok::hash) &&
4678            (!Style.isVerilog() ||
4679             Keywords.isVerilogPPDirective(*Tokens->peekNextToken())) &&
4680            FirstNonCommentOnLine) {
4681       distributeComments(Comments, FormatTok);
4682       Comments.clear();
4683       // If there is an unfinished unwrapped line, we flush the preprocessor
4684       // directives only after that unwrapped line was finished later.
4685       bool SwitchToPreprocessorLines = !Line->Tokens.empty();
4686       ScopedLineState BlockState(*this, SwitchToPreprocessorLines);
4687       assert((LevelDifference >= 0 ||
4688               static_cast<unsigned>(-LevelDifference) <= Line->Level) &&
4689              "LevelDifference makes Line->Level negative");
4690       Line->Level += LevelDifference;
4691       // Comments stored before the preprocessor directive need to be output
4692       // before the preprocessor directive, at the same level as the
4693       // preprocessor directive, as we consider them to apply to the directive.
4694       if (Style.IndentPPDirectives == FormatStyle::PPDIS_BeforeHash &&
4695           PPBranchLevel > 0) {
4696         Line->Level += PPBranchLevel;
4697       }
4698       flushComments(isOnNewLine(*FormatTok));
4699       parsePPDirective();
4700       PreviousWasComment = FormatTok->is(tok::comment);
4701       FirstNonCommentOnLine = IsFirstNonCommentOnLine(
4702           FirstNonCommentOnLine, *FormatTok, PreviousWasComment);
4703     }
4704 
4705     if (!PPStack.empty() && (PPStack.back().Kind == PP_Unreachable) &&
4706         !Line->InPPDirective) {
4707       continue;
4708     }
4709 
4710     if (FormatTok->is(tok::identifier) &&
4711         Macros.defined(FormatTok->TokenText) &&
4712         // FIXME: Allow expanding macros in preprocessor directives.
4713         !Line->InPPDirective) {
4714       FormatToken *ID = FormatTok;
4715       unsigned Position = Tokens->getPosition();
4716 
4717       // To correctly parse the code, we need to replace the tokens of the macro
4718       // call with its expansion.
4719       auto PreCall = std::move(Line);
4720       Line.reset(new UnwrappedLine);
4721       bool OldInExpansion = InExpansion;
4722       InExpansion = true;
4723       // We parse the macro call into a new line.
4724       auto Args = parseMacroCall();
4725       InExpansion = OldInExpansion;
4726       assert(Line->Tokens.front().Tok == ID);
4727       // And remember the unexpanded macro call tokens.
4728       auto UnexpandedLine = std::move(Line);
4729       // Reset to the old line.
4730       Line = std::move(PreCall);
4731 
4732       LLVM_DEBUG({
4733         llvm::dbgs() << "Macro call: " << ID->TokenText << "(";
4734         if (Args) {
4735           llvm::dbgs() << "(";
4736           for (const auto &Arg : Args.value())
4737             for (const auto &T : Arg)
4738               llvm::dbgs() << T->TokenText << " ";
4739           llvm::dbgs() << ")";
4740         }
4741         llvm::dbgs() << "\n";
4742       });
4743       if (Macros.objectLike(ID->TokenText) && Args &&
4744           !Macros.hasArity(ID->TokenText, Args->size())) {
4745         // The macro is either
4746         // - object-like, but we got argumnets, or
4747         // - overloaded to be both object-like and function-like, but none of
4748         //   the function-like arities match the number of arguments.
4749         // Thus, expand as object-like macro.
4750         LLVM_DEBUG(llvm::dbgs()
4751                    << "Macro \"" << ID->TokenText
4752                    << "\" not overloaded for arity " << Args->size()
4753                    << "or not function-like, using object-like overload.");
4754         Args.reset();
4755         UnexpandedLine->Tokens.resize(1);
4756         Tokens->setPosition(Position);
4757         nextToken();
4758         assert(!Args && Macros.objectLike(ID->TokenText));
4759       }
4760       if ((!Args && Macros.objectLike(ID->TokenText)) ||
4761           (Args && Macros.hasArity(ID->TokenText, Args->size()))) {
4762         // Next, we insert the expanded tokens in the token stream at the
4763         // current position, and continue parsing.
4764         Unexpanded[ID] = std::move(UnexpandedLine);
4765         SmallVector<FormatToken *, 8> Expansion =
4766             Macros.expand(ID, std::move(Args));
4767         if (!Expansion.empty())
4768           FormatTok = Tokens->insertTokens(Expansion);
4769 
4770         LLVM_DEBUG({
4771           llvm::dbgs() << "Expanded: ";
4772           for (const auto &T : Expansion)
4773             llvm::dbgs() << T->TokenText << " ";
4774           llvm::dbgs() << "\n";
4775         });
4776       } else {
4777         LLVM_DEBUG({
4778           llvm::dbgs() << "Did not expand macro \"" << ID->TokenText
4779                        << "\", because it was used ";
4780           if (Args)
4781             llvm::dbgs() << "with " << Args->size();
4782           else
4783             llvm::dbgs() << "without";
4784           llvm::dbgs() << " arguments, which doesn't match any definition.\n";
4785         });
4786         Tokens->setPosition(Position);
4787         FormatTok = ID;
4788       }
4789     }
4790 
4791     if (!FormatTok->is(tok::comment)) {
4792       distributeComments(Comments, FormatTok);
4793       Comments.clear();
4794       return;
4795     }
4796 
4797     Comments.push_back(FormatTok);
4798   } while (!eof());
4799 
4800   distributeComments(Comments, nullptr);
4801   Comments.clear();
4802 }
4803 
4804 namespace {
4805 template <typename Iterator>
4806 void pushTokens(Iterator Begin, Iterator End,
4807                 llvm::SmallVectorImpl<FormatToken *> &Into) {
4808   for (auto I = Begin; I != End; ++I) {
4809     Into.push_back(I->Tok);
4810     for (const auto &Child : I->Children)
4811       pushTokens(Child.Tokens.begin(), Child.Tokens.end(), Into);
4812   }
4813 }
4814 } // namespace
4815 
4816 std::optional<llvm::SmallVector<llvm::SmallVector<FormatToken *, 8>, 1>>
4817 UnwrappedLineParser::parseMacroCall() {
4818   std::optional<llvm::SmallVector<llvm::SmallVector<FormatToken *, 8>, 1>> Args;
4819   assert(Line->Tokens.empty());
4820   nextToken();
4821   if (!FormatTok->is(tok::l_paren))
4822     return Args;
4823   unsigned Position = Tokens->getPosition();
4824   FormatToken *Tok = FormatTok;
4825   nextToken();
4826   Args.emplace();
4827   auto ArgStart = std::prev(Line->Tokens.end());
4828 
4829   int Parens = 0;
4830   do {
4831     switch (FormatTok->Tok.getKind()) {
4832     case tok::l_paren:
4833       ++Parens;
4834       nextToken();
4835       break;
4836     case tok::r_paren: {
4837       if (Parens > 0) {
4838         --Parens;
4839         nextToken();
4840         break;
4841       }
4842       Args->push_back({});
4843       pushTokens(std::next(ArgStart), Line->Tokens.end(), Args->back());
4844       nextToken();
4845       return Args;
4846     }
4847     case tok::comma: {
4848       if (Parens > 0) {
4849         nextToken();
4850         break;
4851       }
4852       Args->push_back({});
4853       pushTokens(std::next(ArgStart), Line->Tokens.end(), Args->back());
4854       nextToken();
4855       ArgStart = std::prev(Line->Tokens.end());
4856       break;
4857     }
4858     default:
4859       nextToken();
4860       break;
4861     }
4862   } while (!eof());
4863   Line->Tokens.resize(1);
4864   Tokens->setPosition(Position);
4865   FormatTok = Tok;
4866   return {};
4867 }
4868 
4869 void UnwrappedLineParser::pushToken(FormatToken *Tok) {
4870   Line->Tokens.push_back(UnwrappedLineNode(Tok));
4871   if (MustBreakBeforeNextToken) {
4872     Line->Tokens.back().Tok->MustBreakBefore = true;
4873     MustBreakBeforeNextToken = false;
4874   }
4875 }
4876 
4877 } // end namespace format
4878 } // end namespace clang
4879