1 //===--- UnwrappedLineParser.cpp - Format C++ code ------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 ///
9 /// \file
10 /// This file contains the implementation of the UnwrappedLineParser,
11 /// which turns a stream of tokens into UnwrappedLines.
12 ///
13 //===----------------------------------------------------------------------===//
14 
15 #include "UnwrappedLineParser.h"
16 #include "FormatToken.h"
17 #include "llvm/ADT/STLExtras.h"
18 #include "llvm/Support/Debug.h"
19 #include "llvm/Support/raw_ostream.h"
20 
21 #include <algorithm>
22 
23 #define DEBUG_TYPE "format-parser"
24 
25 namespace clang {
26 namespace format {
27 
28 class FormatTokenSource {
29 public:
~FormatTokenSource()30   virtual ~FormatTokenSource() {}
31   virtual FormatToken *getNextToken() = 0;
32 
33   virtual unsigned getPosition() = 0;
34   virtual FormatToken *setPosition(unsigned Position) = 0;
35 };
36 
37 namespace {
38 
39 class ScopedDeclarationState {
40 public:
ScopedDeclarationState(UnwrappedLine & Line,std::vector<bool> & Stack,bool MustBeDeclaration)41   ScopedDeclarationState(UnwrappedLine &Line, std::vector<bool> &Stack,
42                          bool MustBeDeclaration)
43       : Line(Line), Stack(Stack) {
44     Line.MustBeDeclaration = MustBeDeclaration;
45     Stack.push_back(MustBeDeclaration);
46   }
~ScopedDeclarationState()47   ~ScopedDeclarationState() {
48     Stack.pop_back();
49     if (!Stack.empty())
50       Line.MustBeDeclaration = Stack.back();
51     else
52       Line.MustBeDeclaration = true;
53   }
54 
55 private:
56   UnwrappedLine &Line;
57   std::vector<bool> &Stack;
58 };
59 
isLineComment(const FormatToken & FormatTok)60 static bool isLineComment(const FormatToken &FormatTok) {
61   return FormatTok.is(tok::comment) && !FormatTok.TokenText.startswith("/*");
62 }
63 
64 // Checks if \p FormatTok is a line comment that continues the line comment
65 // \p Previous. The original column of \p MinColumnToken is used to determine
66 // whether \p FormatTok is indented enough to the right to continue \p Previous.
continuesLineComment(const FormatToken & FormatTok,const FormatToken * Previous,const FormatToken * MinColumnToken)67 static bool continuesLineComment(const FormatToken &FormatTok,
68                                  const FormatToken *Previous,
69                                  const FormatToken *MinColumnToken) {
70   if (!Previous || !MinColumnToken)
71     return false;
72   unsigned MinContinueColumn =
73       MinColumnToken->OriginalColumn + (isLineComment(*MinColumnToken) ? 0 : 1);
74   return isLineComment(FormatTok) && FormatTok.NewlinesBefore == 1 &&
75          isLineComment(*Previous) &&
76          FormatTok.OriginalColumn >= MinContinueColumn;
77 }
78 
79 class ScopedMacroState : public FormatTokenSource {
80 public:
ScopedMacroState(UnwrappedLine & Line,FormatTokenSource * & TokenSource,FormatToken * & ResetToken)81   ScopedMacroState(UnwrappedLine &Line, FormatTokenSource *&TokenSource,
82                    FormatToken *&ResetToken)
83       : Line(Line), TokenSource(TokenSource), ResetToken(ResetToken),
84         PreviousLineLevel(Line.Level), PreviousTokenSource(TokenSource),
85         Token(nullptr), PreviousToken(nullptr) {
86     FakeEOF.Tok.startToken();
87     FakeEOF.Tok.setKind(tok::eof);
88     TokenSource = this;
89     Line.Level = 0;
90     Line.InPPDirective = true;
91   }
92 
~ScopedMacroState()93   ~ScopedMacroState() override {
94     TokenSource = PreviousTokenSource;
95     ResetToken = Token;
96     Line.InPPDirective = false;
97     Line.Level = PreviousLineLevel;
98   }
99 
getNextToken()100   FormatToken *getNextToken() override {
101     // The \c UnwrappedLineParser guards against this by never calling
102     // \c getNextToken() after it has encountered the first eof token.
103     assert(!eof());
104     PreviousToken = Token;
105     Token = PreviousTokenSource->getNextToken();
106     if (eof())
107       return &FakeEOF;
108     return Token;
109   }
110 
getPosition()111   unsigned getPosition() override { return PreviousTokenSource->getPosition(); }
112 
setPosition(unsigned Position)113   FormatToken *setPosition(unsigned Position) override {
114     PreviousToken = nullptr;
115     Token = PreviousTokenSource->setPosition(Position);
116     return Token;
117   }
118 
119 private:
eof()120   bool eof() {
121     return Token && Token->HasUnescapedNewline &&
122            !continuesLineComment(*Token, PreviousToken,
123                                  /*MinColumnToken=*/PreviousToken);
124   }
125 
126   FormatToken FakeEOF;
127   UnwrappedLine &Line;
128   FormatTokenSource *&TokenSource;
129   FormatToken *&ResetToken;
130   unsigned PreviousLineLevel;
131   FormatTokenSource *PreviousTokenSource;
132 
133   FormatToken *Token;
134   FormatToken *PreviousToken;
135 };
136 
137 } // end anonymous namespace
138 
139 class ScopedLineState {
140 public:
ScopedLineState(UnwrappedLineParser & Parser,bool SwitchToPreprocessorLines=false)141   ScopedLineState(UnwrappedLineParser &Parser,
142                   bool SwitchToPreprocessorLines = false)
143       : Parser(Parser), OriginalLines(Parser.CurrentLines) {
144     if (SwitchToPreprocessorLines)
145       Parser.CurrentLines = &Parser.PreprocessorDirectives;
146     else if (!Parser.Line->Tokens.empty())
147       Parser.CurrentLines = &Parser.Line->Tokens.back().Children;
148     PreBlockLine = std::move(Parser.Line);
149     Parser.Line = std::make_unique<UnwrappedLine>();
150     Parser.Line->Level = PreBlockLine->Level;
151     Parser.Line->InPPDirective = PreBlockLine->InPPDirective;
152   }
153 
~ScopedLineState()154   ~ScopedLineState() {
155     if (!Parser.Line->Tokens.empty()) {
156       Parser.addUnwrappedLine();
157     }
158     assert(Parser.Line->Tokens.empty());
159     Parser.Line = std::move(PreBlockLine);
160     if (Parser.CurrentLines == &Parser.PreprocessorDirectives)
161       Parser.MustBreakBeforeNextToken = true;
162     Parser.CurrentLines = OriginalLines;
163   }
164 
165 private:
166   UnwrappedLineParser &Parser;
167 
168   std::unique_ptr<UnwrappedLine> PreBlockLine;
169   SmallVectorImpl<UnwrappedLine> *OriginalLines;
170 };
171 
172 class CompoundStatementIndenter {
173 public:
CompoundStatementIndenter(UnwrappedLineParser * Parser,const FormatStyle & Style,unsigned & LineLevel)174   CompoundStatementIndenter(UnwrappedLineParser *Parser,
175                             const FormatStyle &Style, unsigned &LineLevel)
176       : CompoundStatementIndenter(Parser, LineLevel,
177                                   Style.BraceWrapping.AfterControlStatement,
178                                   Style.BraceWrapping.IndentBraces) {}
CompoundStatementIndenter(UnwrappedLineParser * Parser,unsigned & LineLevel,bool WrapBrace,bool IndentBrace)179   CompoundStatementIndenter(UnwrappedLineParser *Parser, unsigned &LineLevel,
180                             bool WrapBrace, bool IndentBrace)
181       : LineLevel(LineLevel), OldLineLevel(LineLevel) {
182     if (WrapBrace)
183       Parser->addUnwrappedLine();
184     if (IndentBrace)
185       ++LineLevel;
186   }
~CompoundStatementIndenter()187   ~CompoundStatementIndenter() { LineLevel = OldLineLevel; }
188 
189 private:
190   unsigned &LineLevel;
191   unsigned OldLineLevel;
192 };
193 
194 namespace {
195 
196 class IndexedTokenSource : public FormatTokenSource {
197 public:
IndexedTokenSource(ArrayRef<FormatToken * > Tokens)198   IndexedTokenSource(ArrayRef<FormatToken *> Tokens)
199       : Tokens(Tokens), Position(-1) {}
200 
getNextToken()201   FormatToken *getNextToken() override {
202     ++Position;
203     return Tokens[Position];
204   }
205 
getPosition()206   unsigned getPosition() override {
207     assert(Position >= 0);
208     return Position;
209   }
210 
setPosition(unsigned P)211   FormatToken *setPosition(unsigned P) override {
212     Position = P;
213     return Tokens[Position];
214   }
215 
reset()216   void reset() { Position = -1; }
217 
218 private:
219   ArrayRef<FormatToken *> Tokens;
220   int Position;
221 };
222 
223 } // end anonymous namespace
224 
UnwrappedLineParser(const FormatStyle & Style,const AdditionalKeywords & Keywords,unsigned FirstStartColumn,ArrayRef<FormatToken * > Tokens,UnwrappedLineConsumer & Callback)225 UnwrappedLineParser::UnwrappedLineParser(const FormatStyle &Style,
226                                          const AdditionalKeywords &Keywords,
227                                          unsigned FirstStartColumn,
228                                          ArrayRef<FormatToken *> Tokens,
229                                          UnwrappedLineConsumer &Callback)
230     : Line(new UnwrappedLine), MustBreakBeforeNextToken(false),
231       CurrentLines(&Lines), Style(Style), Keywords(Keywords),
232       CommentPragmasRegex(Style.CommentPragmas), Tokens(nullptr),
233       Callback(Callback), AllTokens(Tokens), PPBranchLevel(-1),
234       IncludeGuard(Style.IndentPPDirectives == FormatStyle::PPDIS_None
235                        ? IG_Rejected
236                        : IG_Inited),
237       IncludeGuardToken(nullptr), FirstStartColumn(FirstStartColumn) {}
238 
reset()239 void UnwrappedLineParser::reset() {
240   PPBranchLevel = -1;
241   IncludeGuard = Style.IndentPPDirectives == FormatStyle::PPDIS_None
242                      ? IG_Rejected
243                      : IG_Inited;
244   IncludeGuardToken = nullptr;
245   Line.reset(new UnwrappedLine);
246   CommentsBeforeNextToken.clear();
247   FormatTok = nullptr;
248   MustBreakBeforeNextToken = false;
249   PreprocessorDirectives.clear();
250   CurrentLines = &Lines;
251   DeclarationScopeStack.clear();
252   PPStack.clear();
253   Line->FirstStartColumn = FirstStartColumn;
254 }
255 
parse()256 void UnwrappedLineParser::parse() {
257   IndexedTokenSource TokenSource(AllTokens);
258   Line->FirstStartColumn = FirstStartColumn;
259   do {
260     LLVM_DEBUG(llvm::dbgs() << "----\n");
261     reset();
262     Tokens = &TokenSource;
263     TokenSource.reset();
264 
265     readToken();
266     parseFile();
267 
268     // If we found an include guard then all preprocessor directives (other than
269     // the guard) are over-indented by one.
270     if (IncludeGuard == IG_Found)
271       for (auto &Line : Lines)
272         if (Line.InPPDirective && Line.Level > 0)
273           --Line.Level;
274 
275     // Create line with eof token.
276     pushToken(FormatTok);
277     addUnwrappedLine();
278 
279     for (SmallVectorImpl<UnwrappedLine>::iterator I = Lines.begin(),
280                                                   E = Lines.end();
281          I != E; ++I) {
282       Callback.consumeUnwrappedLine(*I);
283     }
284     Callback.finishRun();
285     Lines.clear();
286     while (!PPLevelBranchIndex.empty() &&
287            PPLevelBranchIndex.back() + 1 >= PPLevelBranchCount.back()) {
288       PPLevelBranchIndex.resize(PPLevelBranchIndex.size() - 1);
289       PPLevelBranchCount.resize(PPLevelBranchCount.size() - 1);
290     }
291     if (!PPLevelBranchIndex.empty()) {
292       ++PPLevelBranchIndex.back();
293       assert(PPLevelBranchIndex.size() == PPLevelBranchCount.size());
294       assert(PPLevelBranchIndex.back() <= PPLevelBranchCount.back());
295     }
296   } while (!PPLevelBranchIndex.empty());
297 }
298 
parseFile()299 void UnwrappedLineParser::parseFile() {
300   // The top-level context in a file always has declarations, except for pre-
301   // processor directives and JavaScript files.
302   bool MustBeDeclaration =
303       !Line->InPPDirective && Style.Language != FormatStyle::LK_JavaScript;
304   ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack,
305                                           MustBeDeclaration);
306   if (Style.Language == FormatStyle::LK_TextProto)
307     parseBracedList();
308   else
309     parseLevel(/*HasOpeningBrace=*/false);
310   // Make sure to format the remaining tokens.
311   //
312   // LK_TextProto is special since its top-level is parsed as the body of a
313   // braced list, which does not necessarily have natural line separators such
314   // as a semicolon. Comments after the last entry that have been determined to
315   // not belong to that line, as in:
316   //   key: value
317   //   // endfile comment
318   // do not have a chance to be put on a line of their own until this point.
319   // Here we add this newline before end-of-file comments.
320   if (Style.Language == FormatStyle::LK_TextProto &&
321       !CommentsBeforeNextToken.empty())
322     addUnwrappedLine();
323   flushComments(true);
324   addUnwrappedLine();
325 }
326 
parseCSharpGenericTypeConstraint()327 void UnwrappedLineParser::parseCSharpGenericTypeConstraint() {
328   do {
329     switch (FormatTok->Tok.getKind()) {
330     case tok::l_brace:
331       return;
332     default:
333       if (FormatTok->is(Keywords.kw_where)) {
334         addUnwrappedLine();
335         nextToken();
336         parseCSharpGenericTypeConstraint();
337         break;
338       }
339       nextToken();
340       break;
341     }
342   } while (!eof());
343 }
344 
parseCSharpAttribute()345 void UnwrappedLineParser::parseCSharpAttribute() {
346   int UnpairedSquareBrackets = 1;
347   do {
348     switch (FormatTok->Tok.getKind()) {
349     case tok::r_square:
350       nextToken();
351       --UnpairedSquareBrackets;
352       if (UnpairedSquareBrackets == 0) {
353         addUnwrappedLine();
354         return;
355       }
356       break;
357     case tok::l_square:
358       ++UnpairedSquareBrackets;
359       nextToken();
360       break;
361     default:
362       nextToken();
363       break;
364     }
365   } while (!eof());
366 }
367 
parseLevel(bool HasOpeningBrace)368 void UnwrappedLineParser::parseLevel(bool HasOpeningBrace) {
369   bool SwitchLabelEncountered = false;
370   do {
371     tok::TokenKind kind = FormatTok->Tok.getKind();
372     if (FormatTok->getType() == TT_MacroBlockBegin) {
373       kind = tok::l_brace;
374     } else if (FormatTok->getType() == TT_MacroBlockEnd) {
375       kind = tok::r_brace;
376     }
377 
378     switch (kind) {
379     case tok::comment:
380       nextToken();
381       addUnwrappedLine();
382       break;
383     case tok::l_brace:
384       // FIXME: Add parameter whether this can happen - if this happens, we must
385       // be in a non-declaration context.
386       if (!FormatTok->is(TT_MacroBlockBegin) && tryToParseBracedList())
387         continue;
388       parseBlock(/*MustBeDeclaration=*/false);
389       addUnwrappedLine();
390       break;
391     case tok::r_brace:
392       if (HasOpeningBrace)
393         return;
394       nextToken();
395       addUnwrappedLine();
396       break;
397     case tok::kw_default: {
398       unsigned StoredPosition = Tokens->getPosition();
399       FormatToken *Next;
400       do {
401         Next = Tokens->getNextToken();
402       } while (Next && Next->is(tok::comment));
403       FormatTok = Tokens->setPosition(StoredPosition);
404       if (Next && Next->isNot(tok::colon)) {
405         // default not followed by ':' is not a case label; treat it like
406         // an identifier.
407         parseStructuralElement();
408         break;
409       }
410       // Else, if it is 'default:', fall through to the case handling.
411       LLVM_FALLTHROUGH;
412     }
413     case tok::kw_case:
414       if (Style.Language == FormatStyle::LK_JavaScript &&
415           Line->MustBeDeclaration) {
416         // A 'case: string' style field declaration.
417         parseStructuralElement();
418         break;
419       }
420       if (!SwitchLabelEncountered &&
421           (Style.IndentCaseLabels || (Line->InPPDirective && Line->Level == 1)))
422         ++Line->Level;
423       SwitchLabelEncountered = true;
424       parseStructuralElement();
425       break;
426     case tok::l_square:
427       if (Style.isCSharp()) {
428         nextToken();
429         parseCSharpAttribute();
430         break;
431       }
432       LLVM_FALLTHROUGH;
433     default:
434       parseStructuralElement(/*IsTopLevel=*/true);
435       break;
436     }
437   } while (!eof());
438 }
439 
calculateBraceTypes(bool ExpectClassBody)440 void UnwrappedLineParser::calculateBraceTypes(bool ExpectClassBody) {
441   // We'll parse forward through the tokens until we hit
442   // a closing brace or eof - note that getNextToken() will
443   // parse macros, so this will magically work inside macro
444   // definitions, too.
445   unsigned StoredPosition = Tokens->getPosition();
446   FormatToken *Tok = FormatTok;
447   const FormatToken *PrevTok = Tok->Previous;
448   // Keep a stack of positions of lbrace tokens. We will
449   // update information about whether an lbrace starts a
450   // braced init list or a different block during the loop.
451   SmallVector<FormatToken *, 8> LBraceStack;
452   assert(Tok->Tok.is(tok::l_brace));
453   do {
454     // Get next non-comment token.
455     FormatToken *NextTok;
456     unsigned ReadTokens = 0;
457     do {
458       NextTok = Tokens->getNextToken();
459       ++ReadTokens;
460     } while (NextTok->is(tok::comment));
461 
462     switch (Tok->Tok.getKind()) {
463     case tok::l_brace:
464       if (Style.Language == FormatStyle::LK_JavaScript && PrevTok) {
465         if (PrevTok->isOneOf(tok::colon, tok::less))
466           // A ':' indicates this code is in a type, or a braced list
467           // following a label in an object literal ({a: {b: 1}}).
468           // A '<' could be an object used in a comparison, but that is nonsense
469           // code (can never return true), so more likely it is a generic type
470           // argument (`X<{a: string; b: number}>`).
471           // The code below could be confused by semicolons between the
472           // individual members in a type member list, which would normally
473           // trigger BK_Block. In both cases, this must be parsed as an inline
474           // braced init.
475           Tok->setBlockKind(BK_BracedInit);
476         else if (PrevTok->is(tok::r_paren))
477           // `) { }` can only occur in function or method declarations in JS.
478           Tok->setBlockKind(BK_Block);
479       } else {
480         Tok->setBlockKind(BK_Unknown);
481       }
482       LBraceStack.push_back(Tok);
483       break;
484     case tok::r_brace:
485       if (LBraceStack.empty())
486         break;
487       if (LBraceStack.back()->is(BK_Unknown)) {
488         bool ProbablyBracedList = false;
489         if (Style.Language == FormatStyle::LK_Proto) {
490           ProbablyBracedList = NextTok->isOneOf(tok::comma, tok::r_square);
491         } else {
492           // Using OriginalColumn to distinguish between ObjC methods and
493           // binary operators is a bit hacky.
494           bool NextIsObjCMethod = NextTok->isOneOf(tok::plus, tok::minus) &&
495                                   NextTok->OriginalColumn == 0;
496 
497           // If there is a comma, semicolon or right paren after the closing
498           // brace, we assume this is a braced initializer list.  Note that
499           // regardless how we mark inner braces here, we will overwrite the
500           // BlockKind later if we parse a braced list (where all blocks
501           // inside are by default braced lists), or when we explicitly detect
502           // blocks (for example while parsing lambdas).
503           // FIXME: Some of these do not apply to JS, e.g. "} {" can never be a
504           // braced list in JS.
505           ProbablyBracedList =
506               (Style.Language == FormatStyle::LK_JavaScript &&
507                NextTok->isOneOf(Keywords.kw_of, Keywords.kw_in,
508                                 Keywords.kw_as)) ||
509               (Style.isCpp() && NextTok->is(tok::l_paren)) ||
510               NextTok->isOneOf(tok::comma, tok::period, tok::colon,
511                                tok::r_paren, tok::r_square, tok::l_brace,
512                                tok::ellipsis) ||
513               (NextTok->is(tok::identifier) &&
514                !PrevTok->isOneOf(tok::semi, tok::r_brace, tok::l_brace)) ||
515               (NextTok->is(tok::semi) &&
516                (!ExpectClassBody || LBraceStack.size() != 1)) ||
517               (NextTok->isBinaryOperator() && !NextIsObjCMethod);
518           if (!Style.isCSharp() && NextTok->is(tok::l_square)) {
519             // We can have an array subscript after a braced init
520             // list, but C++11 attributes are expected after blocks.
521             NextTok = Tokens->getNextToken();
522             ++ReadTokens;
523             ProbablyBracedList = NextTok->isNot(tok::l_square);
524           }
525         }
526         if (ProbablyBracedList) {
527           Tok->setBlockKind(BK_BracedInit);
528           LBraceStack.back()->setBlockKind(BK_BracedInit);
529         } else {
530           Tok->setBlockKind(BK_Block);
531           LBraceStack.back()->setBlockKind(BK_Block);
532         }
533       }
534       LBraceStack.pop_back();
535       break;
536     case tok::identifier:
537       if (!Tok->is(TT_StatementMacro))
538         break;
539       LLVM_FALLTHROUGH;
540     case tok::at:
541     case tok::semi:
542     case tok::kw_if:
543     case tok::kw_while:
544     case tok::kw_for:
545     case tok::kw_switch:
546     case tok::kw_try:
547     case tok::kw___try:
548       if (!LBraceStack.empty() && LBraceStack.back()->is(BK_Unknown))
549         LBraceStack.back()->setBlockKind(BK_Block);
550       break;
551     default:
552       break;
553     }
554     PrevTok = Tok;
555     Tok = NextTok;
556   } while (Tok->Tok.isNot(tok::eof) && !LBraceStack.empty());
557 
558   // Assume other blocks for all unclosed opening braces.
559   for (unsigned i = 0, e = LBraceStack.size(); i != e; ++i) {
560     if (LBraceStack[i]->is(BK_Unknown))
561       LBraceStack[i]->setBlockKind(BK_Block);
562   }
563 
564   FormatTok = Tokens->setPosition(StoredPosition);
565 }
566 
567 template <class T>
hash_combine(std::size_t & seed,const T & v)568 static inline void hash_combine(std::size_t &seed, const T &v) {
569   std::hash<T> hasher;
570   seed ^= hasher(v) + 0x9e3779b9 + (seed << 6) + (seed >> 2);
571 }
572 
computePPHash() const573 size_t UnwrappedLineParser::computePPHash() const {
574   size_t h = 0;
575   for (const auto &i : PPStack) {
576     hash_combine(h, size_t(i.Kind));
577     hash_combine(h, i.Line);
578   }
579   return h;
580 }
581 
parseBlock(bool MustBeDeclaration,unsigned AddLevels,bool MunchSemi,bool UnindentWhitesmithsBraces)582 void UnwrappedLineParser::parseBlock(bool MustBeDeclaration, unsigned AddLevels,
583                                      bool MunchSemi,
584                                      bool UnindentWhitesmithsBraces) {
585   assert(FormatTok->isOneOf(tok::l_brace, TT_MacroBlockBegin) &&
586          "'{' or macro block token expected");
587   const bool MacroBlock = FormatTok->is(TT_MacroBlockBegin);
588   FormatTok->setBlockKind(BK_Block);
589 
590   // For Whitesmiths mode, jump to the next level prior to skipping over the
591   // braces.
592   if (AddLevels > 0 && Style.BreakBeforeBraces == FormatStyle::BS_Whitesmiths)
593     ++Line->Level;
594 
595   size_t PPStartHash = computePPHash();
596 
597   unsigned InitialLevel = Line->Level;
598   nextToken(/*LevelDifference=*/AddLevels);
599 
600   if (MacroBlock && FormatTok->is(tok::l_paren))
601     parseParens();
602 
603   size_t NbPreprocessorDirectives =
604       CurrentLines == &Lines ? PreprocessorDirectives.size() : 0;
605   addUnwrappedLine();
606   size_t OpeningLineIndex =
607       CurrentLines->empty()
608           ? (UnwrappedLine::kInvalidIndex)
609           : (CurrentLines->size() - 1 - NbPreprocessorDirectives);
610 
611   // Whitesmiths is weird here. The brace needs to be indented for the namespace
612   // block, but the block itself may not be indented depending on the style
613   // settings. This allows the format to back up one level in those cases.
614   if (UnindentWhitesmithsBraces)
615     --Line->Level;
616 
617   ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack,
618                                           MustBeDeclaration);
619   if (AddLevels > 0u && Style.BreakBeforeBraces != FormatStyle::BS_Whitesmiths)
620     Line->Level += AddLevels;
621   parseLevel(/*HasOpeningBrace=*/true);
622 
623   if (eof())
624     return;
625 
626   if (MacroBlock ? !FormatTok->is(TT_MacroBlockEnd)
627                  : !FormatTok->is(tok::r_brace)) {
628     Line->Level = InitialLevel;
629     FormatTok->setBlockKind(BK_Block);
630     return;
631   }
632 
633   size_t PPEndHash = computePPHash();
634 
635   // Munch the closing brace.
636   nextToken(/*LevelDifference=*/-AddLevels);
637 
638   if (MacroBlock && FormatTok->is(tok::l_paren))
639     parseParens();
640 
641   if (FormatTok->is(tok::arrow)) {
642     // Following the } we can find a trailing return type arrow
643     // as part of an implicit conversion constraint.
644     nextToken();
645     parseStructuralElement();
646   }
647 
648   if (MunchSemi && FormatTok->Tok.is(tok::semi))
649     nextToken();
650 
651   Line->Level = InitialLevel;
652 
653   if (PPStartHash == PPEndHash) {
654     Line->MatchingOpeningBlockLineIndex = OpeningLineIndex;
655     if (OpeningLineIndex != UnwrappedLine::kInvalidIndex) {
656       // Update the opening line to add the forward reference as well
657       (*CurrentLines)[OpeningLineIndex].MatchingClosingBlockLineIndex =
658           CurrentLines->size() - 1;
659     }
660   }
661 }
662 
isGoogScope(const UnwrappedLine & Line)663 static bool isGoogScope(const UnwrappedLine &Line) {
664   // FIXME: Closure-library specific stuff should not be hard-coded but be
665   // configurable.
666   if (Line.Tokens.size() < 4)
667     return false;
668   auto I = Line.Tokens.begin();
669   if (I->Tok->TokenText != "goog")
670     return false;
671   ++I;
672   if (I->Tok->isNot(tok::period))
673     return false;
674   ++I;
675   if (I->Tok->TokenText != "scope")
676     return false;
677   ++I;
678   return I->Tok->is(tok::l_paren);
679 }
680 
isIIFE(const UnwrappedLine & Line,const AdditionalKeywords & Keywords)681 static bool isIIFE(const UnwrappedLine &Line,
682                    const AdditionalKeywords &Keywords) {
683   // Look for the start of an immediately invoked anonymous function.
684   // https://en.wikipedia.org/wiki/Immediately-invoked_function_expression
685   // This is commonly done in JavaScript to create a new, anonymous scope.
686   // Example: (function() { ... })()
687   if (Line.Tokens.size() < 3)
688     return false;
689   auto I = Line.Tokens.begin();
690   if (I->Tok->isNot(tok::l_paren))
691     return false;
692   ++I;
693   if (I->Tok->isNot(Keywords.kw_function))
694     return false;
695   ++I;
696   return I->Tok->is(tok::l_paren);
697 }
698 
ShouldBreakBeforeBrace(const FormatStyle & Style,const FormatToken & InitialToken)699 static bool ShouldBreakBeforeBrace(const FormatStyle &Style,
700                                    const FormatToken &InitialToken) {
701   if (InitialToken.isOneOf(tok::kw_namespace, TT_NamespaceMacro))
702     return Style.BraceWrapping.AfterNamespace;
703   if (InitialToken.is(tok::kw_class))
704     return Style.BraceWrapping.AfterClass;
705   if (InitialToken.is(tok::kw_union))
706     return Style.BraceWrapping.AfterUnion;
707   if (InitialToken.is(tok::kw_struct))
708     return Style.BraceWrapping.AfterStruct;
709   return false;
710 }
711 
parseChildBlock()712 void UnwrappedLineParser::parseChildBlock() {
713   FormatTok->setBlockKind(BK_Block);
714   nextToken();
715   {
716     bool SkipIndent = (Style.Language == FormatStyle::LK_JavaScript &&
717                        (isGoogScope(*Line) || isIIFE(*Line, Keywords)));
718     ScopedLineState LineState(*this);
719     ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack,
720                                             /*MustBeDeclaration=*/false);
721     Line->Level += SkipIndent ? 0 : 1;
722     parseLevel(/*HasOpeningBrace=*/true);
723     flushComments(isOnNewLine(*FormatTok));
724     Line->Level -= SkipIndent ? 0 : 1;
725   }
726   nextToken();
727 }
728 
parsePPDirective()729 void UnwrappedLineParser::parsePPDirective() {
730   assert(FormatTok->Tok.is(tok::hash) && "'#' expected");
731   ScopedMacroState MacroState(*Line, Tokens, FormatTok);
732 
733   nextToken();
734 
735   if (!FormatTok->Tok.getIdentifierInfo()) {
736     parsePPUnknown();
737     return;
738   }
739 
740   switch (FormatTok->Tok.getIdentifierInfo()->getPPKeywordID()) {
741   case tok::pp_define:
742     parsePPDefine();
743     return;
744   case tok::pp_if:
745     parsePPIf(/*IfDef=*/false);
746     break;
747   case tok::pp_ifdef:
748   case tok::pp_ifndef:
749     parsePPIf(/*IfDef=*/true);
750     break;
751   case tok::pp_else:
752     parsePPElse();
753     break;
754   case tok::pp_elifdef:
755   case tok::pp_elifndef:
756   case tok::pp_elif:
757     parsePPElIf();
758     break;
759   case tok::pp_endif:
760     parsePPEndIf();
761     break;
762   default:
763     parsePPUnknown();
764     break;
765   }
766 }
767 
conditionalCompilationCondition(bool Unreachable)768 void UnwrappedLineParser::conditionalCompilationCondition(bool Unreachable) {
769   size_t Line = CurrentLines->size();
770   if (CurrentLines == &PreprocessorDirectives)
771     Line += Lines.size();
772 
773   if (Unreachable ||
774       (!PPStack.empty() && PPStack.back().Kind == PP_Unreachable))
775     PPStack.push_back({PP_Unreachable, Line});
776   else
777     PPStack.push_back({PP_Conditional, Line});
778 }
779 
conditionalCompilationStart(bool Unreachable)780 void UnwrappedLineParser::conditionalCompilationStart(bool Unreachable) {
781   ++PPBranchLevel;
782   assert(PPBranchLevel >= 0 && PPBranchLevel <= (int)PPLevelBranchIndex.size());
783   if (PPBranchLevel == (int)PPLevelBranchIndex.size()) {
784     PPLevelBranchIndex.push_back(0);
785     PPLevelBranchCount.push_back(0);
786   }
787   PPChainBranchIndex.push(0);
788   bool Skip = PPLevelBranchIndex[PPBranchLevel] > 0;
789   conditionalCompilationCondition(Unreachable || Skip);
790 }
791 
conditionalCompilationAlternative()792 void UnwrappedLineParser::conditionalCompilationAlternative() {
793   if (!PPStack.empty())
794     PPStack.pop_back();
795   assert(PPBranchLevel < (int)PPLevelBranchIndex.size());
796   if (!PPChainBranchIndex.empty())
797     ++PPChainBranchIndex.top();
798   conditionalCompilationCondition(
799       PPBranchLevel >= 0 && !PPChainBranchIndex.empty() &&
800       PPLevelBranchIndex[PPBranchLevel] != PPChainBranchIndex.top());
801 }
802 
conditionalCompilationEnd()803 void UnwrappedLineParser::conditionalCompilationEnd() {
804   assert(PPBranchLevel < (int)PPLevelBranchIndex.size());
805   if (PPBranchLevel >= 0 && !PPChainBranchIndex.empty()) {
806     if (PPChainBranchIndex.top() + 1 > PPLevelBranchCount[PPBranchLevel]) {
807       PPLevelBranchCount[PPBranchLevel] = PPChainBranchIndex.top() + 1;
808     }
809   }
810   // Guard against #endif's without #if.
811   if (PPBranchLevel > -1)
812     --PPBranchLevel;
813   if (!PPChainBranchIndex.empty())
814     PPChainBranchIndex.pop();
815   if (!PPStack.empty())
816     PPStack.pop_back();
817 }
818 
parsePPIf(bool IfDef)819 void UnwrappedLineParser::parsePPIf(bool IfDef) {
820   bool IfNDef = FormatTok->is(tok::pp_ifndef);
821   nextToken();
822   bool Unreachable = false;
823   if (!IfDef && (FormatTok->is(tok::kw_false) || FormatTok->TokenText == "0"))
824     Unreachable = true;
825   if (IfDef && !IfNDef && FormatTok->TokenText == "SWIG")
826     Unreachable = true;
827   conditionalCompilationStart(Unreachable);
828   FormatToken *IfCondition = FormatTok;
829   // If there's a #ifndef on the first line, and the only lines before it are
830   // comments, it could be an include guard.
831   bool MaybeIncludeGuard = IfNDef;
832   if (IncludeGuard == IG_Inited && MaybeIncludeGuard)
833     for (auto &Line : Lines) {
834       if (!Line.Tokens.front().Tok->is(tok::comment)) {
835         MaybeIncludeGuard = false;
836         IncludeGuard = IG_Rejected;
837         break;
838       }
839     }
840   --PPBranchLevel;
841   parsePPUnknown();
842   ++PPBranchLevel;
843   if (IncludeGuard == IG_Inited && MaybeIncludeGuard) {
844     IncludeGuard = IG_IfNdefed;
845     IncludeGuardToken = IfCondition;
846   }
847 }
848 
parsePPElse()849 void UnwrappedLineParser::parsePPElse() {
850   // If a potential include guard has an #else, it's not an include guard.
851   if (IncludeGuard == IG_Defined && PPBranchLevel == 0)
852     IncludeGuard = IG_Rejected;
853   conditionalCompilationAlternative();
854   if (PPBranchLevel > -1)
855     --PPBranchLevel;
856   parsePPUnknown();
857   ++PPBranchLevel;
858 }
859 
parsePPElIf()860 void UnwrappedLineParser::parsePPElIf() { parsePPElse(); }
861 
parsePPEndIf()862 void UnwrappedLineParser::parsePPEndIf() {
863   conditionalCompilationEnd();
864   parsePPUnknown();
865   // If the #endif of a potential include guard is the last thing in the file,
866   // then we found an include guard.
867   unsigned TokenPosition = Tokens->getPosition();
868   FormatToken *PeekNext = AllTokens[TokenPosition];
869   if (IncludeGuard == IG_Defined && PPBranchLevel == -1 &&
870       PeekNext->is(tok::eof) &&
871       Style.IndentPPDirectives != FormatStyle::PPDIS_None)
872     IncludeGuard = IG_Found;
873 }
874 
parsePPDefine()875 void UnwrappedLineParser::parsePPDefine() {
876   nextToken();
877 
878   if (!FormatTok->Tok.getIdentifierInfo()) {
879     IncludeGuard = IG_Rejected;
880     IncludeGuardToken = nullptr;
881     parsePPUnknown();
882     return;
883   }
884 
885   if (IncludeGuard == IG_IfNdefed &&
886       IncludeGuardToken->TokenText == FormatTok->TokenText) {
887     IncludeGuard = IG_Defined;
888     IncludeGuardToken = nullptr;
889     for (auto &Line : Lines) {
890       if (!Line.Tokens.front().Tok->isOneOf(tok::comment, tok::hash)) {
891         IncludeGuard = IG_Rejected;
892         break;
893       }
894     }
895   }
896 
897   nextToken();
898   if (FormatTok->Tok.getKind() == tok::l_paren &&
899       FormatTok->WhitespaceRange.getBegin() ==
900           FormatTok->WhitespaceRange.getEnd()) {
901     parseParens();
902   }
903   if (Style.IndentPPDirectives != FormatStyle::PPDIS_None)
904     Line->Level += PPBranchLevel + 1;
905   addUnwrappedLine();
906   ++Line->Level;
907 
908   // Errors during a preprocessor directive can only affect the layout of the
909   // preprocessor directive, and thus we ignore them. An alternative approach
910   // would be to use the same approach we use on the file level (no
911   // re-indentation if there was a structural error) within the macro
912   // definition.
913   parseFile();
914 }
915 
parsePPUnknown()916 void UnwrappedLineParser::parsePPUnknown() {
917   do {
918     nextToken();
919   } while (!eof());
920   if (Style.IndentPPDirectives != FormatStyle::PPDIS_None)
921     Line->Level += PPBranchLevel + 1;
922   addUnwrappedLine();
923 }
924 
925 // Here we exclude certain tokens that are not usually the first token in an
926 // unwrapped line. This is used in attempt to distinguish macro calls without
927 // trailing semicolons from other constructs split to several lines.
tokenCanStartNewLine(const FormatToken & Tok)928 static bool tokenCanStartNewLine(const FormatToken &Tok) {
929   // Semicolon can be a null-statement, l_square can be a start of a macro or
930   // a C++11 attribute, but this doesn't seem to be common.
931   return Tok.isNot(tok::semi) && Tok.isNot(tok::l_brace) &&
932          Tok.isNot(TT_AttributeSquare) &&
933          // Tokens that can only be used as binary operators and a part of
934          // overloaded operator names.
935          Tok.isNot(tok::period) && Tok.isNot(tok::periodstar) &&
936          Tok.isNot(tok::arrow) && Tok.isNot(tok::arrowstar) &&
937          Tok.isNot(tok::less) && Tok.isNot(tok::greater) &&
938          Tok.isNot(tok::slash) && Tok.isNot(tok::percent) &&
939          Tok.isNot(tok::lessless) && Tok.isNot(tok::greatergreater) &&
940          Tok.isNot(tok::equal) && Tok.isNot(tok::plusequal) &&
941          Tok.isNot(tok::minusequal) && Tok.isNot(tok::starequal) &&
942          Tok.isNot(tok::slashequal) && Tok.isNot(tok::percentequal) &&
943          Tok.isNot(tok::ampequal) && Tok.isNot(tok::pipeequal) &&
944          Tok.isNot(tok::caretequal) && Tok.isNot(tok::greatergreaterequal) &&
945          Tok.isNot(tok::lesslessequal) &&
946          // Colon is used in labels, base class lists, initializer lists,
947          // range-based for loops, ternary operator, but should never be the
948          // first token in an unwrapped line.
949          Tok.isNot(tok::colon) &&
950          // 'noexcept' is a trailing annotation.
951          Tok.isNot(tok::kw_noexcept);
952 }
953 
mustBeJSIdent(const AdditionalKeywords & Keywords,const FormatToken * FormatTok)954 static bool mustBeJSIdent(const AdditionalKeywords &Keywords,
955                           const FormatToken *FormatTok) {
956   // FIXME: This returns true for C/C++ keywords like 'struct'.
957   return FormatTok->is(tok::identifier) &&
958          (FormatTok->Tok.getIdentifierInfo() == nullptr ||
959           !FormatTok->isOneOf(
960               Keywords.kw_in, Keywords.kw_of, Keywords.kw_as, Keywords.kw_async,
961               Keywords.kw_await, Keywords.kw_yield, Keywords.kw_finally,
962               Keywords.kw_function, Keywords.kw_import, Keywords.kw_is,
963               Keywords.kw_let, Keywords.kw_var, tok::kw_const,
964               Keywords.kw_abstract, Keywords.kw_extends, Keywords.kw_implements,
965               Keywords.kw_instanceof, Keywords.kw_interface, Keywords.kw_throws,
966               Keywords.kw_from));
967 }
968 
mustBeJSIdentOrValue(const AdditionalKeywords & Keywords,const FormatToken * FormatTok)969 static bool mustBeJSIdentOrValue(const AdditionalKeywords &Keywords,
970                                  const FormatToken *FormatTok) {
971   return FormatTok->Tok.isLiteral() ||
972          FormatTok->isOneOf(tok::kw_true, tok::kw_false) ||
973          mustBeJSIdent(Keywords, FormatTok);
974 }
975 
976 // isJSDeclOrStmt returns true if |FormatTok| starts a declaration or statement
977 // when encountered after a value (see mustBeJSIdentOrValue).
isJSDeclOrStmt(const AdditionalKeywords & Keywords,const FormatToken * FormatTok)978 static bool isJSDeclOrStmt(const AdditionalKeywords &Keywords,
979                            const FormatToken *FormatTok) {
980   return FormatTok->isOneOf(
981       tok::kw_return, Keywords.kw_yield,
982       // conditionals
983       tok::kw_if, tok::kw_else,
984       // loops
985       tok::kw_for, tok::kw_while, tok::kw_do, tok::kw_continue, tok::kw_break,
986       // switch/case
987       tok::kw_switch, tok::kw_case,
988       // exceptions
989       tok::kw_throw, tok::kw_try, tok::kw_catch, Keywords.kw_finally,
990       // declaration
991       tok::kw_const, tok::kw_class, Keywords.kw_var, Keywords.kw_let,
992       Keywords.kw_async, Keywords.kw_function,
993       // import/export
994       Keywords.kw_import, tok::kw_export);
995 }
996 
997 // Checks whether a token is a type in K&R C (aka C78).
isC78Type(const FormatToken & Tok)998 static bool isC78Type(const FormatToken &Tok) {
999   return Tok.isOneOf(tok::kw_char, tok::kw_short, tok::kw_int, tok::kw_long,
1000                      tok::kw_unsigned, tok::kw_float, tok::kw_double,
1001                      tok::identifier);
1002 }
1003 
1004 // This function checks whether a token starts the first parameter declaration
1005 // in a K&R C (aka C78) function definition, e.g.:
1006 //   int f(a, b)
1007 //   short a, b;
1008 //   {
1009 //      return a + b;
1010 //   }
isC78ParameterDecl(const FormatToken * Tok,const FormatToken * Next,const FormatToken * FuncName)1011 static bool isC78ParameterDecl(const FormatToken *Tok, const FormatToken *Next,
1012                                const FormatToken *FuncName) {
1013   assert(Tok);
1014   assert(Next);
1015   assert(FuncName);
1016 
1017   if (FuncName->isNot(tok::identifier))
1018     return false;
1019 
1020   const FormatToken *Prev = FuncName->Previous;
1021   if (!Prev || (Prev->isNot(tok::star) && !isC78Type(*Prev)))
1022     return false;
1023 
1024   if (!isC78Type(*Tok) &&
1025       !Tok->isOneOf(tok::kw_register, tok::kw_struct, tok::kw_union))
1026     return false;
1027 
1028   if (Next->isNot(tok::star) && !Next->Tok.getIdentifierInfo())
1029     return false;
1030 
1031   Tok = Tok->Previous;
1032   if (!Tok || Tok->isNot(tok::r_paren))
1033     return false;
1034 
1035   Tok = Tok->Previous;
1036   if (!Tok || Tok->isNot(tok::identifier))
1037     return false;
1038 
1039   return Tok->Previous && Tok->Previous->isOneOf(tok::l_paren, tok::comma);
1040 }
1041 
1042 // readTokenWithJavaScriptASI reads the next token and terminates the current
1043 // line if JavaScript Automatic Semicolon Insertion must
1044 // happen between the current token and the next token.
1045 //
1046 // This method is conservative - it cannot cover all edge cases of JavaScript,
1047 // but only aims to correctly handle certain well known cases. It *must not*
1048 // return true in speculative cases.
readTokenWithJavaScriptASI()1049 void UnwrappedLineParser::readTokenWithJavaScriptASI() {
1050   FormatToken *Previous = FormatTok;
1051   readToken();
1052   FormatToken *Next = FormatTok;
1053 
1054   bool IsOnSameLine =
1055       CommentsBeforeNextToken.empty()
1056           ? Next->NewlinesBefore == 0
1057           : CommentsBeforeNextToken.front()->NewlinesBefore == 0;
1058   if (IsOnSameLine)
1059     return;
1060 
1061   bool PreviousMustBeValue = mustBeJSIdentOrValue(Keywords, Previous);
1062   bool PreviousStartsTemplateExpr =
1063       Previous->is(TT_TemplateString) && Previous->TokenText.endswith("${");
1064   if (PreviousMustBeValue || Previous->is(tok::r_paren)) {
1065     // If the line contains an '@' sign, the previous token might be an
1066     // annotation, which can precede another identifier/value.
1067     bool HasAt = std::find_if(Line->Tokens.begin(), Line->Tokens.end(),
1068                               [](UnwrappedLineNode &LineNode) {
1069                                 return LineNode.Tok->is(tok::at);
1070                               }) != Line->Tokens.end();
1071     if (HasAt)
1072       return;
1073   }
1074   if (Next->is(tok::exclaim) && PreviousMustBeValue)
1075     return addUnwrappedLine();
1076   bool NextMustBeValue = mustBeJSIdentOrValue(Keywords, Next);
1077   bool NextEndsTemplateExpr =
1078       Next->is(TT_TemplateString) && Next->TokenText.startswith("}");
1079   if (NextMustBeValue && !NextEndsTemplateExpr && !PreviousStartsTemplateExpr &&
1080       (PreviousMustBeValue ||
1081        Previous->isOneOf(tok::r_square, tok::r_paren, tok::plusplus,
1082                          tok::minusminus)))
1083     return addUnwrappedLine();
1084   if ((PreviousMustBeValue || Previous->is(tok::r_paren)) &&
1085       isJSDeclOrStmt(Keywords, Next))
1086     return addUnwrappedLine();
1087 }
1088 
parseStructuralElement(bool IsTopLevel)1089 void UnwrappedLineParser::parseStructuralElement(bool IsTopLevel) {
1090   assert(!FormatTok->is(tok::l_brace));
1091   if (Style.Language == FormatStyle::LK_TableGen &&
1092       FormatTok->is(tok::pp_include)) {
1093     nextToken();
1094     if (FormatTok->is(tok::string_literal))
1095       nextToken();
1096     addUnwrappedLine();
1097     return;
1098   }
1099   switch (FormatTok->Tok.getKind()) {
1100   case tok::kw_asm:
1101     nextToken();
1102     if (FormatTok->is(tok::l_brace)) {
1103       FormatTok->setType(TT_InlineASMBrace);
1104       nextToken();
1105       while (FormatTok && FormatTok->isNot(tok::eof)) {
1106         if (FormatTok->is(tok::r_brace)) {
1107           FormatTok->setType(TT_InlineASMBrace);
1108           nextToken();
1109           addUnwrappedLine();
1110           break;
1111         }
1112         FormatTok->Finalized = true;
1113         nextToken();
1114       }
1115     }
1116     break;
1117   case tok::kw_namespace:
1118     parseNamespace();
1119     return;
1120   case tok::kw_public:
1121   case tok::kw_protected:
1122   case tok::kw_private:
1123     if (Style.Language == FormatStyle::LK_Java ||
1124         Style.Language == FormatStyle::LK_JavaScript || Style.isCSharp())
1125       nextToken();
1126     else
1127       parseAccessSpecifier();
1128     return;
1129   case tok::kw_if:
1130     if (Style.Language == FormatStyle::LK_JavaScript && Line->MustBeDeclaration)
1131       // field/method declaration.
1132       break;
1133     parseIfThenElse();
1134     return;
1135   case tok::kw_for:
1136   case tok::kw_while:
1137     if (Style.Language == FormatStyle::LK_JavaScript && Line->MustBeDeclaration)
1138       // field/method declaration.
1139       break;
1140     parseForOrWhileLoop();
1141     return;
1142   case tok::kw_do:
1143     if (Style.Language == FormatStyle::LK_JavaScript && Line->MustBeDeclaration)
1144       // field/method declaration.
1145       break;
1146     parseDoWhile();
1147     return;
1148   case tok::kw_switch:
1149     if (Style.Language == FormatStyle::LK_JavaScript && Line->MustBeDeclaration)
1150       // 'switch: string' field declaration.
1151       break;
1152     parseSwitch();
1153     return;
1154   case tok::kw_default:
1155     if (Style.Language == FormatStyle::LK_JavaScript && Line->MustBeDeclaration)
1156       // 'default: string' field declaration.
1157       break;
1158     nextToken();
1159     if (FormatTok->is(tok::colon)) {
1160       parseLabel();
1161       return;
1162     }
1163     // e.g. "default void f() {}" in a Java interface.
1164     break;
1165   case tok::kw_case:
1166     if (Style.Language == FormatStyle::LK_JavaScript && Line->MustBeDeclaration)
1167       // 'case: string' field declaration.
1168       break;
1169     parseCaseLabel();
1170     return;
1171   case tok::kw_try:
1172   case tok::kw___try:
1173     if (Style.Language == FormatStyle::LK_JavaScript && Line->MustBeDeclaration)
1174       // field/method declaration.
1175       break;
1176     parseTryCatch();
1177     return;
1178   case tok::kw_extern:
1179     nextToken();
1180     if (FormatTok->Tok.is(tok::string_literal)) {
1181       nextToken();
1182       if (FormatTok->Tok.is(tok::l_brace)) {
1183         if (!Style.IndentExternBlock) {
1184           if (Style.BraceWrapping.AfterExternBlock) {
1185             addUnwrappedLine();
1186           }
1187           unsigned AddLevels = Style.BraceWrapping.AfterExternBlock ? 1u : 0u;
1188           parseBlock(/*MustBeDeclaration=*/true, AddLevels);
1189         } else {
1190           unsigned AddLevels =
1191               Style.IndentExternBlock == FormatStyle::IEBS_Indent ? 1u : 0u;
1192           parseBlock(/*MustBeDeclaration=*/true, AddLevels);
1193         }
1194         addUnwrappedLine();
1195         return;
1196       }
1197     }
1198     break;
1199   case tok::kw_export:
1200     if (Style.Language == FormatStyle::LK_JavaScript) {
1201       parseJavaScriptEs6ImportExport();
1202       return;
1203     }
1204     if (!Style.isCpp())
1205       break;
1206     // Handle C++ "(inline|export) namespace".
1207     LLVM_FALLTHROUGH;
1208   case tok::kw_inline:
1209     nextToken();
1210     if (FormatTok->Tok.is(tok::kw_namespace)) {
1211       parseNamespace();
1212       return;
1213     }
1214     break;
1215   case tok::identifier:
1216     if (FormatTok->is(TT_ForEachMacro)) {
1217       parseForOrWhileLoop();
1218       return;
1219     }
1220     if (FormatTok->is(TT_MacroBlockBegin)) {
1221       parseBlock(/*MustBeDeclaration=*/false, /*AddLevels=*/1u,
1222                  /*MunchSemi=*/false);
1223       return;
1224     }
1225     if (FormatTok->is(Keywords.kw_import)) {
1226       if (Style.Language == FormatStyle::LK_JavaScript) {
1227         parseJavaScriptEs6ImportExport();
1228         return;
1229       }
1230       if (Style.Language == FormatStyle::LK_Proto) {
1231         nextToken();
1232         if (FormatTok->is(tok::kw_public))
1233           nextToken();
1234         if (!FormatTok->is(tok::string_literal))
1235           return;
1236         nextToken();
1237         if (FormatTok->is(tok::semi))
1238           nextToken();
1239         addUnwrappedLine();
1240         return;
1241       }
1242     }
1243     if (Style.isCpp() &&
1244         FormatTok->isOneOf(Keywords.kw_signals, Keywords.kw_qsignals,
1245                            Keywords.kw_slots, Keywords.kw_qslots)) {
1246       nextToken();
1247       if (FormatTok->is(tok::colon)) {
1248         nextToken();
1249         addUnwrappedLine();
1250         return;
1251       }
1252     }
1253     if (Style.isCpp() && FormatTok->is(TT_StatementMacro)) {
1254       parseStatementMacro();
1255       return;
1256     }
1257     if (Style.isCpp() && FormatTok->is(TT_NamespaceMacro)) {
1258       parseNamespace();
1259       return;
1260     }
1261     // In all other cases, parse the declaration.
1262     break;
1263   default:
1264     break;
1265   }
1266   do {
1267     const FormatToken *Previous = FormatTok->Previous;
1268     switch (FormatTok->Tok.getKind()) {
1269     case tok::at:
1270       nextToken();
1271       if (FormatTok->Tok.is(tok::l_brace)) {
1272         nextToken();
1273         parseBracedList();
1274         break;
1275       } else if (Style.Language == FormatStyle::LK_Java &&
1276                  FormatTok->is(Keywords.kw_interface)) {
1277         nextToken();
1278         break;
1279       }
1280       switch (FormatTok->Tok.getObjCKeywordID()) {
1281       case tok::objc_public:
1282       case tok::objc_protected:
1283       case tok::objc_package:
1284       case tok::objc_private:
1285         return parseAccessSpecifier();
1286       case tok::objc_interface:
1287       case tok::objc_implementation:
1288         return parseObjCInterfaceOrImplementation();
1289       case tok::objc_protocol:
1290         if (parseObjCProtocol())
1291           return;
1292         break;
1293       case tok::objc_end:
1294         return; // Handled by the caller.
1295       case tok::objc_optional:
1296       case tok::objc_required:
1297         nextToken();
1298         addUnwrappedLine();
1299         return;
1300       case tok::objc_autoreleasepool:
1301         nextToken();
1302         if (FormatTok->Tok.is(tok::l_brace)) {
1303           if (Style.BraceWrapping.AfterControlStatement ==
1304               FormatStyle::BWACS_Always)
1305             addUnwrappedLine();
1306           parseBlock(/*MustBeDeclaration=*/false);
1307         }
1308         addUnwrappedLine();
1309         return;
1310       case tok::objc_synchronized:
1311         nextToken();
1312         if (FormatTok->Tok.is(tok::l_paren))
1313           // Skip synchronization object
1314           parseParens();
1315         if (FormatTok->Tok.is(tok::l_brace)) {
1316           if (Style.BraceWrapping.AfterControlStatement ==
1317               FormatStyle::BWACS_Always)
1318             addUnwrappedLine();
1319           parseBlock(/*MustBeDeclaration=*/false);
1320         }
1321         addUnwrappedLine();
1322         return;
1323       case tok::objc_try:
1324         // This branch isn't strictly necessary (the kw_try case below would
1325         // do this too after the tok::at is parsed above).  But be explicit.
1326         parseTryCatch();
1327         return;
1328       default:
1329         break;
1330       }
1331       break;
1332     case tok::kw_concept:
1333       parseConcept();
1334       break;
1335     case tok::kw_requires:
1336       parseRequires();
1337       break;
1338     case tok::kw_enum:
1339       // Ignore if this is part of "template <enum ...".
1340       if (Previous && Previous->is(tok::less)) {
1341         nextToken();
1342         break;
1343       }
1344 
1345       // parseEnum falls through and does not yet add an unwrapped line as an
1346       // enum definition can start a structural element.
1347       if (!parseEnum())
1348         break;
1349       // This only applies for C++.
1350       if (!Style.isCpp()) {
1351         addUnwrappedLine();
1352         return;
1353       }
1354       break;
1355     case tok::kw_typedef:
1356       nextToken();
1357       if (FormatTok->isOneOf(Keywords.kw_NS_ENUM, Keywords.kw_NS_OPTIONS,
1358                              Keywords.kw_CF_ENUM, Keywords.kw_CF_OPTIONS,
1359                              Keywords.kw_CF_CLOSED_ENUM,
1360                              Keywords.kw_NS_CLOSED_ENUM))
1361         parseEnum();
1362       break;
1363     case tok::kw_struct:
1364     case tok::kw_union:
1365     case tok::kw_class:
1366       if (parseStructLike()) {
1367         return;
1368       }
1369       break;
1370     case tok::period:
1371       nextToken();
1372       // In Java, classes have an implicit static member "class".
1373       if (Style.Language == FormatStyle::LK_Java && FormatTok &&
1374           FormatTok->is(tok::kw_class))
1375         nextToken();
1376       if (Style.Language == FormatStyle::LK_JavaScript && FormatTok &&
1377           FormatTok->Tok.getIdentifierInfo())
1378         // JavaScript only has pseudo keywords, all keywords are allowed to
1379         // appear in "IdentifierName" positions. See http://es5.github.io/#x7.6
1380         nextToken();
1381       break;
1382     case tok::semi:
1383       nextToken();
1384       addUnwrappedLine();
1385       return;
1386     case tok::r_brace:
1387       addUnwrappedLine();
1388       return;
1389     case tok::l_paren: {
1390       parseParens();
1391       // Break the unwrapped line if a K&R C function definition has a parameter
1392       // declaration.
1393       if (!IsTopLevel || !Style.isCpp() || !Previous || FormatTok->is(tok::eof))
1394         break;
1395       const unsigned Position = Tokens->getPosition() + 1;
1396       assert(Position < AllTokens.size());
1397       if (isC78ParameterDecl(FormatTok, AllTokens[Position], Previous)) {
1398         addUnwrappedLine();
1399         return;
1400       }
1401       break;
1402     }
1403     case tok::kw_operator:
1404       nextToken();
1405       if (FormatTok->isBinaryOperator())
1406         nextToken();
1407       break;
1408     case tok::caret:
1409       nextToken();
1410       if (FormatTok->Tok.isAnyIdentifier() ||
1411           FormatTok->isSimpleTypeSpecifier())
1412         nextToken();
1413       if (FormatTok->is(tok::l_paren))
1414         parseParens();
1415       if (FormatTok->is(tok::l_brace))
1416         parseChildBlock();
1417       break;
1418     case tok::l_brace:
1419       if (!tryToParsePropertyAccessor() && !tryToParseBracedList()) {
1420         // A block outside of parentheses must be the last part of a
1421         // structural element.
1422         // FIXME: Figure out cases where this is not true, and add projections
1423         // for them (the one we know is missing are lambdas).
1424         if (Style.BraceWrapping.AfterFunction)
1425           addUnwrappedLine();
1426         FormatTok->setType(TT_FunctionLBrace);
1427         parseBlock(/*MustBeDeclaration=*/false);
1428         addUnwrappedLine();
1429         return;
1430       }
1431       // Otherwise this was a braced init list, and the structural
1432       // element continues.
1433       break;
1434     case tok::kw_try:
1435       if (Style.Language == FormatStyle::LK_JavaScript &&
1436           Line->MustBeDeclaration) {
1437         // field/method declaration.
1438         nextToken();
1439         break;
1440       }
1441       // We arrive here when parsing function-try blocks.
1442       if (Style.BraceWrapping.AfterFunction)
1443         addUnwrappedLine();
1444       parseTryCatch();
1445       return;
1446     case tok::identifier: {
1447       if (Style.isCSharp() && FormatTok->is(Keywords.kw_where) &&
1448           Line->MustBeDeclaration) {
1449         addUnwrappedLine();
1450         parseCSharpGenericTypeConstraint();
1451         break;
1452       }
1453       if (FormatTok->is(TT_MacroBlockEnd)) {
1454         addUnwrappedLine();
1455         return;
1456       }
1457 
1458       // Function declarations (as opposed to function expressions) are parsed
1459       // on their own unwrapped line by continuing this loop. Function
1460       // expressions (functions that are not on their own line) must not create
1461       // a new unwrapped line, so they are special cased below.
1462       size_t TokenCount = Line->Tokens.size();
1463       if (Style.Language == FormatStyle::LK_JavaScript &&
1464           FormatTok->is(Keywords.kw_function) &&
1465           (TokenCount > 1 || (TokenCount == 1 && !Line->Tokens.front().Tok->is(
1466                                                      Keywords.kw_async)))) {
1467         tryToParseJSFunction();
1468         break;
1469       }
1470       if ((Style.Language == FormatStyle::LK_JavaScript ||
1471            Style.Language == FormatStyle::LK_Java) &&
1472           FormatTok->is(Keywords.kw_interface)) {
1473         if (Style.Language == FormatStyle::LK_JavaScript) {
1474           // In JavaScript/TypeScript, "interface" can be used as a standalone
1475           // identifier, e.g. in `var interface = 1;`. If "interface" is
1476           // followed by another identifier, it is very like to be an actual
1477           // interface declaration.
1478           unsigned StoredPosition = Tokens->getPosition();
1479           FormatToken *Next = Tokens->getNextToken();
1480           FormatTok = Tokens->setPosition(StoredPosition);
1481           if (Next && !mustBeJSIdent(Keywords, Next)) {
1482             nextToken();
1483             break;
1484           }
1485         }
1486         parseRecord();
1487         addUnwrappedLine();
1488         return;
1489       }
1490 
1491       if (FormatTok->is(Keywords.kw_interface)) {
1492         if (parseStructLike()) {
1493           return;
1494         }
1495         break;
1496       }
1497 
1498       if (Style.isCpp() && FormatTok->is(TT_StatementMacro)) {
1499         parseStatementMacro();
1500         return;
1501       }
1502 
1503       // See if the following token should start a new unwrapped line.
1504       StringRef Text = FormatTok->TokenText;
1505       nextToken();
1506 
1507       // JS doesn't have macros, and within classes colons indicate fields, not
1508       // labels.
1509       if (Style.Language == FormatStyle::LK_JavaScript)
1510         break;
1511 
1512       TokenCount = Line->Tokens.size();
1513       if (TokenCount == 1 ||
1514           (TokenCount == 2 && Line->Tokens.front().Tok->is(tok::comment))) {
1515         if (FormatTok->Tok.is(tok::colon) && !Line->MustBeDeclaration) {
1516           Line->Tokens.begin()->Tok->MustBreakBefore = true;
1517           parseLabel(!Style.IndentGotoLabels);
1518           return;
1519         }
1520         // Recognize function-like macro usages without trailing semicolon as
1521         // well as free-standing macros like Q_OBJECT.
1522         bool FunctionLike = FormatTok->is(tok::l_paren);
1523         if (FunctionLike)
1524           parseParens();
1525 
1526         bool FollowedByNewline =
1527             CommentsBeforeNextToken.empty()
1528                 ? FormatTok->NewlinesBefore > 0
1529                 : CommentsBeforeNextToken.front()->NewlinesBefore > 0;
1530 
1531         if (FollowedByNewline && (Text.size() >= 5 || FunctionLike) &&
1532             tokenCanStartNewLine(*FormatTok) && Text == Text.upper()) {
1533           addUnwrappedLine();
1534           return;
1535         }
1536       }
1537       break;
1538     }
1539     case tok::equal:
1540       // Fat arrows (=>) have tok::TokenKind tok::equal but TokenType
1541       // TT_FatArrow. They always start an expression or a child block if
1542       // followed by a curly brace.
1543       if (FormatTok->is(TT_FatArrow)) {
1544         nextToken();
1545         if (FormatTok->is(tok::l_brace)) {
1546           // C# may break after => if the next character is a newline.
1547           if (Style.isCSharp() && Style.BraceWrapping.AfterFunction == true) {
1548             // calling `addUnwrappedLine()` here causes odd parsing errors.
1549             FormatTok->MustBreakBefore = true;
1550           }
1551           parseChildBlock();
1552         }
1553         break;
1554       }
1555 
1556       nextToken();
1557       if (FormatTok->Tok.is(tok::l_brace)) {
1558         // Block kind should probably be set to BK_BracedInit for any language.
1559         // C# needs this change to ensure that array initialisers and object
1560         // initialisers are indented the same way.
1561         if (Style.isCSharp())
1562           FormatTok->setBlockKind(BK_BracedInit);
1563         nextToken();
1564         parseBracedList();
1565       } else if (Style.Language == FormatStyle::LK_Proto &&
1566                  FormatTok->Tok.is(tok::less)) {
1567         nextToken();
1568         parseBracedList(/*ContinueOnSemicolons=*/false, /*IsEnum=*/false,
1569                         /*ClosingBraceKind=*/tok::greater);
1570       }
1571       break;
1572     case tok::l_square:
1573       parseSquare();
1574       break;
1575     case tok::kw_new:
1576       parseNew();
1577       break;
1578     default:
1579       nextToken();
1580       break;
1581     }
1582   } while (!eof());
1583 }
1584 
tryToParsePropertyAccessor()1585 bool UnwrappedLineParser::tryToParsePropertyAccessor() {
1586   assert(FormatTok->is(tok::l_brace));
1587   if (!Style.isCSharp())
1588     return false;
1589   // See if it's a property accessor.
1590   if (FormatTok->Previous->isNot(tok::identifier))
1591     return false;
1592 
1593   // See if we are inside a property accessor.
1594   //
1595   // Record the current tokenPosition so that we can advance and
1596   // reset the current token. `Next` is not set yet so we need
1597   // another way to advance along the token stream.
1598   unsigned int StoredPosition = Tokens->getPosition();
1599   FormatToken *Tok = Tokens->getNextToken();
1600 
1601   // A trivial property accessor is of the form:
1602   // { [ACCESS_SPECIFIER] [get]; [ACCESS_SPECIFIER] [set] }
1603   // Track these as they do not require line breaks to be introduced.
1604   bool HasGetOrSet = false;
1605   bool IsTrivialPropertyAccessor = true;
1606   while (!eof()) {
1607     if (Tok->isOneOf(tok::semi, tok::kw_public, tok::kw_private,
1608                      tok::kw_protected, Keywords.kw_internal, Keywords.kw_get,
1609                      Keywords.kw_set)) {
1610       if (Tok->isOneOf(Keywords.kw_get, Keywords.kw_set))
1611         HasGetOrSet = true;
1612       Tok = Tokens->getNextToken();
1613       continue;
1614     }
1615     if (Tok->isNot(tok::r_brace))
1616       IsTrivialPropertyAccessor = false;
1617     break;
1618   }
1619 
1620   if (!HasGetOrSet) {
1621     Tokens->setPosition(StoredPosition);
1622     return false;
1623   }
1624 
1625   // Try to parse the property accessor:
1626   // https://docs.microsoft.com/en-us/dotnet/csharp/programming-guide/classes-and-structs/properties
1627   Tokens->setPosition(StoredPosition);
1628   if (!IsTrivialPropertyAccessor && Style.BraceWrapping.AfterFunction == true)
1629     addUnwrappedLine();
1630   nextToken();
1631   do {
1632     switch (FormatTok->Tok.getKind()) {
1633     case tok::r_brace:
1634       nextToken();
1635       if (FormatTok->is(tok::equal)) {
1636         while (!eof() && FormatTok->isNot(tok::semi))
1637           nextToken();
1638         nextToken();
1639       }
1640       addUnwrappedLine();
1641       return true;
1642     case tok::l_brace:
1643       ++Line->Level;
1644       parseBlock(/*MustBeDeclaration=*/true);
1645       addUnwrappedLine();
1646       --Line->Level;
1647       break;
1648     case tok::equal:
1649       if (FormatTok->is(TT_FatArrow)) {
1650         ++Line->Level;
1651         do {
1652           nextToken();
1653         } while (!eof() && FormatTok->isNot(tok::semi));
1654         nextToken();
1655         addUnwrappedLine();
1656         --Line->Level;
1657         break;
1658       }
1659       nextToken();
1660       break;
1661     default:
1662       if (FormatTok->isOneOf(Keywords.kw_get, Keywords.kw_set) &&
1663           !IsTrivialPropertyAccessor) {
1664         // Non-trivial get/set needs to be on its own line.
1665         addUnwrappedLine();
1666       }
1667       nextToken();
1668     }
1669   } while (!eof());
1670 
1671   // Unreachable for well-formed code (paired '{' and '}').
1672   return true;
1673 }
1674 
tryToParseLambda()1675 bool UnwrappedLineParser::tryToParseLambda() {
1676   if (!Style.isCpp()) {
1677     nextToken();
1678     return false;
1679   }
1680   assert(FormatTok->is(tok::l_square));
1681   FormatToken &LSquare = *FormatTok;
1682   if (!tryToParseLambdaIntroducer())
1683     return false;
1684 
1685   bool SeenArrow = false;
1686 
1687   while (FormatTok->isNot(tok::l_brace)) {
1688     if (FormatTok->isSimpleTypeSpecifier()) {
1689       nextToken();
1690       continue;
1691     }
1692     switch (FormatTok->Tok.getKind()) {
1693     case tok::l_brace:
1694       break;
1695     case tok::l_paren:
1696       parseParens();
1697       break;
1698     case tok::amp:
1699     case tok::star:
1700     case tok::kw_const:
1701     case tok::comma:
1702     case tok::less:
1703     case tok::greater:
1704     case tok::identifier:
1705     case tok::numeric_constant:
1706     case tok::coloncolon:
1707     case tok::kw_class:
1708     case tok::kw_mutable:
1709     case tok::kw_noexcept:
1710     case tok::kw_template:
1711     case tok::kw_typename:
1712       nextToken();
1713       break;
1714     // Specialization of a template with an integer parameter can contain
1715     // arithmetic, logical, comparison and ternary operators.
1716     //
1717     // FIXME: This also accepts sequences of operators that are not in the scope
1718     // of a template argument list.
1719     //
1720     // In a C++ lambda a template type can only occur after an arrow. We use
1721     // this as an heuristic to distinguish between Objective-C expressions
1722     // followed by an `a->b` expression, such as:
1723     // ([obj func:arg] + a->b)
1724     // Otherwise the code below would parse as a lambda.
1725     //
1726     // FIXME: This heuristic is incorrect for C++20 generic lambdas with
1727     // explicit template lists: []<bool b = true && false>(U &&u){}
1728     case tok::plus:
1729     case tok::minus:
1730     case tok::exclaim:
1731     case tok::tilde:
1732     case tok::slash:
1733     case tok::percent:
1734     case tok::lessless:
1735     case tok::pipe:
1736     case tok::pipepipe:
1737     case tok::ampamp:
1738     case tok::caret:
1739     case tok::equalequal:
1740     case tok::exclaimequal:
1741     case tok::greaterequal:
1742     case tok::lessequal:
1743     case tok::question:
1744     case tok::colon:
1745     case tok::ellipsis:
1746     case tok::kw_true:
1747     case tok::kw_false:
1748       if (SeenArrow) {
1749         nextToken();
1750         break;
1751       }
1752       return true;
1753     case tok::arrow:
1754       // This might or might not actually be a lambda arrow (this could be an
1755       // ObjC method invocation followed by a dereferencing arrow). We might
1756       // reset this back to TT_Unknown in TokenAnnotator.
1757       FormatTok->setType(TT_LambdaArrow);
1758       SeenArrow = true;
1759       nextToken();
1760       break;
1761     default:
1762       return true;
1763     }
1764   }
1765   FormatTok->setType(TT_LambdaLBrace);
1766   LSquare.setType(TT_LambdaLSquare);
1767   parseChildBlock();
1768   return true;
1769 }
1770 
tryToParseLambdaIntroducer()1771 bool UnwrappedLineParser::tryToParseLambdaIntroducer() {
1772   const FormatToken *Previous = FormatTok->Previous;
1773   if (Previous &&
1774       (Previous->isOneOf(tok::identifier, tok::kw_operator, tok::kw_new,
1775                          tok::kw_delete, tok::l_square) ||
1776        FormatTok->isCppStructuredBinding(Style) || Previous->closesScope() ||
1777        Previous->isSimpleTypeSpecifier())) {
1778     nextToken();
1779     return false;
1780   }
1781   nextToken();
1782   if (FormatTok->is(tok::l_square)) {
1783     return false;
1784   }
1785   parseSquare(/*LambdaIntroducer=*/true);
1786   return true;
1787 }
1788 
tryToParseJSFunction()1789 void UnwrappedLineParser::tryToParseJSFunction() {
1790   assert(FormatTok->is(Keywords.kw_function) ||
1791          FormatTok->startsSequence(Keywords.kw_async, Keywords.kw_function));
1792   if (FormatTok->is(Keywords.kw_async))
1793     nextToken();
1794   // Consume "function".
1795   nextToken();
1796 
1797   // Consume * (generator function). Treat it like C++'s overloaded operators.
1798   if (FormatTok->is(tok::star)) {
1799     FormatTok->setType(TT_OverloadedOperator);
1800     nextToken();
1801   }
1802 
1803   // Consume function name.
1804   if (FormatTok->is(tok::identifier))
1805     nextToken();
1806 
1807   if (FormatTok->isNot(tok::l_paren))
1808     return;
1809 
1810   // Parse formal parameter list.
1811   parseParens();
1812 
1813   if (FormatTok->is(tok::colon)) {
1814     // Parse a type definition.
1815     nextToken();
1816 
1817     // Eat the type declaration. For braced inline object types, balance braces,
1818     // otherwise just parse until finding an l_brace for the function body.
1819     if (FormatTok->is(tok::l_brace))
1820       tryToParseBracedList();
1821     else
1822       while (!FormatTok->isOneOf(tok::l_brace, tok::semi) && !eof())
1823         nextToken();
1824   }
1825 
1826   if (FormatTok->is(tok::semi))
1827     return;
1828 
1829   parseChildBlock();
1830 }
1831 
tryToParseBracedList()1832 bool UnwrappedLineParser::tryToParseBracedList() {
1833   if (FormatTok->is(BK_Unknown))
1834     calculateBraceTypes();
1835   assert(FormatTok->isNot(BK_Unknown));
1836   if (FormatTok->is(BK_Block))
1837     return false;
1838   nextToken();
1839   parseBracedList();
1840   return true;
1841 }
1842 
parseBracedList(bool ContinueOnSemicolons,bool IsEnum,tok::TokenKind ClosingBraceKind)1843 bool UnwrappedLineParser::parseBracedList(bool ContinueOnSemicolons,
1844                                           bool IsEnum,
1845                                           tok::TokenKind ClosingBraceKind) {
1846   bool HasError = false;
1847 
1848   // FIXME: Once we have an expression parser in the UnwrappedLineParser,
1849   // replace this by using parseAssignmentExpression() inside.
1850   do {
1851     if (Style.isCSharp()) {
1852       // Fat arrows (=>) have tok::TokenKind tok::equal but TokenType
1853       // TT_FatArrow. They always start an expression or a child block if
1854       // followed by a curly brace.
1855       if (FormatTok->is(TT_FatArrow)) {
1856         nextToken();
1857         if (FormatTok->is(tok::l_brace)) {
1858           // C# may break after => if the next character is a newline.
1859           if (Style.isCSharp() && Style.BraceWrapping.AfterFunction == true) {
1860             // calling `addUnwrappedLine()` here causes odd parsing errors.
1861             FormatTok->MustBreakBefore = true;
1862           }
1863           parseChildBlock();
1864           continue;
1865         }
1866       }
1867     }
1868     if (Style.Language == FormatStyle::LK_JavaScript) {
1869       if (FormatTok->is(Keywords.kw_function) ||
1870           FormatTok->startsSequence(Keywords.kw_async, Keywords.kw_function)) {
1871         tryToParseJSFunction();
1872         continue;
1873       }
1874       if (FormatTok->is(TT_FatArrow)) {
1875         nextToken();
1876         // Fat arrows can be followed by simple expressions or by child blocks
1877         // in curly braces.
1878         if (FormatTok->is(tok::l_brace)) {
1879           parseChildBlock();
1880           continue;
1881         }
1882       }
1883       if (FormatTok->is(tok::l_brace)) {
1884         // Could be a method inside of a braced list `{a() { return 1; }}`.
1885         if (tryToParseBracedList())
1886           continue;
1887         parseChildBlock();
1888       }
1889     }
1890     if (FormatTok->Tok.getKind() == ClosingBraceKind) {
1891       if (IsEnum && !Style.AllowShortEnumsOnASingleLine)
1892         addUnwrappedLine();
1893       nextToken();
1894       return !HasError;
1895     }
1896     switch (FormatTok->Tok.getKind()) {
1897     case tok::caret:
1898       nextToken();
1899       if (FormatTok->is(tok::l_brace)) {
1900         parseChildBlock();
1901       }
1902       break;
1903     case tok::l_square:
1904       if (Style.isCSharp())
1905         parseSquare();
1906       else
1907         tryToParseLambda();
1908       break;
1909     case tok::l_paren:
1910       parseParens();
1911       // JavaScript can just have free standing methods and getters/setters in
1912       // object literals. Detect them by a "{" following ")".
1913       if (Style.Language == FormatStyle::LK_JavaScript) {
1914         if (FormatTok->is(tok::l_brace))
1915           parseChildBlock();
1916         break;
1917       }
1918       break;
1919     case tok::l_brace:
1920       // Assume there are no blocks inside a braced init list apart
1921       // from the ones we explicitly parse out (like lambdas).
1922       FormatTok->setBlockKind(BK_BracedInit);
1923       nextToken();
1924       parseBracedList();
1925       break;
1926     case tok::less:
1927       if (Style.Language == FormatStyle::LK_Proto) {
1928         nextToken();
1929         parseBracedList(/*ContinueOnSemicolons=*/false, /*IsEnum=*/false,
1930                         /*ClosingBraceKind=*/tok::greater);
1931       } else {
1932         nextToken();
1933       }
1934       break;
1935     case tok::semi:
1936       // JavaScript (or more precisely TypeScript) can have semicolons in braced
1937       // lists (in so-called TypeMemberLists). Thus, the semicolon cannot be
1938       // used for error recovery if we have otherwise determined that this is
1939       // a braced list.
1940       if (Style.Language == FormatStyle::LK_JavaScript) {
1941         nextToken();
1942         break;
1943       }
1944       HasError = true;
1945       if (!ContinueOnSemicolons)
1946         return !HasError;
1947       nextToken();
1948       break;
1949     case tok::comma:
1950       nextToken();
1951       if (IsEnum && !Style.AllowShortEnumsOnASingleLine)
1952         addUnwrappedLine();
1953       break;
1954     default:
1955       nextToken();
1956       break;
1957     }
1958   } while (!eof());
1959   return false;
1960 }
1961 
parseParens()1962 void UnwrappedLineParser::parseParens() {
1963   assert(FormatTok->Tok.is(tok::l_paren) && "'(' expected.");
1964   nextToken();
1965   do {
1966     switch (FormatTok->Tok.getKind()) {
1967     case tok::l_paren:
1968       parseParens();
1969       if (Style.Language == FormatStyle::LK_Java && FormatTok->is(tok::l_brace))
1970         parseChildBlock();
1971       break;
1972     case tok::r_paren:
1973       nextToken();
1974       return;
1975     case tok::r_brace:
1976       // A "}" inside parenthesis is an error if there wasn't a matching "{".
1977       return;
1978     case tok::l_square:
1979       tryToParseLambda();
1980       break;
1981     case tok::l_brace:
1982       if (!tryToParseBracedList())
1983         parseChildBlock();
1984       break;
1985     case tok::at:
1986       nextToken();
1987       if (FormatTok->Tok.is(tok::l_brace)) {
1988         nextToken();
1989         parseBracedList();
1990       }
1991       break;
1992     case tok::equal:
1993       if (Style.isCSharp() && FormatTok->is(TT_FatArrow))
1994         parseStructuralElement();
1995       else
1996         nextToken();
1997       break;
1998     case tok::kw_class:
1999       if (Style.Language == FormatStyle::LK_JavaScript)
2000         parseRecord(/*ParseAsExpr=*/true);
2001       else
2002         nextToken();
2003       break;
2004     case tok::identifier:
2005       if (Style.Language == FormatStyle::LK_JavaScript &&
2006           (FormatTok->is(Keywords.kw_function) ||
2007            FormatTok->startsSequence(Keywords.kw_async, Keywords.kw_function)))
2008         tryToParseJSFunction();
2009       else
2010         nextToken();
2011       break;
2012     default:
2013       nextToken();
2014       break;
2015     }
2016   } while (!eof());
2017 }
2018 
parseSquare(bool LambdaIntroducer)2019 void UnwrappedLineParser::parseSquare(bool LambdaIntroducer) {
2020   if (!LambdaIntroducer) {
2021     assert(FormatTok->Tok.is(tok::l_square) && "'[' expected.");
2022     if (tryToParseLambda())
2023       return;
2024   }
2025   do {
2026     switch (FormatTok->Tok.getKind()) {
2027     case tok::l_paren:
2028       parseParens();
2029       break;
2030     case tok::r_square:
2031       nextToken();
2032       return;
2033     case tok::r_brace:
2034       // A "}" inside parenthesis is an error if there wasn't a matching "{".
2035       return;
2036     case tok::l_square:
2037       parseSquare();
2038       break;
2039     case tok::l_brace: {
2040       if (!tryToParseBracedList())
2041         parseChildBlock();
2042       break;
2043     }
2044     case tok::at:
2045       nextToken();
2046       if (FormatTok->Tok.is(tok::l_brace)) {
2047         nextToken();
2048         parseBracedList();
2049       }
2050       break;
2051     default:
2052       nextToken();
2053       break;
2054     }
2055   } while (!eof());
2056 }
2057 
parseIfThenElse()2058 void UnwrappedLineParser::parseIfThenElse() {
2059   assert(FormatTok->Tok.is(tok::kw_if) && "'if' expected");
2060   nextToken();
2061   if (FormatTok->Tok.isOneOf(tok::kw_constexpr, tok::identifier))
2062     nextToken();
2063   if (FormatTok->Tok.is(tok::l_paren))
2064     parseParens();
2065   // handle [[likely]] / [[unlikely]]
2066   if (FormatTok->is(tok::l_square) && tryToParseSimpleAttribute())
2067     parseSquare();
2068   bool NeedsUnwrappedLine = false;
2069   if (FormatTok->Tok.is(tok::l_brace)) {
2070     CompoundStatementIndenter Indenter(this, Style, Line->Level);
2071     parseBlock(/*MustBeDeclaration=*/false);
2072     if (Style.BraceWrapping.BeforeElse)
2073       addUnwrappedLine();
2074     else
2075       NeedsUnwrappedLine = true;
2076   } else {
2077     addUnwrappedLine();
2078     ++Line->Level;
2079     parseStructuralElement();
2080     --Line->Level;
2081   }
2082   if (FormatTok->Tok.is(tok::kw_else)) {
2083     nextToken();
2084     // handle [[likely]] / [[unlikely]]
2085     if (FormatTok->Tok.is(tok::l_square) && tryToParseSimpleAttribute())
2086       parseSquare();
2087     if (FormatTok->Tok.is(tok::l_brace)) {
2088       CompoundStatementIndenter Indenter(this, Style, Line->Level);
2089       parseBlock(/*MustBeDeclaration=*/false);
2090       addUnwrappedLine();
2091     } else if (FormatTok->Tok.is(tok::kw_if)) {
2092       FormatToken *Previous = AllTokens[Tokens->getPosition() - 1];
2093       bool PrecededByComment = Previous->is(tok::comment);
2094       if (PrecededByComment) {
2095         addUnwrappedLine();
2096         ++Line->Level;
2097       }
2098       parseIfThenElse();
2099       if (PrecededByComment)
2100         --Line->Level;
2101     } else {
2102       addUnwrappedLine();
2103       ++Line->Level;
2104       parseStructuralElement();
2105       if (FormatTok->is(tok::eof))
2106         addUnwrappedLine();
2107       --Line->Level;
2108     }
2109   } else if (NeedsUnwrappedLine) {
2110     addUnwrappedLine();
2111   }
2112 }
2113 
parseTryCatch()2114 void UnwrappedLineParser::parseTryCatch() {
2115   assert(FormatTok->isOneOf(tok::kw_try, tok::kw___try) && "'try' expected");
2116   nextToken();
2117   bool NeedsUnwrappedLine = false;
2118   if (FormatTok->is(tok::colon)) {
2119     // We are in a function try block, what comes is an initializer list.
2120     nextToken();
2121 
2122     // In case identifiers were removed by clang-tidy, what might follow is
2123     // multiple commas in sequence - before the first identifier.
2124     while (FormatTok->is(tok::comma))
2125       nextToken();
2126 
2127     while (FormatTok->is(tok::identifier)) {
2128       nextToken();
2129       if (FormatTok->is(tok::l_paren))
2130         parseParens();
2131       if (FormatTok->Previous && FormatTok->Previous->is(tok::identifier) &&
2132           FormatTok->is(tok::l_brace)) {
2133         do {
2134           nextToken();
2135         } while (!FormatTok->is(tok::r_brace));
2136         nextToken();
2137       }
2138 
2139       // In case identifiers were removed by clang-tidy, what might follow is
2140       // multiple commas in sequence - after the first identifier.
2141       while (FormatTok->is(tok::comma))
2142         nextToken();
2143     }
2144   }
2145   // Parse try with resource.
2146   if (Style.Language == FormatStyle::LK_Java && FormatTok->is(tok::l_paren)) {
2147     parseParens();
2148   }
2149   if (FormatTok->is(tok::l_brace)) {
2150     CompoundStatementIndenter Indenter(this, Style, Line->Level);
2151     parseBlock(/*MustBeDeclaration=*/false);
2152     if (Style.BraceWrapping.BeforeCatch) {
2153       addUnwrappedLine();
2154     } else {
2155       NeedsUnwrappedLine = true;
2156     }
2157   } else if (!FormatTok->is(tok::kw_catch)) {
2158     // The C++ standard requires a compound-statement after a try.
2159     // If there's none, we try to assume there's a structuralElement
2160     // and try to continue.
2161     addUnwrappedLine();
2162     ++Line->Level;
2163     parseStructuralElement();
2164     --Line->Level;
2165   }
2166   while (1) {
2167     if (FormatTok->is(tok::at))
2168       nextToken();
2169     if (!(FormatTok->isOneOf(tok::kw_catch, Keywords.kw___except,
2170                              tok::kw___finally) ||
2171           ((Style.Language == FormatStyle::LK_Java ||
2172             Style.Language == FormatStyle::LK_JavaScript) &&
2173            FormatTok->is(Keywords.kw_finally)) ||
2174           (FormatTok->Tok.isObjCAtKeyword(tok::objc_catch) ||
2175            FormatTok->Tok.isObjCAtKeyword(tok::objc_finally))))
2176       break;
2177     nextToken();
2178     while (FormatTok->isNot(tok::l_brace)) {
2179       if (FormatTok->is(tok::l_paren)) {
2180         parseParens();
2181         continue;
2182       }
2183       if (FormatTok->isOneOf(tok::semi, tok::r_brace, tok::eof))
2184         return;
2185       nextToken();
2186     }
2187     NeedsUnwrappedLine = false;
2188     CompoundStatementIndenter Indenter(this, Style, Line->Level);
2189     parseBlock(/*MustBeDeclaration=*/false);
2190     if (Style.BraceWrapping.BeforeCatch)
2191       addUnwrappedLine();
2192     else
2193       NeedsUnwrappedLine = true;
2194   }
2195   if (NeedsUnwrappedLine)
2196     addUnwrappedLine();
2197 }
2198 
parseNamespace()2199 void UnwrappedLineParser::parseNamespace() {
2200   assert(FormatTok->isOneOf(tok::kw_namespace, TT_NamespaceMacro) &&
2201          "'namespace' expected");
2202 
2203   const FormatToken &InitialToken = *FormatTok;
2204   nextToken();
2205   if (InitialToken.is(TT_NamespaceMacro)) {
2206     parseParens();
2207   } else {
2208     while (FormatTok->isOneOf(tok::identifier, tok::coloncolon, tok::kw_inline,
2209                               tok::l_square)) {
2210       if (FormatTok->is(tok::l_square))
2211         parseSquare();
2212       else
2213         nextToken();
2214     }
2215   }
2216   if (FormatTok->Tok.is(tok::l_brace)) {
2217     if (ShouldBreakBeforeBrace(Style, InitialToken))
2218       addUnwrappedLine();
2219 
2220     unsigned AddLevels =
2221         Style.NamespaceIndentation == FormatStyle::NI_All ||
2222                 (Style.NamespaceIndentation == FormatStyle::NI_Inner &&
2223                  DeclarationScopeStack.size() > 1)
2224             ? 1u
2225             : 0u;
2226     bool ManageWhitesmithsBraces =
2227         AddLevels == 0u &&
2228         Style.BreakBeforeBraces == FormatStyle::BS_Whitesmiths;
2229 
2230     // If we're in Whitesmiths mode, indent the brace if we're not indenting
2231     // the whole block.
2232     if (ManageWhitesmithsBraces)
2233       ++Line->Level;
2234 
2235     parseBlock(/*MustBeDeclaration=*/true, AddLevels,
2236                /*MunchSemi=*/true,
2237                /*UnindentWhitesmithsBraces=*/ManageWhitesmithsBraces);
2238 
2239     // Munch the semicolon after a namespace. This is more common than one would
2240     // think. Putting the semicolon into its own line is very ugly.
2241     if (FormatTok->Tok.is(tok::semi))
2242       nextToken();
2243 
2244     addUnwrappedLine(AddLevels > 0 ? LineLevel::Remove : LineLevel::Keep);
2245 
2246     if (ManageWhitesmithsBraces)
2247       --Line->Level;
2248   }
2249   // FIXME: Add error handling.
2250 }
2251 
parseNew()2252 void UnwrappedLineParser::parseNew() {
2253   assert(FormatTok->is(tok::kw_new) && "'new' expected");
2254   nextToken();
2255 
2256   if (Style.isCSharp()) {
2257     do {
2258       if (FormatTok->is(tok::l_brace))
2259         parseBracedList();
2260 
2261       if (FormatTok->isOneOf(tok::semi, tok::comma))
2262         return;
2263 
2264       nextToken();
2265     } while (!eof());
2266   }
2267 
2268   if (Style.Language != FormatStyle::LK_Java)
2269     return;
2270 
2271   // In Java, we can parse everything up to the parens, which aren't optional.
2272   do {
2273     // There should not be a ;, { or } before the new's open paren.
2274     if (FormatTok->isOneOf(tok::semi, tok::l_brace, tok::r_brace))
2275       return;
2276 
2277     // Consume the parens.
2278     if (FormatTok->is(tok::l_paren)) {
2279       parseParens();
2280 
2281       // If there is a class body of an anonymous class, consume that as child.
2282       if (FormatTok->is(tok::l_brace))
2283         parseChildBlock();
2284       return;
2285     }
2286     nextToken();
2287   } while (!eof());
2288 }
2289 
parseForOrWhileLoop()2290 void UnwrappedLineParser::parseForOrWhileLoop() {
2291   assert(FormatTok->isOneOf(tok::kw_for, tok::kw_while, TT_ForEachMacro) &&
2292          "'for', 'while' or foreach macro expected");
2293   nextToken();
2294   // JS' for await ( ...
2295   if (Style.Language == FormatStyle::LK_JavaScript &&
2296       FormatTok->is(Keywords.kw_await))
2297     nextToken();
2298   if (FormatTok->Tok.is(tok::l_paren))
2299     parseParens();
2300   if (FormatTok->Tok.is(tok::l_brace)) {
2301     CompoundStatementIndenter Indenter(this, Style, Line->Level);
2302     parseBlock(/*MustBeDeclaration=*/false);
2303     addUnwrappedLine();
2304   } else {
2305     addUnwrappedLine();
2306     ++Line->Level;
2307     parseStructuralElement();
2308     --Line->Level;
2309   }
2310 }
2311 
parseDoWhile()2312 void UnwrappedLineParser::parseDoWhile() {
2313   assert(FormatTok->Tok.is(tok::kw_do) && "'do' expected");
2314   nextToken();
2315   if (FormatTok->Tok.is(tok::l_brace)) {
2316     CompoundStatementIndenter Indenter(this, Style, Line->Level);
2317     parseBlock(/*MustBeDeclaration=*/false);
2318     if (Style.BraceWrapping.BeforeWhile)
2319       addUnwrappedLine();
2320   } else {
2321     addUnwrappedLine();
2322     ++Line->Level;
2323     parseStructuralElement();
2324     --Line->Level;
2325   }
2326 
2327   // FIXME: Add error handling.
2328   if (!FormatTok->Tok.is(tok::kw_while)) {
2329     addUnwrappedLine();
2330     return;
2331   }
2332 
2333   // If in Whitesmiths mode, the line with the while() needs to be indented
2334   // to the same level as the block.
2335   if (Style.BreakBeforeBraces == FormatStyle::BS_Whitesmiths)
2336     ++Line->Level;
2337 
2338   nextToken();
2339   parseStructuralElement();
2340 }
2341 
parseLabel(bool LeftAlignLabel)2342 void UnwrappedLineParser::parseLabel(bool LeftAlignLabel) {
2343   nextToken();
2344   unsigned OldLineLevel = Line->Level;
2345   if (Line->Level > 1 || (!Line->InPPDirective && Line->Level > 0))
2346     --Line->Level;
2347   if (LeftAlignLabel)
2348     Line->Level = 0;
2349 
2350   if (!Style.IndentCaseBlocks && CommentsBeforeNextToken.empty() &&
2351       FormatTok->Tok.is(tok::l_brace)) {
2352 
2353     CompoundStatementIndenter Indenter(this, Line->Level,
2354                                        Style.BraceWrapping.AfterCaseLabel,
2355                                        Style.BraceWrapping.IndentBraces);
2356     parseBlock(/*MustBeDeclaration=*/false);
2357     if (FormatTok->Tok.is(tok::kw_break)) {
2358       if (Style.BraceWrapping.AfterControlStatement ==
2359           FormatStyle::BWACS_Always) {
2360         addUnwrappedLine();
2361         if (!Style.IndentCaseBlocks &&
2362             Style.BreakBeforeBraces == FormatStyle::BS_Whitesmiths) {
2363           Line->Level++;
2364         }
2365       }
2366       parseStructuralElement();
2367     }
2368     addUnwrappedLine();
2369   } else {
2370     if (FormatTok->is(tok::semi))
2371       nextToken();
2372     addUnwrappedLine();
2373   }
2374   Line->Level = OldLineLevel;
2375   if (FormatTok->isNot(tok::l_brace)) {
2376     parseStructuralElement();
2377     addUnwrappedLine();
2378   }
2379 }
2380 
parseCaseLabel()2381 void UnwrappedLineParser::parseCaseLabel() {
2382   assert(FormatTok->Tok.is(tok::kw_case) && "'case' expected");
2383 
2384   // FIXME: fix handling of complex expressions here.
2385   do {
2386     nextToken();
2387   } while (!eof() && !FormatTok->Tok.is(tok::colon));
2388   parseLabel();
2389 }
2390 
parseSwitch()2391 void UnwrappedLineParser::parseSwitch() {
2392   assert(FormatTok->Tok.is(tok::kw_switch) && "'switch' expected");
2393   nextToken();
2394   if (FormatTok->Tok.is(tok::l_paren))
2395     parseParens();
2396   if (FormatTok->Tok.is(tok::l_brace)) {
2397     CompoundStatementIndenter Indenter(this, Style, Line->Level);
2398     parseBlock(/*MustBeDeclaration=*/false);
2399     addUnwrappedLine();
2400   } else {
2401     addUnwrappedLine();
2402     ++Line->Level;
2403     parseStructuralElement();
2404     --Line->Level;
2405   }
2406 }
2407 
parseAccessSpecifier()2408 void UnwrappedLineParser::parseAccessSpecifier() {
2409   nextToken();
2410   // Understand Qt's slots.
2411   if (FormatTok->isOneOf(Keywords.kw_slots, Keywords.kw_qslots))
2412     nextToken();
2413   // Otherwise, we don't know what it is, and we'd better keep the next token.
2414   if (FormatTok->Tok.is(tok::colon))
2415     nextToken();
2416   addUnwrappedLine();
2417 }
2418 
parseConcept()2419 void UnwrappedLineParser::parseConcept() {
2420   assert(FormatTok->Tok.is(tok::kw_concept) && "'concept' expected");
2421   nextToken();
2422   if (!FormatTok->Tok.is(tok::identifier))
2423     return;
2424   nextToken();
2425   if (!FormatTok->Tok.is(tok::equal))
2426     return;
2427   nextToken();
2428   if (FormatTok->Tok.is(tok::kw_requires)) {
2429     nextToken();
2430     parseRequiresExpression(Line->Level);
2431   } else {
2432     parseConstraintExpression(Line->Level);
2433   }
2434 }
2435 
parseRequiresExpression(unsigned int OriginalLevel)2436 void UnwrappedLineParser::parseRequiresExpression(unsigned int OriginalLevel) {
2437   // requires (R range)
2438   if (FormatTok->Tok.is(tok::l_paren)) {
2439     parseParens();
2440     if (Style.IndentRequires && OriginalLevel != Line->Level) {
2441       addUnwrappedLine();
2442       --Line->Level;
2443     }
2444   }
2445 
2446   if (FormatTok->Tok.is(tok::l_brace)) {
2447     if (Style.BraceWrapping.AfterFunction)
2448       addUnwrappedLine();
2449     FormatTok->setType(TT_FunctionLBrace);
2450     parseBlock(/*MustBeDeclaration=*/false);
2451     addUnwrappedLine();
2452   } else {
2453     parseConstraintExpression(OriginalLevel);
2454   }
2455 }
2456 
parseConstraintExpression(unsigned int OriginalLevel)2457 void UnwrappedLineParser::parseConstraintExpression(
2458     unsigned int OriginalLevel) {
2459   // requires Id<T> && Id<T> || Id<T>
2460   while (
2461       FormatTok->isOneOf(tok::identifier, tok::kw_requires, tok::coloncolon)) {
2462     nextToken();
2463     while (FormatTok->isOneOf(tok::identifier, tok::coloncolon, tok::less,
2464                               tok::greater, tok::comma, tok::ellipsis)) {
2465       if (FormatTok->Tok.is(tok::less)) {
2466         parseBracedList(/*ContinueOnSemicolons=*/false, /*IsEnum=*/false,
2467                         /*ClosingBraceKind=*/tok::greater);
2468         continue;
2469       }
2470       nextToken();
2471     }
2472     if (FormatTok->Tok.is(tok::kw_requires)) {
2473       parseRequiresExpression(OriginalLevel);
2474     }
2475     if (FormatTok->Tok.is(tok::less)) {
2476       parseBracedList(/*ContinueOnSemicolons=*/false, /*IsEnum=*/false,
2477                       /*ClosingBraceKind=*/tok::greater);
2478     }
2479 
2480     if (FormatTok->Tok.is(tok::l_paren)) {
2481       parseParens();
2482     }
2483     if (FormatTok->Tok.is(tok::l_brace)) {
2484       if (Style.BraceWrapping.AfterFunction)
2485         addUnwrappedLine();
2486       FormatTok->setType(TT_FunctionLBrace);
2487       parseBlock(/*MustBeDeclaration=*/false);
2488     }
2489     if (FormatTok->Tok.is(tok::semi)) {
2490       // Eat any trailing semi.
2491       nextToken();
2492       addUnwrappedLine();
2493     }
2494     if (FormatTok->Tok.is(tok::colon)) {
2495       return;
2496     }
2497     if (!FormatTok->Tok.isOneOf(tok::ampamp, tok::pipepipe)) {
2498       if (FormatTok->Previous &&
2499           !FormatTok->Previous->isOneOf(tok::identifier, tok::kw_requires,
2500                                         tok::coloncolon)) {
2501         addUnwrappedLine();
2502       }
2503       if (Style.IndentRequires && OriginalLevel != Line->Level) {
2504         --Line->Level;
2505       }
2506       break;
2507     } else {
2508       FormatTok->setType(TT_ConstraintJunctions);
2509     }
2510 
2511     nextToken();
2512   }
2513 }
2514 
parseRequires()2515 void UnwrappedLineParser::parseRequires() {
2516   assert(FormatTok->Tok.is(tok::kw_requires) && "'requires' expected");
2517 
2518   unsigned OriginalLevel = Line->Level;
2519   if (FormatTok->Previous && FormatTok->Previous->is(tok::greater)) {
2520     addUnwrappedLine();
2521     if (Style.IndentRequires) {
2522       Line->Level++;
2523     }
2524   }
2525   nextToken();
2526 
2527   parseRequiresExpression(OriginalLevel);
2528 }
2529 
parseEnum()2530 bool UnwrappedLineParser::parseEnum() {
2531   // Won't be 'enum' for NS_ENUMs.
2532   if (FormatTok->Tok.is(tok::kw_enum))
2533     nextToken();
2534 
2535   const FormatToken &InitialToken = *FormatTok;
2536 
2537   // In TypeScript, "enum" can also be used as property name, e.g. in interface
2538   // declarations. An "enum" keyword followed by a colon would be a syntax
2539   // error and thus assume it is just an identifier.
2540   if (Style.Language == FormatStyle::LK_JavaScript &&
2541       FormatTok->isOneOf(tok::colon, tok::question))
2542     return false;
2543 
2544   // In protobuf, "enum" can be used as a field name.
2545   if (Style.Language == FormatStyle::LK_Proto && FormatTok->is(tok::equal))
2546     return false;
2547 
2548   // Eat up enum class ...
2549   if (FormatTok->Tok.is(tok::kw_class) || FormatTok->Tok.is(tok::kw_struct))
2550     nextToken();
2551 
2552   while (FormatTok->Tok.getIdentifierInfo() ||
2553          FormatTok->isOneOf(tok::colon, tok::coloncolon, tok::less,
2554                             tok::greater, tok::comma, tok::question)) {
2555     nextToken();
2556     // We can have macros or attributes in between 'enum' and the enum name.
2557     if (FormatTok->is(tok::l_paren))
2558       parseParens();
2559     if (FormatTok->is(tok::identifier)) {
2560       nextToken();
2561       // If there are two identifiers in a row, this is likely an elaborate
2562       // return type. In Java, this can be "implements", etc.
2563       if (Style.isCpp() && FormatTok->is(tok::identifier))
2564         return false;
2565     }
2566   }
2567 
2568   // Just a declaration or something is wrong.
2569   if (FormatTok->isNot(tok::l_brace))
2570     return true;
2571   FormatTok->setBlockKind(BK_Block);
2572 
2573   if (Style.Language == FormatStyle::LK_Java) {
2574     // Java enums are different.
2575     parseJavaEnumBody();
2576     return true;
2577   }
2578   if (Style.Language == FormatStyle::LK_Proto) {
2579     parseBlock(/*MustBeDeclaration=*/true);
2580     return true;
2581   }
2582 
2583   if (!Style.AllowShortEnumsOnASingleLine &&
2584       ShouldBreakBeforeBrace(Style, InitialToken))
2585     addUnwrappedLine();
2586   // Parse enum body.
2587   nextToken();
2588   if (!Style.AllowShortEnumsOnASingleLine) {
2589     addUnwrappedLine();
2590     Line->Level += 1;
2591   }
2592   bool HasError = !parseBracedList(/*ContinueOnSemicolons=*/true,
2593                                    /*IsEnum=*/true);
2594   if (!Style.AllowShortEnumsOnASingleLine)
2595     Line->Level -= 1;
2596   if (HasError) {
2597     if (FormatTok->is(tok::semi))
2598       nextToken();
2599     addUnwrappedLine();
2600   }
2601   return true;
2602 
2603   // There is no addUnwrappedLine() here so that we fall through to parsing a
2604   // structural element afterwards. Thus, in "enum A {} n, m;",
2605   // "} n, m;" will end up in one unwrapped line.
2606 }
2607 
parseStructLike()2608 bool UnwrappedLineParser::parseStructLike() {
2609   // parseRecord falls through and does not yet add an unwrapped line as a
2610   // record declaration or definition can start a structural element.
2611   parseRecord();
2612   // This does not apply to Java, JavaScript and C#.
2613   if (Style.Language == FormatStyle::LK_Java ||
2614       Style.Language == FormatStyle::LK_JavaScript || Style.isCSharp()) {
2615     if (FormatTok->is(tok::semi))
2616       nextToken();
2617     addUnwrappedLine();
2618     return true;
2619   }
2620   return false;
2621 }
2622 
2623 namespace {
2624 // A class used to set and restore the Token position when peeking
2625 // ahead in the token source.
2626 class ScopedTokenPosition {
2627   unsigned StoredPosition;
2628   FormatTokenSource *Tokens;
2629 
2630 public:
ScopedTokenPosition(FormatTokenSource * Tokens)2631   ScopedTokenPosition(FormatTokenSource *Tokens) : Tokens(Tokens) {
2632     assert(Tokens && "Tokens expected to not be null");
2633     StoredPosition = Tokens->getPosition();
2634   }
2635 
~ScopedTokenPosition()2636   ~ScopedTokenPosition() { Tokens->setPosition(StoredPosition); }
2637 };
2638 } // namespace
2639 
2640 // Look to see if we have [[ by looking ahead, if
2641 // its not then rewind to the original position.
tryToParseSimpleAttribute()2642 bool UnwrappedLineParser::tryToParseSimpleAttribute() {
2643   ScopedTokenPosition AutoPosition(Tokens);
2644   FormatToken *Tok = Tokens->getNextToken();
2645   // We already read the first [ check for the second.
2646   if (Tok && !Tok->is(tok::l_square)) {
2647     return false;
2648   }
2649   // Double check that the attribute is just something
2650   // fairly simple.
2651   while (Tok) {
2652     if (Tok->is(tok::r_square)) {
2653       break;
2654     }
2655     Tok = Tokens->getNextToken();
2656   }
2657   Tok = Tokens->getNextToken();
2658   if (Tok && !Tok->is(tok::r_square)) {
2659     return false;
2660   }
2661   Tok = Tokens->getNextToken();
2662   if (Tok && Tok->is(tok::semi)) {
2663     return false;
2664   }
2665   return true;
2666 }
2667 
parseJavaEnumBody()2668 void UnwrappedLineParser::parseJavaEnumBody() {
2669   // Determine whether the enum is simple, i.e. does not have a semicolon or
2670   // constants with class bodies. Simple enums can be formatted like braced
2671   // lists, contracted to a single line, etc.
2672   unsigned StoredPosition = Tokens->getPosition();
2673   bool IsSimple = true;
2674   FormatToken *Tok = Tokens->getNextToken();
2675   while (Tok) {
2676     if (Tok->is(tok::r_brace))
2677       break;
2678     if (Tok->isOneOf(tok::l_brace, tok::semi)) {
2679       IsSimple = false;
2680       break;
2681     }
2682     // FIXME: This will also mark enums with braces in the arguments to enum
2683     // constants as "not simple". This is probably fine in practice, though.
2684     Tok = Tokens->getNextToken();
2685   }
2686   FormatTok = Tokens->setPosition(StoredPosition);
2687 
2688   if (IsSimple) {
2689     nextToken();
2690     parseBracedList();
2691     addUnwrappedLine();
2692     return;
2693   }
2694 
2695   // Parse the body of a more complex enum.
2696   // First add a line for everything up to the "{".
2697   nextToken();
2698   addUnwrappedLine();
2699   ++Line->Level;
2700 
2701   // Parse the enum constants.
2702   while (FormatTok) {
2703     if (FormatTok->is(tok::l_brace)) {
2704       // Parse the constant's class body.
2705       parseBlock(/*MustBeDeclaration=*/true, /*AddLevels=*/1u,
2706                  /*MunchSemi=*/false);
2707     } else if (FormatTok->is(tok::l_paren)) {
2708       parseParens();
2709     } else if (FormatTok->is(tok::comma)) {
2710       nextToken();
2711       addUnwrappedLine();
2712     } else if (FormatTok->is(tok::semi)) {
2713       nextToken();
2714       addUnwrappedLine();
2715       break;
2716     } else if (FormatTok->is(tok::r_brace)) {
2717       addUnwrappedLine();
2718       break;
2719     } else {
2720       nextToken();
2721     }
2722   }
2723 
2724   // Parse the class body after the enum's ";" if any.
2725   parseLevel(/*HasOpeningBrace=*/true);
2726   nextToken();
2727   --Line->Level;
2728   addUnwrappedLine();
2729 }
2730 
parseRecord(bool ParseAsExpr)2731 void UnwrappedLineParser::parseRecord(bool ParseAsExpr) {
2732   const FormatToken &InitialToken = *FormatTok;
2733   nextToken();
2734 
2735   // The actual identifier can be a nested name specifier, and in macros
2736   // it is often token-pasted.
2737   // An [[attribute]] can be before the identifier.
2738   while (FormatTok->isOneOf(tok::identifier, tok::coloncolon, tok::hashhash,
2739                             tok::kw___attribute, tok::kw___declspec,
2740                             tok::kw_alignas, tok::l_square, tok::r_square) ||
2741          ((Style.Language == FormatStyle::LK_Java ||
2742            Style.Language == FormatStyle::LK_JavaScript) &&
2743           FormatTok->isOneOf(tok::period, tok::comma))) {
2744     if (Style.Language == FormatStyle::LK_JavaScript &&
2745         FormatTok->isOneOf(Keywords.kw_extends, Keywords.kw_implements)) {
2746       // JavaScript/TypeScript supports inline object types in
2747       // extends/implements positions:
2748       //     class Foo implements {bar: number} { }
2749       nextToken();
2750       if (FormatTok->is(tok::l_brace)) {
2751         tryToParseBracedList();
2752         continue;
2753       }
2754     }
2755     bool IsNonMacroIdentifier =
2756         FormatTok->is(tok::identifier) &&
2757         FormatTok->TokenText != FormatTok->TokenText.upper();
2758     nextToken();
2759     // We can have macros or attributes in between 'class' and the class name.
2760     if (!IsNonMacroIdentifier) {
2761       if (FormatTok->Tok.is(tok::l_paren)) {
2762         parseParens();
2763       } else if (FormatTok->is(TT_AttributeSquare)) {
2764         parseSquare();
2765         // Consume the closing TT_AttributeSquare.
2766         if (FormatTok->Next && FormatTok->is(TT_AttributeSquare))
2767           nextToken();
2768       }
2769     }
2770   }
2771 
2772   // Note that parsing away template declarations here leads to incorrectly
2773   // accepting function declarations as record declarations.
2774   // In general, we cannot solve this problem. Consider:
2775   // class A<int> B() {}
2776   // which can be a function definition or a class definition when B() is a
2777   // macro. If we find enough real-world cases where this is a problem, we
2778   // can parse for the 'template' keyword in the beginning of the statement,
2779   // and thus rule out the record production in case there is no template
2780   // (this would still leave us with an ambiguity between template function
2781   // and class declarations).
2782   if (FormatTok->isOneOf(tok::colon, tok::less)) {
2783     while (!eof()) {
2784       if (FormatTok->is(tok::l_brace)) {
2785         calculateBraceTypes(/*ExpectClassBody=*/true);
2786         if (!tryToParseBracedList())
2787           break;
2788       }
2789       if (FormatTok->Tok.is(tok::semi))
2790         return;
2791       if (Style.isCSharp() && FormatTok->is(Keywords.kw_where)) {
2792         addUnwrappedLine();
2793         nextToken();
2794         parseCSharpGenericTypeConstraint();
2795         break;
2796       }
2797       nextToken();
2798     }
2799   }
2800   if (FormatTok->Tok.is(tok::l_brace)) {
2801     if (ParseAsExpr) {
2802       parseChildBlock();
2803     } else {
2804       if (ShouldBreakBeforeBrace(Style, InitialToken))
2805         addUnwrappedLine();
2806 
2807       unsigned AddLevels = Style.IndentAccessModifiers ? 2u : 1u;
2808       parseBlock(/*MustBeDeclaration=*/true, AddLevels, /*MunchSemi=*/false);
2809     }
2810   }
2811   // There is no addUnwrappedLine() here so that we fall through to parsing a
2812   // structural element afterwards. Thus, in "class A {} n, m;",
2813   // "} n, m;" will end up in one unwrapped line.
2814 }
2815 
parseObjCMethod()2816 void UnwrappedLineParser::parseObjCMethod() {
2817   assert(FormatTok->Tok.isOneOf(tok::l_paren, tok::identifier) &&
2818          "'(' or identifier expected.");
2819   do {
2820     if (FormatTok->Tok.is(tok::semi)) {
2821       nextToken();
2822       addUnwrappedLine();
2823       return;
2824     } else if (FormatTok->Tok.is(tok::l_brace)) {
2825       if (Style.BraceWrapping.AfterFunction)
2826         addUnwrappedLine();
2827       parseBlock(/*MustBeDeclaration=*/false);
2828       addUnwrappedLine();
2829       return;
2830     } else {
2831       nextToken();
2832     }
2833   } while (!eof());
2834 }
2835 
parseObjCProtocolList()2836 void UnwrappedLineParser::parseObjCProtocolList() {
2837   assert(FormatTok->Tok.is(tok::less) && "'<' expected.");
2838   do {
2839     nextToken();
2840     // Early exit in case someone forgot a close angle.
2841     if (FormatTok->isOneOf(tok::semi, tok::l_brace) ||
2842         FormatTok->Tok.isObjCAtKeyword(tok::objc_end))
2843       return;
2844   } while (!eof() && FormatTok->Tok.isNot(tok::greater));
2845   nextToken(); // Skip '>'.
2846 }
2847 
parseObjCUntilAtEnd()2848 void UnwrappedLineParser::parseObjCUntilAtEnd() {
2849   do {
2850     if (FormatTok->Tok.isObjCAtKeyword(tok::objc_end)) {
2851       nextToken();
2852       addUnwrappedLine();
2853       break;
2854     }
2855     if (FormatTok->is(tok::l_brace)) {
2856       parseBlock(/*MustBeDeclaration=*/false);
2857       // In ObjC interfaces, nothing should be following the "}".
2858       addUnwrappedLine();
2859     } else if (FormatTok->is(tok::r_brace)) {
2860       // Ignore stray "}". parseStructuralElement doesn't consume them.
2861       nextToken();
2862       addUnwrappedLine();
2863     } else if (FormatTok->isOneOf(tok::minus, tok::plus)) {
2864       nextToken();
2865       parseObjCMethod();
2866     } else {
2867       parseStructuralElement();
2868     }
2869   } while (!eof());
2870 }
2871 
parseObjCInterfaceOrImplementation()2872 void UnwrappedLineParser::parseObjCInterfaceOrImplementation() {
2873   assert(FormatTok->Tok.getObjCKeywordID() == tok::objc_interface ||
2874          FormatTok->Tok.getObjCKeywordID() == tok::objc_implementation);
2875   nextToken();
2876   nextToken(); // interface name
2877 
2878   // @interface can be followed by a lightweight generic
2879   // specialization list, then either a base class or a category.
2880   if (FormatTok->Tok.is(tok::less)) {
2881     parseObjCLightweightGenerics();
2882   }
2883   if (FormatTok->Tok.is(tok::colon)) {
2884     nextToken();
2885     nextToken(); // base class name
2886     // The base class can also have lightweight generics applied to it.
2887     if (FormatTok->Tok.is(tok::less)) {
2888       parseObjCLightweightGenerics();
2889     }
2890   } else if (FormatTok->Tok.is(tok::l_paren))
2891     // Skip category, if present.
2892     parseParens();
2893 
2894   if (FormatTok->Tok.is(tok::less))
2895     parseObjCProtocolList();
2896 
2897   if (FormatTok->Tok.is(tok::l_brace)) {
2898     if (Style.BraceWrapping.AfterObjCDeclaration)
2899       addUnwrappedLine();
2900     parseBlock(/*MustBeDeclaration=*/true);
2901   }
2902 
2903   // With instance variables, this puts '}' on its own line.  Without instance
2904   // variables, this ends the @interface line.
2905   addUnwrappedLine();
2906 
2907   parseObjCUntilAtEnd();
2908 }
2909 
parseObjCLightweightGenerics()2910 void UnwrappedLineParser::parseObjCLightweightGenerics() {
2911   assert(FormatTok->Tok.is(tok::less));
2912   // Unlike protocol lists, generic parameterizations support
2913   // nested angles:
2914   //
2915   // @interface Foo<ValueType : id <NSCopying, NSSecureCoding>> :
2916   //     NSObject <NSCopying, NSSecureCoding>
2917   //
2918   // so we need to count how many open angles we have left.
2919   unsigned NumOpenAngles = 1;
2920   do {
2921     nextToken();
2922     // Early exit in case someone forgot a close angle.
2923     if (FormatTok->isOneOf(tok::semi, tok::l_brace) ||
2924         FormatTok->Tok.isObjCAtKeyword(tok::objc_end))
2925       break;
2926     if (FormatTok->Tok.is(tok::less))
2927       ++NumOpenAngles;
2928     else if (FormatTok->Tok.is(tok::greater)) {
2929       assert(NumOpenAngles > 0 && "'>' makes NumOpenAngles negative");
2930       --NumOpenAngles;
2931     }
2932   } while (!eof() && NumOpenAngles != 0);
2933   nextToken(); // Skip '>'.
2934 }
2935 
2936 // Returns true for the declaration/definition form of @protocol,
2937 // false for the expression form.
parseObjCProtocol()2938 bool UnwrappedLineParser::parseObjCProtocol() {
2939   assert(FormatTok->Tok.getObjCKeywordID() == tok::objc_protocol);
2940   nextToken();
2941 
2942   if (FormatTok->is(tok::l_paren))
2943     // The expression form of @protocol, e.g. "Protocol* p = @protocol(foo);".
2944     return false;
2945 
2946   // The definition/declaration form,
2947   // @protocol Foo
2948   // - (int)someMethod;
2949   // @end
2950 
2951   nextToken(); // protocol name
2952 
2953   if (FormatTok->Tok.is(tok::less))
2954     parseObjCProtocolList();
2955 
2956   // Check for protocol declaration.
2957   if (FormatTok->Tok.is(tok::semi)) {
2958     nextToken();
2959     addUnwrappedLine();
2960     return true;
2961   }
2962 
2963   addUnwrappedLine();
2964   parseObjCUntilAtEnd();
2965   return true;
2966 }
2967 
parseJavaScriptEs6ImportExport()2968 void UnwrappedLineParser::parseJavaScriptEs6ImportExport() {
2969   bool IsImport = FormatTok->is(Keywords.kw_import);
2970   assert(IsImport || FormatTok->is(tok::kw_export));
2971   nextToken();
2972 
2973   // Consume the "default" in "export default class/function".
2974   if (FormatTok->is(tok::kw_default))
2975     nextToken();
2976 
2977   // Consume "async function", "function" and "default function", so that these
2978   // get parsed as free-standing JS functions, i.e. do not require a trailing
2979   // semicolon.
2980   if (FormatTok->is(Keywords.kw_async))
2981     nextToken();
2982   if (FormatTok->is(Keywords.kw_function)) {
2983     nextToken();
2984     return;
2985   }
2986 
2987   // For imports, `export *`, `export {...}`, consume the rest of the line up
2988   // to the terminating `;`. For everything else, just return and continue
2989   // parsing the structural element, i.e. the declaration or expression for
2990   // `export default`.
2991   if (!IsImport && !FormatTok->isOneOf(tok::l_brace, tok::star) &&
2992       !FormatTok->isStringLiteral())
2993     return;
2994 
2995   while (!eof()) {
2996     if (FormatTok->is(tok::semi))
2997       return;
2998     if (Line->Tokens.empty()) {
2999       // Common issue: Automatic Semicolon Insertion wrapped the line, so the
3000       // import statement should terminate.
3001       return;
3002     }
3003     if (FormatTok->is(tok::l_brace)) {
3004       FormatTok->setBlockKind(BK_Block);
3005       nextToken();
3006       parseBracedList();
3007     } else {
3008       nextToken();
3009     }
3010   }
3011 }
3012 
parseStatementMacro()3013 void UnwrappedLineParser::parseStatementMacro() {
3014   nextToken();
3015   if (FormatTok->is(tok::l_paren))
3016     parseParens();
3017   if (FormatTok->is(tok::semi))
3018     nextToken();
3019   addUnwrappedLine();
3020 }
3021 
printDebugInfo(const UnwrappedLine & Line,StringRef Prefix="")3022 LLVM_ATTRIBUTE_UNUSED static void printDebugInfo(const UnwrappedLine &Line,
3023                                                  StringRef Prefix = "") {
3024   llvm::dbgs() << Prefix << "Line(" << Line.Level
3025                << ", FSC=" << Line.FirstStartColumn << ")"
3026                << (Line.InPPDirective ? " MACRO" : "") << ": ";
3027   for (std::list<UnwrappedLineNode>::const_iterator I = Line.Tokens.begin(),
3028                                                     E = Line.Tokens.end();
3029        I != E; ++I) {
3030     llvm::dbgs() << I->Tok->Tok.getName() << "["
3031                  << "T=" << (unsigned)I->Tok->getType()
3032                  << ", OC=" << I->Tok->OriginalColumn << "] ";
3033   }
3034   for (std::list<UnwrappedLineNode>::const_iterator I = Line.Tokens.begin(),
3035                                                     E = Line.Tokens.end();
3036        I != E; ++I) {
3037     const UnwrappedLineNode &Node = *I;
3038     for (SmallVectorImpl<UnwrappedLine>::const_iterator
3039              I = Node.Children.begin(),
3040              E = Node.Children.end();
3041          I != E; ++I) {
3042       printDebugInfo(*I, "\nChild: ");
3043     }
3044   }
3045   llvm::dbgs() << "\n";
3046 }
3047 
addUnwrappedLine(LineLevel AdjustLevel)3048 void UnwrappedLineParser::addUnwrappedLine(LineLevel AdjustLevel) {
3049   if (Line->Tokens.empty())
3050     return;
3051   LLVM_DEBUG({
3052     if (CurrentLines == &Lines)
3053       printDebugInfo(*Line);
3054   });
3055 
3056   // If this line closes a block when in Whitesmiths mode, remember that
3057   // information so that the level can be decreased after the line is added.
3058   // This has to happen after the addition of the line since the line itself
3059   // needs to be indented.
3060   bool ClosesWhitesmithsBlock =
3061       Line->MatchingOpeningBlockLineIndex != UnwrappedLine::kInvalidIndex &&
3062       Style.BreakBeforeBraces == FormatStyle::BS_Whitesmiths;
3063 
3064   CurrentLines->push_back(std::move(*Line));
3065   Line->Tokens.clear();
3066   Line->MatchingOpeningBlockLineIndex = UnwrappedLine::kInvalidIndex;
3067   Line->FirstStartColumn = 0;
3068 
3069   if (ClosesWhitesmithsBlock && AdjustLevel == LineLevel::Remove)
3070     --Line->Level;
3071   if (CurrentLines == &Lines && !PreprocessorDirectives.empty()) {
3072     CurrentLines->append(
3073         std::make_move_iterator(PreprocessorDirectives.begin()),
3074         std::make_move_iterator(PreprocessorDirectives.end()));
3075     PreprocessorDirectives.clear();
3076   }
3077   // Disconnect the current token from the last token on the previous line.
3078   FormatTok->Previous = nullptr;
3079 }
3080 
eof() const3081 bool UnwrappedLineParser::eof() const { return FormatTok->Tok.is(tok::eof); }
3082 
isOnNewLine(const FormatToken & FormatTok)3083 bool UnwrappedLineParser::isOnNewLine(const FormatToken &FormatTok) {
3084   return (Line->InPPDirective || FormatTok.HasUnescapedNewline) &&
3085          FormatTok.NewlinesBefore > 0;
3086 }
3087 
3088 // Checks if \p FormatTok is a line comment that continues the line comment
3089 // section on \p Line.
3090 static bool
continuesLineCommentSection(const FormatToken & FormatTok,const UnwrappedLine & Line,const llvm::Regex & CommentPragmasRegex)3091 continuesLineCommentSection(const FormatToken &FormatTok,
3092                             const UnwrappedLine &Line,
3093                             const llvm::Regex &CommentPragmasRegex) {
3094   if (Line.Tokens.empty())
3095     return false;
3096 
3097   StringRef IndentContent = FormatTok.TokenText;
3098   if (FormatTok.TokenText.startswith("//") ||
3099       FormatTok.TokenText.startswith("/*"))
3100     IndentContent = FormatTok.TokenText.substr(2);
3101   if (CommentPragmasRegex.match(IndentContent))
3102     return false;
3103 
3104   // If Line starts with a line comment, then FormatTok continues the comment
3105   // section if its original column is greater or equal to the original start
3106   // column of the line.
3107   //
3108   // Define the min column token of a line as follows: if a line ends in '{' or
3109   // contains a '{' followed by a line comment, then the min column token is
3110   // that '{'. Otherwise, the min column token of the line is the first token of
3111   // the line.
3112   //
3113   // If Line starts with a token other than a line comment, then FormatTok
3114   // continues the comment section if its original column is greater than the
3115   // original start column of the min column token of the line.
3116   //
3117   // For example, the second line comment continues the first in these cases:
3118   //
3119   // // first line
3120   // // second line
3121   //
3122   // and:
3123   //
3124   // // first line
3125   //  // second line
3126   //
3127   // and:
3128   //
3129   // int i; // first line
3130   //  // second line
3131   //
3132   // and:
3133   //
3134   // do { // first line
3135   //      // second line
3136   //   int i;
3137   // } while (true);
3138   //
3139   // and:
3140   //
3141   // enum {
3142   //   a, // first line
3143   //    // second line
3144   //   b
3145   // };
3146   //
3147   // The second line comment doesn't continue the first in these cases:
3148   //
3149   //   // first line
3150   //  // second line
3151   //
3152   // and:
3153   //
3154   // int i; // first line
3155   // // second line
3156   //
3157   // and:
3158   //
3159   // do { // first line
3160   //   // second line
3161   //   int i;
3162   // } while (true);
3163   //
3164   // and:
3165   //
3166   // enum {
3167   //   a, // first line
3168   //   // second line
3169   // };
3170   const FormatToken *MinColumnToken = Line.Tokens.front().Tok;
3171 
3172   // Scan for '{//'. If found, use the column of '{' as a min column for line
3173   // comment section continuation.
3174   const FormatToken *PreviousToken = nullptr;
3175   for (const UnwrappedLineNode &Node : Line.Tokens) {
3176     if (PreviousToken && PreviousToken->is(tok::l_brace) &&
3177         isLineComment(*Node.Tok)) {
3178       MinColumnToken = PreviousToken;
3179       break;
3180     }
3181     PreviousToken = Node.Tok;
3182 
3183     // Grab the last newline preceding a token in this unwrapped line.
3184     if (Node.Tok->NewlinesBefore > 0) {
3185       MinColumnToken = Node.Tok;
3186     }
3187   }
3188   if (PreviousToken && PreviousToken->is(tok::l_brace)) {
3189     MinColumnToken = PreviousToken;
3190   }
3191 
3192   return continuesLineComment(FormatTok, /*Previous=*/Line.Tokens.back().Tok,
3193                               MinColumnToken);
3194 }
3195 
flushComments(bool NewlineBeforeNext)3196 void UnwrappedLineParser::flushComments(bool NewlineBeforeNext) {
3197   bool JustComments = Line->Tokens.empty();
3198   for (SmallVectorImpl<FormatToken *>::const_iterator
3199            I = CommentsBeforeNextToken.begin(),
3200            E = CommentsBeforeNextToken.end();
3201        I != E; ++I) {
3202     // Line comments that belong to the same line comment section are put on the
3203     // same line since later we might want to reflow content between them.
3204     // Additional fine-grained breaking of line comment sections is controlled
3205     // by the class BreakableLineCommentSection in case it is desirable to keep
3206     // several line comment sections in the same unwrapped line.
3207     //
3208     // FIXME: Consider putting separate line comment sections as children to the
3209     // unwrapped line instead.
3210     (*I)->ContinuesLineCommentSection =
3211         continuesLineCommentSection(**I, *Line, CommentPragmasRegex);
3212     if (isOnNewLine(**I) && JustComments && !(*I)->ContinuesLineCommentSection)
3213       addUnwrappedLine();
3214     pushToken(*I);
3215   }
3216   if (NewlineBeforeNext && JustComments)
3217     addUnwrappedLine();
3218   CommentsBeforeNextToken.clear();
3219 }
3220 
nextToken(int LevelDifference)3221 void UnwrappedLineParser::nextToken(int LevelDifference) {
3222   if (eof())
3223     return;
3224   flushComments(isOnNewLine(*FormatTok));
3225   pushToken(FormatTok);
3226   FormatToken *Previous = FormatTok;
3227   if (Style.Language != FormatStyle::LK_JavaScript)
3228     readToken(LevelDifference);
3229   else
3230     readTokenWithJavaScriptASI();
3231   FormatTok->Previous = Previous;
3232 }
3233 
distributeComments(const SmallVectorImpl<FormatToken * > & Comments,const FormatToken * NextTok)3234 void UnwrappedLineParser::distributeComments(
3235     const SmallVectorImpl<FormatToken *> &Comments,
3236     const FormatToken *NextTok) {
3237   // Whether or not a line comment token continues a line is controlled by
3238   // the method continuesLineCommentSection, with the following caveat:
3239   //
3240   // Define a trail of Comments to be a nonempty proper postfix of Comments such
3241   // that each comment line from the trail is aligned with the next token, if
3242   // the next token exists. If a trail exists, the beginning of the maximal
3243   // trail is marked as a start of a new comment section.
3244   //
3245   // For example in this code:
3246   //
3247   // int a; // line about a
3248   //   // line 1 about b
3249   //   // line 2 about b
3250   //   int b;
3251   //
3252   // the two lines about b form a maximal trail, so there are two sections, the
3253   // first one consisting of the single comment "// line about a" and the
3254   // second one consisting of the next two comments.
3255   if (Comments.empty())
3256     return;
3257   bool ShouldPushCommentsInCurrentLine = true;
3258   bool HasTrailAlignedWithNextToken = false;
3259   unsigned StartOfTrailAlignedWithNextToken = 0;
3260   if (NextTok) {
3261     // We are skipping the first element intentionally.
3262     for (unsigned i = Comments.size() - 1; i > 0; --i) {
3263       if (Comments[i]->OriginalColumn == NextTok->OriginalColumn) {
3264         HasTrailAlignedWithNextToken = true;
3265         StartOfTrailAlignedWithNextToken = i;
3266       }
3267     }
3268   }
3269   for (unsigned i = 0, e = Comments.size(); i < e; ++i) {
3270     FormatToken *FormatTok = Comments[i];
3271     if (HasTrailAlignedWithNextToken && i == StartOfTrailAlignedWithNextToken) {
3272       FormatTok->ContinuesLineCommentSection = false;
3273     } else {
3274       FormatTok->ContinuesLineCommentSection =
3275           continuesLineCommentSection(*FormatTok, *Line, CommentPragmasRegex);
3276     }
3277     if (!FormatTok->ContinuesLineCommentSection &&
3278         (isOnNewLine(*FormatTok) || FormatTok->IsFirst)) {
3279       ShouldPushCommentsInCurrentLine = false;
3280     }
3281     if (ShouldPushCommentsInCurrentLine) {
3282       pushToken(FormatTok);
3283     } else {
3284       CommentsBeforeNextToken.push_back(FormatTok);
3285     }
3286   }
3287 }
3288 
readToken(int LevelDifference)3289 void UnwrappedLineParser::readToken(int LevelDifference) {
3290   SmallVector<FormatToken *, 1> Comments;
3291   do {
3292     FormatTok = Tokens->getNextToken();
3293     assert(FormatTok);
3294     while (!Line->InPPDirective && FormatTok->Tok.is(tok::hash) &&
3295            (FormatTok->HasUnescapedNewline || FormatTok->IsFirst)) {
3296       distributeComments(Comments, FormatTok);
3297       Comments.clear();
3298       // If there is an unfinished unwrapped line, we flush the preprocessor
3299       // directives only after that unwrapped line was finished later.
3300       bool SwitchToPreprocessorLines = !Line->Tokens.empty();
3301       ScopedLineState BlockState(*this, SwitchToPreprocessorLines);
3302       assert((LevelDifference >= 0 ||
3303               static_cast<unsigned>(-LevelDifference) <= Line->Level) &&
3304              "LevelDifference makes Line->Level negative");
3305       Line->Level += LevelDifference;
3306       // Comments stored before the preprocessor directive need to be output
3307       // before the preprocessor directive, at the same level as the
3308       // preprocessor directive, as we consider them to apply to the directive.
3309       if (Style.IndentPPDirectives == FormatStyle::PPDIS_BeforeHash &&
3310           PPBranchLevel > 0)
3311         Line->Level += PPBranchLevel;
3312       flushComments(isOnNewLine(*FormatTok));
3313       parsePPDirective();
3314     }
3315     while (FormatTok->getType() == TT_ConflictStart ||
3316            FormatTok->getType() == TT_ConflictEnd ||
3317            FormatTok->getType() == TT_ConflictAlternative) {
3318       if (FormatTok->getType() == TT_ConflictStart) {
3319         conditionalCompilationStart(/*Unreachable=*/false);
3320       } else if (FormatTok->getType() == TT_ConflictAlternative) {
3321         conditionalCompilationAlternative();
3322       } else if (FormatTok->getType() == TT_ConflictEnd) {
3323         conditionalCompilationEnd();
3324       }
3325       FormatTok = Tokens->getNextToken();
3326       FormatTok->MustBreakBefore = true;
3327     }
3328 
3329     if (!PPStack.empty() && (PPStack.back().Kind == PP_Unreachable) &&
3330         !Line->InPPDirective) {
3331       continue;
3332     }
3333 
3334     if (!FormatTok->Tok.is(tok::comment)) {
3335       distributeComments(Comments, FormatTok);
3336       Comments.clear();
3337       return;
3338     }
3339 
3340     Comments.push_back(FormatTok);
3341   } while (!eof());
3342 
3343   distributeComments(Comments, nullptr);
3344   Comments.clear();
3345 }
3346 
pushToken(FormatToken * Tok)3347 void UnwrappedLineParser::pushToken(FormatToken *Tok) {
3348   Line->Tokens.push_back(UnwrappedLineNode(Tok));
3349   if (MustBreakBeforeNextToken) {
3350     Line->Tokens.back().Tok->MustBreakBefore = true;
3351     MustBreakBeforeNextToken = false;
3352   }
3353 }
3354 
3355 } // end namespace format
3356 } // end namespace clang
3357